[PATCH 4.4 58/58] net: mvpp2: fix dma unmapping of TX buffers for fragments

From: Greg Kroah-Hartman
Date: Fri Jan 06 2017 - 17:15:36 EST


4.4-stable review patch. If anyone has any objections, please let me know.

------------------

From: Thomas Petazzoni <thomas.petazzoni@xxxxxxxxxxxxxxxxxx>

commit 8354491c9d5b06709384cea91d13019bf5e61449 upstream.

Since commit 71ce391dfb784 ("net: mvpp2: enable proper per-CPU TX
buffers unmapping"), we are not correctly DMA unmapping TX buffers for
fragments.

Indeed, the mvpp2_txq_inc_put() function only stores in the
txq_cpu->tx_buffs[] array the physical address of the buffer to be
DMA-unmapped when skb != NULL. In addition, when DMA-unmapping, we use
skb_headlen(skb) to get the size to be unmapped. Both of this works fine
for TX descriptors that are associated directly to a SKB, but not the
ones that are used for fragments, with a NULL pointer as skb:

- We have a NULL physical address when calling DMA unmap
- skb_headlen(skb) crashes because skb is NULL

This causes random crashes when fragments are used.

To solve this problem, we need to:

- Store the physical address of the buffer to be unmapped
unconditionally, regardless of whether it is tied to a SKB or not.

- Store the length of the buffer to be unmapped, which requires a new
field.

Instead of adding a third array to store the length of the buffer to be
unmapped, and as suggested by David Miller, this commit refactors the
tx_buffs[] and tx_skb[] arrays of 'struct mvpp2_txq_pcpu' into a
separate structure 'mvpp2_txq_pcpu_buf', to which a 'size' field is
added. Therefore, instead of having three arrays to allocate/free, we
have a single one, which also improve data locality, reducing the
impact on the CPU cache.

Fixes: 71ce391dfb784 ("net: mvpp2: enable proper per-CPU TX buffers unmapping")
Reported-by: Raphael G <raphael.glon@xxxxxxxxxxxx>
Cc: Raphael G <raphael.glon@xxxxxxxxxxxx>
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@xxxxxxxxxxxxxxxxxx>
Signed-off-by: David S. Miller <davem@xxxxxxxxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>

---
drivers/net/ethernet/marvell/mvpp2.c | 59 +++++++++++++++++------------------
1 file changed, 30 insertions(+), 29 deletions(-)

--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -772,6 +772,17 @@ struct mvpp2_rx_desc {
u32 reserved8;
};

+struct mvpp2_txq_pcpu_buf {
+ /* Transmitted SKB */
+ struct sk_buff *skb;
+
+ /* Physical address of transmitted buffer */
+ dma_addr_t phys;
+
+ /* Size transmitted */
+ size_t size;
+};
+
/* Per-CPU Tx queue control */
struct mvpp2_txq_pcpu {
int cpu;
@@ -787,11 +798,8 @@ struct mvpp2_txq_pcpu {
/* Number of Tx DMA descriptors reserved for each CPU */
int reserved_num;

- /* Array of transmitted skb */
- struct sk_buff **tx_skb;
-
- /* Array of transmitted buffers' physical addresses */
- dma_addr_t *tx_buffs;
+ /* Infos about transmitted buffers */
+ struct mvpp2_txq_pcpu_buf *buffs;

/* Index of last TX DMA descriptor that was inserted */
int txq_put_index;
@@ -981,10 +989,11 @@ static void mvpp2_txq_inc_put(struct mvp
struct sk_buff *skb,
struct mvpp2_tx_desc *tx_desc)
{
- txq_pcpu->tx_skb[txq_pcpu->txq_put_index] = skb;
- if (skb)
- txq_pcpu->tx_buffs[txq_pcpu->txq_put_index] =
- tx_desc->buf_phys_addr;
+ struct mvpp2_txq_pcpu_buf *tx_buf =
+ txq_pcpu->buffs + txq_pcpu->txq_put_index;
+ tx_buf->skb = skb;
+ tx_buf->size = tx_desc->data_size;
+ tx_buf->phys = tx_desc->buf_phys_addr;
txq_pcpu->txq_put_index++;
if (txq_pcpu->txq_put_index == txq_pcpu->size)
txq_pcpu->txq_put_index = 0;
@@ -4403,17 +4412,16 @@ static void mvpp2_txq_bufs_free(struct m
int i;

for (i = 0; i < num; i++) {
- dma_addr_t buf_phys_addr =
- txq_pcpu->tx_buffs[txq_pcpu->txq_get_index];
- struct sk_buff *skb = txq_pcpu->tx_skb[txq_pcpu->txq_get_index];
+ struct mvpp2_txq_pcpu_buf *tx_buf =
+ txq_pcpu->buffs + txq_pcpu->txq_get_index;

mvpp2_txq_inc_get(txq_pcpu);

- dma_unmap_single(port->dev->dev.parent, buf_phys_addr,
- skb_headlen(skb), DMA_TO_DEVICE);
- if (!skb)
+ dma_unmap_single(port->dev->dev.parent, tx_buf->phys,
+ tx_buf->size, DMA_TO_DEVICE);
+ if (!tx_buf->skb)
continue;
- dev_kfree_skb_any(skb);
+ dev_kfree_skb_any(tx_buf->skb);
}
}

@@ -4664,15 +4672,10 @@ static int mvpp2_txq_init(struct mvpp2_p
for_each_present_cpu(cpu) {
txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
txq_pcpu->size = txq->size;
- txq_pcpu->tx_skb = kmalloc(txq_pcpu->size *
- sizeof(*txq_pcpu->tx_skb),
- GFP_KERNEL);
- if (!txq_pcpu->tx_skb)
- goto error;
-
- txq_pcpu->tx_buffs = kmalloc(txq_pcpu->size *
- sizeof(dma_addr_t), GFP_KERNEL);
- if (!txq_pcpu->tx_buffs)
+ txq_pcpu->buffs = kmalloc(txq_pcpu->size *
+ sizeof(struct mvpp2_txq_pcpu_buf),
+ GFP_KERNEL);
+ if (!txq_pcpu->buffs)
goto error;

txq_pcpu->count = 0;
@@ -4686,8 +4689,7 @@ static int mvpp2_txq_init(struct mvpp2_p
error:
for_each_present_cpu(cpu) {
txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
- kfree(txq_pcpu->tx_skb);
- kfree(txq_pcpu->tx_buffs);
+ kfree(txq_pcpu->buffs);
}

dma_free_coherent(port->dev->dev.parent,
@@ -4706,8 +4708,7 @@ static void mvpp2_txq_deinit(struct mvpp

for_each_present_cpu(cpu) {
txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
- kfree(txq_pcpu->tx_skb);
- kfree(txq_pcpu->tx_buffs);
+ kfree(txq_pcpu->buffs);
}

if (txq->descs)