Re: [PATCH net-next v8 3/3] net: axienet: Introduce dmaengine support

From: Paolo Abeni
Date: Thu Oct 19 2023 - 05:51:53 EST


On Wed, 2023-10-18 at 00:47 +0530, Radhey Shyam Pandey wrote:
[...]
> @@ -727,6 +746,122 @@ static inline int axienet_check_tx_bd_space(struct axienet_local *lp,
> return 0;
> }
>
> +/**
> + * axienet_dma_tx_cb - DMA engine callback for TX channel.
> + * @data: Pointer to the axienet_local structure.
> + * @result: error reporting through dmaengine_result.
> + * This function is called by dmaengine driver for TX channel to notify
> + * that the transmit is done.
> + */
> +static void axienet_dma_tx_cb(void *data, const struct dmaengine_result *result)
> +{
> + struct axienet_local *lp = data;
> + struct skbuf_dma_descriptor *skbuf_dma;

Minor nit: please use the reverse x-mas tree order

> +
> + skbuf_dma = axienet_get_tx_desc(lp, lp->tx_ring_tail++);
> + u64_stats_update_begin(&lp->tx_stat_sync);
> + u64_stats_add(&lp->tx_bytes, skbuf_dma->skb->len);
> + u64_stats_add(&lp->tx_packets, 1);
> + u64_stats_update_end(&lp->tx_stat_sync);
> + dma_unmap_sg(lp->dev, skbuf_dma->sgl, skbuf_dma->sg_len, DMA_TO_DEVICE);
> + dev_consume_skb_any(skbuf_dma->skb);
> + if (CIRC_SPACE(lp->tx_ring_head, lp->tx_ring_tail, TX_BD_NUM_MAX) > MAX_SKB_FRAGS + 1)
> + netif_wake_queue(lp->ndev);
> +}
> +
> +/**
> + * axienet_start_xmit_dmaengine - Starts the transmission.
> + * @skb: sk_buff pointer that contains data to be Txed.
> + * @ndev: Pointer to net_device structure.
> + *
> + * Return: NETDEV_TX_OK on success or any non space errors.
> + * NETDEV_TX_BUSY when free element in TX skb ring buffer
> + * is not available.
> + *
> + * This function is invoked to initiate transmission. The
> + * function sets the skbs, register dma callback API and submit
> + * the dma transaction.
> + * Additionally if checksum offloading is supported,
> + * it populates AXI Stream Control fields with appropriate values.
> + */
> +static netdev_tx_t
> +axienet_start_xmit_dmaengine(struct sk_buff *skb, struct net_device *ndev)
> +{
> + struct dma_async_tx_descriptor *dma_tx_desc = NULL;
> + struct axienet_local *lp = netdev_priv(ndev);
> + u32 app_metadata[DMA_NUM_APP_WORDS] = {0};
> + struct skbuf_dma_descriptor *skbuf_dma;
> + struct dma_device *dma_dev;
> + u32 csum_start_off;
> + u32 csum_index_off;
> + int sg_len;
> + int ret;
> +
> + dma_dev = lp->tx_chan->device;
> + sg_len = skb_shinfo(skb)->nr_frags + 1;
> + if (CIRC_SPACE(lp->tx_ring_head, lp->tx_ring_tail, TX_BD_NUM_MAX) <= sg_len) {
> + netif_stop_queue(ndev);
> + if (net_ratelimit())
> + netdev_warn(ndev, "TX ring unexpectedly full\n");
> + return NETDEV_TX_BUSY;
> + }
> +
> + skbuf_dma = axienet_get_tx_desc(lp, lp->tx_ring_head);
> + if (!skbuf_dma) {
> + dev_kfree_skb_any(skb);
> + return NETDEV_TX_OK;

You can avoid some duplicate code with:
goto drop_skb;

and adding at the bottom of this function:

drop_skb:
dev_kfree_skb_any(skb);
return NETDEV_TX_OK;

> + }
> +
> + lp->tx_ring_head++;
> + sg_init_table(skbuf_dma->sgl, sg_len);
> + ret = skb_to_sgvec(skb, skbuf_dma->sgl, 0, skb->len);
> + if (ret < 0) {
> + dev_kfree_skb_any(skb);
> + return NETDEV_TX_OK;

Same here and below.

> + }
> +
> + ret = dma_map_sg(lp->dev, skbuf_dma->sgl, sg_len, DMA_TO_DEVICE);
> + if (!ret) {
> + dev_kfree_skb_any(skb);
> + return NETDEV_TX_OK;
> + }
> +
> + /* Fill up app fields for checksum */
> + if (skb->ip_summed == CHECKSUM_PARTIAL) {
> + if (lp->features & XAE_FEATURE_FULL_TX_CSUM) {
> + /* Tx Full Checksum Offload Enabled */
> + app_metadata[0] |= 2;
> + } else if (lp->features & XAE_FEATURE_PARTIAL_TX_CSUM) {
> + csum_start_off = skb_transport_offset(skb);
> + csum_index_off = csum_start_off + skb->csum_offset;
> + /* Tx Partial Checksum Offload Enabled */
> + app_metadata[0] |= 1;
> + app_metadata[1] = (csum_start_off << 16) | csum_index_off;
> + }
> + } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
> + app_metadata[0] |= 2; /* Tx Full Checksum Offload Enabled */
> + }
> +
> + dma_tx_desc = dma_dev->device_prep_slave_sg(lp->tx_chan, skbuf_dma->sgl,
> + sg_len, DMA_MEM_TO_DEV,
> + DMA_PREP_INTERRUPT, (void *)app_metadata);
> + if (!dma_tx_desc)
> + goto xmit_error_unmap_sg;

Are you leaking an skb here?

You forgot to add the netif_txq_maybe_stop() call, as suggested by
Jakub in the previous revision.

> +
> + skbuf_dma->skb = skb;
> + skbuf_dma->sg_len = sg_len;
> + dma_tx_desc->callback_param = lp;
> + dma_tx_desc->callback_result = axienet_dma_tx_cb;
> + dmaengine_submit(dma_tx_desc);
> + dma_async_issue_pending(lp->tx_chan);
> +
> + return NETDEV_TX_OK;
> +
> +xmit_error_unmap_sg:
> + dma_unmap_sg(lp->dev, skbuf_dma->sgl, sg_len, DMA_TO_DEVICE);

If you need to drop the skb (as I suspect), you can reuse the drop_skb
label here:

drop_skb:
dev_kfree_skb_any(skb);
> + return NETDEV_TX_OK;
> +}
> +
> /**
> * axienet_tx_poll - Invoked once a transmit is completed by the
> * Axi DMA Tx channel.
> @@ -893,6 +1028,42 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
> return NETDEV_TX_OK;
> }
>
> +/**
> + * axienet_dma_rx_cb - DMA engine callback for RX channel.
> + * @data: Pointer to the skbuf_dma_descriptor structure.
> + * @result: error reporting through dmaengine_result.
> + * This function is called by dmaengine driver for RX channel to notify
> + * that the packet is received.
> + */
> +static void axienet_dma_rx_cb(void *data, const struct dmaengine_result *result)
> +{
> + struct axienet_local *lp = data;
> + struct skbuf_dma_descriptor *skbuf_dma;
> + size_t meta_len, meta_max_len, rx_len;
> + struct sk_buff *skb;
> + u32 *app_metadata;

Minor nit: please respect the reverse x-mas tree order

> +
> + skbuf_dma = axienet_get_rx_desc(lp, lp->rx_ring_tail++);
> + skb = skbuf_dma->skb;
> + app_metadata = dmaengine_desc_get_metadata_ptr(skbuf_dma->desc, &meta_len,
> + &meta_max_len);
> + dma_unmap_single(lp->dev, skbuf_dma->dma_address, lp->max_frm_size,
> + DMA_FROM_DEVICE);
> + /* TODO: Derive app word index programmatically */
> + rx_len = (app_metadata[LEN_APP] & 0xFFFF);
> + skb_put(skb, rx_len);
> + skb->protocol = eth_type_trans(skb, lp->ndev);
> + skb->ip_summed = CHECKSUM_NONE;
> +
> + __netif_rx(skb);

It's a pity you can't leverage NAPI here.

I think that could be doable as a follow-up, but I'm unsure if that
would fit the DMA engine model: in this callback you could cache the
ready dma index (a single range should suffice) and schedule the napi
instance. The actual dma processing will be done in napi poll.

Another possible follow-up could be introducing a "bulk" RX callback in
the DMA engine, to mitigate the indirect call overhead on a burst of RX
DMA completion - assuming the DMA engine actually generates such burst.

> + u64_stats_update_begin(&lp->rx_stat_sync);
> + u64_stats_add(&lp->rx_packets, 1);
> + u64_stats_add(&lp->rx_bytes, rx_len);
> + u64_stats_update_end(&lp->rx_stat_sync);
> + axienet_rx_submit_desc(lp->ndev);
> + dma_async_issue_pending(lp->rx_chan);
> +}
> +
> /**
> * axienet_rx_poll - Triggered by RX ISR to complete the BD processing.
> * @napi: Pointer to NAPI structure.
> @@ -1126,6 +1297,150 @@ static irqreturn_t axienet_eth_irq(int irq, void *_ndev)
>
> static void axienet_dma_err_handler(struct work_struct *work);
>
> +/**
> + * axienet_rx_submit_desc - Submit the rx descriptors to dmaengine.
> + * allocate skbuff, map the scatterlist and obtain a descriptor
> + * and then add the callback information and submit descriptor.
> + *
> + * @ndev: net_device pointer
> + *
> + *Return: 0, on success.
> + * non-zero error value on failure
> + */
> +static int axienet_rx_submit_desc(struct net_device *ndev)
> +{
> + struct dma_async_tx_descriptor *dma_rx_desc = NULL;
> + struct axienet_local *lp = netdev_priv(ndev);
> + struct skbuf_dma_descriptor *skbuf_dma;
> + struct sk_buff *skb;
> + dma_addr_t addr;
> + int ret;
> +
> + skbuf_dma = axienet_get_rx_desc(lp, lp->rx_ring_head);
> + if (!skbuf_dma)
> + return -ENOMEM;

Minor nit: here a newline would make the core more readable

> + lp->rx_ring_head++;
> + skb = netdev_alloc_skb(ndev, lp->max_frm_size);
> + if (!skb)
> + return -ENOMEM;

Another possible follow-up: usually the skb header is allocated just
before sending it to the network stack (e.g. just before the
__netif_rx() call) to be cache friendly. Here you could allocate just
the data part and later use e.g. build_skb_around()

> +
> + sg_init_table(skbuf_dma->sgl, 1);
> + addr = dma_map_single(lp->dev, skb->data, lp->max_frm_size, DMA_FROM_DEVICE);
> + if (unlikely(dma_mapping_error(lp->dev, addr))) {
> + if (net_ratelimit())
> + netdev_err(ndev, "DMA mapping error\n");
> + ret = -ENOMEM;
> + goto rx_submit_err_free_skb;
> + }
> + sg_dma_address(skbuf_dma->sgl) = addr;
> + sg_dma_len(skbuf_dma->sgl) = lp->max_frm_size;
> + dma_rx_desc = dmaengine_prep_slave_sg(lp->rx_chan, skbuf_dma->sgl,
> + 1, DMA_DEV_TO_MEM,
> + DMA_PREP_INTERRUPT);
> + if (!dma_rx_desc) {
> + ret = -EINVAL;
> + goto rx_submit_err_unmap_skb;
> + }
> +
> + skbuf_dma->skb = skb;
> + skbuf_dma->dma_address = sg_dma_address(skbuf_dma->sgl);
> + skbuf_dma->desc = dma_rx_desc;
> + dma_rx_desc->callback_param = lp;
> + dma_rx_desc->callback_result = axienet_dma_rx_cb;
> + dmaengine_submit(dma_rx_desc);
> +
> + return 0;
> +
> +rx_submit_err_unmap_skb:
> + dma_unmap_single(lp->dev, addr, lp->max_frm_size, DMA_FROM_DEVICE);
> +rx_submit_err_free_skb:
> + dev_kfree_skb(skb);
> + return ret;

It looks like the error code is ignored by the caller. Possibly you can
change this to a 'void' function.


Cheers,

Paolo