Re: [PATCH net-next 1/3] net: macb: queue tie-off or disable during WOL suspend

From: claudiu beznea
Date: Sat Feb 03 2024 - 10:38:31 EST


Hi, Vineeth,

On 30.01.2024 12:48, Vineeth Karumanchi wrote:
> When GEM is used as a wake device, it is not mandatory for the RX DMA
> to be active. The RX engine in IP only needs to receive and identify
> a wake packet through an interrupt. The wake packet is of no further
> significance; hence, it is not required to be copied into memory.
> By disabling RX DMA during suspend, we can avoid unnecessary DMA
> processing of any incoming traffic.
>
> During suspend, perform either of the below operations:
>
> tie-off/dummy descriptor: Disable unused queues by connecting
> them to a looped descriptor chain without free slots.
>
> queue disable: The newer IP version allows disabling individual queues.

I would add a dash line for each of these 2 items for better understanding.
E.g.:

- tie-off/dummy descriptior: ...
- queue disable: ...

>
> Co-developed-by: Harini Katakam <harini.katakam@xxxxxxx>
> Signed-off-by: Harini Katakam <harini.katakam@xxxxxxx>
> Signed-off-by: Vineeth Karumanchi <vineeth.karumanchi@xxxxxxx>
> ---
> drivers/net/ethernet/cadence/macb.h | 7 +++
> drivers/net/ethernet/cadence/macb_main.c | 58 +++++++++++++++++++++++-
> 2 files changed, 64 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
> index aa5700ac9c00..f68ff163aa18 100644
> --- a/drivers/net/ethernet/cadence/macb.h
> +++ b/drivers/net/ethernet/cadence/macb.h
> @@ -645,6 +645,9 @@
> #define GEM_T2OFST_OFFSET 0 /* offset value */
> #define GEM_T2OFST_SIZE 7
>
> +/* Bitfields in queue pointer registers */
> +#define GEM_RBQP_DISABLE BIT(0)

You have spaces after macro name. However the approach in this driver is to
define bit offset and lenght and use {MACB,GEM}_BIT() macros (see the above
bitfield definitions).

> +
> /* Offset for screener type 2 compare values (T2CMPOFST).
> * Note the offset is applied after the specified point,
> * e.g. GEM_T2COMPOFST_ETYPE denotes the EtherType field, so an offset
> @@ -737,6 +740,7 @@
> #define MACB_CAPS_HIGH_SPEED 0x02000000
> #define MACB_CAPS_CLK_HW_CHG 0x04000000
> #define MACB_CAPS_MACB_IS_EMAC 0x08000000
> +#define MACB_CAPS_QUEUE_DISABLE 0x00002000

Can you keep this sorted by value with the rest of the caps?

> #define MACB_CAPS_FIFO_MODE 0x10000000
> #define MACB_CAPS_GIGABIT_MODE_AVAILABLE 0x20000000
> #define MACB_CAPS_SG_DISABLED 0x40000000
> @@ -1254,6 +1258,7 @@ struct macb {
> u32 (*macb_reg_readl)(struct macb *bp, int offset);
> void (*macb_reg_writel)(struct macb *bp, int offset, u32 value);
>
> + struct macb_dma_desc *rx_ring_tieoff;

Keep this ^

> size_t rx_buffer_size;
>
> unsigned int rx_ring_size;
> @@ -1276,6 +1281,8 @@ struct macb {
> struct gem_stats gem;
> } hw_stats;
>
> + dma_addr_t rx_ring_tieoff_dma;

And this ^ toghether.

> +
> struct macb_or_gem_ops macbgem_ops;
>
> struct mii_bus *mii_bus;
> diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
> index 898debfd4db3..47cbea58e6c3 100644
> --- a/drivers/net/ethernet/cadence/macb_main.c
> +++ b/drivers/net/ethernet/cadence/macb_main.c
> @@ -2479,6 +2479,12 @@ static void macb_free_consistent(struct macb *bp)
>
> bp->macbgem_ops.mog_free_rx_buffers(bp);
>
> + if (bp->rx_ring_tieoff) {
> + dma_free_coherent(&bp->pdev->dev, macb_dma_desc_get_size(bp),
> + bp->rx_ring_tieoff, bp->rx_ring_tieoff_dma);
> + bp->rx_ring_tieoff = NULL;
> + }
> +

Keep the reverse order of operation as oposed to macb_alloc_consistent,
thus move this before bp->macbgem_ops.mog_free_rx_buffers();

> for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
> kfree(queue->tx_skb);
> queue->tx_skb = NULL;
> @@ -2568,6 +2574,16 @@ static int macb_alloc_consistent(struct macb *bp)
> if (bp->macbgem_ops.mog_alloc_rx_buffers(bp))
> goto out_err;
>
> + /* Required for tie off descriptor for PM cases */
> + if (!(bp->caps & MACB_CAPS_QUEUE_DISABLE)) {
> + bp->rx_ring_tieoff = dma_alloc_coherent(&bp->pdev->dev,
> + macb_dma_desc_get_size(bp),
> + &bp->rx_ring_tieoff_dma,
> + GFP_KERNEL);
> + if (!bp->rx_ring_tieoff)
> + goto out_err;

You also need to free the previously allocated rx buffers.

> + }
> +
> return 0;
>
> out_err:
> @@ -2575,6 +2591,16 @@ static int macb_alloc_consistent(struct macb *bp)
> return -ENOMEM;
> }
>
> +static void macb_init_tieoff(struct macb *bp)
> +{
> + struct macb_dma_desc *d = bp->rx_ring_tieoff;

s/d/desc to cope with the rest of descriptor usage in this file.

Add this here:

if (bp->caps & MACB_CAPS_QUEUE_DISABLE)
return;

to avoid checking it in different places where this function is called.

> + /* Setup a wrapping descriptor with no free slots
> + * (WRAP and USED) to tie off/disable unused RX queues.
> + */
> + macb_set_addr(bp, d, MACB_BIT(RX_WRAP) | MACB_BIT(RX_USED));
> + d->ctrl = 0;
> +}
> +
> static void gem_init_rings(struct macb *bp)
> {
> struct macb_queue *queue;
> @@ -2598,6 +2624,8 @@ static void gem_init_rings(struct macb *bp)
> gem_rx_refill(queue);
> }
>
> + if (!(bp->caps & MACB_CAPS_QUEUE_DISABLE))
> + macb_init_tieoff(bp);
> }
>
> static void macb_init_rings(struct macb *bp)
> @@ -2615,6 +2643,8 @@ static void macb_init_rings(struct macb *bp)
> bp->queues[0].tx_head = 0;
> bp->queues[0].tx_tail = 0;
> desc->ctrl |= MACB_BIT(TX_WRAP);
> +
> + macb_init_tieoff(bp);
> }
>
> static void macb_reset_hw(struct macb *bp)
> @@ -4917,7 +4947,8 @@ static const struct macb_config sama7g5_emac_config = {
>
> static const struct macb_config versal_config = {
> .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_JUMBO |
> - MACB_CAPS_GEM_HAS_PTP | MACB_CAPS_BD_RD_PREFETCH | MACB_CAPS_NEED_TSUCLK,
> + MACB_CAPS_GEM_HAS_PTP | MACB_CAPS_BD_RD_PREFETCH |
> + MACB_CAPS_QUEUE_DISABLE | MACB_CAPS_NEED_TSUCLK,

This should go in a different patch.

> .dma_burst_length = 16,
> .clk_init = macb_clk_init,
> .init = init_reset_optional,
> @@ -5214,6 +5245,7 @@ static int __maybe_unused macb_suspend(struct device *dev)
> struct macb_queue *queue;
> unsigned long flags;
> unsigned int q;
> + u32 ctrlmask;

val/tmp should be enough for this as you are, anyway, re-using it in the
next patch for different purpose.

> int err;
>
> if (!device_may_wakeup(&bp->dev->dev))
> @@ -5224,6 +5256,30 @@ static int __maybe_unused macb_suspend(struct device *dev)
>
> if (bp->wol & MACB_WOL_ENABLED) {
> spin_lock_irqsave(&bp->lock, flags);
> +
> + /* Disable Tx and Rx engines before disabling the queues,
> + * this is mandatory as per the IP spec sheet
> + */
> + ctrlmask = macb_readl(bp, NCR);
> + ctrlmask &= ~(MACB_BIT(TE) | MACB_BIT(RE));

You can remove this
> + macb_writel(bp, NCR, ctrlmask);

And add this:
macb_writel(bp, NCR, ctrlmask & ~(MACB_BIT(TE) | MACB_BIT(RE));

> + for (q = 0, queue = bp->queues; q < bp->num_queues;
> + ++q, ++queue) {
> + /* Disable RX queues */

Operation in this for loop could be moved in the the above IRQ disable
loop. Have you tried it? are there any issues with it?

> + if (bp->caps & MACB_CAPS_QUEUE_DISABLE) {
> + queue_writel(queue, RBQP, GEM_RBQP_DISABLE);
> + } else {
> + /* Tie off RX queues */
> + queue_writel(queue, RBQP,
> + lower_32_bits(bp->rx_ring_tieoff_dma));

I think this should be guarded by:
#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
> + queue_writel(queue, RBQPH,
> + upper_32_bits(bp->rx_ring_tieoff_dma));
> + }
> + }
> + /* Enable Receive engine */
> + ctrlmask = macb_readl(bp, NCR);

Is this needed?

> + ctrlmask |= MACB_BIT(RE);
> + macb_writel(bp, NCR, ctrlmask);

These could be merged

> /* Flush all status bits */
> macb_writel(bp, TSR, -1);
> macb_writel(bp, RSR, -1);