[PATCH net-next v4 1/6] net: ethernet: ti: am65-cpsw: Introduce multi queue Rx

From: Roger Quadros
Date: Tue Sep 10 2024 - 05:24:44 EST


am65-cpsw can support up to 8 queues at Rx.
Use a macro AM65_CPSW_MAX_RX_QUEUES to indicate that.
As there is only one DMA channel for RX traffic, the
8 queues come as 8 flows in that channel.

By default, we will start with 1 flow as defined by the
macro AM65_CPSW_DEFAULT_RX_CHN_FLOWS.

User can change the number of flows by ethtool like so
'ethtool -L ethx rx <N>'

All traffic will still come on flow 0. To get traffic on
different flows the Classifiers will need to be set up.

Signed-off-by: Roger Quadros <rogerq@xxxxxxxxxx>
Reviewed-by: Simon Horman <horms@xxxxxxxxxx>
---
Changelog:
v4:
- Use single macro AM65_CPSW_MAX_QUEUES for both TX and RX queues
to simplify code
- reuse am65_cpsw_get/set_per_queue_coalesce for am65_cpsw_get/set_coalesce.
- Return -EINVAL if unsupported tx/rx_coalesce_usecs in
am65_cpsw_set_coalesce.
- move am65_cpsw_nuss_remove_rx/tx_chns() to am65_cpsw_nuss_update_tx_rx_chns()
- don't set skip_fdq during k3_udma_glue_reset_rx_chn(). Fixes breakage
during ifdown/up.

v3:
- style fixes: reverse xmas tree and checkpatch.pl --max-line-length=80
- typo fix: Classifer -> Classifier
- added Reviewed-by Simon Horman
---
drivers/net/ethernet/ti/am65-cpsw-ethtool.c | 75 +++---
drivers/net/ethernet/ti/am65-cpsw-nuss.c | 385 ++++++++++++++++------------
drivers/net/ethernet/ti/am65-cpsw-nuss.h | 39 +--
3 files changed, 269 insertions(+), 230 deletions(-)

diff --git a/drivers/net/ethernet/ti/am65-cpsw-ethtool.c b/drivers/net/ethernet/ti/am65-cpsw-ethtool.c
index 539d5ca82f52..9032444435e9 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-ethtool.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-ethtool.c
@@ -427,9 +427,9 @@ static void am65_cpsw_get_channels(struct net_device *ndev,
{
struct am65_cpsw_common *common = am65_ndev_to_common(ndev);

- ch->max_rx = AM65_CPSW_MAX_RX_QUEUES;
- ch->max_tx = AM65_CPSW_MAX_TX_QUEUES;
- ch->rx_count = AM65_CPSW_MAX_RX_QUEUES;
+ ch->max_rx = AM65_CPSW_MAX_QUEUES;
+ ch->max_tx = AM65_CPSW_MAX_QUEUES;
+ ch->rx_count = common->rx_ch_num_flows;
ch->tx_count = common->tx_ch_num;
}

@@ -447,9 +447,8 @@ static int am65_cpsw_set_channels(struct net_device *ndev,
if (common->usage_count)
return -EBUSY;

- am65_cpsw_nuss_remove_tx_chns(common);
-
- return am65_cpsw_nuss_update_tx_chns(common, chs->tx_count);
+ return am65_cpsw_nuss_update_tx_rx_chns(common, chs->tx_count,
+ chs->rx_count);
}

static void
@@ -913,80 +912,64 @@ static void am65_cpsw_get_mm_stats(struct net_device *ndev,
s->MACMergeHoldCount = readl(base + AM65_CPSW_STATN_IET_TX_HOLD);
}

-static int am65_cpsw_get_coalesce(struct net_device *ndev, struct ethtool_coalesce *coal,
- struct kernel_ethtool_coalesce *kernel_coal,
- struct netlink_ext_ack *extack)
-{
- struct am65_cpsw_common *common = am65_ndev_to_common(ndev);
- struct am65_cpsw_tx_chn *tx_chn;
-
- tx_chn = &common->tx_chns[0];
-
- coal->rx_coalesce_usecs = common->rx_pace_timeout / 1000;
- coal->tx_coalesce_usecs = tx_chn->tx_pace_timeout / 1000;
-
- return 0;
-}
-
static int am65_cpsw_get_per_queue_coalesce(struct net_device *ndev, u32 queue,
struct ethtool_coalesce *coal)
{
struct am65_cpsw_common *common = am65_ndev_to_common(ndev);
+ struct am65_cpsw_rx_flow *rx_flow;
struct am65_cpsw_tx_chn *tx_chn;

- if (queue >= AM65_CPSW_MAX_TX_QUEUES)
+ if (queue >= AM65_CPSW_MAX_QUEUES)
return -EINVAL;

tx_chn = &common->tx_chns[queue];
-
coal->tx_coalesce_usecs = tx_chn->tx_pace_timeout / 1000;

+ rx_flow = &common->rx_chns.flows[queue];
+ coal->rx_coalesce_usecs = rx_flow->rx_pace_timeout / 1000;
+
return 0;
}

-static int am65_cpsw_set_coalesce(struct net_device *ndev, struct ethtool_coalesce *coal,
+static int am65_cpsw_get_coalesce(struct net_device *ndev, struct ethtool_coalesce *coal,
struct kernel_ethtool_coalesce *kernel_coal,
struct netlink_ext_ack *extack)
{
- struct am65_cpsw_common *common = am65_ndev_to_common(ndev);
- struct am65_cpsw_tx_chn *tx_chn;
-
- tx_chn = &common->tx_chns[0];
-
- if (coal->rx_coalesce_usecs && coal->rx_coalesce_usecs < 20)
- return -EINVAL;
-
- if (coal->tx_coalesce_usecs && coal->tx_coalesce_usecs < 20)
- return -EINVAL;
-
- common->rx_pace_timeout = coal->rx_coalesce_usecs * 1000;
- tx_chn->tx_pace_timeout = coal->tx_coalesce_usecs * 1000;
-
- return 0;
+ return am65_cpsw_get_per_queue_coalesce(ndev, 0, coal);
}

static int am65_cpsw_set_per_queue_coalesce(struct net_device *ndev, u32 queue,
struct ethtool_coalesce *coal)
{
struct am65_cpsw_common *common = am65_ndev_to_common(ndev);
+ struct am65_cpsw_rx_flow *rx_flow;
struct am65_cpsw_tx_chn *tx_chn;

- if (queue >= AM65_CPSW_MAX_TX_QUEUES)
+ if (queue >= AM65_CPSW_MAX_QUEUES)
return -EINVAL;

tx_chn = &common->tx_chns[queue];
-
- if (coal->tx_coalesce_usecs && coal->tx_coalesce_usecs < 20) {
- dev_info(common->dev, "defaulting to min value of 20us for tx-usecs for tx-%u\n",
- queue);
- coal->tx_coalesce_usecs = 20;
- }
+ if (coal->tx_coalesce_usecs && coal->tx_coalesce_usecs < 20)
+ return -EINVAL;

tx_chn->tx_pace_timeout = coal->tx_coalesce_usecs * 1000;

+ rx_flow = &common->rx_chns.flows[queue];
+ if (coal->rx_coalesce_usecs && coal->rx_coalesce_usecs < 20)
+ return -EINVAL;
+
+ rx_flow->rx_pace_timeout = coal->rx_coalesce_usecs * 1000;
+
return 0;
}

+static int am65_cpsw_set_coalesce(struct net_device *ndev, struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
+{
+ return am65_cpsw_set_per_queue_coalesce(ndev, 0, coal);
+}
+
const struct ethtool_ops am65_cpsw_ethtool_ops_slave = {
.begin = am65_cpsw_ethtool_op_begin,
.complete = am65_cpsw_ethtool_op_complete,
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index b7e5d0fb5d19..76e62351b30b 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -138,7 +138,7 @@
AM65_CPSW_PN_TS_CTL_RX_ANX_F_EN)

#define AM65_CPSW_ALE_AGEOUT_DEFAULT 30
-/* Number of TX/RX descriptors */
+/* Number of TX/RX descriptors per channel/flow */
#define AM65_CPSW_MAX_TX_DESC 500
#define AM65_CPSW_MAX_RX_DESC 500

@@ -150,6 +150,7 @@
NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR)

#define AM65_CPSW_DEFAULT_TX_CHNS 8
+#define AM65_CPSW_DEFAULT_RX_CHN_FLOWS 1

/* CPPI streaming packet interface */
#define AM65_CPSW_CPPI_TX_FLOW_ID 0x3FFF
@@ -331,7 +332,7 @@ static void am65_cpsw_nuss_ndo_host_tx_timeout(struct net_device *ndev,
}

static int am65_cpsw_nuss_rx_push(struct am65_cpsw_common *common,
- struct page *page)
+ struct page *page, u32 flow_idx)
{
struct am65_cpsw_rx_chn *rx_chn = &common->rx_chns;
struct cppi5_host_desc_t *desc_rx;
@@ -364,7 +365,8 @@ static int am65_cpsw_nuss_rx_push(struct am65_cpsw_common *common,
swdata = cppi5_hdesc_get_swdata(desc_rx);
*((void **)swdata) = page_address(page);

- return k3_udma_glue_push_rx_chn(rx_chn->rx_chn, 0, desc_rx, desc_dma);
+ return k3_udma_glue_push_rx_chn(rx_chn->rx_chn, flow_idx,
+ desc_rx, desc_dma);
}

void am65_cpsw_nuss_set_p0_ptype(struct am65_cpsw_common *common)
@@ -399,22 +401,27 @@ static void am65_cpsw_init_port_emac_ale(struct am65_cpsw_port *port);
static void am65_cpsw_destroy_xdp_rxqs(struct am65_cpsw_common *common)
{
struct am65_cpsw_rx_chn *rx_chn = &common->rx_chns;
+ struct am65_cpsw_rx_flow *flow;
struct xdp_rxq_info *rxq;
- int i;
+ int id, port;

- for (i = 0; i < common->port_num; i++) {
- if (!common->ports[i].ndev)
- continue;
+ for (id = 0; id < common->rx_ch_num_flows; id++) {
+ flow = &rx_chn->flows[id];

- rxq = &common->ports[i].xdp_rxq;
+ for (port = 0; port < common->port_num; port++) {
+ if (!common->ports[port].ndev)
+ continue;

- if (xdp_rxq_info_is_reg(rxq))
- xdp_rxq_info_unreg(rxq);
- }
+ rxq = &common->ports[port].xdp_rxq[id];
+
+ if (xdp_rxq_info_is_reg(rxq))
+ xdp_rxq_info_unreg(rxq);
+ }

- if (rx_chn->page_pool) {
- page_pool_destroy(rx_chn->page_pool);
- rx_chn->page_pool = NULL;
+ if (flow->page_pool) {
+ page_pool_destroy(flow->page_pool);
+ flow->page_pool = NULL;
+ }
}
}

@@ -428,31 +435,44 @@ static int am65_cpsw_create_xdp_rxqs(struct am65_cpsw_common *common)
.nid = dev_to_node(common->dev),
.dev = common->dev,
.dma_dir = DMA_BIDIRECTIONAL,
- .napi = &common->napi_rx,
+ /* .napi set dynamically */
};
+ struct am65_cpsw_rx_flow *flow;
struct xdp_rxq_info *rxq;
struct page_pool *pool;
- int i, ret;
-
- pool = page_pool_create(&pp_params);
- if (IS_ERR(pool))
- return PTR_ERR(pool);
+ int id, port, ret;
+
+ for (id = 0; id < common->rx_ch_num_flows; id++) {
+ flow = &rx_chn->flows[id];
+ pp_params.napi = &flow->napi_rx;
+ pool = page_pool_create(&pp_params);
+ if (IS_ERR(pool)) {
+ ret = PTR_ERR(pool);
+ goto err;
+ }

- rx_chn->page_pool = pool;
+ flow->page_pool = pool;

- for (i = 0; i < common->port_num; i++) {
- if (!common->ports[i].ndev)
- continue;
+ /* using same page pool is allowed as no running rx handlers
+ * simultaneously for both ndevs
+ */
+ for (port = 0; port < common->port_num; port++) {
+ if (!common->ports[port].ndev)
+ continue;

- rxq = &common->ports[i].xdp_rxq;
+ rxq = &common->ports[port].xdp_rxq[id];

- ret = xdp_rxq_info_reg(rxq, common->ports[i].ndev, i, 0);
- if (ret)
- goto err;
+ ret = xdp_rxq_info_reg(rxq, common->ports[port].ndev,
+ id, flow->napi_rx.napi_id);
+ if (ret)
+ goto err;

- ret = xdp_rxq_info_reg_mem_model(rxq, MEM_TYPE_PAGE_POOL, pool);
- if (ret)
- goto err;
+ ret = xdp_rxq_info_reg_mem_model(rxq,
+ MEM_TYPE_PAGE_POOL,
+ pool);
+ if (ret)
+ goto err;
+ }
}

return 0;
@@ -497,25 +517,27 @@ static enum am65_cpsw_tx_buf_type am65_cpsw_nuss_buf_type(struct am65_cpsw_tx_ch
desc_idx);
}

-static inline void am65_cpsw_put_page(struct am65_cpsw_rx_chn *rx_chn,
+static inline void am65_cpsw_put_page(struct am65_cpsw_rx_flow *flow,
struct page *page,
bool allow_direct,
int desc_idx)
{
- page_pool_put_full_page(rx_chn->page_pool, page, allow_direct);
- rx_chn->pages[desc_idx] = NULL;
+ page_pool_put_full_page(flow->page_pool, page, allow_direct);
+ flow->pages[desc_idx] = NULL;
}

static void am65_cpsw_nuss_rx_cleanup(void *data, dma_addr_t desc_dma)
{
- struct am65_cpsw_rx_chn *rx_chn = data;
+ struct am65_cpsw_rx_flow *flow = data;
struct cppi5_host_desc_t *desc_rx;
+ struct am65_cpsw_rx_chn *rx_chn;
dma_addr_t buf_dma;
u32 buf_dma_len;
void *page_addr;
void **swdata;
int desc_idx;

+ rx_chn = &flow->common->rx_chns;
desc_rx = k3_cppi_desc_pool_dma2virt(rx_chn->desc_pool, desc_dma);
swdata = cppi5_hdesc_get_swdata(desc_rx);
page_addr = *swdata;
@@ -526,7 +548,7 @@ static void am65_cpsw_nuss_rx_cleanup(void *data, dma_addr_t desc_dma)

desc_idx = am65_cpsw_nuss_desc_idx(rx_chn->desc_pool, desc_rx,
rx_chn->dsize_log2);
- am65_cpsw_put_page(rx_chn, virt_to_page(page_addr), false, desc_idx);
+ am65_cpsw_put_page(flow, virt_to_page(page_addr), false, desc_idx);
}

static void am65_cpsw_nuss_xmit_free(struct am65_cpsw_tx_chn *tx_chn,
@@ -602,7 +624,8 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common)
struct am65_cpsw_host *host_p = am65_common_get_host(common);
struct am65_cpsw_rx_chn *rx_chn = &common->rx_chns;
struct am65_cpsw_tx_chn *tx_chn = common->tx_chns;
- int port_idx, i, ret, tx;
+ int port_idx, i, ret, tx, flow_idx;
+ struct am65_cpsw_rx_flow *flow;
u32 val, port_mask;
struct page *page;

@@ -670,27 +693,26 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common)
return ret;
}

- for (i = 0; i < rx_chn->descs_num; i++) {
- page = page_pool_dev_alloc_pages(rx_chn->page_pool);
- if (!page) {
- ret = -ENOMEM;
- if (i)
+ for (flow_idx = 0; flow_idx < common->rx_ch_num_flows; flow_idx++) {
+ flow = &rx_chn->flows[flow_idx];
+ for (i = 0; i < AM65_CPSW_MAX_RX_DESC; i++) {
+ page = page_pool_dev_alloc_pages(flow->page_pool);
+ if (!page) {
+ dev_err(common->dev, "cannot allocate page in flow %d\n",
+ flow_idx);
+ ret = -ENOMEM;
goto fail_rx;
+ }
+ flow->pages[i] = page;

- return ret;
- }
- rx_chn->pages[i] = page;
-
- ret = am65_cpsw_nuss_rx_push(common, page);
- if (ret < 0) {
- dev_err(common->dev,
- "cannot submit page to channel rx: %d\n",
- ret);
- am65_cpsw_put_page(rx_chn, page, false, i);
- if (i)
+ ret = am65_cpsw_nuss_rx_push(common, page, flow_idx);
+ if (ret < 0) {
+ dev_err(common->dev,
+ "cannot submit page to rx channel flow %d, error %d\n",
+ flow_idx, ret);
+ am65_cpsw_put_page(flow, page, false, i);
goto fail_rx;
-
- return ret;
+ }
}
}

@@ -700,6 +722,14 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common)
goto fail_rx;
}

+ for (i = 0; i < common->rx_ch_num_flows ; i++) {
+ napi_enable(&rx_chn->flows[i].napi_rx);
+ if (rx_chn->flows[i].irq_disabled) {
+ rx_chn->flows[i].irq_disabled = false;
+ enable_irq(rx_chn->flows[i].irq);
+ }
+ }
+
for (tx = 0; tx < common->tx_ch_num; tx++) {
ret = k3_udma_glue_enable_tx_chn(tx_chn[tx].tx_chn);
if (ret) {
@@ -711,12 +741,6 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common)
napi_enable(&tx_chn[tx].napi_tx);
}

- napi_enable(&common->napi_rx);
- if (common->rx_irq_disabled) {
- common->rx_irq_disabled = false;
- enable_irq(rx_chn->irq);
- }
-
dev_dbg(common->dev, "cpsw_nuss started\n");
return 0;

@@ -727,11 +751,24 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common)
tx--;
}

+ for (flow_idx = 0; i < common->rx_ch_num_flows; flow_idx++) {
+ flow = &rx_chn->flows[flow_idx];
+ if (!flow->irq_disabled) {
+ disable_irq(flow->irq);
+ flow->irq_disabled = true;
+ }
+ napi_disable(&flow->napi_rx);
+ }
+
k3_udma_glue_disable_rx_chn(rx_chn->rx_chn);

fail_rx:
- k3_udma_glue_reset_rx_chn(rx_chn->rx_chn, 0, rx_chn,
- am65_cpsw_nuss_rx_cleanup, 0);
+ for (i = 0; i < common->rx_ch_num_flows; i--)
+ k3_udma_glue_reset_rx_chn(rx_chn->rx_chn, i, &rx_chn->flows[i],
+ am65_cpsw_nuss_rx_cleanup, 0);
+
+ am65_cpsw_destroy_xdp_rxqs(common);
+
return ret;
}

@@ -780,12 +817,12 @@ static int am65_cpsw_nuss_common_stop(struct am65_cpsw_common *common)
dev_err(common->dev, "rx teardown timeout\n");
}

- napi_disable(&common->napi_rx);
- hrtimer_cancel(&common->rx_hrtimer);
-
- for (i = 0; i < AM65_CPSW_MAX_RX_FLOWS; i++)
- k3_udma_glue_reset_rx_chn(rx_chn->rx_chn, i, rx_chn,
- am65_cpsw_nuss_rx_cleanup, !!i);
+ for (i = 0; i < common->rx_ch_num_flows; i++) {
+ napi_disable(&rx_chn->flows[i].napi_rx);
+ hrtimer_cancel(&rx_chn->flows[i].rx_hrtimer);
+ k3_udma_glue_reset_rx_chn(rx_chn->rx_chn, i, &rx_chn->flows[i],
+ am65_cpsw_nuss_rx_cleanup, 0);
+ }

k3_udma_glue_disable_rx_chn(rx_chn->rx_chn);

@@ -794,10 +831,6 @@ static int am65_cpsw_nuss_common_stop(struct am65_cpsw_common *common)
writel(0, common->cpsw_base + AM65_CPSW_REG_CTL);
writel(0, common->cpsw_base + AM65_CPSW_REG_STAT_PORT_EN);

- for (i = 0; i < rx_chn->descs_num; i++) {
- if (rx_chn->pages[i])
- am65_cpsw_put_page(rx_chn, rx_chn->pages[i], false, i);
- }
am65_cpsw_destroy_xdp_rxqs(common);

dev_dbg(common->dev, "cpsw_nuss stopped\n");
@@ -868,7 +901,7 @@ static int am65_cpsw_nuss_ndo_slave_open(struct net_device *ndev)
goto runtime_put;
}

- ret = netif_set_real_num_rx_queues(ndev, AM65_CPSW_MAX_RX_QUEUES);
+ ret = netif_set_real_num_rx_queues(ndev, common->rx_ch_num_flows);
if (ret) {
dev_err(common->dev, "cannot set real number of rx queues\n");
goto runtime_put;
@@ -992,12 +1025,12 @@ static int am65_cpsw_xdp_tx_frame(struct net_device *ndev,
return ret;
}

-static int am65_cpsw_run_xdp(struct am65_cpsw_common *common,
+static int am65_cpsw_run_xdp(struct am65_cpsw_rx_flow *flow,
struct am65_cpsw_port *port,
struct xdp_buff *xdp,
int desc_idx, int cpu, int *len)
{
- struct am65_cpsw_rx_chn *rx_chn = &common->rx_chns;
+ struct am65_cpsw_common *common = flow->common;
struct am65_cpsw_ndev_priv *ndev_priv;
struct net_device *ndev = port->ndev;
struct am65_cpsw_ndev_stats *stats;
@@ -1026,7 +1059,7 @@ static int am65_cpsw_run_xdp(struct am65_cpsw_common *common,
ret = AM65_CPSW_XDP_PASS;
goto out;
case XDP_TX:
- tx_chn = &common->tx_chns[cpu % AM65_CPSW_MAX_TX_QUEUES];
+ tx_chn = &common->tx_chns[cpu % AM65_CPSW_MAX_QUEUES];
netif_txq = netdev_get_tx_queue(ndev, tx_chn->id);

xdpf = xdp_convert_buff_to_frame(xdp);
@@ -1068,7 +1101,8 @@ static int am65_cpsw_run_xdp(struct am65_cpsw_common *common,
}

page = virt_to_head_page(xdp->data);
- am65_cpsw_put_page(rx_chn, page, true, desc_idx);
+ am65_cpsw_put_page(flow, page, true, desc_idx);
+
out:
return ret;
}
@@ -1106,11 +1140,12 @@ static void am65_cpsw_nuss_rx_csum(struct sk_buff *skb, u32 csum_info)
}
}

-static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_common *common,
- u32 flow_idx, int cpu, int *xdp_state)
+static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_rx_flow *flow,
+ int cpu, int *xdp_state)
{
- struct am65_cpsw_rx_chn *rx_chn = &common->rx_chns;
+ struct am65_cpsw_rx_chn *rx_chn = &flow->common->rx_chns;
u32 buf_dma_len, pkt_len, port_id = 0, csum_info;
+ struct am65_cpsw_common *common = flow->common;
struct am65_cpsw_ndev_priv *ndev_priv;
struct am65_cpsw_ndev_stats *stats;
struct cppi5_host_desc_t *desc_rx;
@@ -1120,6 +1155,7 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_common *common,
struct am65_cpsw_port *port;
int headroom, desc_idx, ret;
struct net_device *ndev;
+ u32 flow_idx = flow->id;
struct sk_buff *skb;
struct xdp_buff xdp;
void *page_addr;
@@ -1174,10 +1210,10 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_common *common,
}

if (port->xdp_prog) {
- xdp_init_buff(&xdp, PAGE_SIZE, &port->xdp_rxq);
+ xdp_init_buff(&xdp, PAGE_SIZE, &port->xdp_rxq[flow->id]);
xdp_prepare_buff(&xdp, page_addr, AM65_CPSW_HEADROOM,
pkt_len, false);
- *xdp_state = am65_cpsw_run_xdp(common, port, &xdp, desc_idx,
+ *xdp_state = am65_cpsw_run_xdp(flow, port, &xdp, desc_idx,
cpu, &pkt_len);
if (*xdp_state != AM65_CPSW_XDP_PASS)
goto allocate;
@@ -1195,7 +1231,7 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_common *common,
skb_mark_for_recycle(skb);
skb->protocol = eth_type_trans(skb, ndev);
am65_cpsw_nuss_rx_csum(skb, csum_info);
- napi_gro_receive(&common->napi_rx, skb);
+ napi_gro_receive(&flow->napi_rx, skb);

stats = this_cpu_ptr(ndev_priv->stats);

@@ -1205,24 +1241,24 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_common *common,
u64_stats_update_end(&stats->syncp);

allocate:
- new_page = page_pool_dev_alloc_pages(rx_chn->page_pool);
+ new_page = page_pool_dev_alloc_pages(flow->page_pool);
if (unlikely(!new_page)) {
dev_err(dev, "page alloc failed\n");
return -ENOMEM;
}

- rx_chn->pages[desc_idx] = new_page;
+ flow->pages[desc_idx] = new_page;

if (netif_dormant(ndev)) {
- am65_cpsw_put_page(rx_chn, new_page, true, desc_idx);
+ am65_cpsw_put_page(flow, new_page, true, desc_idx);
ndev->stats.rx_dropped++;
return 0;
}

requeue:
- ret = am65_cpsw_nuss_rx_push(common, new_page);
+ ret = am65_cpsw_nuss_rx_push(common, new_page, flow_idx);
if (WARN_ON(ret < 0)) {
- am65_cpsw_put_page(rx_chn, new_page, true, desc_idx);
+ am65_cpsw_put_page(flow, new_page, true, desc_idx);
ndev->stats.rx_errors++;
ndev->stats.rx_dropped++;
}
@@ -1232,38 +1268,32 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_common *common,

static enum hrtimer_restart am65_cpsw_nuss_rx_timer_callback(struct hrtimer *timer)
{
- struct am65_cpsw_common *common =
- container_of(timer, struct am65_cpsw_common, rx_hrtimer);
+ struct am65_cpsw_rx_flow *flow = container_of(timer,
+ struct am65_cpsw_rx_flow,
+ rx_hrtimer);

- enable_irq(common->rx_chns.irq);
+ enable_irq(flow->irq);
return HRTIMER_NORESTART;
}

static int am65_cpsw_nuss_rx_poll(struct napi_struct *napi_rx, int budget)
{
- struct am65_cpsw_common *common = am65_cpsw_napi_to_common(napi_rx);
- int flow = AM65_CPSW_MAX_RX_FLOWS;
+ struct am65_cpsw_rx_flow *flow = am65_cpsw_napi_to_rx_flow(napi_rx);
+ struct am65_cpsw_common *common = flow->common;
int cpu = smp_processor_id();
int xdp_state_or = 0;
int cur_budget, ret;
int xdp_state;
int num_rx = 0;

- /* process every flow */
- while (flow--) {
- cur_budget = budget - num_rx;
-
- while (cur_budget--) {
- ret = am65_cpsw_nuss_rx_packets(common, flow, cpu,
- &xdp_state);
- xdp_state_or |= xdp_state;
- if (ret)
- break;
- num_rx++;
- }
-
- if (num_rx >= budget)
+ /* process only this flow */
+ cur_budget = budget;
+ while (cur_budget--) {
+ ret = am65_cpsw_nuss_rx_packets(flow, cpu, &xdp_state);
+ xdp_state_or |= xdp_state;
+ if (ret)
break;
+ num_rx++;
}

if (xdp_state_or & AM65_CPSW_XDP_REDIRECT)
@@ -1272,14 +1302,14 @@ static int am65_cpsw_nuss_rx_poll(struct napi_struct *napi_rx, int budget)
dev_dbg(common->dev, "%s num_rx:%d %d\n", __func__, num_rx, budget);

if (num_rx < budget && napi_complete_done(napi_rx, num_rx)) {
- if (common->rx_irq_disabled) {
- common->rx_irq_disabled = false;
- if (unlikely(common->rx_pace_timeout)) {
- hrtimer_start(&common->rx_hrtimer,
- ns_to_ktime(common->rx_pace_timeout),
+ if (flow->irq_disabled) {
+ flow->irq_disabled = false;
+ if (unlikely(flow->rx_pace_timeout)) {
+ hrtimer_start(&flow->rx_hrtimer,
+ ns_to_ktime(flow->rx_pace_timeout),
HRTIMER_MODE_REL_PINNED);
} else {
- enable_irq(common->rx_chns.irq);
+ enable_irq(flow->irq);
}
}
}
@@ -1527,11 +1557,11 @@ static int am65_cpsw_nuss_tx_poll(struct napi_struct *napi_tx, int budget)

static irqreturn_t am65_cpsw_nuss_rx_irq(int irq, void *dev_id)
{
- struct am65_cpsw_common *common = dev_id;
+ struct am65_cpsw_rx_flow *flow = dev_id;

- common->rx_irq_disabled = true;
+ flow->irq_disabled = true;
disable_irq_nosync(irq);
- napi_schedule(&common->napi_rx);
+ napi_schedule(&flow->napi_rx);

return IRQ_HANDLED;
}
@@ -2176,7 +2206,7 @@ static void am65_cpsw_nuss_free_tx_chns(void *data)
}
}

-void am65_cpsw_nuss_remove_tx_chns(struct am65_cpsw_common *common)
+static void am65_cpsw_nuss_remove_tx_chns(struct am65_cpsw_common *common)
{
struct device *dev = common->dev;
int i;
@@ -2191,15 +2221,9 @@ void am65_cpsw_nuss_remove_tx_chns(struct am65_cpsw_common *common)
devm_free_irq(dev, tx_chn->irq, tx_chn);

netif_napi_del(&tx_chn->napi_tx);
-
- if (!IS_ERR_OR_NULL(tx_chn->desc_pool))
- k3_cppi_desc_pool_destroy(tx_chn->desc_pool);
-
- if (!IS_ERR_OR_NULL(tx_chn->tx_chn))
- k3_udma_glue_release_tx_chn(tx_chn->tx_chn);
-
- memset(tx_chn, 0, sizeof(*tx_chn));
}
+
+ am65_cpsw_nuss_free_tx_chns(common);
}

static int am65_cpsw_nuss_ndev_add_tx_napi(struct am65_cpsw_common *common)
@@ -2331,19 +2355,22 @@ static void am65_cpsw_nuss_free_rx_chns(void *data)
k3_udma_glue_release_rx_chn(rx_chn->rx_chn);
}

-static void am65_cpsw_nuss_remove_rx_chns(void *data)
+static void am65_cpsw_nuss_remove_rx_chns(struct am65_cpsw_common *common)
{
- struct am65_cpsw_common *common = data;
struct device *dev = common->dev;
struct am65_cpsw_rx_chn *rx_chn;
+ struct am65_cpsw_rx_flow *flows;
+ int i;

rx_chn = &common->rx_chns;
+ flows = rx_chn->flows;
devm_remove_action(dev, am65_cpsw_nuss_free_rx_chns, common);

- if (!(rx_chn->irq < 0))
- devm_free_irq(dev, rx_chn->irq, common);
-
- netif_napi_del(&common->napi_rx);
+ for (i = 0; i < common->rx_ch_num_flows; i++) {
+ if (!(flows[i].irq < 0))
+ devm_free_irq(dev, flows[i].irq, &flows[i]);
+ netif_napi_del(&flows[i].napi_rx);
+ }

am65_cpsw_nuss_free_rx_chns(common);

@@ -2356,6 +2383,7 @@ static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common)
struct k3_udma_glue_rx_channel_cfg rx_cfg = { 0 };
u32 max_desc_num = AM65_CPSW_MAX_RX_DESC;
struct device *dev = common->dev;
+ struct am65_cpsw_rx_flow *flow;
u32 hdesc_size, hdesc_size_out;
u32 fdqring_id;
int i, ret = 0;
@@ -2364,12 +2392,21 @@ static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common)
AM65_CPSW_NAV_SW_DATA_SIZE);

rx_cfg.swdata_size = AM65_CPSW_NAV_SW_DATA_SIZE;
- rx_cfg.flow_id_num = AM65_CPSW_MAX_RX_FLOWS;
+ rx_cfg.flow_id_num = common->rx_ch_num_flows;
rx_cfg.flow_id_base = common->rx_flow_id_base;

/* init all flows */
rx_chn->dev = dev;
- rx_chn->descs_num = max_desc_num;
+ rx_chn->descs_num = max_desc_num * rx_cfg.flow_id_num;
+
+ for (i = 0; i < common->rx_ch_num_flows; i++) {
+ flow = &rx_chn->flows[i];
+ flow->page_pool = NULL;
+ flow->pages = devm_kcalloc(dev, AM65_CPSW_MAX_RX_DESC,
+ sizeof(*flow->pages), GFP_KERNEL);
+ if (!flow->pages)
+ return -ENOMEM;
+ }

rx_chn->rx_chn = k3_udma_glue_request_rx_chn(dev, "rx", &rx_cfg);
if (IS_ERR(rx_chn->rx_chn)) {
@@ -2392,13 +2429,6 @@ static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common)
rx_chn->dsize_log2 = __fls(hdesc_size_out);
WARN_ON(hdesc_size_out != (1 << rx_chn->dsize_log2));

- rx_chn->page_pool = NULL;
-
- rx_chn->pages = devm_kcalloc(dev, rx_chn->descs_num,
- sizeof(*rx_chn->pages), GFP_KERNEL);
- if (!rx_chn->pages)
- return -ENOMEM;
-
common->rx_flow_id_base =
k3_udma_glue_rx_get_flow_id_base(rx_chn->rx_chn);
dev_info(dev, "set new flow-id-base %u\n", common->rx_flow_id_base);
@@ -2422,6 +2452,10 @@ static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common)
K3_UDMA_GLUE_SRC_TAG_LO_USE_REMOTE_SRC_TAG,
};

+ flow = &rx_chn->flows[i];
+ flow->id = i;
+ flow->common = common;
+
rx_flow_cfg.ring_rxfdq0_id = fdqring_id;
rx_flow_cfg.rx_cfg.size = max_desc_num;
rx_flow_cfg.rxfdq_cfg.size = max_desc_num;
@@ -2438,28 +2472,32 @@ static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common)
k3_udma_glue_rx_flow_get_fdq_id(rx_chn->rx_chn,
i);

- rx_chn->irq = k3_udma_glue_rx_get_irq(rx_chn->rx_chn, i);
-
- if (rx_chn->irq < 0) {
+ flow->irq = k3_udma_glue_rx_get_irq(rx_chn->rx_chn, i);
+ if (flow->irq <= 0) {
dev_err(dev, "Failed to get rx dma irq %d\n",
- rx_chn->irq);
- ret = rx_chn->irq;
+ flow->irq);
+ ret = flow->irq;
goto err;
}
- }
-
- netif_napi_add(common->dma_ndev, &common->napi_rx,
- am65_cpsw_nuss_rx_poll);
- hrtimer_init(&common->rx_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
- common->rx_hrtimer.function = &am65_cpsw_nuss_rx_timer_callback;

- ret = devm_request_irq(dev, rx_chn->irq,
- am65_cpsw_nuss_rx_irq,
- IRQF_TRIGGER_HIGH, dev_name(dev), common);
- if (ret) {
- dev_err(dev, "failure requesting rx irq %u, %d\n",
- rx_chn->irq, ret);
- goto err;
+ snprintf(flow->name,
+ sizeof(flow->name), "%s-rx%d",
+ dev_name(dev), i);
+ netif_napi_add(common->dma_ndev, &flow->napi_rx,
+ am65_cpsw_nuss_rx_poll);
+ hrtimer_init(&flow->rx_hrtimer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL_PINNED);
+ flow->rx_hrtimer.function = &am65_cpsw_nuss_rx_timer_callback;
+
+ ret = devm_request_irq(dev, flow->irq,
+ am65_cpsw_nuss_rx_irq,
+ IRQF_TRIGGER_HIGH,
+ flow->name, flow);
+ if (ret) {
+ dev_err(dev, "failure requesting rx %d irq %u, %d\n",
+ i, flow->irq, ret);
+ goto err;
+ }
}

err:
@@ -2705,8 +2743,8 @@ am65_cpsw_nuss_init_port_ndev(struct am65_cpsw_common *common, u32 port_idx)
/* alloc netdev */
port->ndev = devm_alloc_etherdev_mqs(common->dev,
sizeof(struct am65_cpsw_ndev_priv),
- AM65_CPSW_MAX_TX_QUEUES,
- AM65_CPSW_MAX_RX_QUEUES);
+ AM65_CPSW_MAX_QUEUES,
+ AM65_CPSW_MAX_QUEUES);
if (!port->ndev) {
dev_err(dev, "error allocating slave net_device %u\n",
port->port_id);
@@ -3303,9 +3341,10 @@ static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common)
k3_udma_glue_disable_tx_chn(tx_chan[i].tx_chn);
}

- for (i = 0; i < AM65_CPSW_MAX_RX_FLOWS; i++)
- k3_udma_glue_reset_rx_chn(rx_chan->rx_chn, i, rx_chan,
- am65_cpsw_nuss_rx_cleanup, !!i);
+ for (i = 0; i < common->rx_ch_num_flows; i++)
+ k3_udma_glue_reset_rx_chn(rx_chan->rx_chn, i,
+ &rx_chan->flows[i],
+ am65_cpsw_nuss_rx_cleanup, 0);

k3_udma_glue_disable_rx_chn(rx_chan->rx_chn);

@@ -3346,12 +3385,21 @@ static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common)
return ret;
}

-int am65_cpsw_nuss_update_tx_chns(struct am65_cpsw_common *common, int num_tx)
+int am65_cpsw_nuss_update_tx_rx_chns(struct am65_cpsw_common *common,
+ int num_tx, int num_rx)
{
int ret;

+ am65_cpsw_nuss_remove_tx_chns(common);
+ am65_cpsw_nuss_remove_rx_chns(common);
+
common->tx_ch_num = num_tx;
+ common->rx_ch_num_flows = num_rx;
ret = am65_cpsw_nuss_init_tx_chns(common);
+ if (ret)
+ return ret;
+
+ ret = am65_cpsw_nuss_init_rx_chns(common);

return ret;
}
@@ -3481,6 +3529,7 @@ static int am65_cpsw_nuss_probe(struct platform_device *pdev)
common->rx_flow_id_base = -1;
init_completion(&common->tdown_complete);
common->tx_ch_num = AM65_CPSW_DEFAULT_TX_CHNS;
+ common->rx_ch_num_flows = AM65_CPSW_DEFAULT_RX_CHN_FLOWS;
common->pf_p0_rx_ptype_rrobin = false;
common->default_vlan = 1;

@@ -3672,8 +3721,10 @@ static int am65_cpsw_nuss_resume(struct device *dev)
return ret;

/* If RX IRQ was disabled before suspend, keep it disabled */
- if (common->rx_irq_disabled)
- disable_irq(common->rx_chns.irq);
+ for (i = 0; i < common->rx_ch_num_flows; i++) {
+ if (common->rx_chns.flows[i].irq_disabled)
+ disable_irq(common->rx_chns.flows[i].irq);
+ }

am65_cpts_resume(common->cpts);

diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.h b/drivers/net/ethernet/ti/am65-cpsw-nuss.h
index e2ce2be320bd..dc8d544230dc 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.h
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.h
@@ -21,9 +21,7 @@ struct am65_cpts;

#define HOST_PORT_NUM 0

-#define AM65_CPSW_MAX_TX_QUEUES 8
-#define AM65_CPSW_MAX_RX_QUEUES 1
-#define AM65_CPSW_MAX_RX_FLOWS 1
+#define AM65_CPSW_MAX_QUEUES 8 /* both TX & RX */

#define AM65_CPSW_PORT_VLAN_REG_OFFSET 0x014

@@ -58,7 +56,7 @@ struct am65_cpsw_port {
struct am65_cpsw_qos qos;
struct devlink_port devlink_port;
struct bpf_prog *xdp_prog;
- struct xdp_rxq_info xdp_rxq;
+ struct xdp_rxq_info xdp_rxq[AM65_CPSW_MAX_QUEUES];
/* Only for suspend resume context */
u32 vid_context;
};
@@ -94,16 +92,27 @@ struct am65_cpsw_tx_chn {
u32 rate_mbps;
};

+struct am65_cpsw_rx_flow {
+ u32 id;
+ struct napi_struct napi_rx;
+ struct am65_cpsw_common *common;
+ int irq;
+ bool irq_disabled;
+ struct hrtimer rx_hrtimer;
+ unsigned long rx_pace_timeout;
+ struct page_pool *page_pool;
+ struct page **pages;
+ char name[32];
+};
+
struct am65_cpsw_rx_chn {
struct device *dev;
struct device *dma_dev;
struct k3_cppi_desc_pool *desc_pool;
struct k3_udma_glue_rx_channel *rx_chn;
- struct page_pool *page_pool;
- struct page **pages;
u32 descs_num;
unsigned char dsize_log2;
- int irq;
+ struct am65_cpsw_rx_flow flows[AM65_CPSW_MAX_QUEUES];
};

#define AM65_CPSW_QUIRK_I2027_NO_TX_CSUM BIT(0)
@@ -145,16 +154,12 @@ struct am65_cpsw_common {
u32 tx_ch_rate_msk;
u32 rx_flow_id_base;

- struct am65_cpsw_tx_chn tx_chns[AM65_CPSW_MAX_TX_QUEUES];
+ struct am65_cpsw_tx_chn tx_chns[AM65_CPSW_MAX_QUEUES];
struct completion tdown_complete;
atomic_t tdown_cnt;

+ int rx_ch_num_flows;
struct am65_cpsw_rx_chn rx_chns;
- struct napi_struct napi_rx;
-
- bool rx_irq_disabled;
- struct hrtimer rx_hrtimer;
- unsigned long rx_pace_timeout;

u32 nuss_ver;
u32 cpsw_ver;
@@ -203,8 +208,8 @@ struct am65_cpsw_ndev_priv {
#define am65_common_get_host(common) (&(common)->host)
#define am65_common_get_port(common, id) (&(common)->ports[(id) - 1])

-#define am65_cpsw_napi_to_common(pnapi) \
- container_of(pnapi, struct am65_cpsw_common, napi_rx)
+#define am65_cpsw_napi_to_rx_flow(pnapi) \
+ container_of(pnapi, struct am65_cpsw_rx_flow, napi_rx)
#define am65_cpsw_napi_to_tx_chn(pnapi) \
container_of(pnapi, struct am65_cpsw_tx_chn, napi_tx)

@@ -215,8 +220,8 @@ struct am65_cpsw_ndev_priv {
extern const struct ethtool_ops am65_cpsw_ethtool_ops_slave;

void am65_cpsw_nuss_set_p0_ptype(struct am65_cpsw_common *common);
-void am65_cpsw_nuss_remove_tx_chns(struct am65_cpsw_common *common);
-int am65_cpsw_nuss_update_tx_chns(struct am65_cpsw_common *common, int num_tx);
+int am65_cpsw_nuss_update_tx_rx_chns(struct am65_cpsw_common *common,
+ int num_tx, int num_rx);

bool am65_cpsw_port_dev_check(const struct net_device *dev);


--
2.34.1