Re: [PATCH net v3 2/2] net: ethernet: ti: am65-cpsw: avoid devm_alloc_etherdev, fix module removal

From: Roger Quadros
Date: Fri Oct 04 2024 - 08:58:10 EST


Hi Nicolas,

On 04/10/2024 12:09, Roger Quadros wrote:
> Hi Nicolas,
>
> On 04/10/2024 07:10, Nicolas Pitre wrote:
>> From: Nicolas Pitre <npitre@xxxxxxxxxxxx>
>>
>> Usage of devm_alloc_etherdev_mqs() conflicts with
>> am65_cpsw_nuss_cleanup_ndev() as the same struct net_device instances
>> get unregistered twice. Switch to alloc_etherdev_mqs() and make sure
>
> Do we know why the same net device gets unregistered twice?

On some boards there are 2 net devices per CPSW. so those those 2
getting unregistered?

On some investigation I found that the issue has to do with napi_list.
I don't exactly know why but it oopes in free_netdev() at napi_list
iterations
list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
netif_napi_del(p);

If we cleanup the napi list at remove then I don't see the oops anymore.

>
>> am65_cpsw_nuss_cleanup_ndev() unregisters and frees those net_device
>> instances properly.
>>
>> With this, it is finally possible to rmmod the driver without oopsing
>> the kernel.
>>
>> Fixes: 93a76530316a ("net: ethernet: ti: introduce am65x/j721e gigabit eth subsystem driver")
>> Signed-off-by: Nicolas Pitre <npitre@xxxxxxxxxxxx>
>> ---

Can you please try the below patch instead?

diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index f6bc8a4dc687..e214547aeba7 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -2206,14 +2206,11 @@ static void am65_cpsw_nuss_free_tx_chns(void *data)
}
}

-static void am65_cpsw_nuss_remove_tx_chns(struct am65_cpsw_common *common)
+static void am65_cpsw_nuss_cleanup_tx_napi(struct am65_cpsw_common *common)
{
struct device *dev = common->dev;
int i;

- devm_remove_action(dev, am65_cpsw_nuss_free_tx_chns, common);
-
- common->tx_ch_rate_msk = 0;
for (i = 0; i < common->tx_ch_num; i++) {
struct am65_cpsw_tx_chn *tx_chn = &common->tx_chns[i];

@@ -2222,7 +2219,15 @@ static void am65_cpsw_nuss_remove_tx_chns(struct am65_cpsw_common *common)

netif_napi_del(&tx_chn->napi_tx);
}
+}
+
+static void am65_cpsw_nuss_remove_tx_chns(struct am65_cpsw_common *common)
+{
+ struct device *dev = common->dev;

+ devm_remove_action(dev, am65_cpsw_nuss_free_tx_chns, common);
+ common->tx_ch_rate_msk = 0;
+ am65_cpsw_nuss_cleanup_tx_napi(common);
am65_cpsw_nuss_free_tx_chns(common);
}

@@ -2355,25 +2360,27 @@ static void am65_cpsw_nuss_free_rx_chns(void *data)
k3_udma_glue_release_rx_chn(rx_chn->rx_chn);
}

-static void am65_cpsw_nuss_remove_rx_chns(struct am65_cpsw_common *common)
+static void am65_cpsw_nuss_cleanup_rx_napi(struct am65_cpsw_common *common)
{
struct device *dev = common->dev;
- struct am65_cpsw_rx_chn *rx_chn;
struct am65_cpsw_rx_flow *flows;
int i;

- rx_chn = &common->rx_chns;
- flows = rx_chn->flows;
- devm_remove_action(dev, am65_cpsw_nuss_free_rx_chns, common);
-
+ flows = common->rx_chns.flows;
for (i = 0; i < common->rx_ch_num_flows; i++) {
if (!(flows[i].irq < 0))
devm_free_irq(dev, flows[i].irq, &flows[i]);
netif_napi_del(&flows[i].napi_rx);
}
+}

- am65_cpsw_nuss_free_rx_chns(common);
+static void am65_cpsw_nuss_remove_rx_chns(struct am65_cpsw_common *common)
+{
+ struct device *dev = common->dev;

+ devm_remove_action(dev, am65_cpsw_nuss_free_rx_chns, common);
+ am65_cpsw_nuss_cleanup_rx_napi(common);
+ am65_cpsw_nuss_free_rx_chns(common);
common->rx_flow_id_base = -1;
}

@@ -2871,6 +2878,9 @@ static void am65_cpsw_nuss_cleanup_ndev(struct am65_cpsw_common *common)
if (port->ndev && port->ndev->reg_state == NETREG_REGISTERED)
unregister_netdev(port->ndev);
}
+
+ am65_cpsw_nuss_cleanup_rx_napi(common);
+ am65_cpsw_nuss_cleanup_tx_napi(common);
}

static void am65_cpsw_port_offload_fwd_mark_update(struct am65_cpsw_common *common)