[PATCH net v1 1/4] octeon_ep: fix race conditions in ndo_get_stats64

From: Shinas Rasheed
Date: Tue Dec 03 2024 - 02:22:36 EST


ndo_get_stats64() can race with ndo_stop(), which frees input and
output queue resources. Implement device state variable to protect
against such resource usage.

Fixes: 6a610a46bad1 ("octeon_ep: add support for ndo ops")
Signed-off-by: Shinas Rasheed <srasheed@xxxxxxxxxxx>
---
.../ethernet/marvell/octeon_ep/octep_main.c | 27 +++++++++++++++++++
.../ethernet/marvell/octeon_ep/octep_main.h | 8 ++++++
2 files changed, 35 insertions(+)

diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
index 549436efc204..872b4848027c 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
@@ -733,6 +733,7 @@ static int octep_open(struct net_device *netdev)
if (ret > 0)
octep_link_up(netdev);

+ set_bit(OCTEP_DEV_STATE_OPEN, &oct->state);
return 0;

set_queues_err:
@@ -745,6 +746,11 @@ static int octep_open(struct net_device *netdev)
return -1;
}

+static bool octep_drv_busy(struct octep_device *oct)
+{
+ return test_bit(OCTEP_DEV_STATE_READ_STATS, &oct->state);
+}
+
/**
* octep_stop() - stop the octeon network device.
*
@@ -759,6 +765,14 @@ static int octep_stop(struct net_device *netdev)

netdev_info(netdev, "Stopping the device ...\n");

+ clear_bit(OCTEP_DEV_STATE_OPEN, &oct->state);
+ /* Make sure device state open is cleared so that no more
+ * stats fetch can happen intermittently
+ */
+ smp_mb__after_atomic();
+ while (octep_drv_busy(oct))
+ msleep(20);
+
octep_ctrl_net_set_link_status(oct, OCTEP_CTRL_NET_INVALID_VFID, false,
false);
octep_ctrl_net_set_rx_state(oct, OCTEP_CTRL_NET_INVALID_VFID, false,
@@ -1001,6 +1015,16 @@ static void octep_get_stats64(struct net_device *netdev,
&oct->iface_rx_stats,
&oct->iface_tx_stats);

+ set_bit(OCTEP_DEV_STATE_READ_STATS, &oct->state);
+ /* Make sure read stats state is set, so that ndo_stop
+ * doesn't clear resources as they are read
+ */
+ smp_mb__after_atomic();
+ if (!test_bit(OCTEP_DEV_STATE_OPEN, &oct->state)) {
+ clear_bit(OCTEP_DEV_STATE_READ_STATS, &oct->state);
+ return;
+ }
+
tx_packets = 0;
tx_bytes = 0;
rx_packets = 0;
@@ -1022,6 +1046,7 @@ static void octep_get_stats64(struct net_device *netdev,
stats->rx_errors = oct->iface_rx_stats.err_pkts;
stats->collisions = oct->iface_tx_stats.xscol;
stats->tx_fifo_errors = oct->iface_tx_stats.undflw;
+ clear_bit(OCTEP_DEV_STATE_READ_STATS, &oct->state);
}

/**
@@ -1526,6 +1551,8 @@ static int octep_sriov_disable(struct octep_device *oct)
pci_disable_sriov(pdev);
CFG_GET_ACTIVE_VFS(oct->conf) = 0;

+ clear_bit(OCTEP_DEV_STATE_OPEN, &oct->state);
+
return 0;
}

diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.h b/drivers/net/ethernet/marvell/octeon_ep/octep_main.h
index fee59e0e0138..78293366e7de 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.h
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.h
@@ -223,6 +223,12 @@ struct octep_pfvf_info {
u32 mbox_version;
};

+/* Device state */
+enum octep_dev_state {
+ OCTEP_DEV_STATE_OPEN,
+ OCTEP_DEV_STATE_READ_STATS,
+};
+
/* The Octeon device specific private data structure.
* Each Octeon device has this structure to represent all its components.
*/
@@ -318,6 +324,8 @@ struct octep_device {
atomic_t hb_miss_cnt;
/* Task to reset device on heartbeat miss */
struct delayed_work hb_task;
+ /* Device state */
+ unsigned long state;
};

static inline u16 OCTEP_MAJOR_REV(struct octep_device *oct)
--
2.25.1