Re: [RFC net-next v4 2/2] net/mlx5e: Add per queue netdev-genl stats

From: Tariq Toukan
Date: Thu Jun 06 2024 - 16:12:21 EST




On 04/06/2024 3:46, Joe Damato wrote:
./cli.py --spec netlink/specs/netdev.yaml \
--dump qstats-get --json '{"scope": "queue"}'

...snip

{'ifindex': 7,
'queue-id': 62,
'queue-type': 'rx',
'rx-alloc-fail': 0,
'rx-bytes': 105965251,
'rx-packets': 179790},
{'ifindex': 7,
'queue-id': 0,
'queue-type': 'tx',
'tx-bytes': 9402665,
'tx-packets': 17551},

...snip

Also tested with the script tools/testing/selftests/drivers/net/stats.py
in several scenarios to ensure stats tallying was correct:

- on boot (default queue counts)
- adjusting queue count up or down (ethtool -L eth0 combined ...)

The tools/testing/selftests/drivers/net/stats.py brings the device up,
so to test with the device down, I did the following:

$ ip link show eth4
7: eth4: <BROADCAST,MULTICAST> mtu 9000 qdisc mq state DOWN [..snip..]
[..snip..]

$ cat /proc/net/dev | grep eth4
eth4: 235710489 434811 [..snip rx..] 2878744 21227 [..snip tx..]

$ ./cli.py --spec ../../../Documentation/netlink/specs/netdev.yaml \
--dump qstats-get --json '{"ifindex": 7}'
[{'ifindex': 7,
'rx-alloc-fail': 0,
'rx-bytes': 235710489,
'rx-packets': 434811,
'tx-bytes': 2878744,
'tx-packets': 21227}]

Compare the values in /proc/net/dev match the output of cli for the same
device, even while the device is down.

Note that while the device is down, per queue stats output nothing
(because the device is down there are no queues):

This part is not true anymore.


$ ./cli.py --spec ../../../Documentation/netlink/specs/netdev.yaml \
--dump qstats-get --json '{"scope": "queue", "ifindex": 7}'
[]

Signed-off-by: Joe Damato <jdamato@xxxxxxxxxx>
---
.../net/ethernet/mellanox/mlx5/core/en_main.c | 138 ++++++++++++++++++
1 file changed, 138 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index d03fd1c98eb6..76d64bbcf250 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -39,6 +39,7 @@
#include <linux/debugfs.h>
#include <linux/if_bridge.h>
#include <linux/filter.h>
+#include <net/netdev_queues.h>
#include <net/page_pool/types.h>
#include <net/pkt_sched.h>
#include <net/xdp_sock_drv.h>
@@ -5279,6 +5280,142 @@ static bool mlx5e_tunnel_any_tx_proto_supported(struct mlx5_core_dev *mdev)
return (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev));
}
+static void mlx5e_get_queue_stats_rx(struct net_device *dev, int i,
+ struct netdev_queue_stats_rx *stats)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_channel_stats *channel_stats;
+ struct mlx5e_rq_stats *xskrq_stats;
+ struct mlx5e_rq_stats *rq_stats;
+
+ ASSERT_RTNL();
+ if (mlx5e_is_uplink_rep(priv))
+ return;
+
+ /* ptp was ever opened, is currently open, and channel index matches i
+ * then export stats
+ */
+ if (priv->rx_ptp_opened && priv->channels.ptp) {
+ if (test_bit(MLX5E_PTP_STATE_RX, priv->channels.ptp->state) &&
+ priv->channels.ptp->rq.ix == i) {

PTP RQ index is naively assigned to zero:
rq->ix = MLX5E_PTP_CHANNEL_IX;

but this isn't to be used as the stats index.
Today, the PTP-RQ has no matcing rxq in the kernel level.
i.e. turning PTP-RQ on won't add a kernel-level RXQ to the real_num_rx_queues.
Maybe we better do.
If not, and the current state is kept, the best we can do is let the PTP-RQ naively contribute its queue-stat to channel 0.

+ rq_stats = &priv->ptp_stats.rq;
+ stats->packets = rq_stats->packets;
+ stats->bytes = rq_stats->bytes;
+ stats->alloc_fail = rq_stats->buff_alloc_err;
+ return;
+ }
+ }
+
+ channel_stats = priv->channel_stats[i];
+ xskrq_stats = &channel_stats->xskrq;
+ rq_stats = &channel_stats->rq;
+
+ stats->packets = rq_stats->packets + xskrq_stats->packets;
+ stats->bytes = rq_stats->bytes + xskrq_stats->bytes;
+ stats->alloc_fail = rq_stats->buff_alloc_err +
+ xskrq_stats->buff_alloc_err;
+}
+
+static void mlx5e_get_queue_stats_tx(struct net_device *dev, int i,
+ struct netdev_queue_stats_tx *stats)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_sq_stats *sq_stats;
+
+ ASSERT_RTNL();
+ /* no special case needed for ptp htb etc since txq2sq_stats is kept up
+ * to date for active sq_stats, otherwise get_base_stats takes care of
+ * inactive sqs.
+ */
+ sq_stats = priv->txq2sq_stats[i];
+ stats->packets = sq_stats->packets;
+ stats->bytes = sq_stats->bytes;
+}
+
+static void mlx5e_get_base_stats(struct net_device *dev,
+ struct netdev_queue_stats_rx *rx,
+ struct netdev_queue_stats_tx *tx)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ int i, tc;
+
+ ASSERT_RTNL();
+ if (!mlx5e_is_uplink_rep(priv)) {
+ rx->packets = 0;
+ rx->bytes = 0;
+ rx->alloc_fail = 0;
+
+ for (i = priv->channels.params.num_channels; i < priv->stats_nch; i++) {
+ struct netdev_queue_stats_rx rx_i = {0};
+
+ mlx5e_get_queue_stats_rx(dev, i, &rx_i);
+
+ rx->packets += rx_i.packets;
+ rx->bytes += rx_i.bytes;
+ rx->alloc_fail += rx_i.alloc_fail;
+ }
+
+ if (priv->rx_ptp_opened) {
+ /* if PTP was opened, but is not currently open, then
+ * report the stats here. otherwise,
+ * mlx5e_get_queue_stats_rx will get it
+ */

We shouldn't care if the RQ is currently open. The stats are always there.
This applies to all RQs and SQs.

+ if (priv->channels.ptp &&
+ !test_bit(MLX5E_PTP_STATE_RX, priv->channels.ptp->state)) {
+ struct mlx5e_rq_stats *rq_stats = &priv->ptp_stats.rq;
+
+ rx->packets += rq_stats->packets;
+ rx->bytes += rq_stats->bytes;
+ }
+ }
+ }
+
+ tx->packets = 0;
+ tx->bytes = 0;
+
+ for (i = 0; i < priv->stats_nch; i++) {
+ struct mlx5e_channel_stats *channel_stats = priv->channel_stats[i];
+
+ /* while iterating through all channels [0, stats_nch], there
+ * are two cases to handle:
+ *
+ * 1. the channel is available, so sum only the unavailable TCs
+ * [mlx5e_get_dcb_num_tc, max_opened_tc).
+ *
+ * 2. the channel is unavailable, so sum all TCs [0, max_opened_tc).
+ */

Even if the channel is not available, mlx5e_get_queue_stats_tx() accesses and returns its stats.
Here you need to only cover SQs that have no mapping in range [0..real_num_tx_queues - 1].

+ if (i < priv->channels.params.num_channels)
+ tc = mlx5e_get_dcb_num_tc(&priv->channels.params);
+ else
+ tc = 0;
+
+ for (; tc < priv->max_opened_tc; tc++) {
+ struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[tc];
+
+ tx->packets += sq_stats->packets;
+ tx->bytes += sq_stats->bytes;
+ }
+ }
+
+ if (priv->tx_ptp_opened) {
+ /* only report PTP TCs if it was opened but is now closed */
+ if (priv->channels.ptp && !test_bit(MLX5E_PTP_STATE_TX, priv->channels.ptp->state)) {
+ for (tc = 0; tc < priv->channels.ptp->num_tc; tc++) {
+ struct mlx5e_sq_stats *sq_stats = &priv->ptp_stats.sq[tc];
+
+ tx->packets += sq_stats->packets;
+ tx->bytes += sq_stats->bytes;
+ }
+ }
+ }
+}
+
+static const struct netdev_stat_ops mlx5e_stat_ops = {
+ .get_queue_stats_rx = mlx5e_get_queue_stats_rx,
+ .get_queue_stats_tx = mlx5e_get_queue_stats_tx,
+ .get_base_stats = mlx5e_get_base_stats,
+};
+
static void mlx5e_build_nic_netdev(struct net_device *netdev)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -5296,6 +5433,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
netdev->watchdog_timeo = 15 * HZ;
+ netdev->stat_ops = &mlx5e_stat_ops;
netdev->ethtool_ops = &mlx5e_ethtool_ops;
netdev->vlan_features |= NETIF_F_SG;