Re: [PATCH net v2 2/4] gve: Fix backward stats when interface goes down or configuration is adjusted
From: Pin-yen Lin
Date: Mon Apr 27 2026 - 16:35:34 EST
On Fri, Apr 24, 2026 at 5:24 PM Harshitha Ramamurthy
<hramamurthy@xxxxxxxxxx> wrote:
>
> From: Debarghya Kundu <debarghyak@xxxxxxxxxx>
>
> gve_get_base_stats() sets all the stats to 0, so the stats go backwards
> when interface goes down or configuration is adjusted.
>
> Fix this by persisting baseline stats across interface down.
>
> Cc: stable@xxxxxxxxxxxxxxx
> Fixes: 2e5e0932dff5 ("gve: add support for basic queue stats")
> Signed-off-by: Debarghya Kundu <debarghyak@xxxxxxxxxx>
> Co-developed-by: Pin-yen Lin <treapking@xxxxxxxxxx>
> Signed-off-by: Pin-yen Lin <treapking@xxxxxxxxxx>
> Signed-off-by: Harshitha Ramamurthy <hramamurthy@xxxxxxxxxx>
> ---
> Changes in v2:
> - Add a NULL pointer check in gve_get_ring_err_stats() (Sashiko)
> - Use local variable to prevent inflates from u64_stats_fetch_retry()
> (Sashiko)
> - Add u64_stats_fetch/begin to protect base stats (Sashiko)
>
> drivers/net/ethernet/google/gve/gve.h | 7 ++
> drivers/net/ethernet/google/gve/gve_main.c | 88 ++++++++++++++++++++--
> 2 files changed, 88 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
> index 1d66d3834f7e..702b1641d984 100644
> --- a/drivers/net/ethernet/google/gve/gve.h
> +++ b/drivers/net/ethernet/google/gve/gve.h
> @@ -794,6 +794,10 @@ struct gve_ptp {
> struct gve_priv *priv;
> };
>
> +struct gve_ring_err_stats {
> + u64 rx_alloc_fails;
> +};
> +
> struct gve_priv {
> struct net_device *dev;
> struct gve_tx_ring *tx; /* array of tx_cfg.num_queues */
> @@ -883,6 +887,9 @@ struct gve_priv {
> unsigned long service_task_flags;
> unsigned long state_flags;
>
> + struct gve_ring_err_stats base_ring_err_stats;
> + struct rtnl_link_stats64 base_net_stats;
> + struct u64_stats_sync base_statss; /* sync stats for 32bit archs */
Sashiko says:
Is u64_stats_init(&priv->base_statss) ever explicitly called during driver
probe?
netdev_alloc_priv() zero-allocates the memory, but on 32-bit architectures
or when lockdep is enabled, the underlying seqcount_t requires explicit
initialization to set up its lockdep class key.
Could failing to initialize it trigger a lockdep splat when the interface
goes down and u64_stats_update_begin() is invoked?
Yes, this is missing in this series. We will include this in v3.
> struct gve_stats_report *stats_report;
> u64 stats_report_len;
> dma_addr_t stats_report_bus; /* dma address for the stats report */
> diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
> index ef00d9ca1643..1fec8e1e4821 100644
> --- a/drivers/net/ethernet/google/gve/gve_main.c
> +++ b/drivers/net/ethernet/google/gve/gve_main.c
> @@ -106,9 +106,34 @@ static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev)
> return gve_tx_dqo(skb, dev);
> }
>
> -static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
> +static void gve_add_base_stats(struct gve_priv *priv,
> + struct rtnl_link_stats64 *s)
> +{
> + struct rtnl_link_stats64 *base_stats = &priv->base_net_stats;
> + unsigned int start;
> + u64 rx_packets, rx_bytes, rx_dropped, tx_packets, tx_bytes, tx_dropped;
> +
> + do {
> + start = u64_stats_fetch_begin(&priv->base_statss);
> + rx_packets = base_stats->rx_packets;
> + rx_bytes = base_stats->rx_bytes;
> + rx_dropped = base_stats->rx_dropped;
> + tx_packets = base_stats->tx_packets;
> + tx_bytes = base_stats->tx_bytes;
> + tx_dropped = base_stats->tx_dropped;
> + } while (u64_stats_fetch_retry(&priv->base_statss, start));
> +
> + s->rx_packets += rx_packets;
> + s->rx_bytes += rx_bytes;
> + s->rx_dropped += rx_dropped;
> + s->tx_packets += tx_packets;
> + s->tx_bytes += tx_bytes;
> + s->tx_dropped += tx_dropped;
> +}
> +
> +static void gve_get_ring_stats(struct gve_priv *priv,
> + struct rtnl_link_stats64 *s)
> {
> - struct gve_priv *priv = netdev_priv(dev);
> unsigned int start;
> u64 packets, bytes;
> int num_tx_queues;
> @@ -143,6 +168,14 @@ static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
> }
> }
>
> +static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
> +{
> + struct gve_priv *priv = netdev_priv(dev);
> +
> + gve_get_ring_stats(priv, s);
> + gve_add_base_stats(priv, s);
> +}
> +
> static int gve_alloc_flow_rule_caches(struct gve_priv *priv)
> {
> struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache;
> @@ -1533,6 +1566,29 @@ static int gve_queues_stop(struct gve_priv *priv)
> return gve_reset_recovery(priv, false);
> }
>
> +static void gve_get_ring_err_stats(struct gve_priv *priv,
> + struct gve_ring_err_stats *err_stats)
> +{
> + int ring;
> +
> + if (!priv->rx)
> + return;
> +
> + for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) {
> + struct gve_rx_ring *rx = &priv->rx[ring];
> + unsigned int start;
> + u64 rx_alloc_fails;
> +
> + do {
> + start = u64_stats_fetch_begin(&rx->statss);
> + rx_alloc_fails = rx->rx_skb_alloc_fail +
> + rx->rx_buf_alloc_fail;
> + } while (u64_stats_fetch_retry(&rx->statss, start));
> +
> + err_stats->rx_alloc_fails += rx_alloc_fails;
> + }
> +}
> +
> static int gve_close(struct net_device *dev)
> {
> struct gve_priv *priv = netdev_priv(dev);
> @@ -1542,6 +1598,12 @@ static int gve_close(struct net_device *dev)
> if (err)
> return err;
>
> + /* Save ring queue and err stats before closing the interface */
> + u64_stats_update_begin(&priv->base_statss);
Sashiko says:
Is there a risk of a deadlock here on 32-bit platforms?
ndo_get_stats64 (which calls gve_get_stats()) can execute concurrently in
softirq context. u64_stats_update_begin() on 32-bit platforms acquires a
seqcount lock but does not disable bottom halves.
If a softirq interrupts this update on the same CPU, could the lockless
reader spin infinitely waiting for the sequence counter to become even,
causing a hard deadlock?
Should this update be wrapped in local_bh_disable() and local_bh_enable()?
We will add this in v3.
> + gve_get_ring_stats(priv, &priv->base_net_stats);
> + gve_get_ring_err_stats(priv, &priv->base_ring_err_stats);
> + u64_stats_update_end(&priv->base_statss);
> +
> gve_queues_mem_remove(priv);
Sashiko says:
Will this sequence cause a transient double-counting of statistics?
The active ring stats are accumulated into base_net_stats, but the ring
pointers (priv->rx, priv->tx) remain active and are not cleared until
gve_queues_mem_remove() executes later.
If gve_get_stats() executes concurrently during this window, does it sum both
the active ring stats and the newly updated base_net_stats (which now
incorporates those same ring stats)?
One additional pre-existing issue related to this code path: does
gve_queues_mem_remove() leave a window for a use-after-free?
ndo_get_stats64 executes locklessly under RCU and checks if (priv->rx)
before reading ring stats. gve_queues_mem_remove() frees priv->rx using
kvfree(), but sets the pointer to NULL only after the memory is freed.
If ndo_get_stats64 checks priv->rx, gets preempted, and resumes after
gve_close() frees the array, could it dereference freed memory?
Both point to a pre-existing race between stats and gve_close(). We
will address this in v3.
> return 0;
> }
> @@ -2784,12 +2846,24 @@ static void gve_get_base_stats(struct net_device *dev,
> struct netdev_queue_stats_rx *rx,
> struct netdev_queue_stats_tx *tx)
> {
> - rx->packets = 0;
> - rx->bytes = 0;
> - rx->alloc_fail = 0;
> + const struct gve_ring_err_stats *base_err_stats;
> + const struct rtnl_link_stats64 *base_stats;
> + struct gve_priv *priv;
> + unsigned int start;
>
> - tx->packets = 0;
> - tx->bytes = 0;
> + priv = netdev_priv(dev);
> + base_stats = &priv->base_net_stats;
> + base_err_stats = &priv->base_ring_err_stats;
> +
> + do {
> + start = u64_stats_fetch_begin(&priv->base_statss);
> + rx->packets = base_stats->rx_packets;
> + rx->bytes = base_stats->rx_bytes;
> + rx->alloc_fail = base_err_stats->rx_alloc_fails;
> +
> + tx->packets = base_stats->tx_packets;
> + tx->bytes = base_stats->tx_bytes;
> + } while (u64_stats_fetch_retry(&priv->base_statss, start));
> }
>
> static const struct netdev_stat_ops gve_stat_ops = {
> --
> 2.54.0.545.g6539524ca2-goog
>
Regards,
Pin-yen