Re: [PATCH net-next v5 2/3] gve: make nic clock reads thread safe

From: Jordan Rhee

Date: Tue May 05 2026 - 17:40:08 EST

On Tue, Apr 28, 2026 at 6:28 PM Harshitha Ramamurthy
<hramamurthy@xxxxxxxxxx> wrote:
>
> From: Ankit Garg <nktgrg@xxxxxxxxxx>
>
> Add a mutex to protect the shared DMA buffer that receives NIC
> timestamp reports. The NIC timestamp will be read from two different
> threads: the periodic worker and upcoming `gettimex64`.
>
> Move clock registration to the last step of initialization to ensure
> that all data needed by the clock module is initialized before
> the clock is exposed to usermode.
>
> Reviewed-by: Joshua Washington <joshwash@xxxxxxxxxx>
> Signed-off-by: Ankit Garg <nktgrg@xxxxxxxxxx>
> Signed-off-by: Jordan Rhee <jordanrhee@xxxxxxxxxx>
> Signed-off-by: Harshitha Ramamurthy <hramamurthy@xxxxxxxxxx>
> ---
> Changes in v3:
> - Reorder init/teardown to register PTP clock last, and simplify code
> - Move ptp-related members from gve_priv to gve_ptp
> - Only assign priv->ptp after ptp module is successfully initialized
> ---
> drivers/net/ethernet/google/gve/gve.h | 12 +-
> drivers/net/ethernet/google/gve/gve_ethtool.c | 3 +-
> drivers/net/ethernet/google/gve/gve_ptp.c | 134 ++++++++----------
> 3 files changed, 63 insertions(+), 86 deletions(-)
>
> diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
> index 1d66d3834f7e..7b69d0cfc0d5 100644
> --- a/drivers/net/ethernet/google/gve/gve.h
> +++ b/drivers/net/ethernet/google/gve/gve.h
> @@ -792,6 +792,9 @@ struct gve_ptp {
> struct ptp_clock_info info;
> struct ptp_clock *clock;
> struct gve_priv *priv;
> + struct mutex nic_ts_read_lock; /* Protects nic_ts_report */
> + struct gve_nic_ts_report *nic_ts_report;
> + dma_addr_t nic_ts_report_bus;
> };
>
> struct gve_priv {
> @@ -923,8 +926,6 @@ struct gve_priv {
> bool nic_timestamp_supported;
> struct gve_ptp *ptp;
> struct kernel_hwtstamp_config ts_config;
> - struct gve_nic_ts_report *nic_ts_report;
> - dma_addr_t nic_ts_report_bus;
> u64 last_sync_nic_counter; /* Clock counter from last NIC TS report */
> };
>
> @@ -1201,7 +1202,7 @@ static inline bool gve_supports_xdp_xmit(struct gve_priv *priv)
>
> static inline bool gve_is_clock_enabled(struct gve_priv *priv)
> {
> - return priv->nic_ts_report;
> + return priv->ptp;
> }
>
> /* gqi napi handler defined in gve_main.c */
> @@ -1321,14 +1322,9 @@ int gve_flow_rules_reset(struct gve_priv *priv);
> int gve_init_rss_config(struct gve_priv *priv, u16 num_queues);
> /* PTP and timestamping */
> #if IS_ENABLED(CONFIG_PTP_1588_CLOCK)
> -int gve_clock_nic_ts_read(struct gve_priv *priv);
> int gve_init_clock(struct gve_priv *priv);
> void gve_teardown_clock(struct gve_priv *priv);
> #else /* CONFIG_PTP_1588_CLOCK */
> -static inline int gve_clock_nic_ts_read(struct gve_priv *priv)
> -{
> - return -EOPNOTSUPP;
> -}
>
> static inline int gve_init_clock(struct gve_priv *priv)
> {
> diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c
> index dc2213b5ce24..4fd7e8a442c5 100644
> --- a/drivers/net/ethernet/google/gve/gve_ethtool.c
> +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c
> @@ -972,8 +972,7 @@ static int gve_get_ts_info(struct net_device *netdev,
> info->rx_filters |= BIT(HWTSTAMP_FILTER_NONE) |
> BIT(HWTSTAMP_FILTER_ALL);
>
> - if (priv->ptp)
> - info->phc_index = ptp_clock_index(priv->ptp->clock);
> + info->phc_index = ptp_clock_index(priv->ptp->clock);
> }
>
> return 0;
> diff --git a/drivers/net/ethernet/google/gve/gve_ptp.c b/drivers/net/ethernet/google/gve/gve_ptp.c
> index 06b1cf4a5efc..ad15f1209a83 100644
> --- a/drivers/net/ethernet/google/gve/gve_ptp.c
> +++ b/drivers/net/ethernet/google/gve/gve_ptp.c
> @@ -11,19 +11,20 @@
> #define GVE_NIC_TS_SYNC_INTERVAL_MS 250
>
> /* Read the nic timestamp from hardware via the admin queue. */
> -int gve_clock_nic_ts_read(struct gve_priv *priv)
> +static int gve_clock_nic_ts_read(struct gve_ptp *ptp, u64 *nic_raw)
> {
> - u64 nic_raw;
> int err;
>
> - err = gve_adminq_report_nic_ts(priv, priv->nic_ts_report_bus);
> + mutex_lock(&ptp->nic_ts_read_lock);
> + err = gve_adminq_report_nic_ts(ptp->priv, ptp->nic_ts_report_bus);
> if (err)
> - return err;
> + goto out;
>
> - nic_raw = be64_to_cpu(priv->nic_ts_report->nic_timestamp);
> - WRITE_ONCE(priv->last_sync_nic_counter, nic_raw);
> + *nic_raw = be64_to_cpu(ptp->nic_ts_report->nic_timestamp);
>
> - return 0;
> +out:
> + mutex_unlock(&ptp->nic_ts_read_lock);
> + return err;
> }
>
> static int gve_ptp_gettimex64(struct ptp_clock_info *info,
> @@ -41,17 +42,21 @@ static int gve_ptp_settime64(struct ptp_clock_info *info,
>
> static long gve_ptp_do_aux_work(struct ptp_clock_info *info)
> {
> - const struct gve_ptp *ptp = container_of(info, struct gve_ptp, info);
> + struct gve_ptp *ptp = container_of(info, struct gve_ptp, info);
> struct gve_priv *priv = ptp->priv;
> + u64 nic_raw;
> int err;
>
> if (gve_get_reset_in_progress(priv) || !gve_get_admin_queue_ok(priv))
> goto out;
>
> - err = gve_clock_nic_ts_read(priv);
> - if (err && net_ratelimit())
> - dev_err(&priv->pdev->dev,
> - "%s read err %d\n", __func__, err);
> + err = gve_clock_nic_ts_read(ptp, &nic_raw);
> + if (err) {
> + dev_err_ratelimited(&priv->pdev->dev, "%s read err %d\n",
> + __func__, err);
> + goto out;
> + }
> + WRITE_ONCE(priv->last_sync_nic_counter, nic_raw);

Sashiko says:
---
This problem wasn't introduced by this patch, but since the patch moves
the WRITE_ONCE() operation, does it cause a torn read on 32-bit
architectures?

The 64-bit variable last_sync_nic_counter is updated in process context (by
the PTP worker thread) using WRITE_ONCE() and read concurrently in NAPI
softirq context (gve_rx_get_hwtstamp()) using READ_ONCE().

On 32-bit architectures, READ_ONCE() and WRITE_ONCE() on 64-bit variables
do not guarantee atomicity and will compile into two separate 32-bit memory
accesses.

If a softirq interrupts the worker thread exactly between the two 32-bit
writes, could it read a corrupted 64-bit value combining halves of the old
and new timestamps?

Because the upper 32 bits wrap roughly every 4.3 seconds, a torn read will
cause a multi-second jump in the calculated packet timestamps, corrupting
PTP synchronization and packet captures.

Would it be better to use a seqcount, u64_stats_sync, or atomic64_t to
synchronize these accesses?
---

GVE only runs on x86_64 and ARM64, so this won't hit in the wild. It
might be good to fix from a code correctness perspective, but I
believe it's beyond the scope of this patch which is to introduce
thread safety to gve_clock_nic_ts_read().

>
> out:
> return msecs_to_jiffies(GVE_NIC_TS_SYNC_INTERVAL_MS);
> @@ -65,94 +70,71 @@ static const struct ptp_clock_info gve_ptp_caps = {
> .do_aux_work = gve_ptp_do_aux_work,
> };
>
> -static int gve_ptp_init(struct gve_priv *priv)
> +int gve_init_clock(struct gve_priv *priv)
> {
> struct gve_ptp *ptp;
> + u64 nic_raw;
> int err;
>
> - priv->ptp = kzalloc_obj(*priv->ptp);
> - if (!priv->ptp)
> + ptp = kzalloc_obj(*priv->ptp);
> + if (!ptp)
> return -ENOMEM;
>
> - ptp = priv->ptp;
> ptp->info = gve_ptp_caps;
> - ptp->clock = ptp_clock_register(&ptp->info, &priv->pdev->dev);
> -
> - if (IS_ERR(ptp->clock)) {
> - dev_err(&priv->pdev->dev, "PTP clock registration failed\n");
> - err = PTR_ERR(ptp->clock);
> - goto free_ptp;
> - }
> -
> ptp->priv = priv;
> - return 0;
> -
> -free_ptp:
> - kfree(ptp);
> - priv->ptp = NULL;
> - return err;
> -}
> -
> -static void gve_ptp_release(struct gve_priv *priv)
> -{
> - struct gve_ptp *ptp = priv->ptp;
> -
> - if (!ptp)
> - return;
> -
> - if (ptp->clock)
> - ptp_clock_unregister(ptp->clock);
> -
> - kfree(ptp);
> - priv->ptp = NULL;
> -}
> -
> -int gve_init_clock(struct gve_priv *priv)
> -{
> - int err;
> -
> - err = gve_ptp_init(priv);
> - if (err)
> - return err;
> -
> - priv->nic_ts_report =
> + mutex_init(&ptp->nic_ts_read_lock);
> + ptp->nic_ts_report =
> dma_alloc_coherent(&priv->pdev->dev,
> sizeof(struct gve_nic_ts_report),
> - &priv->nic_ts_report_bus,
> - GFP_KERNEL);
> - if (!priv->nic_ts_report) {
> + &ptp->nic_ts_report_bus, GFP_KERNEL);
> + if (!ptp->nic_ts_report) {
> dev_err(&priv->pdev->dev, "%s dma alloc error\n", __func__);
> err = -ENOMEM;
> - goto release_ptp;
> + goto free_ptp;
> }
> - err = gve_clock_nic_ts_read(priv);
> +
> + err = gve_clock_nic_ts_read(ptp, &nic_raw);
> if (err) {
> dev_err(&priv->pdev->dev, "failed to read NIC clock %d\n", err);
> - goto release_nic_ts_report;
> + goto free_dma_mem;
> }
> - ptp_schedule_worker(priv->ptp->clock,
> + WRITE_ONCE(priv->last_sync_nic_counter, nic_raw);
> +
> + ptp->clock = ptp_clock_register(&ptp->info, &priv->pdev->dev);
> + if (IS_ERR(ptp->clock)) {
> + dev_err(&priv->pdev->dev, "PTP clock registration failed\n");
> + err = PTR_ERR(ptp->clock);
> + goto free_dma_mem;
> + }
> +
> + priv->ptp = ptp;

Sashiko says:
---
This problem wasn't introduced by this patch, but since the patch attempts
to fix a related publication race by moving the clock registration, could
this priv->ptp assignment be reordered before ptp->clock is fully
initialized?

If gve_get_ts_info() reads priv->ptp locklessly (for example, when called
from sock_timestamping_bind_phc()), could it see an uninitialized
ptp->clock because smp_store_release() wasn't used here?
---

The only place that priv->ptp is accessed is from
gve_is_clock_enabled(), which is called from the following places:
- gve_get_ts_info() ethtool op
- No risk of race with gve_init_clock() when called from probe()
because it runs *before* netdev is registered. The ethool operation
cannot be dispatched before the netdev is registered.
- No risk of race with gve_init_clock() when called in reset
recovery path because rtnl and netdev locks are held across
gve_reset(), and ethtool operations also take the rtnl/netdev locks.
- gve_set_ts_config() ndo op and gve_xdp_rx_timestamp()
- No risk of race because gve_init_clock() runs before netdev is registered

> + ptp_schedule_worker(ptp->clock,
> msecs_to_jiffies(GVE_NIC_TS_SYNC_INTERVAL_MS));
>
> return 0;
>
> -release_nic_ts_report:
> - dma_free_coherent(&priv->pdev->dev,
> - sizeof(struct gve_nic_ts_report),
> - priv->nic_ts_report, priv->nic_ts_report_bus);
> - priv->nic_ts_report = NULL;
> -release_ptp:
> - gve_ptp_release(priv);
> +free_dma_mem:
> + dma_free_coherent(&priv->pdev->dev, sizeof(struct gve_nic_ts_report),
> + ptp->nic_ts_report, ptp->nic_ts_report_bus);
> + ptp->nic_ts_report = NULL;
> +free_ptp:
> + mutex_destroy(&ptp->nic_ts_read_lock);
> + kfree(ptp);
> return err;
> }
>
> void gve_teardown_clock(struct gve_priv *priv)
> {
> - gve_ptp_release(priv);
> + struct gve_ptp *ptp = priv->ptp;
>
> - if (priv->nic_ts_report) {
> - dma_free_coherent(&priv->pdev->dev,
> - sizeof(struct gve_nic_ts_report),
> - priv->nic_ts_report, priv->nic_ts_report_bus);
> - priv->nic_ts_report = NULL;
> - }
> + if (!ptp)
> + return;
> +
> + priv->ptp = NULL;
> + ptp_clock_unregister(ptp->clock);
> + dma_free_coherent(&priv->pdev->dev, sizeof(struct gve_nic_ts_report),
> + ptp->nic_ts_report, ptp->nic_ts_report_bus);
> + ptp->nic_ts_report = NULL;
> + mutex_destroy(&ptp->nic_ts_read_lock);
> + kfree(ptp);

Sashiko says:
---
Can this synchronous kfree() cause a use-after-free or a NULL pointer
dereference?

If a concurrent device reset executes gve_teardown_clock(), it sets
priv->ptp = NULL and frees ptp synchronously without an RCU grace period
(such as using kfree_rcu() or synchronize_rcu()).

If gve_get_ts_info() reads priv->ptp locklessly, could it dereference
priv->ptp->clock while it is being freed, leading to a Time-Of-Check to
Time-Of-Use (TOCTOU) bug?
---

Reset cannot race against the gve_get_ts_info() ethtool op because of
the netdev/rtnl locks.

> }
> --
> 2.54.0.545.g6539524ca2-goog
>