Re: [PATCH net-next v2 08/15] gve: refactor gve_init_priv for reset path
From: Harshitha Ramamurthy
Date: Thu Jun 04 2026 - 23:58:31 EST
On Tue, Jun 2, 2026 at 4:59 PM Harshitha Ramamurthy
<hramamurthy@xxxxxxxxxx> wrote:
>
> The driver does not need to renegotiate all properties with
> the device on a reset since those should stay constant through
> a reset. Hence change gve_init_priv() into a method that only
> sets these properties into the priv structure and hence needs
> to be only called once during gve_probe().
>
> To achieve this end state of gve_init_priv(), do the following:
> - introduce gve_adminq_init() which writes the driver version register
> and allocates the AdminQ and call it in gve_probe()
> - move gve_adminq_get_device_properties() into gve_probe()
> - introduce gve_setup_device() which deals with device setup logic and
> call it in gve_probe()
> - resetting no. of registered pages is moved into gve_register_qpls()
> since that is a QPL specific property.
>
> With these changes, gve_adminq_get_device_properties() and
> gve_init_priv() are only called once during gve_probe.
> gve_reset_recovery() can now bypass full initialization and call these
> targeted setup functions directly.
>
> This prepares the driver to add mailbox mode's control plane
> initialization and device properties negotiation in the same place
> as is done in AdminQ mode in the upcoming patches when adding the
> mailbox ABI.
>
> These changes are only code movement, no functional change.
>
> Reviewed-by: Willem de Bruijn <willemb@xxxxxxxxxx>
> Reviewed-by: Jordan Rhee <jordanrhee@xxxxxxxxxx>
> Signed-off-by: Harshitha Ramamurthy <hramamurthy@xxxxxxxxxx>
> ---
> drivers/net/ethernet/google/gve/gve.h | 2 +
> drivers/net/ethernet/google/gve/gve_adminq.c | 12 +-
> drivers/net/ethernet/google/gve/gve_adminq.h | 2 +-
> drivers/net/ethernet/google/gve/gve_main.c | 132 +++++++++++--------
> 4 files changed, 92 insertions(+), 56 deletions(-)
>
> diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
> index bd48c3d7a2b2..72588d201e5d 100644
> --- a/drivers/net/ethernet/google/gve/gve.h
> +++ b/drivers/net/ethernet/google/gve/gve.h
> @@ -1252,6 +1252,8 @@ static inline bool gve_is_clock_enabled(struct gve_priv *priv)
> return priv->nic_ts_report;
> }
>
> +void gve_adminq_write_version(u8 __iomem *driver_version_register);
> +
> /* gqi napi handler defined in gve_main.c */
> int gve_napi_poll(struct napi_struct *napi, int budget);
>
> diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c
> index 923a5c737258..e7956d2768b6 100644
> --- a/drivers/net/ethernet/google/gve/gve_adminq.c
> +++ b/drivers/net/ethernet/google/gve/gve_adminq.c
> @@ -296,8 +296,10 @@ gve_process_device_options(struct gve_priv *priv,
> return 0;
> }
>
> -int gve_adminq_alloc(struct device *dev, struct gve_priv *priv)
> +static int gve_adminq_alloc(struct gve_priv *priv)
> {
> + struct device *dev = &priv->pdev->dev;
> +
> priv->adminq_pool = dma_pool_create("adminq_pool", dev,
> GVE_ADMINQ_BUFFER_SIZE, 0, 0);
> if (unlikely(!priv->adminq_pool))
> @@ -353,6 +355,14 @@ int gve_adminq_alloc(struct device *dev, struct gve_priv *priv)
> return 0;
> }
>
> +int gve_adminq_init(struct gve_priv *priv)
> +{
> + struct gve_registers __iomem *reg_bar = priv->reg_bar0;
> +
> + gve_adminq_write_version(®_bar->driver_version);
> + return gve_adminq_alloc(priv);
> +}
> +
> void gve_adminq_release(struct gve_priv *priv)
> {
> int i = 0;
> diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h
> index ac3ef9fd8d24..d07e9c6f279d 100644
> --- a/drivers/net/ethernet/google/gve/gve_adminq.h
> +++ b/drivers/net/ethernet/google/gve/gve_adminq.h
> @@ -619,7 +619,7 @@ union gve_adminq_command {
>
> static_assert(sizeof(union gve_adminq_command) == 64);
>
> -int gve_adminq_alloc(struct device *dev, struct gve_priv *priv);
> +int gve_adminq_init(struct gve_priv *priv);
> void gve_adminq_free(struct gve_priv *priv);
> void gve_adminq_release(struct gve_priv *priv);
> int gve_adminq_describe_device(struct gve_priv *priv);
> diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
> index d693caed7e3d..746ff69a28dd 100644
> --- a/drivers/net/ethernet/google/gve/gve_main.c
> +++ b/drivers/net/ethernet/google/gve/gve_main.c
> @@ -797,6 +797,8 @@ static int gve_register_qpls(struct gve_priv *priv)
> int err;
> int i;
>
> + priv->num_registered_pages = 0;
> +
Commenting on Sashiko's report:
"When gve_reset_recovery() runs with was_up=false, gve_open() is not
called and gve_register_qpls() is not reached, so num_registered_pages
is no longer zeroed on that path. Is that intentional, given the
symmetry with gve_unregister_qpl() decrements?
Although this number will get reset every time queues are created, to
be more symmetric, resetting this in gve_recover() which calls
gve_setup_device or in gve_setup_device is better. Will fix in v3.
> num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_is_qpl(priv));
> num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv));
>
> @@ -2414,6 +2416,33 @@ static void gve_set_buf_sizes(struct gve_priv *priv)
> priv->header_buf_size = device_info->header_buf_size;
> }
>
> +static int gve_setup_device(struct gve_priv *priv)
> +{
> + int err;
> +
> + priv->xsk_pools = bitmap_zalloc(priv->rx_cfg.max_queues, GFP_KERNEL);
Commenting on Sashiko's report:
"This is a pre-existing issue, but does this allocation cause an XDP socket
DMA mapping leak and silently break XDP sockets during a device reset?
When a device reset occurs, such as from a transmit timeout, the teardown
process frees the priv->xsk_pools bitmap without unmapping the active XDP
socket DMA mappings."
Will send a separate net fix for this pre-existing issue.
> + if (!priv->xsk_pools) {
> + err = -ENOMEM;
> + goto err;
> + }
> +
> + gve_set_netdev_xdp_features(priv);
> + if (!gve_is_gqi(priv))
> + priv->dev->xdp_metadata_ops = &gve_xdp_metadata_ops;
> +
> + err = gve_setup_device_resources(priv);
> + if (err)
> + goto err_free_xsk_bitmap;
> +
> + return 0;
> +
> +err_free_xsk_bitmap:
> + bitmap_free(priv->xsk_pools);
> + priv->xsk_pools = NULL;
> +err:
> + return err;
> +}
> +
> static const struct gve_ctrl_ops gve_adminq_ops = {
> .map_db_bar = gve_adminq_map_db_bar,
> .unmap_db_bar = gve_adminq_unmap_db_bar,
> @@ -2421,36 +2450,18 @@ static const struct gve_ctrl_ops gve_adminq_ops = {
> .set_num_ntfy_blks = gve_adminq_set_num_ntfy_blks,
> };
>
> -static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
> +static int gve_init_priv(struct gve_priv *priv)
> {
> struct gve_device_info *device_info = &priv->device_info;
> int err;
>
> - /* Set up the adminq */
> - err = gve_adminq_alloc(&priv->pdev->dev, priv);
> - if (err) {
> - dev_err(&priv->pdev->dev,
> - "Failed to alloc admin queue: err=%d\n", err);
> - return err;
> - }
> -
> - priv->num_registered_pages = 0;
> -
> - if (skip_describe_device)
> - goto setup_device;
> -
> - device_info->queue_format = GVE_QUEUE_FORMAT_UNSPECIFIED;
> - err = gve_adminq_get_device_properties(priv);
> - if (err)
> - goto err;
> -
> priv->queue_format = priv->device_info.queue_format;
>
> err = priv->ctrl_ops->set_num_ntfy_blks(priv);
> if (err) {
> dev_err(&priv->pdev->dev,
> "Could not setup notify blocks: err=%d\n", err);
> - goto err;
> + return err;
> }
>
> priv->ctrl_ops->set_num_queues(priv);
> @@ -2473,10 +2484,8 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
> netif_set_tso_max_size(priv->dev, GVE_DQO_TX_MAX);
> }
>
> - if (gve_set_mtu(priv)) {
> - err = -EINVAL;
> - goto err;
> - }
> + if (gve_set_mtu(priv))
> + return -EINVAL;
>
> priv->num_event_counters = device_info->num_event_counters;
>
> @@ -2501,30 +2510,7 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
> priv->ts_config.tx_type = HWTSTAMP_TX_OFF;
> priv->ts_config.rx_filter = HWTSTAMP_FILTER_NONE;
> priv->nic_timestamp_supported = device_info->nic_timestamp_supported;
> -
> -setup_device:
> - priv->xsk_pools = bitmap_zalloc(priv->rx_cfg.max_queues, GFP_KERNEL);
> - if (!priv->xsk_pools) {
> - err = -ENOMEM;
> - goto err;
> - }
> -
> - gve_set_netdev_xdp_features(priv);
> - if (!gve_is_gqi(priv))
> - priv->dev->xdp_metadata_ops = &gve_xdp_metadata_ops;
> -
> - err = gve_setup_device_resources(priv);
> - if (err)
> - goto err_free_xsk_bitmap;
> -
> return 0;
> -
> -err_free_xsk_bitmap:
> - bitmap_free(priv->xsk_pools);
> - priv->xsk_pools = NULL;
> -err:
> - gve_adminq_free(priv);
> - return err;
> }
>
> static void gve_teardown_priv_resources(struct gve_priv *priv)
> @@ -2554,15 +2540,29 @@ static int gve_reset_recovery(struct gve_priv *priv, bool was_up)
> {
> int err;
>
> - err = gve_init_priv(priv, true);
> - if (err)
> + err = gve_adminq_init(priv);
> + if (err) {
> + dev_err(&priv->pdev->dev,
> + "Failed to alloc admin queue: err=%d\n", err);
> goto err;
> + }
> +
> + err = gve_setup_device(priv);
> + if (err)
> + goto err_free_adminq;
> if (was_up) {
> err = gve_open(priv->dev);
> if (err)
> - goto err;
> + goto err_free_device;
> }
> return 0;
> +
> +err_free_device:
> + gve_teardown_device_resources(priv);
> + bitmap_free(priv->xsk_pools);
> + priv->xsk_pools = NULL;
> +err_free_adminq:
> + gve_adminq_free(priv);
"If gve_open() fails here, the error path tears down device resources (freeing
priv->ntfy_blocks) and the admin queue DMA pool via gve_adminq_free().
However, the net_device remains registered. If a user subsequently attempts to
bring the interface up, gve_open() is invoked again. Because gve_open()
currently does not check if resources are initialized, it proceeds to call
gve_queues_start()."
Correct, if gve_open() fails, will just return error instead. Will fix in v3.
> err:
> dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n");
> gve_turndown(priv);
> @@ -2605,7 +2605,7 @@ int gve_reset(struct gve_priv *priv, bool attempt_teardown)
> return err;
> }
>
> -static void gve_write_version(u8 __iomem *driver_version_register)
> +void gve_adminq_write_version(u8 __iomem *driver_version_register)
> {
> const char *c = gve_version_prefix;
>
> @@ -2838,7 +2838,6 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
> goto abort_with_pci_region;
> }
>
> - gve_write_version(®_bar->driver_version);
> /* Get max queues to alloc etherdev */
> max_tx_queues = ioread32be(®_bar->max_tx_queues);
> max_rx_queues = ioread32be(®_bar->max_rx_queues);
> @@ -2885,13 +2884,28 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
> priv->rx_cfg.packet_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE;
> priv->max_rx_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE;
>
> + err = gve_adminq_init(priv);
> + if (err) {
> + dev_err(&priv->pdev->dev,
> + "Failed to alloc admin queue: err=%d\n", err);
> + goto abort_with_netdev;
> + }
> +
> + priv->device_info.queue_format = GVE_QUEUE_FORMAT_UNSPECIFIED;
> + err = gve_adminq_get_device_properties(priv);
> + if (err) {
> + dev_err(&priv->pdev->dev,
> + "Could not get device information: err=%d\n", err);
> + goto abort_with_adminq;
> + }
> +
> /* Set adminq ctrl ops */
> priv->ctrl_ops = &gve_adminq_ops;
>
> err = priv->ctrl_ops->map_db_bar(priv);
> if (err) {
> err = -ENOMEM;
> - goto abort_with_netdev;
> + goto abort_with_adminq;
> }
>
> gve_set_probe_in_progress(priv);
> @@ -2906,10 +2920,17 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
> priv->tx_cfg.max_queues = max_tx_queues;
> priv->rx_cfg.max_queues = max_rx_queues;
>
> - err = gve_init_priv(priv, false);
> + err = gve_init_priv(priv);
> if (err)
> goto abort_with_wq;
>
> + err = gve_setup_device(priv);
> + if (err) {
> + dev_err(&priv->pdev->dev,
> + "Could not setup device: err=%d\n", err);
> + goto abort_with_wq;
> + }
> +
> if (!gve_is_gqi(priv) && !gve_is_qpl(priv))
> dev->netmem_tx = NETMEM_TX_DMA;
>
> @@ -2932,6 +2953,9 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
> abort_with_unmap_db_bar:
> priv->ctrl_ops->unmap_db_bar(priv);
>
> +abort_with_adminq:
> + gve_adminq_free(priv);
> +
> abort_with_netdev:
> free_netdev(dev);
>
> --
> 2.54.0.1013.g208068f2d8-goog
>