RE: [PATCH net-next 4/4] net: mana: Support hibernation and kexec

From: Haiyang Zhang
Date: Mon Nov 01 2021 - 11:12:33 EST




> -----Original Message-----
> From: Dexuan Cui <decui@xxxxxxxxxxxxx>
> Sent: Monday, November 1, 2021 3:03 AM
> To: Haiyang Zhang <haiyangz@xxxxxxxxxxxxx>; davem@xxxxxxxxxxxxx;
> kuba@xxxxxxxxxx; gustavoars@xxxxxxxxxx; netdev@xxxxxxxxxxxxxxx
> Cc: KY Srinivasan <kys@xxxxxxxxxxxxx>; stephen@xxxxxxxxxxxxxxxxxx;
> wei.liu@xxxxxxxxxx; linux-kernel@xxxxxxxxxxxxxxx; linux-
> hyperv@xxxxxxxxxxxxxxx; Shachar Raindel <shacharr@xxxxxxxxxxxxx>; Paul
> Rosswurm <paulros@xxxxxxxxxxxxx>; olaf@xxxxxxxxx; vkuznets
> <vkuznets@xxxxxxxxxx>
> Subject: RE: [PATCH net-next 4/4] net: mana: Support hibernation and
> kexec
>
> > From: Haiyang Zhang <haiyangz@xxxxxxxxxxxxx>
> > Sent: Saturday, October 30, 2021 8:55 AM
> > >
> > > @@ -1844,44 +1845,70 @@ int mana_probe(struct gdma_dev *gd)
> > > if (err)
> > > return err;
> > >
> > > - ac = kzalloc(sizeof(*ac), GFP_KERNEL);
> > > - if (!ac)
> > > - return -ENOMEM;
> > > + if (!resuming) {
> > > + ac = kzalloc(sizeof(*ac), GFP_KERNEL);
> > > + if (!ac)
> > > + return -ENOMEM;
> > >
> > > - ac->gdma_dev = gd;
> > > - ac->num_ports = 1;
> > > - gd->driver_data = ac;
> > > + ac->gdma_dev = gd;
> > > + gd->driver_data = ac;
> > > + }
> > >
> > > err = mana_create_eq(ac);
> > > if (err)
> > > goto out;
> > >
> > > err = mana_query_device_cfg(ac, MANA_MAJOR_VERSION,
> > > MANA_MINOR_VERSION,
> > > - MANA_MICRO_VERSION, &ac->num_ports);
> > > + MANA_MICRO_VERSION, &num_ports);
> > > if (err)
> > > goto out;
> > >
> > > + if (!resuming) {
> > > + ac->num_ports = num_ports;
> > > + } else {
> > > + if (ac->num_ports != num_ports) {
> > > + dev_err(dev, "The number of vPorts changed: %d->%d\n",
> > > + ac->num_ports, num_ports);
> > > + err = -EPROTO;
> > > + goto out;
> > > + }
> > > + }
> > > +
> > > + if (ac->num_ports == 0)
> > > + dev_err(dev, "Failed to detect any vPort\n");
> > > +
> > > if (ac->num_ports > MAX_PORTS_IN_MANA_DEV)
> > > ac->num_ports = MAX_PORTS_IN_MANA_DEV;
> > >
> > > - for (i = 0; i < ac->num_ports; i++) {
> > > - err = mana_probe_port(ac, i, &ac->ports[i]);
> > > - if (err)
> > > - break;
> > > + if (!resuming) {
> > > + for (i = 0; i < ac->num_ports; i++) {
> > > + err = mana_probe_port(ac, i, &ac->ports[i]);
> > > + if (err)
> > > + break;
> > > + }
> > > + } else {
> > > + for (i = 0; i < ac->num_ports; i++) {
> > > + rtnl_lock();
> > > + err = mana_attach(ac->ports[i]);
> > > + rtnl_unlock();
> > > + if (err)
> > > + break;
> > > + }
> > > }
> > > out:
> > > if (err)
> > > - mana_remove(gd);
> > > + mana_remove(gd, false);
> >
> > The "goto out" can happen in both resuming true/false cases,
> > should the error handling path deal with the two cases
> > differently?
>
> Let me make the below change in v2. Please let me know
> if any further change is needed:
>
> --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
> +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
> @@ -1850,8 +1850,10 @@ int mana_probe(struct gdma_dev *gd, bool resuming)
>
> if (!resuming) {
> ac = kzalloc(sizeof(*ac), GFP_KERNEL);
> - if (!ac)
> - return -ENOMEM;
> + if (!ac) {
> + err = -ENOMEM;
> + goto out;
> + }
>
> ac->gdma_dev = gd;
> gd->driver_data = ac;
> @@ -1872,8 +1874,8 @@ int mana_probe(struct gdma_dev *gd, bool resuming)
> if (ac->num_ports != num_ports) {
> dev_err(dev, "The number of vPorts changed: %d-
> >%d\n",
> ac->num_ports, num_ports);
> - err = -EPROTO;
> - goto out;
> + /* It's unsafe to proceed. */
> + return -EPROTO;
> }
> }
>
> @@ -1886,22 +1888,36 @@ int mana_probe(struct gdma_dev *gd, bool
> resuming)
> if (!resuming) {
> for (i = 0; i < ac->num_ports; i++) {
> err = mana_probe_port(ac, i, &ac->ports[i]);
> - if (err)
> - break;
> + if (err) {
> + dev_err(dev, "Failed to probe
> vPort %u: %d\n",
> + i, err);
> + goto out;
> + }
> }
> } else {
> for (i = 0; i < ac->num_ports; i++) {
> rtnl_lock();
> err = mana_attach(ac->ports[i]);
> rtnl_unlock();
> - if (err)
> - break;
> +
> + if (err) {
> + netdev_err(ac->ports[i],
> + "Failed to resume
> vPort %u: %d\n",
> + i, err);
> + return err;
> + }
> }
> }
> +
> + return 0;
> out:
> - if (err)
> - mana_remove(gd, false);
> + /* In the resuming path, it's safer to leave the device in the
> failed
> + * state than try to invoke mana_detach().
> + */
> + if (resuming)
> + return err;
>
> + mana_remove(gd, false);
> return err;
> }
>
> @@ -1919,7 +1935,7 @@ void mana_remove(struct gdma_dev *gd, bool
> suspending)
> if (!ndev) {
> if (i == 0)
> dev_err(dev, "No net device to
> remove\n");
> - goto out;
> + break;
> }
>
> /* All cleanup actions should stay after rtnl_lock(),
> otherwise
>
> For your easy reviewing, the new code of the function in v2 will be:
>
> int mana_probe(struct gdma_dev *gd, bool resuming)
> {
> struct gdma_context *gc = gd->gdma_context;
> struct mana_context *ac = gd->driver_data;
> struct device *dev = gc->dev;
> u16 num_ports = 0;
> int err;
> int i;
>
> dev_info(dev,
> "Microsoft Azure Network Adapter protocol
> version: %d.%d.%d\n",
> MANA_MAJOR_VERSION, MANA_MINOR_VERSION,
> MANA_MICRO_VERSION);
>
> err = mana_gd_register_device(gd);
> if (err)
> return err;
>
> if (!resuming) {
> ac = kzalloc(sizeof(*ac), GFP_KERNEL);
> if (!ac) {
> err = -ENOMEM;
> goto out;
> }
>
> ac->gdma_dev = gd;
> gd->driver_data = ac;
> }
>
> err = mana_create_eq(ac);
> if (err)
> goto out;
>
> err = mana_query_device_cfg(ac, MANA_MAJOR_VERSION,
> MANA_MINOR_VERSION,
> MANA_MICRO_VERSION, &num_ports);
> if (err)
> goto out;
>
> if (!resuming) {
> ac->num_ports = num_ports;
> } else {
> if (ac->num_ports != num_ports) {
> dev_err(dev, "The number of vPorts changed: %d-
> >%d\n",
> ac->num_ports, num_ports);
> /* It's unsafe to proceed. */
> return -EPROTO;
> }
> }
>
> if (ac->num_ports == 0)
> dev_err(dev, "Failed to detect any vPort\n");
>
> if (ac->num_ports > MAX_PORTS_IN_MANA_DEV)
> ac->num_ports = MAX_PORTS_IN_MANA_DEV;
>
> if (!resuming) {
> for (i = 0; i < ac->num_ports; i++) {
> err = mana_probe_port(ac, i, &ac->ports[i]);
> if (err) {
> dev_err(dev, "Failed to probe
> vPort %u: %d\n",
> i, err);
> goto out;
> }
> }
> } else {
> for (i = 0; i < ac->num_ports; i++) {
> rtnl_lock();
> err = mana_attach(ac->ports[i]);
> rtnl_unlock();
>
> if (err) {
> netdev_err(ac->ports[i],
> "Failed to resume
> vPort %u: %d\n",
> i, err);
> return err;
> }
> }
> }
>
> return 0;
> out:
> /* In the resuming path, it's safer to leave the device in the
> failed
> * state than try to invoke mana_detach().
> */
> if (resuming)
> return err;
>
> mana_remove(gd, false);
> return err;
> }

The new code looks good!

Thanks,
- Haiyang