RE: [PATCH v2] EDAC/versalnet: Refactor memory controller initialization and cleanup

From: Datta, Shubhrajyoti

Date: Thu Feb 26 2026 - 13:14:10 EST


[AMD Official Use Only - AMD Internal Distribution Only]

> -----Original Message-----
> From: Borislav Petkov <bp@xxxxxxxxx>
> Sent: Sunday, November 9, 2025 9:29 PM
> To: Datta, Shubhrajyoti <shubhrajyoti.datta@xxxxxxx>
> Cc: linux-kernel@xxxxxxxxxxxxxxx; linux-edac@xxxxxxxxxxxxxxx;
> shubhrajyoti.datta@xxxxxxxxx; Tony Luck <tony.luck@xxxxxxxxx>; James Morse
> <james.morse@xxxxxxx>; Mauro Carvalho Chehab <mchehab@xxxxxxxxxx>;
> Robert Richter <rric@xxxxxxxxxx>
> Subject: Re: [PATCH v2] EDAC/versalnet: Refactor memory controller
> initialization and cleanup
>
> Caution: This message originated from an External Source. Use proper caution
> when opening attachments, clicking links, or responding.
>
>
> On Tue, Nov 04, 2025 at 03:09:20PM +0530, Shubhrajyoti Datta wrote:
> > Simplify the initialization and cleanup flow for Versal Net DDRMC
> > controllers in the EDAC driver.
> >
> > Introduce `init_single_versalnet()` for per-controller setup and
> > `init_versalnet()` for looping through NUM_CONTROLLERS, also add
> > rollback logic to handle partial init failures.
> >
> > Signed-off-by: Shubhrajyoti Datta <shubhrajyoti.datta@xxxxxxx>
> > ---
> >
> > Changes in v2:
> > - Rename init_single_versalnet() to init_mc() for clarity.
> > - Rename remove_single_versalnet() to remove_mc() to match naming
> convention.
> > - Simplify error handling in init_versalnet() by replacing goto with a rollback
> loop.
> > - Reduce indentation and consolidate cleanup logic.
>
> Better, here's some more improvements and cleanups ontop. You probably
> should apply the diff to better see what I mean:
>
> - do the kzalloc allocations first
> - publish the structures only after they've been initialized properly so that
> you don't need to unwind unnecessarily when it fails later
> - remove_versalnet() is now trivial
>
> Do run it on the hw and have the code fail at certain places on purpose to make
> sure the unwinding happens properly.

Tested on the hardware.

>
> HTH.
>
> ---
>
> diff --git a/drivers/edac/versalnet_edac.c b/drivers/edac/versalnet_edac.c index
> 01edc7408a5c..dc6108f7cee3 100644
> --- a/drivers/edac/versalnet_edac.c
> +++ b/drivers/edac/versalnet_edac.c
> @@ -70,6 +70,8 @@
> #define XDDR5_BUS_WIDTH_32 1
> #define XDDR5_BUS_WIDTH_16 2
>
> +#define MC_NAME_LEN 32
> +
> /**
> * struct ecc_error_info - ECC error log information.
> * @burstpos: Burst position.
> @@ -758,7 +760,7 @@ static void versal_edac_release(struct device *dev)
> kfree(dev);
> }
>
> -static void remove_mc(struct mc_priv *priv, int i)
> +static void remove_one_mc(struct mc_priv *priv, int i)
> {
> struct mem_ctl_info *mci;
>
> @@ -768,7 +770,7 @@ static void remove_mc(struct mc_priv *priv, int i)
> edac_mc_free(mci);
> }
>
> -static int init_mc(struct mc_priv *priv, struct platform_device *pdev, int i)
> +static int init_one_mc(struct mc_priv *priv, struct platform_device
> +*pdev, int i)
> {
> u32 num_chans, rank, dwidth, config;
> struct edac_mc_layer layers[2];
> @@ -809,41 +811,54 @@ static int init_mc(struct mc_priv *priv, struct
> platform_device *pdev, int i)
> layers[1].is_virt_csrow = false;
>
> rc = -ENOMEM;
> - mci = edac_mc_alloc(i, ARRAY_SIZE(layers), layers,
> - sizeof(struct mc_priv));
> - if (!mci) {
> - edac_printk(KERN_ERR, EDAC_MC, "Failed memory allocation for
> MC%d\n", i);
> + name = kzalloc(MC_NAME_LEN, GFP_KERNEL);
> + if (!name)
> return rc;
> - }
> - priv->mci[i] = mci;
> - priv->dwidth = dt;
>
> dev = kzalloc(sizeof(*dev), GFP_KERNEL);
> if (!dev)
> - goto err_mc_free;
> - dev->release = versal_edac_release;
> - name = kmalloc(32, GFP_KERNEL);
> + goto err_name_free;
> +
> + mci = edac_mc_alloc(i, ARRAY_SIZE(layers), layers, sizeof(struct mc_priv));
> + if (!mci) {
> + edac_printk(KERN_ERR, EDAC_MC, "Failed memory allocation for
> MC%d\n", i);
> + goto err_dev_free;
> + }
> +
> sprintf(name, "versal-net-ddrmc5-edac-%d", i);
> +
> dev->init_name = name;
> + dev->release = versal_edac_release;
> +
> rc = device_register(dev);
> if (rc)
> goto err_mc_free;
>
> mci->pdev = dev;
> -
> - platform_set_drvdata(pdev, priv);
> -
> mc_init(mci, dev);
> +
> rc = edac_mc_add_mc(mci);
> if (rc) {
> edac_printk(KERN_ERR, EDAC_MC, "Failed to register MC%d with EDAC
> core\n", i);
> goto err_unreg;
> }
> +
> + priv->mci[i] = mci;
> + priv->dwidth = dt;
> +
> + platform_set_drvdata(pdev, priv);
> +
> return 0;
> +
> err_unreg:
> device_unregister(mci->pdev);
> err_mc_free:
> edac_mc_free(mci);
> +err_dev_free:
> + kfree(dev);
> +err_name_free:
> + kfree(name);
> +
> return rc;
> }
>
> @@ -852,10 +867,10 @@ static int init_versalnet(struct mc_priv *priv, struct
> platform_device *pdev)
> int rc, i;
>
> for (i = 0; i < NUM_CONTROLLERS; i++) {
> - rc = init_mc(priv, pdev, i);
> + rc = init_one_mc(priv, pdev, i);
> if (rc) {
> while (i--)
> - remove_mc(priv, i);
> + remove_one_mc(priv, i);
> return rc;
> }
> }
> @@ -864,14 +879,8 @@ static int init_versalnet(struct mc_priv *priv, struct
> platform_device *pdev)
>
> static void remove_versalnet(struct mc_priv *priv) {
> - struct mem_ctl_info *mci;
> - int i;
> -
> - for (i = 0; i < NUM_CONTROLLERS; i++) {
> - device_unregister(priv->mci[i]->pdev);
> - mci = edac_mc_del_mc(priv->mci[i]->pdev);
> - edac_mc_free(mci);
> - }
> + for (int i = 0; i < NUM_CONTROLLERS; i++)
> + remove_one_mc(priv, i);
> }
>
> static int mc_probe(struct platform_device *pdev)
>
> --
> Regards/Gruss,
> Boris.
>
> https://people.kernel.org/tglx/notes-about-netiquette