Re: [PATCH V3 3/4] pmdomain: core: Fix debugfs node creation failure

From: Dmitry Baryshkov
Date: Mon Oct 07 2024 - 13:33:49 EST


On Mon, Oct 07, 2024 at 11:36:41AM GMT, Sibi Sankar wrote:
> The domain attributes returned by the perf protocol can end up
> reporting identical names across domains, resulting in debugfs
> node creation failure. Fix this failure by ensuring that pm domains
> get a unique name using ida in pm_genpd_init.

Can we make this opt-in or opt-out? Seeing numeric suffixes next to
well-known power domain names (e.g. those comin from RPMh or the CPU
domains) is a bit strange. Or maybe you can limit the IDA suffix just to
the SCMI / perf domains?

>
> Logs: [X1E reports 'NCC' for all its scmi perf domains]
> debugfs: Directory 'NCC' with parent 'pm_genpd' already present!
> debugfs: Directory 'NCC' with parent 'pm_genpd' already present!
>
> Reported-by: Johan Hovold <johan+linaro@xxxxxxxxxx>
> Closes: https://lore.kernel.org/lkml/ZoQjAWse2YxwyRJv@xxxxxxxxxxxxxxxxxxxx/
> Fixes: 718072ceb211 ("PM: domains: create debugfs nodes when adding power domains")
> Fix-suggested-by: Ulf Hansson <ulf.hansson@xxxxxxxxxx>

Just "Suggested-by: ..."

> Signed-off-by: Sibi Sankar <quic_sibis@xxxxxxxxxxx>
> ---
>
> genpd names with ida appended:
> power-domain-cpu0_0
> power-domain-cpu1_1
> ....
> ebi_18
> gfx_19
> ...
> NCC_56
> NCC_57
> NCC_58
>
> genpd summary with ida appended:
> domain status children performance
> /device runtime status managed by
> ------------------------------------------------------------------------------
> NCC_58 on 0
> NCC_57 on 0
> NCC_56 on 0
> ...
> gfx_19 off-0 0
> ebi_18 off-0 0
> ...
> power-domain-cpu1_1 off-0 0
> genpd:0:cpu1 suspended 0 SW
> power-domain-cpu0_0 off-0 0
> genpd:0:cpu0 suspended 0 SW
>
> drivers/pmdomain/core.c | 40 ++++++++++++++++++++++++---------------
> include/linux/pm_domain.h | 1 +
> 2 files changed, 26 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/pmdomain/core.c b/drivers/pmdomain/core.c
> index 5ede0f7eda09..631cb732bb39 100644
> --- a/drivers/pmdomain/core.c
> +++ b/drivers/pmdomain/core.c
> @@ -7,6 +7,7 @@
> #define pr_fmt(fmt) "PM: " fmt
>
> #include <linux/delay.h>
> +#include <linux/idr.h>
> #include <linux/kernel.h>
> #include <linux/io.h>
> #include <linux/platform_device.h>
> @@ -23,6 +24,9 @@
> #include <linux/cpu.h>
> #include <linux/debugfs.h>
>
> +/* Provides a unique ID for each genpd device */
> +static DEFINE_IDA(genpd_ida);
> +
> #define GENPD_RETRY_MAX_MS 250 /* Approximate */
>
> #define GENPD_DEV_CALLBACK(genpd, type, callback, dev) \
> @@ -189,7 +193,7 @@ static inline bool irq_safe_dev_in_sleep_domain(struct device *dev,
>
> if (ret)
> dev_warn_once(dev, "PM domain %s will not be powered off\n",
> - genpd->name);
> + dev_name(&genpd->dev));
>
> return ret;
> }
> @@ -274,7 +278,7 @@ static void genpd_debug_remove(struct generic_pm_domain *genpd)
> if (!genpd_debugfs_dir)
> return;
>
> - debugfs_lookup_and_remove(genpd->name, genpd_debugfs_dir);
> + debugfs_lookup_and_remove(dev_name(&genpd->dev), genpd_debugfs_dir);
> }
>
> static void genpd_update_accounting(struct generic_pm_domain *genpd)
> @@ -731,7 +735,7 @@ static int _genpd_power_on(struct generic_pm_domain *genpd, bool timed)
> genpd->states[state_idx].power_on_latency_ns = elapsed_ns;
> genpd->gd->max_off_time_changed = true;
> pr_debug("%s: Power-%s latency exceeded, new value %lld ns\n",
> - genpd->name, "on", elapsed_ns);
> + dev_name(&genpd->dev), "on", elapsed_ns);
>
> out:
> raw_notifier_call_chain(&genpd->power_notifiers, GENPD_NOTIFY_ON, NULL);
> @@ -782,7 +786,7 @@ static int _genpd_power_off(struct generic_pm_domain *genpd, bool timed)
> genpd->states[state_idx].power_off_latency_ns = elapsed_ns;
> genpd->gd->max_off_time_changed = true;
> pr_debug("%s: Power-%s latency exceeded, new value %lld ns\n",
> - genpd->name, "off", elapsed_ns);
> + dev_name(&genpd->dev), "off", elapsed_ns);
>
> out:
> raw_notifier_call_chain(&genpd->power_notifiers, GENPD_NOTIFY_OFF,
> @@ -1940,7 +1944,7 @@ int dev_pm_genpd_add_notifier(struct device *dev, struct notifier_block *nb)
>
> if (ret) {
> dev_warn(dev, "failed to add notifier for PM domain %s\n",
> - genpd->name);
> + dev_name(&genpd->dev));
> return ret;
> }
>
> @@ -1987,7 +1991,7 @@ int dev_pm_genpd_remove_notifier(struct device *dev)
>
> if (ret) {
> dev_warn(dev, "failed to remove notifier for PM domain %s\n",
> - genpd->name);
> + dev_name(&genpd->dev));
> return ret;
> }
>
> @@ -2013,7 +2017,7 @@ static int genpd_add_subdomain(struct generic_pm_domain *genpd,
> */
> if (!genpd_is_irq_safe(genpd) && genpd_is_irq_safe(subdomain)) {
> WARN(1, "Parent %s of subdomain %s must be IRQ safe\n",
> - genpd->name, subdomain->name);
> + dev_name(&genpd->dev), subdomain->name);
> return -EINVAL;
> }
>
> @@ -2088,7 +2092,7 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
>
> if (!list_empty(&subdomain->parent_links) || subdomain->device_count) {
> pr_warn("%s: unable to remove subdomain %s\n",
> - genpd->name, subdomain->name);
> + dev_name(&genpd->dev), subdomain->name);
> ret = -EBUSY;
> goto out;
> }
> @@ -2264,8 +2268,13 @@ int pm_genpd_init(struct generic_pm_domain *genpd,
> if (ret)
> return ret;
>
> + ret = ida_alloc(&genpd_ida, GFP_KERNEL);
> + if (ret < 0)
> + return ret;
> + genpd->device_id = ret;
> +
> device_initialize(&genpd->dev);
> - dev_set_name(&genpd->dev, "%s", genpd->name);
> + dev_set_name(&genpd->dev, "%s_%u", genpd->name, genpd->device_id);
>
> mutex_lock(&gpd_list_lock);
> list_add(&genpd->gpd_list_node, &gpd_list);
> @@ -2287,13 +2296,13 @@ static int genpd_remove(struct generic_pm_domain *genpd)
>
> if (genpd->has_provider) {
> genpd_unlock(genpd);
> - pr_err("Provider present, unable to remove %s\n", genpd->name);
> + pr_err("Provider present, unable to remove %s\n", dev_name(&genpd->dev));
> return -EBUSY;
> }
>
> if (!list_empty(&genpd->parent_links) || genpd->device_count) {
> genpd_unlock(genpd);
> - pr_err("%s: unable to remove %s\n", __func__, genpd->name);
> + pr_err("%s: unable to remove %s\n", __func__, dev_name(&genpd->dev));
> return -EBUSY;
> }
>
> @@ -2307,9 +2316,10 @@ static int genpd_remove(struct generic_pm_domain *genpd)
> genpd_unlock(genpd);
> genpd_debug_remove(genpd);
> cancel_work_sync(&genpd->power_off_work);
> + ida_free(&genpd_ida, genpd->device_id);
> genpd_free_data(genpd);
>
> - pr_debug("%s: removed %s\n", __func__, genpd->name);
> + pr_debug("%s: removed %s\n", __func__, dev_name(&genpd->dev));
>
> return 0;
> }
> @@ -3272,12 +3282,12 @@ static int genpd_summary_one(struct seq_file *s,
> else
> snprintf(state, sizeof(state), "%s",
> status_lookup[genpd->status]);
> - seq_printf(s, "%-30s %-30s %u", genpd->name, state, genpd->performance_state);
> + seq_printf(s, "%-30s %-30s %u", dev_name(&genpd->dev), state, genpd->performance_state);
>
> /*
> * Modifications on the list require holding locks on both
> * parent and child, so we are safe.
> - * Also genpd->name is immutable.
> + * Also the device name is immutable.
> */
> list_for_each_entry(link, &genpd->parent_links, parent_node) {
> if (list_is_first(&link->parent_node, &genpd->parent_links))
> @@ -3502,7 +3512,7 @@ static void genpd_debug_add(struct generic_pm_domain *genpd)
> if (!genpd_debugfs_dir)
> return;
>
> - d = debugfs_create_dir(genpd->name, genpd_debugfs_dir);
> + d = debugfs_create_dir(dev_name(&genpd->dev), genpd_debugfs_dir);
>
> debugfs_create_file("current_state", 0444,
> d, genpd, &status_fops);
> diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
> index b637ec14025f..738df5296ec7 100644
> --- a/include/linux/pm_domain.h
> +++ b/include/linux/pm_domain.h
> @@ -163,6 +163,7 @@ struct generic_pm_domain {
> atomic_t sd_count; /* Number of subdomains with power "on" */
> enum gpd_status status; /* Current state of the domain */
> unsigned int device_count; /* Number of devices */
> + unsigned int device_id; /* unique device id */
> unsigned int suspended_count; /* System suspend device counter */
> unsigned int prepared_count; /* Suspend counter of prepared devices */
> unsigned int performance_state; /* Aggregated max performance state */
> --
> 2.34.1
>

--
With best wishes
Dmitry