Re: [PATCH v4 1/4] PM / EM: add devices to Energy Model

From: Quentin Perret
Date: Fri Mar 13 2020 - 06:04:17 EST


Hi Lukasz,

On Monday 09 Mar 2020 at 13:41:14 (+0000), Lukasz Luba wrote:
<snip>
> diff --git a/drivers/opp/of.c b/drivers/opp/of.c
> index 9cd8f0adacae..0efd6cf6d023 100644
> --- a/drivers/opp/of.c
> +++ b/drivers/opp/of.c
> @@ -1047,9 +1047,8 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_of_node);
> * calculation failed because of missing parameters, 0 otherwise.
> */
> static int __maybe_unused _get_cpu_power(unsigned long *mW, unsigned long *kHz,
> - int cpu)
> + struct device *cpu_dev)
> {
> - struct device *cpu_dev;
> struct dev_pm_opp *opp;
> struct device_node *np;
> unsigned long mV, Hz;
> @@ -1057,10 +1056,6 @@ static int __maybe_unused _get_cpu_power(unsigned long *mW, unsigned long *kHz,
> u64 tmp;
> int ret;
>
> - cpu_dev = get_cpu_device(cpu);
> - if (!cpu_dev)
> - return -ENODEV;
> -
> np = of_node_get(cpu_dev->of_node);
> if (!np)
> return -EINVAL;
> @@ -1128,6 +1123,6 @@ void dev_pm_opp_of_register_em(struct cpumask *cpus)
> if (ret || !cap)
> return;
>
> - em_register_perf_domain(cpus, nr_opp, &em_cb);
> + em_register_perf_domain(cpu_dev, nr_opp, &em_cb, cpus);

Any reason for not checking the return value here ? You added a nice
check in scmi_get_cpu_power(), perhaps do the same thing here ?

> }
> EXPORT_SYMBOL_GPL(dev_pm_opp_of_register_em);
> diff --git a/drivers/thermal/cpufreq_cooling.c b/drivers/thermal/cpufreq_cooling.c
> index fe83d7a210d4..fcf2dab1b3b8 100644
> --- a/drivers/thermal/cpufreq_cooling.c
> +++ b/drivers/thermal/cpufreq_cooling.c
> @@ -333,18 +333,18 @@ static inline bool em_is_sane(struct cpufreq_cooling_device *cpufreq_cdev,
> return false;
>
> policy = cpufreq_cdev->policy;
> - if (!cpumask_equal(policy->related_cpus, to_cpumask(em->cpus))) {
> + if (!cpumask_equal(policy->related_cpus, em_span_cpus(em))) {
> pr_err("The span of pd %*pbl is misaligned with cpufreq policy %*pbl\n",
> - cpumask_pr_args(to_cpumask(em->cpus)),
> + cpumask_pr_args(em_span_cpus(em)),
> cpumask_pr_args(policy->related_cpus));
> return false;
> }
>
> nr_levels = cpufreq_cdev->max_level + 1;
> - if (em->nr_cap_states != nr_levels) {
> + if (em->nr_perf_states != nr_levels) {
> pr_err("The number of cap states in pd %*pbl (%u) doesn't match the number of cooling levels (%u)\n",

s/cap states/performance states

> - cpumask_pr_args(to_cpumask(em->cpus)),
> - em->nr_cap_states, nr_levels);
> + cpumask_pr_args(em_span_cpus(em)),
> + em->nr_perf_states, nr_levels);
> return false;
> }

<snip>
> +static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
> + int nr_states, struct em_data_callback *cb)
> {
> unsigned long opp_eff, prev_opp_eff = ULONG_MAX;
> unsigned long power, freq, prev_freq = 0;
> - int i, ret, cpu = cpumask_first(span);
> - struct em_cap_state *table;
> - struct em_perf_domain *pd;
> + struct em_perf_state *table;
> + int i, ret;
> u64 fmax;
>
> - if (!cb->active_power)
> - return NULL;
> -
> - pd = kzalloc(sizeof(*pd) + cpumask_size(), GFP_KERNEL);
> - if (!pd)
> - return NULL;
> -
> table = kcalloc(nr_states, sizeof(*table), GFP_KERNEL);
> if (!table)
> - goto free_pd;
> + return -ENOMEM;
>
> - /* Build the list of capacity states for this performance domain */
> + /* Build the list of performance states for this performance domain */
> for (i = 0, freq = 0; i < nr_states; i++, freq++) {
> /*
> * active_power() is a driver callback which ceils 'freq' to
> - * lowest capacity state of 'cpu' above 'freq' and updates
> + * lowest performance state of 'dev' above 'freq' and updates
> * 'power' and 'freq' accordingly.
> */
> - ret = cb->active_power(&power, &freq, cpu);
> + ret = cb->active_power(&power, &freq, dev);
> if (ret) {
> - pr_err("pd%d: invalid cap. state: %d\n", cpu, ret);
> + dev_err(dev, "EM: invalid perf. state: %d\n",
> + ret);

Not easy to figure out which device has a problem with this. I'm
guessing you went that way since this is called before ida_simple_get() ?
Could that be refactored to make the error message more useful ?

> goto free_cs_table;
> }

<snip>
> +/**
> + * em_unregister_perf_domain() - Unregister Energy Model (EM) for the device
> + * @dev : Device for which the EM is registered
> + *
> + * Try to unregister the EM for the specified device (it checks current
> + * reference counter). The EM for CPUs will not be freed.
> + */
> +void em_unregister_perf_domain(struct device *dev)
> +{
> + struct em_device *em_dev, *tmp;
> +
> + if (IS_ERR_OR_NULL(dev))
> + return;
> +
> + /* We don't support freeing CPU structures in hotplug */
> + if (_is_cpu_device(dev))
> + return;

Can we WARN() here ?

> +
> + mutex_lock(&em_pd_mutex);
> +
> + if (list_empty(&em_pd_dev_list)) {
> + mutex_unlock(&em_pd_mutex);
> + return;
> + }
> +
> + list_for_each_entry_safe(em_dev, tmp, &em_pd_dev_list, em_dev_list) {
> + if (em_dev->dev == dev) {
> + kref_put(&em_dev->kref, _em_release);
> + break;
> + }
> + }
> +
> + mutex_unlock(&em_pd_mutex);
> +}
> +EXPORT_SYMBOL_GPL(em_unregister_perf_domain);

Otherwise this looks pretty good to me. So, with these small nits
addressed:

Acked-by: Quentin Perret <qperret@xxxxxxxxxx>

Thanks!
Quentin