Re: [PATCH 1/2] thermal: devfreq_cooling: Use PM QoS to set frequency limits
From: Leonard Crestez
Date: Tue Jan 14 2020 - 11:08:45 EST
On 10.01.2020 19:49, Matthias Kaehlcke wrote:
> Now that devfreq supports limiting the frequency range of a device
> through PM QoS make use of it instead of disabling OPPs that should
> not be used.
>
> Signed-off-by: Matthias Kaehlcke <mka@xxxxxxxxxxxx>
> ---
It is not obvious but this changes behavior when min max requests
conflict (min > max): with PM QoS a MIN_FREQUENCY request takes
precedence but if higher OPPs are disabled then this will override
MIN_FREQUENCY.
There are very few users of this functionality so I don't think there
are any systems that depend on this behaving one way or the other but
perhaps it should be mentioned in commit message?
As far as I can tell the only user of devfreq_cooling in upstream is
drivers/gpu/drm/panfrost?
> drivers/thermal/devfreq_cooling.c | 66 ++++++++++---------------------
> 1 file changed, 20 insertions(+), 46 deletions(-)
>
> diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c
> index ef59256887ff..3a63603afcf2 100644
> --- a/drivers/thermal/devfreq_cooling.c
> +++ b/drivers/thermal/devfreq_cooling.c
> @@ -24,11 +24,13 @@
> #include <linux/idr.h>
> #include <linux/slab.h>
> #include <linux/pm_opp.h>
> +#include <linux/pm_qos.h>
> #include <linux/thermal.h>
>
> #include <trace/events/thermal.h>
>
> -#define SCALE_ERROR_MITIGATION 100
> +#define HZ_PER_KHZ 1000
> +#define SCALE_ERROR_MITIGATION 100
>
> static DEFINE_IDA(devfreq_ida);
>
> @@ -65,49 +67,9 @@ struct devfreq_cooling_device {
> struct devfreq_cooling_power *power_ops;
> u32 res_util;
> int capped_state;
> + struct dev_pm_qos_request req_max_freq;
> };
>
> -/**
> - * partition_enable_opps() - disable all opps above a given state
> - * @dfc: Pointer to devfreq we are operating on
> - * @cdev_state: cooling device state we're setting
> - *
> - * Go through the OPPs of the device, enabling all OPPs until
> - * @cdev_state and disabling those frequencies above it.
> - */
> -static int partition_enable_opps(struct devfreq_cooling_device *dfc,
> - unsigned long cdev_state)
> -{
> - int i;
> - struct device *dev = dfc->devfreq->dev.parent;
> -
> - for (i = 0; i < dfc->freq_table_size; i++) {
> - struct dev_pm_opp *opp;
> - int ret = 0;
> - unsigned int freq = dfc->freq_table[i];
> - bool want_enable = i >= cdev_state ? true : false;
> -
> - opp = dev_pm_opp_find_freq_exact(dev, freq, !want_enable);
> -
> - if (PTR_ERR(opp) == -ERANGE)
> - continue;
> - else if (IS_ERR(opp))
> - return PTR_ERR(opp);
> -
> - dev_pm_opp_put(opp);
> -
> - if (want_enable)
> - ret = dev_pm_opp_enable(dev, freq);
> - else
> - ret = dev_pm_opp_disable(dev, freq);
> -
> - if (ret)
> - return ret;
> - }
> -
> - return 0;
> -}
> -
> static int devfreq_cooling_get_max_state(struct thermal_cooling_device *cdev,
> unsigned long *state)
> {
> @@ -134,7 +96,7 @@ static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev,
> struct devfreq_cooling_device *dfc = cdev->devdata;
> struct devfreq *df = dfc->devfreq;
> struct device *dev = df->dev.parent;
> - int ret;
> + unsigned long freq;
>
> if (state == dfc->cooling_state)
> return 0;
> @@ -144,9 +106,10 @@ static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev,
> if (state >= dfc->freq_table_size)
> return -EINVAL;
>
> - ret = partition_enable_opps(dfc, state);
> - if (ret)
> - return ret;
> + freq = dfc->freq_table[state];
> +
> + dev_pm_qos_update_request(&dfc->req_max_freq,
> + DIV_ROUND_UP(freq, HZ_PER_KHZ));
>
> dfc->cooling_state = state;
>
> @@ -529,6 +492,12 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
> if (err)
> goto free_dfc;
>
> + err = dev_pm_qos_add_request(df->dev.parent, &dfc->req_max_freq,
> + DEV_PM_QOS_MAX_FREQUENCY,
> + PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE);
> + if (err < 0)
> + goto remove_qos_req;
> +
> err = ida_simple_get(&devfreq_ida, 0, 0, GFP_KERNEL);
> if (err < 0)
> goto free_tables;
> @@ -552,6 +521,10 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
>
> release_ida:
> ida_simple_remove(&devfreq_ida, dfc->id);
> +
> +remove_qos_req:
> + dev_pm_qos_remove_request(&dfc->req_max_freq); > +
A quirk of the dev_pm_qos API is that dev_pm_qos_remove_request prints a
WARN splat if !dev_pm_qos_request_active and this can true on
dev_pm_qos_add_request error.
I dealt with this by checking dev_pm_qos_request_active explicitly but
perhaps dev_pm_qos API could be changed? In general "free/release"
functions shouldn't complain if there's nothing to do.
> free_tables:
> kfree(dfc->power_table);
> kfree(dfc->freq_table);
> @@ -600,6 +573,7 @@ void devfreq_cooling_unregister(struct thermal_cooling_device *cdev)
>
> thermal_cooling_device_unregister(dfc->cdev);
> ida_simple_remove(&devfreq_ida, dfc->id);
> + dev_pm_qos_remove_request(&dfc->req_max_freq);
> kfree(dfc->power_table);
> kfree(dfc->freq_table);