Re: [PATCH v2] thermal: devfreq_cooling: Use PM QoS to set frequency limits
From: Matthias Kaehlcke
Date: Wed Mar 11 2020 - 20:35:31 EST
Is any further action needed from my side or can this land?
Thanks
Matthias
On Fri, Jan 17, 2020 at 02:22:02PM +0900, Chanwoo Choi wrote:
> On 1/17/20 8:12 AM, Matthias Kaehlcke wrote:
> > Now that devfreq supports limiting the frequency range of a device
> > through PM QoS make use of it instead of disabling OPPs that should
> > not be used.
> >
> > The switch from disabling OPPs to PM QoS introduces a subtle behavioral
> > change in case of conflicting requests (min > max): PM QoS gives
> > precedence to the MIN_FREQUENCY request, while higher OPPs disabled
> > with dev_pm_opp_disable() would override MIN_FREQUENCY.
> >
> > Signed-off-by: Matthias Kaehlcke <mka@xxxxxxxxxxxx>
> > ---
> >
> > Changes in v2:
> > - added documentation for 'req_max_freq'
> > - fixed jumps in of_devfreq_cooling_register_power() unwind
> > - added comment about behavioral change to the commit message
> >
> > drivers/thermal/devfreq_cooling.c | 70 ++++++++++---------------------
> > 1 file changed, 23 insertions(+), 47 deletions(-)
> >
> > diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c
> > index ef59256887ff63..cbbaf5bc425d1a 100644
> > --- a/drivers/thermal/devfreq_cooling.c
> > +++ b/drivers/thermal/devfreq_cooling.c
> > @@ -24,11 +24,13 @@
> > #include <linux/idr.h>
> > #include <linux/slab.h>
> > #include <linux/pm_opp.h>
> > +#include <linux/pm_qos.h>
> > #include <linux/thermal.h>
> >
> > #include <trace/events/thermal.h>
> >
> > -#define SCALE_ERROR_MITIGATION 100
> > +#define HZ_PER_KHZ 1000
> > +#define SCALE_ERROR_MITIGATION 100
> >
> > static DEFINE_IDA(devfreq_ida);
> >
> > @@ -53,6 +55,8 @@ static DEFINE_IDA(devfreq_ida);
> > * 'utilization' (which is 'busy_time / 'total_time').
> > * The 'res_util' range is from 100 to (power_table[state] * 100)
> > * for the corresponding 'state'.
> > + * @req_max_freq: PM QoS request for limiting the maximum frequency
> > + * of the devfreq device.
> > */
> > struct devfreq_cooling_device {
> > int id;
> > @@ -65,49 +69,9 @@ struct devfreq_cooling_device {
> > struct devfreq_cooling_power *power_ops;
> > u32 res_util;
> > int capped_state;
> > + struct dev_pm_qos_request req_max_freq;
> > };
> >
> > -/**
> > - * partition_enable_opps() - disable all opps above a given state
> > - * @dfc: Pointer to devfreq we are operating on
> > - * @cdev_state: cooling device state we're setting
> > - *
> > - * Go through the OPPs of the device, enabling all OPPs until
> > - * @cdev_state and disabling those frequencies above it.
> > - */
> > -static int partition_enable_opps(struct devfreq_cooling_device *dfc,
> > - unsigned long cdev_state)
> > -{
> > - int i;
> > - struct device *dev = dfc->devfreq->dev.parent;
> > -
> > - for (i = 0; i < dfc->freq_table_size; i++) {
> > - struct dev_pm_opp *opp;
> > - int ret = 0;
> > - unsigned int freq = dfc->freq_table[i];
> > - bool want_enable = i >= cdev_state ? true : false;
> > -
> > - opp = dev_pm_opp_find_freq_exact(dev, freq, !want_enable);
> > -
> > - if (PTR_ERR(opp) == -ERANGE)
> > - continue;
> > - else if (IS_ERR(opp))
> > - return PTR_ERR(opp);
> > -
> > - dev_pm_opp_put(opp);
> > -
> > - if (want_enable)
> > - ret = dev_pm_opp_enable(dev, freq);
> > - else
> > - ret = dev_pm_opp_disable(dev, freq);
> > -
> > - if (ret)
> > - return ret;
> > - }
> > -
> > - return 0;
> > -}
> > -
> > static int devfreq_cooling_get_max_state(struct thermal_cooling_device *cdev,
> > unsigned long *state)
> > {
> > @@ -134,7 +98,7 @@ static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev,
> > struct devfreq_cooling_device *dfc = cdev->devdata;
> > struct devfreq *df = dfc->devfreq;
> > struct device *dev = df->dev.parent;
> > - int ret;
> > + unsigned long freq;
> >
> > if (state == dfc->cooling_state)
> > return 0;
> > @@ -144,9 +108,10 @@ static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev,
> > if (state >= dfc->freq_table_size)
> > return -EINVAL;
> >
> > - ret = partition_enable_opps(dfc, state);
> > - if (ret)
> > - return ret;
> > + freq = dfc->freq_table[state];
> > +
> > + dev_pm_qos_update_request(&dfc->req_max_freq,
> > + DIV_ROUND_UP(freq, HZ_PER_KHZ));
> >
> > dfc->cooling_state = state;
> >
> > @@ -529,9 +494,15 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
> > if (err)
> > goto free_dfc;
> >
> > - err = ida_simple_get(&devfreq_ida, 0, 0, GFP_KERNEL);
> > + err = dev_pm_qos_add_request(df->dev.parent, &dfc->req_max_freq,
> > + DEV_PM_QOS_MAX_FREQUENCY,
> > + PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE);
> > if (err < 0)
> > goto free_tables;
> > +
> > + err = ida_simple_get(&devfreq_ida, 0, 0, GFP_KERNEL);
> > + if (err < 0)
> > + goto remove_qos_req;
> > dfc->id = err;
> >
> > snprintf(dev_name, sizeof(dev_name), "thermal-devfreq-%d", dfc->id);
> > @@ -552,6 +523,10 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
> >
> > release_ida:
> > ida_simple_remove(&devfreq_ida, dfc->id);
> > +
> > +remove_qos_req:
> > + dev_pm_qos_remove_request(&dfc->req_max_freq);
> > +
> > free_tables:
> > kfree(dfc->power_table);
> > kfree(dfc->freq_table);
> > @@ -600,6 +575,7 @@ void devfreq_cooling_unregister(struct thermal_cooling_device *cdev)
> >
> > thermal_cooling_device_unregister(dfc->cdev);
> > ida_simple_remove(&devfreq_ida, dfc->id);
> > + dev_pm_qos_remove_request(&dfc->req_max_freq);
> > kfree(dfc->power_table);
> > kfree(dfc->freq_table);
> >
> >
>
> Reviewed-by: Chanwoo Choi <cw00.choi@xxxxxxxxxxx>
>
> --
> Best Regards,
> Chanwoo Choi
> Samsung Electronics