Re: [Patch v4 5/6] thermal/cpu-cooling: Update thermal pressure in case of a maximum frequency capping
From: Vincent Guittot
Date: Thu Oct 31 2019 - 12:38:43 EST
On Thu, 31 Oct 2019 at 17:29, Dietmar Eggemann <dietmar.eggemann@xxxxxxx> wrote:
>
> On 22.10.19 22:34, Thara Gopinath wrote:
> > Thermal governors can request for a cpu's maximum supported frequency
> > to be capped in case of an overheat event. This in turn means that the
> > maximum capacity available for tasks to run on the particular cpu is
> > reduced. Delta between the original maximum capacity and capped
> > maximum capacity is known as thermal pressure. Enable cpufreq cooling
> > device to update the thermal pressure in event of a capped
> > maximum frequency.
> >
> > Signed-off-by: Thara Gopinath <thara.gopinath@xxxxxxxxxx>
> > ---
> > drivers/thermal/cpu_cooling.c | 31 +++++++++++++++++++++++++++++--
> > 1 file changed, 29 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
> > index 391f397..2e6a979 100644
> > --- a/drivers/thermal/cpu_cooling.c
> > +++ b/drivers/thermal/cpu_cooling.c
> > @@ -218,6 +218,23 @@ static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_cdev,
> > }
> >
> > /**
> > + * update_sched_max_capacity - update scheduler about change in cpu
> > + * max frequency.
> > + * @policy - cpufreq policy whose max frequency is capped.
> > + */
> > +static void update_sched_max_capacity(struct cpumask *cpus,
> > + unsigned int cur_max_freq,
> > + unsigned int max_freq)
> > +{
> > + int cpu;
> > + unsigned long capacity = (cur_max_freq << SCHED_CAPACITY_SHIFT) /
> > + max_freq;
> > +
> > + for_each_cpu(cpu, cpus)
> > + update_thermal_pressure(cpu, capacity);
> > +}
> > +
> > +/**
> > * get_load() - get load for a cpu since last updated
> > * @cpufreq_cdev: &struct cpufreq_cooling_device for this cpu
> > * @cpu: cpu number
> > @@ -320,6 +337,7 @@ static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
> > unsigned long state)
> > {
> > struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
> > + int ret;
> >
> > /* Request state should be less than max_level */
> > if (WARN_ON(state > cpufreq_cdev->max_level))
> > @@ -331,8 +349,17 @@ static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
> >
> > cpufreq_cdev->cpufreq_state = state;
> >
> > - return dev_pm_qos_update_request(&cpufreq_cdev->qos_req,
> > - cpufreq_cdev->freq_table[state].frequency);
> > + ret = dev_pm_qos_update_request
> > + (&cpufreq_cdev->qos_req,
> > + cpufreq_cdev->freq_table[state].frequency);
> > +
> > + if (ret > 0)
> > + update_sched_max_capacity
> > + (cpufreq_cdev->policy->cpus,
> > + cpufreq_cdev->freq_table[state].frequency,
> > + cpufreq_cdev->policy->cpuinfo.max_freq);
> > +
> > + return ret;
> > }
> >
> > /**
> >
>
> Why not getting rid of update_sched_max_capacity() entirely and call
> update_thermal_pressure() in cpu_cooling.c directly? Saves one level in
> the call chain and would mean less code for this feature.
But you add complexity in update_thermal_pressure which now has to
deal with a cpumask and to compute some frequency ratio
IMHO, it's cleaner to keep update_thermal_pressure simple as it is now
>
> Just compile tested on arm64:
>
> diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
> index 3211b4d3a899..bf36995013b0 100644
> --- a/drivers/thermal/cpu_cooling.c
> +++ b/drivers/thermal/cpu_cooling.c
> @@ -217,23 +217,6 @@ static u32 cpu_power_to_freq(struct
> cpufreq_cooling_device *cpufreq_cdev,
> return freq_table[i - 1].frequency;
> }
>
> -/**
> - * update_sched_max_capacity - update scheduler about change in cpu
> - * max frequency.
> - * @policy - cpufreq policy whose max frequency is capped.
> - */
> -static void update_sched_max_capacity(struct cpumask *cpus,
> - unsigned int cur_max_freq,
> - unsigned int max_freq)
> -{
> - int cpu;
> - unsigned long capacity = (cur_max_freq << SCHED_CAPACITY_SHIFT) /
> - max_freq;
> -
> - for_each_cpu(cpu, cpus)
> - update_thermal_pressure(cpu, capacity);
> -}
> -
> /**
> * get_load() - get load for a cpu since last updated
> * @cpufreq_cdev: &struct cpufreq_cooling_device for this cpu
> @@ -353,7 +336,7 @@ static int cpufreq_set_cur_state(struct
> thermal_cooling_device *cdev,
> cpufreq_cdev->freq_table[state].frequency);
>
> if (ret > 0)
> - update_sched_max_capacity
> + update_thermal_pressure
> (cpufreq_cdev->policy->cpus,
> cpufreq_cdev->freq_table[state].frequency,
> cpufreq_cdev->policy->cpuinfo.max_freq);
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 55dfe9634f67..5707813c7621 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1985,9 +1985,9 @@ static inline void rseq_syscall(struct pt_regs *regs)
> #endif
>
> #ifdef CONFIG_SMP
> -void update_thermal_pressure(int cpu, u64 capacity);
> +void update_thermal_pressure(struct cpumask *cpus, unsigned int cur,
> unsigned int max);
> #else
> -static inline void update_thermal_pressure(int cpu, u64 capacity)
> +static inline void update_thermal_pressure(struct cpumask *cpus,
> unsigned int cur, unsigned int max);
> {
> }
> #endif
> diff --git a/kernel/sched/thermal.c b/kernel/sched/thermal.c
> index 0da31e12a5ff..691bdd79597a 100644
> --- a/kernel/sched/thermal.c
> +++ b/kernel/sched/thermal.c
> @@ -43,17 +43,16 @@ static DEFINE_PER_CPU(unsigned long, delta_capacity);
> * the arch_scale_cpu_capacity and capped capacity is stored in per cpu
> * delta_capacity.
> */
> -void update_thermal_pressure(int cpu, u64 capped_freq_ratio)
> +void update_thermal_pressure(struct cpumask *cpus, unsigned int cur,
> unsigned int max)
> {
> - unsigned long __capacity, delta;
> + int cpu;
>
> - /* Normalize the capped freq ratio */
> - __capacity = (capped_freq_ratio * arch_scale_cpu_capacity(cpu)) >>
> -
> SCHED_CAPACITY_SHIFT;
> - delta = arch_scale_cpu_capacity(cpu) - __capacity;
> - pr_debug("updating cpu%d thermal pressure to %lu\n", cpu, delta);
> + for_each_cpu(cpu, cpus) {
> + unsigned long scale_cap = arch_scale_cpu_capacity(cpu);
> + unsigned long cur_cap = cur * scale_cap / max;
>
> - per_cpu(delta_capacity, cpu) = delta;
> + per_cpu(delta_capacity, cpu) = scale_cap - cur_cap;
> + }
> }