Re: [Patch v4 5/6] thermal/cpu-cooling: Update thermal pressure in case of a maximum frequency capping

From: Dietmar Eggemann
Date: Thu Oct 31 2019 - 12:29:52 EST


On 22.10.19 22:34, Thara Gopinath wrote:
> Thermal governors can request for a cpu's maximum supported frequency
> to be capped in case of an overheat event. This in turn means that the
> maximum capacity available for tasks to run on the particular cpu is
> reduced. Delta between the original maximum capacity and capped
> maximum capacity is known as thermal pressure. Enable cpufreq cooling
> device to update the thermal pressure in event of a capped
> maximum frequency.
>
> Signed-off-by: Thara Gopinath <thara.gopinath@xxxxxxxxxx>
> ---
> drivers/thermal/cpu_cooling.c | 31 +++++++++++++++++++++++++++++--
> 1 file changed, 29 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
> index 391f397..2e6a979 100644
> --- a/drivers/thermal/cpu_cooling.c
> +++ b/drivers/thermal/cpu_cooling.c
> @@ -218,6 +218,23 @@ static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_cdev,
> }
>
> /**
> + * update_sched_max_capacity - update scheduler about change in cpu
> + * max frequency.
> + * @policy - cpufreq policy whose max frequency is capped.
> + */
> +static void update_sched_max_capacity(struct cpumask *cpus,
> + unsigned int cur_max_freq,
> + unsigned int max_freq)
> +{
> + int cpu;
> + unsigned long capacity = (cur_max_freq << SCHED_CAPACITY_SHIFT) /
> + max_freq;
> +
> + for_each_cpu(cpu, cpus)
> + update_thermal_pressure(cpu, capacity);
> +}
> +
> +/**
> * get_load() - get load for a cpu since last updated
> * @cpufreq_cdev: &struct cpufreq_cooling_device for this cpu
> * @cpu: cpu number
> @@ -320,6 +337,7 @@ static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
> unsigned long state)
> {
> struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
> + int ret;
>
> /* Request state should be less than max_level */
> if (WARN_ON(state > cpufreq_cdev->max_level))
> @@ -331,8 +349,17 @@ static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
>
> cpufreq_cdev->cpufreq_state = state;
>
> - return dev_pm_qos_update_request(&cpufreq_cdev->qos_req,
> - cpufreq_cdev->freq_table[state].frequency);
> + ret = dev_pm_qos_update_request
> + (&cpufreq_cdev->qos_req,
> + cpufreq_cdev->freq_table[state].frequency);
> +
> + if (ret > 0)
> + update_sched_max_capacity
> + (cpufreq_cdev->policy->cpus,
> + cpufreq_cdev->freq_table[state].frequency,
> + cpufreq_cdev->policy->cpuinfo.max_freq);
> +
> + return ret;
> }
>
> /**
>

Why not getting rid of update_sched_max_capacity() entirely and call
update_thermal_pressure() in cpu_cooling.c directly? Saves one level in
the call chain and would mean less code for this feature.

Just compile tested on arm64:

diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index 3211b4d3a899..bf36995013b0 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -217,23 +217,6 @@ static u32 cpu_power_to_freq(struct
cpufreq_cooling_device *cpufreq_cdev,
return freq_table[i - 1].frequency;
}

-/**
- * update_sched_max_capacity - update scheduler about change in cpu
- * max frequency.
- * @policy - cpufreq policy whose max frequency is capped.
- */
-static void update_sched_max_capacity(struct cpumask *cpus,
- unsigned int cur_max_freq,
- unsigned int max_freq)
-{
- int cpu;
- unsigned long capacity = (cur_max_freq << SCHED_CAPACITY_SHIFT) /
- max_freq;
-
- for_each_cpu(cpu, cpus)
- update_thermal_pressure(cpu, capacity);
-}
-
/**
* get_load() - get load for a cpu since last updated
* @cpufreq_cdev: &struct cpufreq_cooling_device for this cpu
@@ -353,7 +336,7 @@ static int cpufreq_set_cur_state(struct
thermal_cooling_device *cdev,
cpufreq_cdev->freq_table[state].frequency);

if (ret > 0)
- update_sched_max_capacity
+ update_thermal_pressure
(cpufreq_cdev->policy->cpus,
cpufreq_cdev->freq_table[state].frequency,
cpufreq_cdev->policy->cpuinfo.max_freq);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 55dfe9634f67..5707813c7621 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1985,9 +1985,9 @@ static inline void rseq_syscall(struct pt_regs *regs)
#endif

#ifdef CONFIG_SMP
-void update_thermal_pressure(int cpu, u64 capacity);
+void update_thermal_pressure(struct cpumask *cpus, unsigned int cur,
unsigned int max);
#else
-static inline void update_thermal_pressure(int cpu, u64 capacity)
+static inline void update_thermal_pressure(struct cpumask *cpus,
unsigned int cur, unsigned int max);
{
}
#endif
diff --git a/kernel/sched/thermal.c b/kernel/sched/thermal.c
index 0da31e12a5ff..691bdd79597a 100644
--- a/kernel/sched/thermal.c
+++ b/kernel/sched/thermal.c
@@ -43,17 +43,16 @@ static DEFINE_PER_CPU(unsigned long, delta_capacity);
* the arch_scale_cpu_capacity and capped capacity is stored in per cpu
* delta_capacity.
*/
-void update_thermal_pressure(int cpu, u64 capped_freq_ratio)
+void update_thermal_pressure(struct cpumask *cpus, unsigned int cur,
unsigned int max)
{
- unsigned long __capacity, delta;
+ int cpu;

- /* Normalize the capped freq ratio */
- __capacity = (capped_freq_ratio * arch_scale_cpu_capacity(cpu)) >>
-
SCHED_CAPACITY_SHIFT;
- delta = arch_scale_cpu_capacity(cpu) - __capacity;
- pr_debug("updating cpu%d thermal pressure to %lu\n", cpu, delta);
+ for_each_cpu(cpu, cpus) {
+ unsigned long scale_cap = arch_scale_cpu_capacity(cpu);
+ unsigned long cur_cap = cur * scale_cap / max;

- per_cpu(delta_capacity, cpu) = delta;
+ per_cpu(delta_capacity, cpu) = scale_cap - cur_cap;
+ }
}