[RFC PATCH 2/2] thermal/cpufreq_cooling: Use idle_time to get cpu_load when scx_enabled

From: Xuewen Yan

Date: Fri Mar 20 2026 - 07:35:01 EST


From: Di Shen <di.shen@xxxxxxxxxx>

Recently, while enabling sched-ext debugging, we observed abnormal behavior
in our thermal power_allocator’s temperature control.
Through debugging, we found that the CPU util was too low, causing
the CPU frequency to remain unrestricted.

This issue stems from the fact that in the sched_cpu_util() function,
when scx is enabled, cpu_util_cfs becomes zero. As a result,
the thermal subsystem perceives an extremely low CPU utilization,
which degrades the effectiveness of the power_allocator’s control.

However, the scx_cpuperf_target() reflects the targeted performance,
not the utilisation. We couldn't use it.

Until a perfect solution is found, using idle_time to get the cpu load
might be a better approach.

Co-developed-by: Xuewen Yan <xuewen.yan@xxxxxxxxxx>
Signed-off-by: Xuewen Yan <xuewen.yan@xxxxxxxxxx>
Signed-off-by: Di Shen <di.shen@xxxxxxxxxx>
---
Previous discussion:
https://lore.kernel.org/all/5a5d565b-33ac-4d5c-b0dd-1353324a6117@xxxxxxx/

---
drivers/thermal/cpufreq_cooling.c | 54 ++++++++++++++++++++-----------
1 file changed, 35 insertions(+), 19 deletions(-)

diff --git a/drivers/thermal/cpufreq_cooling.c b/drivers/thermal/cpufreq_cooling.c
index d030dbeb2973..e8fa70a95d00 100644
--- a/drivers/thermal/cpufreq_cooling.c
+++ b/drivers/thermal/cpufreq_cooling.c
@@ -24,6 +24,9 @@
#include <linux/units.h>

#include "thermal_trace.h"
+#ifdef CONFIG_SCHED_CLASS_EXT
+#include "../../kernel/sched/sched.h"
+#endif

/*
* Cooling state <-> CPUFreq frequency
@@ -72,7 +75,7 @@ struct cpufreq_cooling_device {
struct em_perf_domain *em;
struct cpufreq_policy *policy;
struct thermal_cooling_device_ops cooling_ops;
-#ifndef CONFIG_SMP
+#if !defined(CONFIG_SMP) || defined(CONFIG_SCHED_CLASS_EXT)
struct time_in_idle *idle_time;
#endif
struct freq_qos_request qos_req;
@@ -147,23 +150,9 @@ static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_cdev,
return freq;
}

-/**
- * get_load() - get load for a cpu
- * @cpufreq_cdev: struct cpufreq_cooling_device for the cpu
- * @cpu: cpu number
- *
- * Return: The average load of cpu @cpu in percentage since this
- * function was last called.
- */
-#ifdef CONFIG_SMP
-static u32 get_load(struct cpufreq_cooling_device *cpufreq_cdev, int cpu)
-{
- unsigned long util = sched_cpu_util(cpu);
-
- return (util * 100) / arch_scale_cpu_capacity(cpu);
-}
-#else /* !CONFIG_SMP */
-static u32 get_load(struct cpufreq_cooling_device *cpufreq_cdev, int cpu)
+#if !defined(CONFIG_SMP) || defined(CONFIG_SCHED_CLASS_EXT)
+static u32 get_load_from_idle_time(struct cpufreq_cooling_device *cpufreq_cdev,
+ int cpu)
{
u32 load;
u64 now, now_idle, delta_time, delta_idle;
@@ -183,8 +172,35 @@ static u32 get_load(struct cpufreq_cooling_device *cpufreq_cdev, int cpu)

return load;
}
-#endif /* CONFIG_SMP */
+#endif /* !defined(CONFIG_SMP) || defined(CONFIG_SCHED_CLASS_EXT) */

+/**
+ * get_load() - get load for a cpu
+ * @cpufreq_cdev: struct cpufreq_cooling_device for the cpu
+ * @cpu: cpu number
+ *
+ * Return: The average load of cpu @cpu in percentage since this
+ * function was last called.
+ */
+#ifndef CONFIG_SMP
+static u32 get_load(struct cpufreq_cooling_device *cpufreq_cdev, int cpu,
+ int cpu_idx)
+{
+ return get_load_from_idle_time(cpufreq_cdev, cpu, cpu_idx);
+}
+#else /* CONFIG_SMP */
+static u32 get_load(struct cpufreq_cooling_device *cpufreq_cdev, int cpu)
+{
+ unsigned long util;
+
+#ifdef CONFIG_SCHED_CLASS_EXT
+ if (scx_enabled())
+ return get_load_from_idle_time(cpufreq_cdev, cpu);
+#endif
+ util = sched_cpu_util(cpu);
+ return (util * 100) / arch_scale_cpu_capacity(cpu);
+}
+#endif /* !CONFIG_SMP */
/**
* get_dynamic_power() - calculate the dynamic power
* @cpufreq_cdev: &cpufreq_cooling_device for this cdev
--
2.25.1