[PATCH v3 5/5] sched/pelt: Remove shift of thermal clock

From: Vincent Guittot
Date: Mon Jan 08 2024 - 08:50:42 EST


The optional shift of the clock used by thermal/hw load avg has been
introduced to handle case where the signal was not always a high frequency
hw signal. Now that cpufreq provides a signal for firmware and
SW pressure, we can remove this exception and always keep this PELT signal
aligned with other signals.
Mark deprecated sched_thermal_decay_shift boot parameter.

Signed-off-by: Vincent Guittot <vincent.guittot@xxxxxxxxxx>
---
.../admin-guide/kernel-parameters.txt | 1 +
kernel/sched/core.c | 2 +-
kernel/sched/fair.c | 10 ++--------
kernel/sched/sched.h | 18 ------------------
4 files changed, 4 insertions(+), 27 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 65731b060e3f..2ee15522b15d 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5722,6 +5722,7 @@
but is useful for debugging and performance tuning.

sched_thermal_decay_shift=
+ [Deprecated]
[KNL, SMP] Set a decay shift for scheduler thermal
pressure signal. Thermal pressure signal follows the
default decay period of other scheduler pelt
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a6f084bdf1c5..c68e47bfd5ae 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5670,7 +5670,7 @@ void scheduler_tick(void)

update_rq_clock(rq);
hw_pressure = arch_scale_hw_pressure(cpu_of(rq));
- update_hw_load_avg(rq_clock_hw(rq), rq, hw_pressure);
+ update_hw_load_avg(rq_clock_task(rq), rq, hw_pressure);
curr->sched_class->task_tick(rq, curr, 0);
if (sched_feat(LATENCY_WARN))
resched_latency = cpu_resched_latency(rq);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f1c3d600d6d6..d5ba6cdb141c 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -78,15 +78,9 @@ static unsigned int normalized_sysctl_sched_base_slice = 750000ULL;

const_debug unsigned int sysctl_sched_migration_cost = 500000UL;

-int sched_hw_decay_shift;
static int __init setup_sched_thermal_decay_shift(char *str)
{
- int _shift = 0;
-
- if (kstrtoint(str, 0, &_shift))
- pr_warn("Unable to set scheduler thermal pressure decay shift parameter\n");
-
- sched_hw_decay_shift = clamp(_shift, 0, 10);
+ pr_warn("Ignoring the deprecated sched_thermal_decay_shift= option\n");
return 1;
}
__setup("sched_thermal_decay_shift=", setup_sched_thermal_decay_shift);
@@ -9247,7 +9241,7 @@ static bool __update_blocked_others(struct rq *rq, bool *done)

decayed = update_rt_rq_load_avg(now, rq, curr_class == &rt_sched_class) |
update_dl_rq_load_avg(now, rq, curr_class == &dl_sched_class) |
- update_hw_load_avg(rq_clock_hw(rq), rq, hw_pressure) |
+ update_hw_load_avg(now, rq, hw_pressure) |
update_irq_load_avg(rq, 0);

if (others_have_blocked(rq))
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 677d24202eec..6fc6718a1060 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1520,24 +1520,6 @@ static inline u64 rq_clock_task(struct rq *rq)
return rq->clock_task;
}

-/**
- * By default the decay is the default pelt decay period.
- * The decay shift can change the decay period in
- * multiples of 32.
- * Decay shift Decay period(ms)
- * 0 32
- * 1 64
- * 2 128
- * 3 256
- * 4 512
- */
-extern int sched_hw_decay_shift;
-
-static inline u64 rq_clock_hw(struct rq *rq)
-{
- return rq_clock_task(rq) >> sched_hw_decay_shift;
-}
-
static inline void rq_clock_skip_update(struct rq *rq)
{
lockdep_assert_rq_held(rq);
--
2.34.1