[PATCH] fixup: handle long ticks

From: Qais Yousef
Date: Sat Jun 15 2024 - 20:30:41 EST


Signed-off-by: Qais Yousef <qyousef@xxxxxxxxxxx>
---
include/linux/sched/cpufreq.h | 1 +
kernel/sched/cpufreq_schedutil.c | 64 +++++++++++++++++---------------
kernel/sched/fair.c | 13 ++++---
3 files changed, 44 insertions(+), 34 deletions(-)

diff --git a/include/linux/sched/cpufreq.h b/include/linux/sched/cpufreq.h
index 2d0a45aba16f..5409a9f79cc0 100644
--- a/include/linux/sched/cpufreq.h
+++ b/include/linux/sched/cpufreq.h
@@ -10,6 +10,7 @@

#define SCHED_CPUFREQ_IOWAIT (1U << 0)
#define SCHED_CPUFREQ_FORCE_UPDATE (1U << 1) /* ignore transition_delay_us */
+#define SCHED_CPUFREQ_TASK_ENQUEUED (1U << 2) /* new fair task was enqueued */

#ifdef CONFIG_CPU_FREQ
struct cpufreq_policy;
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index e8b65b75e7f3..4cdaca0a984e 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -64,6 +64,27 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time,
{
s64 delta_ns;

+ delta_ns = time - sg_policy->last_freq_update_time;
+
+ /*
+ * We want to update cpufreq at context switch, but on systems with
+ * long TICK values, this can happen after a long time while more tasks
+ * would have been added meanwhile leaving us potentially running at
+ * inadequate frequency for extended period of time.
+ *
+ * This logic should only apply when new fair task was added to the
+ * CPU, we'd want to defer to context switch as much as possible, but
+ * to avoid the potential delays mentioned above, let's check if this
+ * additional tasks warrants sending an update sooner.
+ *
+ * We want to ensure there's at least an update every
+ * sysctl_sched_base_slice.
+ */
+ if (likely(flags & SCHED_CPUFREQ_TASK_ENQUEUED)) {
+ if (delta_ns < sysctl_sched_base_slice)
+ return false;
+ }
+
/*
* Since cpufreq_update_util() is called with rq->lock held for
* the @target_cpu, our per-CPU data is fully serialized.
@@ -91,8 +112,6 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time,
if (unlikely(flags & SCHED_CPUFREQ_FORCE_UPDATE))
return true;

- delta_ns = time - sg_policy->last_freq_update_time;
-
return delta_ns >= sg_policy->freq_update_delay_ns;
}

@@ -257,6 +276,8 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
bool set_iowait_boost = flags & SCHED_CPUFREQ_IOWAIT;
bool forced_update = flags & SCHED_CPUFREQ_FORCE_UPDATE;

+ sg_cpu->last_update = time;
+
/* Reset boost if the CPU appears to have been idle enough */
if (sg_cpu->iowait_boost && !forced_update &&
sugov_iowait_reset(sg_cpu, time, set_iowait_boost))
@@ -362,30 +383,17 @@ static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu)
static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; }
#endif /* CONFIG_NO_HZ_COMMON */

-/*
- * Make sugov_should_update_freq() ignore the rate limit when DL
- * has increased the utilization.
- */
-static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu)
-{
- if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_min)
- sg_cpu->sg_policy->limits_changed = true;
-}
-
static inline bool sugov_update_single_common(struct sugov_cpu *sg_cpu,
u64 time, unsigned long max_cap,
unsigned int flags)
{
unsigned long boost;

- sugov_iowait_boost(sg_cpu, time, flags);
- sg_cpu->last_update = time;
-
- ignore_dl_rate_limit(sg_cpu);
-
if (!sugov_should_update_freq(sg_cpu->sg_policy, time, flags))
return false;

+ sugov_iowait_boost(sg_cpu, time, flags);
+
boost = sugov_iowait_apply(sg_cpu, time, max_cap, flags);
sugov_get_util(sg_cpu, boost);

@@ -510,22 +518,20 @@ sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags)

raw_spin_lock(&sg_policy->update_lock);

- sugov_iowait_boost(sg_cpu, time, flags);
- sg_cpu->last_update = time;
+ if (!sugov_should_update_freq(sg_policy, time, flags))
+ goto unlock;

- ignore_dl_rate_limit(sg_cpu);
+ sugov_iowait_boost(sg_cpu, time, flags);

- if (sugov_should_update_freq(sg_policy, time, flags)) {
- next_f = sugov_next_freq_shared(sg_cpu, time, flags);
+ next_f = sugov_next_freq_shared(sg_cpu, time, flags);

- if (!sugov_update_next_freq(sg_policy, time, next_f, flags))
- goto unlock;
+ if (!sugov_update_next_freq(sg_policy, time, next_f, flags))
+ goto unlock;

- if (sg_policy->policy->fast_switch_enabled)
- cpufreq_driver_fast_switch(sg_policy->policy, next_f);
- else
- sugov_deferred_update(sg_policy);
- }
+ if (sg_policy->policy->fast_switch_enabled)
+ cpufreq_driver_fast_switch(sg_policy->policy, next_f);
+ else
+ sugov_deferred_update(sg_policy);
unlock:
raw_spin_unlock(&sg_policy->update_lock);
}
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 8b87640f386b..3945aa938436 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8314,7 +8314,7 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int
int cse_is_idle, pse_is_idle;

if (unlikely(se == pse))
- return;
+ goto nopreempt;

/*
* This is possible from callers such as attach_tasks(), in which we
@@ -8323,7 +8323,7 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int
* next-buddy nomination below.
*/
if (unlikely(throttled_hierarchy(cfs_rq_of(pse))))
- return;
+ goto nopreempt;

if (sched_feat(NEXT_BUDDY) && !(wake_flags & WF_FORK)) {
set_next_buddy(pse);
@@ -8340,7 +8340,7 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int
* below.
*/
if (test_tsk_need_resched(curr))
- return;
+ goto nopreempt;

/* Idle tasks are by definition preempted by non-idle tasks. */
if (unlikely(task_has_idle_policy(curr)) &&
@@ -8352,7 +8352,7 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int
* is driven by the tick):
*/
if (unlikely(p->policy != SCHED_NORMAL) || !sched_feat(WAKEUP_PREEMPTION))
- return;
+ goto nopreempt;

find_matching_se(&se, &pse);
WARN_ON_ONCE(!pse);
@@ -8367,7 +8367,7 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int
if (cse_is_idle && !pse_is_idle)
goto preempt;
if (cse_is_idle != pse_is_idle)
- return;
+ goto nopreempt;

cfs_rq = cfs_rq_of(se);
update_curr(cfs_rq);
@@ -8378,6 +8378,9 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int
if (pick_eevdf(cfs_rq) == pse)
goto preempt;

+nopreempt:
+ if (rq->cfs.decayed && rq->cfs.h_nr_running > 1)
+ cpufreq_update_util(rq, SCHED_CPUFREQ_TASK_ENQUEUED);
return;

preempt:
--
2.34.1