[PATCH v2 26/35] sched/fair: handle tick expiry under lazy preemption

From: Ankur Arora
Date: Mon May 27 2024 - 20:41:29 EST


The default policy for lazy scheduling is to schedule in exit-to-user.
So, we do that for all but deadline tasks. For deadline tasks once a
task is not leftmost, force it to be scheduled away.

Always scheduling lazily, however, runs into the 'hog' problem -- the
target task might be running in the kernel and might not relinquish
CPU on its own.

Handle that by upgrading the ignored tif_resched(RESCHED_LAZY) bit to
tif_resched(RESCHED_NOW) at the next tick.

Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Juri Lelli <juri.lelli@xxxxxxxxxx>
Cc: Vincent Guittot <vincent.guittot@xxxxxxxxxx>
Originally-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Link: https://lore.kernel.org/lkml/87jzshhexi.ffs@tglx/
Signed-off-by: Ankur Arora <ankur.a.arora@xxxxxxxxxx>
---
kernel/sched/core.c | 8 ++++++++
kernel/sched/deadline.c | 5 ++++-
kernel/sched/fair.c | 2 +-
kernel/sched/rt.c | 2 +-
kernel/sched/sched.h | 6 ++++++
5 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index e838328d93d1..2bc7f636267d 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1051,6 +1051,14 @@ static resched_t resched_opt_translate(struct task_struct *curr,
if (is_idle_task(curr))
return RESCHED_NOW;

+ if (opt == RESCHED_TICK &&
+ unlikely(__test_tsk_need_resched(curr, RESCHED_LAZY)))
+ /*
+ * If the task hasn't switched away by the second tick,
+ * force it away by upgrading to TIF_NEED_RESCHED.
+ */
+ return RESCHED_NOW;
+
return RESCHED_LAZY;
}

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index d24d6bfee293..cb0dd77508b1 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1378,8 +1378,11 @@ static void update_curr_dl_se(struct rq *rq, struct sched_dl_entity *dl_se, s64
enqueue_task_dl(rq, dl_task_of(dl_se), ENQUEUE_REPLENISH);
}

+ /*
+ * We are not leftmost anymore. Reschedule straight away.
+ */
if (!is_leftmost(dl_se, &rq->dl))
- resched_curr(rq);
+ __resched_curr(rq, RESCHED_FORCE);
}

/*
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index dd34709f294c..faa6afe0af0d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -12615,7 +12615,7 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
}

if (resched)
- resched_curr(rq);
+ resched_curr_tick(rq);

if (static_branch_unlikely(&sched_numa_balancing))
task_tick_numa(rq, curr);
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index f0a6c9bb890b..4713783bbdef 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1023,7 +1023,7 @@ static void update_curr_rt(struct rq *rq)
rt_rq->rt_time += delta_exec;
exceeded = sched_rt_runtime_exceeded(rt_rq);
if (exceeded)
- resched_curr(rq);
+ resched_curr_tick(rq);
raw_spin_unlock(&rt_rq->rt_runtime_lock);
if (exceeded)
do_start_rt_bandwidth(sched_rt_bandwidth(rt_rq));
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index e5e4747fbef2..107c5fc2b7bb 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2467,6 +2467,7 @@ extern void reweight_task(struct task_struct *p, int prio);
enum resched_opt {
RESCHED_DEFAULT,
RESCHED_FORCE,
+ RESCHED_TICK,
};

extern void __resched_curr(struct rq *rq, enum resched_opt opt);
@@ -2476,6 +2477,11 @@ static inline void resched_curr(struct rq *rq)
__resched_curr(rq, RESCHED_DEFAULT);
}

+static inline void resched_curr_tick(struct rq *rq)
+{
+ __resched_curr(rq, RESCHED_TICK);
+}
+
extern void resched_cpu(int cpu);

extern struct rt_bandwidth def_rt_bandwidth;
--
2.31.1