[PATCH 16/24] sched: Teach dequeue_task() about special task states

From: Peter Zijlstra
Date: Sat Jul 27 2024 - 07:04:20 EST


Since special task states must not suffer spurious wakeups, and the
proposed delayed dequeue can cause exactly these (under some boundary
conditions), propagate this knowledge into dequeue_task() such that it
can do the right thing.

Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
kernel/sched/core.c | 7 ++++++-
kernel/sched/sched.h | 3 ++-
2 files changed, 8 insertions(+), 2 deletions(-)

--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6521,11 +6521,16 @@ static void __sched notrace __schedule(u
if (signal_pending_state(prev_state, prev)) {
WRITE_ONCE(prev->__state, TASK_RUNNING);
} else {
+ int flags = DEQUEUE_NOCLOCK;
+
prev->sched_contributes_to_load =
(prev_state & TASK_UNINTERRUPTIBLE) &&
!(prev_state & TASK_NOLOAD) &&
!(prev_state & TASK_FROZEN);

+ if (unlikely(is_special_task_state(prev_state)))
+ flags |= DEQUEUE_SPECIAL;
+
/*
* __schedule() ttwu()
* prev_state = prev->state; if (p->on_rq && ...)
@@ -6537,7 +6542,7 @@ static void __sched notrace __schedule(u
*
* After this, schedule() must not care about p->state any more.
*/
- block_task(rq, prev, DEQUEUE_NOCLOCK);
+ block_task(rq, prev, flags);
}
switch_count = &prev->nvcsw;
}
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2254,10 +2254,11 @@ extern const u32 sched_prio_to_wmult[40
*
*/

-#define DEQUEUE_SLEEP 0x01
+#define DEQUEUE_SLEEP 0x01 /* Matches ENQUEUE_WAKEUP */
#define DEQUEUE_SAVE 0x02 /* Matches ENQUEUE_RESTORE */
#define DEQUEUE_MOVE 0x04 /* Matches ENQUEUE_MOVE */
#define DEQUEUE_NOCLOCK 0x08 /* Matches ENQUEUE_NOCLOCK */
+#define DEQUEUE_SPECIAL 0x10
#define DEQUEUE_MIGRATING 0x100 /* Matches ENQUEUE_MIGRATING */
#define DEQUEUE_DELAYED 0x200 /* Matches ENQUEUE_DELAYED */