Re: [PATCH v7 21/23] sched: Add find_exec_ctx helper

From: Metin Kaya
Date: Fri Dec 22 2023 - 06:59:09 EST


On 20/12/2023 12:18 am, John Stultz wrote:
From: Connor O'Brien <connoro@xxxxxxxxxx>

Add a helper to find the runnable owner down a chain of blocked waiters

This patch was broken out from a larger chain migration
patch originally by Connor O'Brien.

Cc: Joel Fernandes <joelaf@xxxxxxxxxx>
Cc: Qais Yousef <qyousef@xxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Juri Lelli <juri.lelli@xxxxxxxxxx>
Cc: Vincent Guittot <vincent.guittot@xxxxxxxxxx>
Cc: Dietmar Eggemann <dietmar.eggemann@xxxxxxx>
Cc: Valentin Schneider <vschneid@xxxxxxxxxx>
Cc: Steven Rostedt <rostedt@xxxxxxxxxxx>
Cc: Ben Segall <bsegall@xxxxxxxxxx>
Cc: Zimuzo Ezeozue <zezeozue@xxxxxxxxxx>
Cc: Youssef Esmat <youssefesmat@xxxxxxxxxx>
Cc: Mel Gorman <mgorman@xxxxxxx>
Cc: Daniel Bristot de Oliveira <bristot@xxxxxxxxxx>
Cc: Will Deacon <will@xxxxxxxxxx>
Cc: Waiman Long <longman@xxxxxxxxxx>
Cc: Boqun Feng <boqun.feng@xxxxxxxxx>
Cc: "Paul E. McKenney" <paulmck@xxxxxxxxxx>
Cc: Metin Kaya <Metin.Kaya@xxxxxxx>
Cc: Xuewen Yan <xuewen.yan94@xxxxxxxxx>
Cc: K Prateek Nayak <kprateek.nayak@xxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: kernel-team@xxxxxxxxxxx
Signed-off-by: Connor O'Brien <connoro@xxxxxxxxxx>
[jstultz: split out from larger chain migration patch]
Signed-off-by: John Stultz <jstultz@xxxxxxxxxx>
---
kernel/sched/core.c | 42 +++++++++++++++++++++++++++++++++++++++++
kernel/sched/cpupri.c | 11 ++++++++---
kernel/sched/deadline.c | 15 +++++++++++++--
kernel/sched/rt.c | 9 ++++++++-
kernel/sched/sched.h | 10 ++++++++++
5 files changed, 81 insertions(+), 6 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0c212dcd4b7a..77a79d5f829a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3896,6 +3896,48 @@ static void activate_blocked_entities(struct rq *target_rq,
}
raw_spin_unlock_irqrestore(&owner->blocked_lock, flags);
}
+
+static inline bool task_queued_on_rq(struct rq *rq, struct task_struct *task)
+{
+ if (!task_on_rq_queued(task))
+ return false;
+ smp_rmb();
+ if (task_rq(task) != rq)
+ return false;
+ smp_rmb();
+ if (!task_on_rq_queued(task))
+ return false;

* Super-nit: we may want to have empty lines between `if` blocks and before/after `smp_rmb()` calls.

* I did not understand why we call `task_on_rq_queued(task)` twice. Should we have an explanatory comment before the function definition?

+ return true;
+}
+
+/*
+ * Returns the unblocked task at the end of the blocked chain starting with p
+ * if that chain is composed entirely of tasks enqueued on rq, or NULL otherwise.
+ */
+struct task_struct *find_exec_ctx(struct rq *rq, struct task_struct *p)
+{
+ struct task_struct *exec_ctx, *owner;
+ struct mutex *mutex;
+
+ if (!sched_proxy_exec())
+ return p;
+
+ lockdep_assert_rq_held(rq);
+
+ for (exec_ctx = p; task_is_blocked(exec_ctx) && !task_on_cpu(rq, exec_ctx);
+ exec_ctx = owner) {
+ mutex = exec_ctx->blocked_on;
+ owner = __mutex_owner(mutex);
+ if (owner == exec_ctx)
+ break;
+
+ if (!task_queued_on_rq(rq, owner) || task_current_selected(rq, owner)) {
+ exec_ctx = NULL;
+ break;
+ }
+ }
+ return exec_ctx;
+}
#else /* !CONFIG_SCHED_PROXY_EXEC */
static inline void do_activate_task(struct rq *rq, struct task_struct *p,
int en_flags)
diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c
index 15e947a3ded7..53be78afdd07 100644
--- a/kernel/sched/cpupri.c
+++ b/kernel/sched/cpupri.c
@@ -96,12 +96,17 @@ static inline int __cpupri_find(struct cpupri *cp, struct task_struct *p,
if (skip)
return 0;
- if (cpumask_any_and(&p->cpus_mask, vec->mask) >= nr_cpu_ids)
+ if ((p && cpumask_any_and(&p->cpus_mask, vec->mask) >= nr_cpu_ids) ||
+ (!p && cpumask_any(vec->mask) >= nr_cpu_ids))
return 0;
if (lowest_mask) {
- cpumask_and(lowest_mask, &p->cpus_mask, vec->mask);
- cpumask_and(lowest_mask, lowest_mask, cpu_active_mask);
+ if (p) {
+ cpumask_and(lowest_mask, &p->cpus_mask, vec->mask);
+ cpumask_and(lowest_mask, lowest_mask, cpu_active_mask);
+ } else {
+ cpumask_copy(lowest_mask, vec->mask);
+ }

I think changes in `cpupri.c` should be part of previous (`sched: Push execution and scheduler context split into deadline and rt paths`) patch. Because they don't seem to be related with find_exec_ctx()?

/*
* We have to ensure that we have at least one bit
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 999bd17f11c4..21e56ac58e32 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1866,6 +1866,8 @@ static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused
static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
{
+ struct task_struct *exec_ctx;
+
/*
* Current can't be migrated, useless to reschedule,
* let's hope p can move out.
@@ -1874,12 +1876,16 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
!cpudl_find(&rq->rd->cpudl, rq_selected(rq), rq->curr, NULL))
return;
+ exec_ctx = find_exec_ctx(rq, p);
+ if (task_current(rq, exec_ctx))
+ return;
+
/*
* p is migratable, so let's not schedule it and
* see if it is pushed or pulled somewhere else.
*/
if (p->nr_cpus_allowed != 1 &&
- cpudl_find(&rq->rd->cpudl, p, p, NULL))
+ cpudl_find(&rq->rd->cpudl, p, exec_ctx, NULL))
return;
resched_curr(rq);
@@ -2169,12 +2175,17 @@ static int find_later_rq(struct task_struct *sched_ctx, struct task_struct *exec
/* Locks the rq it finds */
static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
{
+ struct task_struct *exec_ctx;
struct rq *later_rq = NULL;
int tries;
int cpu;
for (tries = 0; tries < DL_MAX_TRIES; tries++) {
- cpu = find_later_rq(task, task);
+ exec_ctx = find_exec_ctx(rq, task);
+ if (!exec_ctx)
+ break;
+
+ cpu = find_later_rq(task, exec_ctx);

Super-nit: this empty line should be removed to keep logically connected lines closer.
The same for find_lock_lowest_rq().

if ((cpu == -1) || (cpu == rq->cpu))
break;
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 6371b0fca4ad..f8134d062fa3 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1640,6 +1640,11 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
!cpupri_find(&rq->rd->cpupri, rq_selected(rq), rq->curr, NULL))
return;
+ /* No reason to preempt since rq->curr wouldn't change anyway */
+ exec_ctx = find_exec_ctx(rq, p);
+ if (task_current(rq, exec_ctx))
+ return;
+
/*
* p is migratable, so let's not schedule it and
* see if it is pushed or pulled somewhere else.
@@ -1933,12 +1938,14 @@ static int find_lowest_rq(struct task_struct *sched_ctx, struct task_struct *exe
/* Will lock the rq it finds */
static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
{
+ struct task_struct *exec_ctx;
struct rq *lowest_rq = NULL;
int tries;
int cpu;
for (tries = 0; tries < RT_MAX_TRIES; tries++) {
- cpu = find_lowest_rq(task, task);
+ exec_ctx = find_exec_ctx(rq, task);
+ cpu = find_lowest_rq(task, exec_ctx);
if ((cpu == -1) || (cpu == rq->cpu))
break;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index ef3d327e267c..6cd473224cfe 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -3564,6 +3564,16 @@ int task_is_pushable(struct rq *rq, struct task_struct *p, int cpu)
return 0;
}
+
+#ifdef CONFIG_SCHED_PROXY_EXEC
+struct task_struct *find_exec_ctx(struct rq *rq, struct task_struct *p);
+#else /* !CONFIG_SCHED_PROXY_EXEC */
+static inline
+struct task_struct *find_exec_ctx(struct rq *rq, struct task_struct *p)
+{
+ return p;
+}
+#endif /* CONFIG_SCHED_PROXY_EXEC */
#endif

Nit: `#ifdef CONFIG_SMP` block becomes bigger after this hunk. We should append `/* CONFIG_SMP */` to this line, IMHO.

#endif /* _KERNEL_SCHED_SCHED_H */