Re: [RFC PATCH v15 7/7] sched: Start blocked_on chain processing in find_proxy_task()

From: Peter Zijlstra
Date: Mon Mar 17 2025 - 12:55:01 EST


On Wed, Mar 12, 2025 at 03:11:37PM -0700, John Stultz wrote:
> @@ -6668,47 +6676,138 @@ static bool proxy_deactivate(struct rq *rq, struct task_struct *donor)
> }
>
> /*
> + * Find runnable lock owner to proxy for mutex blocked donor
> + *
> + * Follow the blocked-on relation:
> + * task->blocked_on -> mutex->owner -> task...
> + *
> + * Lock order:
> + *
> + * p->pi_lock
> + * rq->lock
> + * mutex->wait_lock
> + *
> + * Returns the task that is going to be used as execution context (the one
> + * that is actually going to be run on cpu_of(rq)).
> */
> static struct task_struct *
> find_proxy_task(struct rq *rq, struct task_struct *donor, struct rq_flags *rf)
> {
> + struct task_struct *owner = NULL;
> + struct task_struct *ret = NULL;
> + int this_cpu = cpu_of(rq);
> + struct task_struct *p;
> struct mutex *mutex;
>
> + /* Follow blocked_on chain. */
> + for (p = donor; task_is_blocked(p); p = owner) {
> + mutex = p->blocked_on;
> + /* Something changed in the chain, so pick again */
> + if (!mutex)
> + return NULL;
> /*
> + * By taking mutex->wait_lock we hold off concurrent mutex_unlock()
> + * and ensure @owner sticks around.
> */
> + raw_spin_lock(&mutex->wait_lock);

This comment -- that is only true if you kill __mutex_unlock_fast(),
which I don't think you did in the previous patches.

> +
> + /* Check again that p is blocked with wait_lock held */
> + if (mutex != __get_task_blocked_on(p)) {
> + /*
> + * Something changed in the blocked_on chain and
> + * we don't know if only at this level. So, let's
> + * just bail out completely and let __schedule
> + * figure things out (pick_again loop).
> + */
> + goto out;
> + }
> +
> + owner = __mutex_owner(mutex);
> + if (!owner) {
> + __clear_task_blocked_on(p, mutex);
> + ret = p;
> + goto out;
> + }
> +
> + if (task_cpu(owner) != this_cpu) {
> + /* XXX Don't handle migrations yet */
> + if (!proxy_deactivate(rq, donor))
> + goto deactivate_failed;
> + goto out;
> + }
> +
> + if (task_on_rq_migrating(owner)) {
> + /*
> + * One of the chain of mutex owners is currently migrating to this
> + * CPU, but has not yet been enqueued because we are holding the
> + * rq lock. As a simple solution, just schedule rq->idle to give
> + * the migration a chance to complete. Much like the migrate_task
> + * case we should end up back in find_proxy_task(), this time
> + * hopefully with all relevant tasks already enqueued.
> + */
> + raw_spin_unlock(&mutex->wait_lock);
> + return proxy_resched_idle(rq);
> + }
> +
> + if (!owner->on_rq) {
> + /* XXX Don't handle blocked owners yet */
> + if (!proxy_deactivate(rq, donor))
> + goto deactivate_failed;
> + goto out;
> + }
> +
> + if (owner->se.sched_delayed) {
> + /* XXX Don't handle delayed dequeue yet */
> + if (!proxy_deactivate(rq, donor))
> + goto deactivate_failed;
> + goto out;
> + }
> +
> + if (owner == p) {
> + /*
> + * It's possible we interleave with mutex_unlock like:
> + *
> + * lock(&rq->lock);
> + * find_proxy_task()
> + * mutex_unlock()
> + * lock(&wait_lock);
> + * donor(owner) = current->blocked_donor;
> + * unlock(&wait_lock);
> + *
> + * wake_up_q();
> + * ...
> + * ttwu_runnable()
> + * __task_rq_lock()
> + * lock(&wait_lock);
> + * owner == p
> + *
> + * Which leaves us to finish the ttwu_runnable() and make it go.
> + *
> + * So schedule rq->idle so that ttwu_runnable can get the rq lock
> + * and mark owner as running.
> + */
> + raw_spin_unlock(&mutex->wait_lock);
> + return proxy_resched_idle(rq);
> + }
>
> /*
> + * OK, now we're absolutely sure @owner is on this
> + * rq, therefore holding @rq->lock is sufficient to
> + * guarantee its existence, as per ttwu_remote().
> */
> raw_spin_unlock(&mutex->wait_lock);
> }
> +
> + WARN_ON_ONCE(owner && !owner->on_rq);
> + return owner;
> +
> +deactivate_failed:
> + /*
> + * XXX: For now, if deactivation failed, set donor
> + * as unblocked, as we aren't doing proxy-migrations
> + * yet (more logic will be needed then).
> + */
> + donor->blocked_on = NULL; /* XXX not following locking rules :( */
> out:
> raw_spin_unlock(&mutex->wait_lock);
> return NULL; /* do pick_next_task again */


Also, something like the below might be a cleanup -- I didn't check your
full series.

--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6653,7 +6653,7 @@ proxy_resched_idle(struct rq *rq)
return rq->idle;
}

-static bool proxy_deactivate(struct rq *rq, struct task_struct *donor)
+static bool __proxy_deactivate(struct rq *rq, struct task_struct *donor)
{
unsigned long state = READ_ONCE(donor->__state);

@@ -6673,6 +6673,19 @@ static bool proxy_deactivate(struct rq *
return try_to_block_task(rq, donor, state, true);
}

+static struct task_struct *proxy_deactivate(struct rq *rq, struct task_struct *donor)
+{
+ if (!__proxy_deactivate(rq, donor)) {
+ /*
+ * XXX: For now, if deactivation failed, set donor
+ * as unblocked, as we aren't doing proxy-migrations
+ * yet (more logic will be needed then).
+ */
+ donor->blocked_on = NULL;
+ }
+ return NULL;
+}
+
/*
* Find runnable lock owner to proxy for mutex blocked donor
*
@@ -6691,23 +6704,17 @@ static bool proxy_deactivate(struct rq *
static struct task_struct *
find_proxy_task(struct rq *rq, struct task_struct *donor, struct rq_flags *rf)
{
- struct task_struct *owner = NULL;
- struct task_struct *ret = NULL;
+ struct task_struct *owner, *p;
int this_cpu = cpu_of(rq);
- struct task_struct *p;
struct mutex *mutex;

/* Follow blocked_on chain. */
- for (p = donor; task_is_blocked(p); p = owner) {
- mutex = p->blocked_on;
- /* Something changed in the chain, so pick again */
- if (!mutex)
- return NULL;
+ for (p = donor; (mutex = p->blocked_on); p = owner) {
/*
* By taking mutex->wait_lock we hold off concurrent mutex_unlock()
* and ensure @owner sticks around.
*/
- raw_spin_lock(&mutex->wait_lock);
+ guard(raw_spinlock)(&mutex->wait_lock);

/* Check again that p is blocked with wait_lock held */
if (mutex != __get_task_blocked_on(p)) {
@@ -6717,22 +6724,17 @@ find_proxy_task(struct rq *rq, struct ta
* just bail out completely and let __schedule
* figure things out (pick_again loop).
*/
- goto out;
+ return NULL;
}

owner = __mutex_owner(mutex);
if (!owner) {
__clear_task_blocked_on(p, mutex);
- ret = p;
- goto out;
+ return p;
}

- if (task_cpu(owner) != this_cpu) {
- /* XXX Don't handle migrations yet */
- if (!proxy_deactivate(rq, donor))
- goto deactivate_failed;
- goto out;
- }
+ if (task_cpu(owner) != this_cpu)
+ return proxy_deactivate(rq, donor);

if (task_on_rq_migrating(owner)) {
/*
@@ -6743,22 +6745,17 @@ find_proxy_task(struct rq *rq, struct ta
* case we should end up back in find_proxy_task(), this time
* hopefully with all relevant tasks already enqueued.
*/
- raw_spin_unlock(&mutex->wait_lock);
- return proxy_resched_idle(rq);
+ goto ret_idle;
}

if (!owner->on_rq) {
/* XXX Don't handle blocked owners yet */
- if (!proxy_deactivate(rq, donor))
- goto deactivate_failed;
- goto out;
+ return proxy_deactivate(rq, donor);
}

if (owner->se.sched_delayed) {
/* XXX Don't handle delayed dequeue yet */
- if (!proxy_deactivate(rq, donor))
- goto deactivate_failed;
- goto out;
+ return proxy_deactivate(rq, donor);
}

if (owner == p) {
@@ -6784,8 +6781,7 @@ find_proxy_task(struct rq *rq, struct ta
* So schedule rq->idle so that ttwu_runnable can get the rq lock
* and mark owner as running.
*/
- raw_spin_unlock(&mutex->wait_lock);
- return proxy_resched_idle(rq);
+ goto ret_idle;
}

/*
@@ -6793,22 +6789,13 @@ find_proxy_task(struct rq *rq, struct ta
* rq, therefore holding @rq->lock is sufficient to
* guarantee its existence, as per ttwu_remote().
*/
- raw_spin_unlock(&mutex->wait_lock);
}

WARN_ON_ONCE(owner && !owner->on_rq);
return owner;

-deactivate_failed:
- /*
- * XXX: For now, if deactivation failed, set donor
- * as unblocked, as we aren't doing proxy-migrations
- * yet (more logic will be needed then).
- */
- donor->blocked_on = NULL; /* XXX not following locking rules :( */
-out:
- raw_spin_unlock(&mutex->wait_lock);
- return NULL; /* do pick_next_task again */
+ret_idle:
+ return proxy_resched_idle(rq);
}
#else /* SCHED_PROXY_EXEC */
static struct task_struct *