Re: [PATCH 1/2] sched: proxy-exec: Close race causing workqueue work being delayed

From: K Prateek Nayak

Date: Tue Apr 28 2026 - 11:38:38 EST


On 4/28/2026 6:45 PM, K Prateek Nayak wrote:
> I was trying something like below but I'm somewhere missing a
> clear_task_blocked_on() for PROXY_WAKING before going back into
> mutex_lock_common():

And I seem to have been missing:

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 8142fba59ad94..a8679b759398c 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7046,6 +7046,9 @@ static void __sched notrace __schedule(int sched_mode)
switch_count = &prev->nvcsw;
}

+ if (!prev_state && task_is_blocked(prev))
+ clear_task_blocked_on(prev, NULL);
+
pick_again:
assert_balance_callbacks_empty(rq);
next = pick_next_task(rq, rq->donor, &rf);
---

With that, it survives test-ww_mutex and a sched-messaging run without
any splats.

>
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 8ec3b6d7d718b..6ea74aecc5fbd 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -586,6 +586,7 @@ struct sched_entity {
> unsigned char sched_delayed;
> unsigned char rel_deadline;
> unsigned char custom_slice;
> + unsigned char sched_proxy;
> /* hole */
>
> u64 exec_start;
> @@ -2222,6 +2223,7 @@ static inline void __clear_task_blocked_on(struct task_struct *p, struct mutex *
> * clearing the relationship with a different lock.
> */
> WARN_ON_ONCE(m && p->blocked_on && p->blocked_on != m && p->blocked_on != PROXY_WAKING);
> + WRITE_ONCE(p->se.sched_proxy, 0);
> p->blocked_on = NULL;
> }
>
> @@ -2250,6 +2252,8 @@ static inline void __set_task_blocked_on_waking(struct task_struct *p, struct mu
> * the relationship with a different lock.
> */
> WARN_ON_ONCE(m && p->blocked_on != m && p->blocked_on != PROXY_WAKING);
> + /* Force the task down proxy_force_return() path. */
> + WRITE_ONCE(p->se.sched_proxy, 1);
> p->blocked_on = PROXY_WAKING;
> }
>
> diff --git a/init/init_task.c b/init/init_task.c
> index b5f48ebdc2b6e..8e8fc680fcd21 100644
> --- a/init/init_task.c
> +++ b/init/init_task.c
> @@ -118,6 +118,7 @@ struct task_struct init_task __aligned(L1_CACHE_BYTES) = {
> },
> .se = {
> .group_node = LIST_HEAD_INIT(init_task.se.group_node),
> + .sched_proxy = 0,
> },
> .rt = {
> .run_list = LIST_HEAD_INIT(init_task.rt.run_list),
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 49cd5d2171613..8142fba59ad94 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -4395,6 +4395,7 @@ static void __sched_fork(u64 clone_flags, struct task_struct *p)
> p->se.nr_migrations = 0;
> p->se.vruntime = 0;
> p->se.vlag = 0;
> + p->se.sched_proxy = 0;
> INIT_LIST_HEAD(&p->se.group_node);
>
> /* A delayed task cannot be in clone(). */
> @@ -6535,8 +6536,13 @@ static bool try_to_block_task(struct rq *rq, struct task_struct *p,
> * blocked on a mutex, and we want to keep it on the runqueue
> * to be selectable for proxy-execution.
> */
> - if (!should_block)
> + if (!should_block) {
> + guard(raw_spinlock)(&p->blocked_lock);
> + /* Stable against race */
> + if (task_is_blocked(p))
> + WRITE_ONCE(p->se.sched_proxy, 1);
> return false;
> + }
>
> p->sched_contributes_to_load =
> (task_state & TASK_UNINTERRUPTIBLE) &&
> @@ -6765,11 +6771,15 @@ find_proxy_task(struct rq *rq, struct task_struct *donor, struct rq_flags *rf)
> bool curr_in_chain = false;
> int this_cpu = cpu_of(rq);
> struct task_struct *p;
> - struct mutex *mutex;
> int owner_cpu;
>
> /* Follow blocked_on chain. */
> - for (p = donor; (mutex = p->blocked_on); p = owner) {
> + for (p = donor; READ_ONCE(p->se.sched_proxy); p = owner) {
> + struct mutex *mutex = p->blocked_on;
> +
> + if (!mutex)
> + return NULL;
> +
> /* if its PROXY_WAKING, do return migration or run if current */
> if (mutex == PROXY_WAKING) {
> if (task_current(rq, p)) {
> @@ -6787,7 +6797,7 @@ find_proxy_task(struct rq *rq, struct task_struct *donor, struct rq_flags *rf)
> guard(raw_spinlock)(&p->blocked_lock);
>
> /* Check again that p is blocked with blocked_lock held */
> - if (mutex != __get_task_blocked_on(p)) {
> + if (!p->se.sched_proxy || mutex != __get_task_blocked_on(p)) {
> /*
> * Something changed in the blocked_on chain and
> * we don't know if only at this level. So, let's
> @@ -7044,7 +7054,7 @@ static void __sched notrace __schedule(int sched_mode)
> struct task_struct *prev_donor = rq->donor;
>
> rq_set_donor(rq, next);
> - if (unlikely(next->blocked_on)) {
> + if (unlikely(READ_ONCE(next->se.sched_proxy))) {
> next = find_proxy_task(rq, next, &rf);
> if (!next) {
> zap_balance_callbacks(rq);
> ---
>
>> next = find_proxy_task(rq, next, &rf);
>> if (!next) {
>> zap_balance_callbacks(rq);
>

--
Thanks and Regards,
Prateek