Re: sched_ext/lavd hard lockup in old call_rcu_tasks_generic needadjust path

From: Paul E. McKenney

Date: Tue Jun 16 2026 - 23:25:15 EST


On Tue, Jun 16, 2026 at 11:56:19AM +0100, Matt Fleming wrote:
> On Fri, Jun 12, 2026 at 07:00:31AM -0700, Paul E. McKenney wrote:
> >
> > Huh. Looks like I did not implement RCU Tasks Trace in terms of SRCU
> > any time too soon. But there is also RCU Tasks and RCU Tasks Rude.
> >
> > If we don't backport the SRCU patches, then the obvious alternative is
> > for call_rcu_tasks*() to defer to IRQ work when invoked with interrupts
> > disabled. Or is there a better way?
>
> What about something like this?

Not bad, actually.

But we are going to have to similarly bypass call_rcu_tasks_generic()
soon enough, though only when interrupts are disabled at the time
of the call. One complication is that it would need to interact
with rcu_barrier_tasks_generic(), which would need to remove any
pending callbacks from bypassed call_rcu_tasks_generic() invocations,
as in llist_del_all() and enqueue the callbacks. A well-placed
raw_spin_trylock_rcu_node(rtpcp) should suffice for the serialization.
The trick is that rcu_barrier_tasks_generic() is not obligated to
wait on later calls to call_rcu_tasks_generic().

Is this something that you would be interested in digging into?

Thanx, Paul

> ----8<----
>
> >From 6b2dc5002f3413f4f89eb1735259d065b7003a52 Mon Sep 17 00:00:00 2001
> From: Matt Fleming <mfleming@xxxxxxxxxxxxxx>
> Date: Mon, 15 Jun 2026 11:19:43 +0100
> Subject: [PATCH] rcu-tasks: Defer callback queue adjustment to irq_work
>
> call_rcu_tasks_generic() can run from BPF task-storage teardown while
> sched_ext still holds rq->lock. The RCU Tasks kthread can concurrently
> hold cbs_gbl_lock while printing under it, then wake a task on the same
> rq while the caller waits for cbs_gbl_lock.
>
> Queue the adjustment through irq_work instead. This keeps callback
> enqueueing synchronous while moving cbs_gbl_lock acquisition out of the
> caller context.
>
> Signed-off-by: Matt Fleming <mfleming@xxxxxxxxxxxxxx>
> ---
> kernel/rcu/tasks.h | 33 +++++++++++++++++++++++----------
> 1 file changed, 23 insertions(+), 10 deletions(-)
>
> diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
> index 2dc044fd126e..92aead9fc200 100644
> --- a/kernel/rcu/tasks.h
> +++ b/kernel/rcu/tasks.h
> @@ -104,6 +104,7 @@ struct rcu_tasks {
> unsigned long n_ipis;
> unsigned long n_ipis_fails;
> struct task_struct *kthread_ptr;
> + struct irq_work cbs_adjust_irq_work;
> unsigned long lazy_jiffies;
> rcu_tasks_gp_func_t gp_func;
> pregp_func_t pregp_func;
> @@ -129,6 +130,7 @@ struct rcu_tasks {
> };
>
> static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp);
> +static void call_rcu_tasks_iw_adjust(struct irq_work *iwp);
>
> #define DEFINE_RCU_TASKS(rt_name, gp, call, n) \
> static DEFINE_PER_CPU(struct rcu_tasks_percpu, rt_name ## __percpu) = { \
> @@ -144,6 +146,7 @@ static struct rcu_tasks rt_name = \
> .call_func = call, \
> .wait_state = TASK_UNINTERRUPTIBLE, \
> .rtpcpu = &rt_name ## __percpu, \
> + .cbs_adjust_irq_work = IRQ_WORK_INIT_HARD(call_rcu_tasks_iw_adjust), \
> .lazy_jiffies = DIV_ROUND_UP(HZ, 4), \
> .name = n, \
> .percpu_enqueue_shift = order_base_2(CONFIG_NR_CPUS), \
> @@ -342,6 +345,24 @@ static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp)
> rcuwait_wake_up(&rtp->cbs_wait);
> }
>
> +static void call_rcu_tasks_iw_adjust(struct irq_work *iwp)
> +{
> + unsigned long flags;
> + bool expanded = false;
> + struct rcu_tasks *rtp = container_of(iwp, struct rcu_tasks, cbs_adjust_irq_work);
> +
> + raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags);
> + if (rtp->percpu_enqueue_lim != rcu_task_cpu_ids) {
> + WRITE_ONCE(rtp->percpu_enqueue_shift, 0);
> + WRITE_ONCE(rtp->percpu_dequeue_lim, rcu_task_cpu_ids);
> + smp_store_release(&rtp->percpu_enqueue_lim, rcu_task_cpu_ids);
> + expanded = true;
> + }
> + raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags);
> + if (expanded)
> + pr_info("Switching %s to per-CPU callback queuing.\n", rtp->name);
> +}
> +
> // Enqueue a callback for the specified flavor of Tasks RCU.
> static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
> struct rcu_tasks *rtp)
> @@ -389,16 +410,8 @@ static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
> rtpcp->urgent_gp = 3;
> rcu_segcblist_enqueue(&rtpcp->cblist, rhp);
> raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
> - if (unlikely(needadjust)) {
> - raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags);
> - if (rtp->percpu_enqueue_lim != rcu_task_cpu_ids) {
> - WRITE_ONCE(rtp->percpu_enqueue_shift, 0);
> - WRITE_ONCE(rtp->percpu_dequeue_lim, rcu_task_cpu_ids);
> - smp_store_release(&rtp->percpu_enqueue_lim, rcu_task_cpu_ids);
> - pr_info("Switching %s to per-CPU callback queuing.\n", rtp->name);
> - }
> - raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags);
> - }
> + if (unlikely(needadjust))
> + irq_work_queue(&rtp->cbs_adjust_irq_work);
> rcu_read_unlock();
> /* We can't create the thread unless interrupts are enabled. */
> if (needwake && READ_ONCE(rtp->kthread_ptr))
> --
> 2.43.0
>