Re: [PATCH 1/5] sched/fair: Drop redundant RCU read lock in NOHZ kick path

From: Vincent Guittot

Date: Mon May 11 2026 - 09:07:40 EST


On Sat, 9 May 2026 at 20:10, Andrea Righi <arighi@xxxxxxxxxx> wrote:
>
> nohz_balancer_kick() is reached from sched_balance_trigger(), which is
> called from sched_tick(). sched_tick() runs with IRQs disabled, so the
> additional rcu_read_lock/unlock() used around sched_domain accesses in
> this path is redundant. Rely on the existing IRQ-disabled context (and
> the rcu_dereference_all() checking) instead.
>
> The same applies to set_cpu_sd_state_idle(), called from the idle entry
> path with IRQs disabled, and to set_cpu_sd_state_busy(), reachable via
> nohz_balance_exit_idle() from two contexts: nohz_balancer_kick() (IRQs
> disabled, as above) and sched_cpu_deactivate() (the CPUHP_AP_ACTIVE
> teardown, which runs under cpus_write_lock(), so it cannot race with
> sched-domain rebuilds). In both cases the rcu_dereference_all()
> validation is sufficient.
>
> No functional change intended.
>
> Cc: Vincent Guittot <vincent.guittot@xxxxxxxxxx>
> Cc: Dietmar Eggemann <dietmar.eggemann@xxxxxxx>
> Suggested-by: K Prateek Nayak <kprateek.nayak@xxxxxxx>
> Reviewed-by: K Prateek Nayak <kprateek.nayak@xxxxxxx>
> Signed-off-by: Andrea Righi <arighi@xxxxxxxxxx>

Reviewed-by: Vincent Guittot <vincent.guittot@xxxxxxxxxx>

> ---
> kernel/sched/fair.c | 38 +++++++++++---------------------------
> 1 file changed, 11 insertions(+), 27 deletions(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 3ebec186f9823..6b059ee80b631 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -12785,8 +12785,6 @@ static void nohz_balancer_kick(struct rq *rq)
> goto out;
> }
>
> - rcu_read_lock();
> -
> sd = rcu_dereference_all(rq->sd);
> if (sd) {
> /*
> @@ -12794,8 +12792,8 @@ static void nohz_balancer_kick(struct rq *rq)
> * capacity, kick the ILB to see if there's a better CPU to run on:
> */
> if (rq->cfs.h_nr_runnable >= 1 && check_cpu_capacity(rq, sd)) {
> - flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
> - goto unlock;
> + flags |= NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
> + goto out;
> }
> }
>
> @@ -12811,8 +12809,8 @@ static void nohz_balancer_kick(struct rq *rq)
> */
> for_each_cpu_and(i, sched_domain_span(sd), nohz.idle_cpus_mask) {
> if (sched_asym(sd, i, cpu)) {
> - flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
> - goto unlock;
> + flags |= NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
> + goto out;
> }
> }
> }
> @@ -12823,10 +12821,8 @@ static void nohz_balancer_kick(struct rq *rq)
> * When ASYM_CPUCAPACITY; see if there's a higher capacity CPU
> * to run the misfit task on.
> */
> - if (check_misfit_status(rq)) {
> - flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
> - goto unlock;
> - }
> + if (check_misfit_status(rq))
> + flags |= NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
>
> /*
> * For asymmetric systems, we do not want to nicely balance
> @@ -12835,7 +12831,7 @@ static void nohz_balancer_kick(struct rq *rq)
> *
> * Skip the LLC logic because it's not relevant in that case.
> */
> - goto unlock;
> + goto out;
> }
>
> sds = rcu_dereference_all(per_cpu(sd_llc_shared, cpu));
> @@ -12850,13 +12846,9 @@ static void nohz_balancer_kick(struct rq *rq)
> * like this LLC domain has tasks we could move.
> */
> nr_busy = atomic_read(&sds->nr_busy_cpus);
> - if (nr_busy > 1) {
> - flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
> - goto unlock;
> - }
> + if (nr_busy > 1)
> + flags |= NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
> }
> -unlock:
> - rcu_read_unlock();
> out:
> if (READ_ONCE(nohz.needs_update))
> flags |= NOHZ_NEXT_KICK;
> @@ -12868,17 +12860,13 @@ static void nohz_balancer_kick(struct rq *rq)
> static void set_cpu_sd_state_busy(int cpu)
> {
> struct sched_domain *sd;
> -
> - rcu_read_lock();
> sd = rcu_dereference_all(per_cpu(sd_llc, cpu));
>
> if (!sd || !sd->nohz_idle)
> - goto unlock;
> + return;
> sd->nohz_idle = 0;
>
> atomic_inc(&sd->shared->nr_busy_cpus);
> -unlock:
> - rcu_read_unlock();
> }
>
> void nohz_balance_exit_idle(struct rq *rq)
> @@ -12897,17 +12885,13 @@ void nohz_balance_exit_idle(struct rq *rq)
> static void set_cpu_sd_state_idle(int cpu)
> {
> struct sched_domain *sd;
> -
> - rcu_read_lock();
> sd = rcu_dereference_all(per_cpu(sd_llc, cpu));
>
> if (!sd || sd->nohz_idle)
> - goto unlock;
> + return;
> sd->nohz_idle = 1;
>
> atomic_dec(&sd->shared->nr_busy_cpus);
> -unlock:
> - rcu_read_unlock();
> }
>
> /*
> --
> 2.54.0
>