[PATCH 1/6] sched/fair: Use guard(rcu) for sched_domain RCU sections
From: Andrea Righi
Date: Tue Apr 28 2026 - 01:17:49 EST
Use the scoped guard(rcu)() helper to safely access sched_domain
pointers.
No functional change intended, this is preparation for topology work
where sched_domain lifetimes are easier to reason about with explicit,
scope-bounded RCU critical sections.
Suggested-by: K Prateek Nayak <kprateek.nayak@xxxxxxx>
Signed-off-by: Andrea Righi <arighi@xxxxxxxxxx>
---
kernel/sched/fair.c | 141 ++++++++++++++++++++++----------------------
1 file changed, 71 insertions(+), 70 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 69361c63353ad..fc0828150c780 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8083,6 +8083,8 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
*/
lockdep_assert_irqs_disabled();
+ guard(rcu)();
+
if (choose_idle_cpu(target, p) &&
asym_fits_cpu(task_util, util_min, util_max, target))
return target;
@@ -12701,55 +12703,16 @@ static void kick_ilb(unsigned int flags)
}
/*
- * Current decision point for kicking the idle load balancer in the presence
- * of idle CPUs in the system.
+ * Decide whether the ILB needs a stats and/or balance kick based on
+ * sched_domain state.
*/
-static void nohz_balancer_kick(struct rq *rq)
+static bool nohz_balancer_needs_kick(struct rq *rq)
{
- unsigned long now = jiffies;
struct sched_domain_shared *sds;
struct sched_domain *sd;
int nr_busy, i, cpu = rq->cpu;
- unsigned int flags = 0;
-
- if (unlikely(rq->idle_balance))
- return;
-
- /*
- * We may be recently in ticked or tickless idle mode. At the first
- * busy tick after returning from idle, we will update the busy stats.
- */
- nohz_balance_exit_idle(rq);
-
- if (READ_ONCE(nohz.has_blocked_load) &&
- time_after(now, READ_ONCE(nohz.next_blocked)))
- flags = NOHZ_STATS_KICK;
-
- /*
- * Most of the time system is not 100% busy. i.e nohz.nr_cpus > 0
- * Skip the read if time is not due.
- *
- * If none are in tickless mode, there maybe a narrow window
- * (28 jiffies, HZ=1000) where flags maybe set and kick_ilb called.
- * But idle load balancing is not done as find_new_ilb fails.
- * That's very rare. So read nohz.nr_cpus only if time is due.
- */
- if (time_before(now, nohz.next_balance))
- goto out;
- /*
- * None are in tickless mode and hence no need for NOHZ idle load
- * balancing
- */
- if (unlikely(cpumask_empty(nohz.idle_cpus_mask)))
- return;
-
- if (rq->nr_running >= 2) {
- flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
- goto out;
- }
-
- rcu_read_lock();
+ guard(rcu)();
sd = rcu_dereference_all(rq->sd);
if (sd) {
@@ -12757,10 +12720,8 @@ static void nohz_balancer_kick(struct rq *rq)
* If there's a runnable CFS task and the current CPU has reduced
* capacity, kick the ILB to see if there's a better CPU to run on:
*/
- if (rq->cfs.h_nr_runnable >= 1 && check_cpu_capacity(rq, sd)) {
- flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
- goto unlock;
- }
+ if (rq->cfs.h_nr_runnable >= 1 && check_cpu_capacity(rq, sd))
+ return true;
}
sd = rcu_dereference_all(per_cpu(sd_asym_packing, cpu));
@@ -12774,10 +12735,8 @@ static void nohz_balancer_kick(struct rq *rq)
* preferred CPU must be idle.
*/
for_each_cpu_and(i, sched_domain_span(sd), nohz.idle_cpus_mask) {
- if (sched_asym(sd, i, cpu)) {
- flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
- goto unlock;
- }
+ if (sched_asym(sd, i, cpu))
+ return true;
}
}
@@ -12787,10 +12746,8 @@ static void nohz_balancer_kick(struct rq *rq)
* When ASYM_CPUCAPACITY; see if there's a higher capacity CPU
* to run the misfit task on.
*/
- if (check_misfit_status(rq)) {
- flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
- goto unlock;
- }
+ if (check_misfit_status(rq))
+ return true;
/*
* For asymmetric systems, we do not want to nicely balance
@@ -12799,7 +12756,7 @@ static void nohz_balancer_kick(struct rq *rq)
*
* Skip the LLC logic because it's not relevant in that case.
*/
- goto unlock;
+ return false;
}
sds = rcu_dereference_all(per_cpu(sd_llc_shared, cpu));
@@ -12814,13 +12771,61 @@ static void nohz_balancer_kick(struct rq *rq)
* like this LLC domain has tasks we could move.
*/
nr_busy = atomic_read(&sds->nr_busy_cpus);
- if (nr_busy > 1) {
- flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
- goto unlock;
- }
+ if (nr_busy > 1)
+ return true;
}
-unlock:
- rcu_read_unlock();
+
+ return false;
+}
+
+/*
+ * Current decision point for kicking the idle load balancer in the presence
+ * of idle CPUs in the system.
+ */
+static void nohz_balancer_kick(struct rq *rq)
+{
+ unsigned long now = jiffies;
+ unsigned int flags = 0;
+
+ if (unlikely(rq->idle_balance))
+ return;
+
+ /*
+ * We may be recently in ticked or tickless idle mode. At the first
+ * busy tick after returning from idle, we will update the busy stats.
+ */
+ nohz_balance_exit_idle(rq);
+
+ if (READ_ONCE(nohz.has_blocked_load) &&
+ time_after(now, READ_ONCE(nohz.next_blocked)))
+ flags = NOHZ_STATS_KICK;
+
+ /*
+ * Most of the time system is not 100% busy. i.e nohz.nr_cpus > 0
+ * Skip the read if time is not due.
+ *
+ * If none are in tickless mode, there maybe a narrow window
+ * (28 jiffies, HZ=1000) where flags maybe set and kick_ilb called.
+ * But idle load balancing is not done as find_new_ilb fails.
+ * That's very rare. So read nohz.nr_cpus only if time is due.
+ */
+ if (time_before(now, nohz.next_balance))
+ goto out;
+
+ /*
+ * None are in tickless mode and hence no need for NOHZ idle load
+ * balancing
+ */
+ if (unlikely(cpumask_empty(nohz.idle_cpus_mask)))
+ return;
+
+ if (rq->nr_running >= 2) {
+ flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
+ goto out;
+ }
+
+ if (nohz_balancer_needs_kick(rq))
+ flags |= NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
out:
if (READ_ONCE(nohz.needs_update))
flags |= NOHZ_NEXT_KICK;
@@ -12833,16 +12838,14 @@ static void set_cpu_sd_state_busy(int cpu)
{
struct sched_domain *sd;
- rcu_read_lock();
+ guard(rcu)();
sd = rcu_dereference_all(per_cpu(sd_llc, cpu));
if (!sd || !sd->nohz_idle)
- goto unlock;
+ return;
sd->nohz_idle = 0;
atomic_inc(&sd->shared->nr_busy_cpus);
-unlock:
- rcu_read_unlock();
}
void nohz_balance_exit_idle(struct rq *rq)
@@ -12862,16 +12865,14 @@ static void set_cpu_sd_state_idle(int cpu)
{
struct sched_domain *sd;
- rcu_read_lock();
+ guard(rcu)();
sd = rcu_dereference_all(per_cpu(sd_llc, cpu));
if (!sd || sd->nohz_idle)
- goto unlock;
+ return;
sd->nohz_idle = 1;
atomic_dec(&sd->shared->nr_busy_cpus);
-unlock:
- rcu_read_unlock();
}
/*
--
2.54.0