Re: [PATCH v5 0/2] sched/fair: Optimize some active balance logic

From: Peter Zijlstra

Date: Thu Jun 18 2026 - 06:56:51 EST

And since I've been staring at this code far too long, I accidentally
did the below cleanup on top.

---
Subject: sched/fair: Reflow sched_balance_rq()
From: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Date: Thu Jun 18 10:51:49 CEST 2026

Reflow to reduce indenting.

Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
kernel/sched/fair.c | 136 ++++++++++++++++++++++++---------------------------
kernel/sched/sched.h | 19 ++++++-
2 files changed, 82 insertions(+), 73 deletions(-)

--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -13437,82 +13437,78 @@ static int sched_balance_rq(int this_cpu
}
}

- if (!ld_moved) {
- schedstat_inc(sd->lb_failed[idle]);
+ if (ld_moved) {
+ sd->nr_balance_failed = 0;
+ goto out_unbalanced;
+ }
+
+ schedstat_inc(sd->lb_failed[idle]);
+ /*
+ * Increment the failure counter only on periodic balance.
+ * We do not want newidle balance, which can be very
+ * frequent, pollute the failure counter causing
+ * excessive cache_hot migrations and active balances.
+ *
+ * Similarly for migration_misfit which is not related to
+ * load/util migration, don't pollute nr_balance_failed.
+ *
+ * The same for cache aware scheduling's allowance for
+ * load imbalance. If regular load balance does not
+ * migrate task due to LLC locality, it is a expected
+ * behavior and don't pollute nr_balance_failed.
+ * See can_migrate_task().
+ */
+ if (idle != CPU_NEWLY_IDLE &&
+ env.migration_type != migrate_misfit &&
+ !(env.flags & LBF_LLC_PINNED))
+ sd->nr_balance_failed++;
+
+ if (!need_active_balance(&env))
+ goto out_unbalanced;
+
+ scoped_guard (raw_spin_rq_lock_irqsave, busiest) {
/*
- * Increment the failure counter only on periodic balance.
- * We do not want newidle balance, which can be very
- * frequent, pollute the failure counter causing
- * excessive cache_hot migrations and active balances.
- *
- * Similarly for migration_misfit which is not related to
- * load/util migration, don't pollute nr_balance_failed.
- *
- * The same for cache aware scheduling's allowance for
- * load imbalance. If regular load balance does not
- * migrate task due to LLC locality, it is a expected
- * behavior and don't pollute nr_balance_failed.
- * See can_migrate_task().
+ * Don't kick the active_load_balance_cpu_stop,
+ * if the curr task on busiest CPU can't be
+ * moved to this_cpu:
*/
- if (idle != CPU_NEWLY_IDLE &&
- env.migration_type != migrate_misfit &&
- !(env.flags & LBF_LLC_PINNED))
- sd->nr_balance_failed++;
-
- if (need_active_balance(&env)) {
- unsigned long flags;
-
- raw_spin_rq_lock_irqsave(busiest, flags);
-
- /*
- * Don't kick the active_load_balance_cpu_stop,
- * if the curr task on busiest CPU can't be
- * moved to this_cpu:
- */
- if (!cpumask_test_cpu(this_cpu, busiest->curr->cpus_ptr)) {
- raw_spin_rq_unlock_irqrestore(busiest, flags);
- goto out_one_pinned;
- }
-
- /* Record that we found at least one task that could run on this_cpu */
- env.flags &= ~LBF_ALL_PINNED;
-
- /*
- * ->active_balance synchronizes accesses to
- * ->active_balance_work. Once set, it's cleared
- * only after active load balance is finished.
- */
- if (busiest->active_balance)
- goto no_active_balance;
-
- /*
- * @busiest dropped its rq_lock in the middle of
- * scheduling out its ->curr task (->on_rq := 0), no
- * need to forcefully punt it away with active balance.
- */
- if (!busiest->curr->on_rq)
- goto no_active_balance;
-
- busiest->active_balance = 1;
- busiest->push_cpu = this_cpu;
- active_balance = 1;
-no_active_balance:
- preempt_disable();
- raw_spin_rq_unlock_irqrestore(busiest, flags);
- if (active_balance) {
- stop_one_cpu_nowait(cpu_of(busiest),
- active_load_balance_cpu_stop, busiest,
- &busiest->active_balance_work);
- }
- preempt_enable();
- }
- } else {
- sd->nr_balance_failed = 0;
+ if (!cpumask_test_cpu(this_cpu, busiest->curr->cpus_ptr))
+ goto out_one_pinned;
+
+ /* Record that we found at least one task that could run on this_cpu */
+ env.flags &= ~LBF_ALL_PINNED;
+
+ /*
+ * ->active_balance synchronizes accesses to
+ * ->active_balance_work. Once set, it's cleared
+ * only after active load balance is finished.
+ */
+ if (busiest->active_balance)
+ goto out_unbalanced;
+
+ /*
+ * @busiest dropped its rq_lock in the middle of
+ * scheduling out its ->curr task (->on_rq := 0), no
+ * need to forcefully punt it away with active balance.
+ */
+ if (!busiest->curr->on_rq)
+ goto out_unbalanced;
+
+ busiest->active_balance = 1;
+ busiest->push_cpu = this_cpu;
+ active_balance = 1;
+ preempt_disable();
}
+ if (active_balance) {
+ stop_one_cpu_nowait(cpu_of(busiest),
+ active_load_balance_cpu_stop, busiest,
+ &busiest->active_balance_work);
+ }
+ preempt_enable();

+out_unbalanced:
/* We were unbalanced, so reset the balancing interval */
sd->balance_interval = sd->min_interval;
-
goto out;

out_balanced:
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2018,7 +2018,8 @@ DEFINE_LOCK_GUARD_1(rq_lock, struct rq,
rq_unlock(_T->lock, &_T->rf),
struct rq_flags rf)

-DECLARE_LOCK_GUARD_1_ATTRS(rq_lock, __acquires(__rq_lockp(_T)), __releases(__rq_lockp(*(struct rq **)_T)));
+DECLARE_LOCK_GUARD_1_ATTRS(rq_lock, __acquires(__rq_lockp(_T)),
+ __releases(__rq_lockp(*(struct rq **)_T)));
#define class_rq_lock_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(rq_lock, _T)

DEFINE_LOCK_GUARD_1(rq_lock_irq, struct rq,
@@ -2026,7 +2027,8 @@ DEFINE_LOCK_GUARD_1(rq_lock_irq, struct
rq_unlock_irq(_T->lock, &_T->rf),
struct rq_flags rf)

-DECLARE_LOCK_GUARD_1_ATTRS(rq_lock_irq, __acquires(__rq_lockp(_T)), __releases(__rq_lockp(*(struct rq **)_T)));
+DECLARE_LOCK_GUARD_1_ATTRS(rq_lock_irq, __acquires(__rq_lockp(_T)),
+ __releases(__rq_lockp(*(struct rq **)_T)));
#define class_rq_lock_irq_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(rq_lock_irq, _T)

DEFINE_LOCK_GUARD_1(rq_lock_irqsave, struct rq,
@@ -2034,9 +2036,20 @@ DEFINE_LOCK_GUARD_1(rq_lock_irqsave, str
rq_unlock_irqrestore(_T->lock, &_T->rf),
struct rq_flags rf)

-DECLARE_LOCK_GUARD_1_ATTRS(rq_lock_irqsave, __acquires(__rq_lockp(_T)), __releases(__rq_lockp(*(struct rq **)_T)));
+DECLARE_LOCK_GUARD_1_ATTRS(rq_lock_irqsave, __acquires(__rq_lockp(_T)),
+ __releases(__rq_lockp(*(struct rq **)_T)));
#define class_rq_lock_irqsave_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(rq_lock_irqsave, _T)

+DEFINE_LOCK_GUARD_1(raw_spin_rq_lock_irqsave, struct rq,
+ raw_spin_rq_lock_irqsave(_T->lock, _T->flags),
+ raw_spin_rq_unlock_irqrestore(_T->lock, _T->flags),
+ unsigned long flags)
+
+DECLARE_LOCK_GUARD_1_ATTRS(raw_spin_rq_lock_irqsave, __acquires(__rq_lockp(_T)),
+ __releases(__rq_lockp(*(struct rq **)_T)));
+#define class_raw_spin_rq_lock_irqsave_constructor(_T) \
+ WITH_LOCK_GUARD_1_ATTRS(raw_spin_rq_lock_irqsave, _T)
+
#define this_rq_lock_irq(...) __acquire_ret(_this_rq_lock_irq(__VA_ARGS__), __rq_lockp(__ret))
static inline struct rq *_this_rq_lock_irq(struct rq_flags *rf) __acquires_ret
{