[tip: sched/core] sched/fair: Add cgroup_mode: up
From: tip-bot2 for Peter Zijlstra
Date: Tue Jun 30 2026 - 05:08:20 EST
The following commit has been merged into the sched/core branch of tip:
Commit-ID: 80ad6d3338ebe136b0c43c5caa962304a8454e3e
Gitweb: https://git.kernel.org/tip/80ad6d3338ebe136b0c43c5caa962304a8454e3e
Author: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
AuthorDate: Thu, 12 Mar 2026 14:59:06 +01:00
Committer: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
CommitterDate: Tue, 30 Jun 2026 10:56:52 +02:00
sched/fair: Add cgroup_mode: up
Instead of calculating the proportional fraction of the group weight for each
CPU, just give each CPU the full measure, ignoring these pesky SMP problems.
This makes the SMP cgroup fraction (F_g_n) equal to 1, and ensures a single
task in a cgroup competes on equal footing to a task in a level above.
However, as already explored, this is not a very good policy because it gets
the SMP weight distribution wrong. Included for completeness.
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
Link: https://patch.msgid.link/20260605124051.450303977%40infradead.org
---
kernel/sched/debug.c | 5 ++++-
kernel/sched/fair.c | 31 +++++++++++++++++++++++++++++--
kernel/sched/sched.h | 1 +
3 files changed, 34 insertions(+), 3 deletions(-)
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 507e486..53b9e82 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -634,9 +634,11 @@ static void debugfs_fair_server_init(void)
}
#ifdef CONFIG_FAIR_GROUP_SCHED
-static int cgroup_mode = 0;
+static int cgroup_mode = 1;
+/* See __sched_cgroup_mode_update(). */
static const char *cgroup_mode_str[] = {
+ "up",
"smp",
};
@@ -666,6 +668,7 @@ static ssize_t sched_cgroup_write(struct file *filp, const char __user *ubuf,
if (mode < 0)
return mode;
+ __sched_cgroup_mode_update(mode);
WRITE_ONCE(cgroup_mode, mode);
*ppos += cnt;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e89edbd..3f8a280 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -38,6 +38,7 @@
#include <linux/sched/isolation.h>
#include <linux/sched/nohz.h>
#include <linux/sched/prio.h>
+#include <linux/static_call.h>
#include <linux/cpuidle.h>
#include <linux/interrupt.h>
@@ -4800,7 +4801,7 @@ static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
*
* hence icky!
*/
-static long calc_group_shares(struct cfs_rq *cfs_rq)
+static long calc_smp_shares(struct cfs_rq *cfs_rq)
{
long tg_weight, tg_shares, load, shares;
struct task_group *tg = cfs_rq->tg;
@@ -4835,6 +4836,32 @@ static long calc_group_shares(struct cfs_rq *cfs_rq)
}
/*
+ * Ignore this pesky SMP stuff, use (4).
+ */
+static long calc_up_shares(struct cfs_rq *cfs_rq)
+{
+ struct task_group *tg = cfs_rq->tg;
+ return READ_ONCE(tg->shares);
+}
+
+DEFINE_STATIC_CALL(calc_group_shares, calc_smp_shares);
+
+void __sched_cgroup_mode_update(int mode)
+{
+ long (*func)(struct cfs_rq *);
+ switch (mode) {
+ case 0:
+ func = &calc_up_shares;
+ break;
+ case 1:
+ default:
+ func = &calc_smp_shares;
+ break;
+ }
+ static_call_update(calc_group_shares, func);
+}
+
+/*
* Recomputes the group entity based on the current state of its group
* runqueue.
*/
@@ -4850,7 +4877,7 @@ static void update_cfs_group(struct sched_entity *se)
if (!gcfs_rq || !gcfs_rq->load.weight)
return;
- shares = calc_group_shares(gcfs_rq);
+ shares = static_call(calc_group_shares)(gcfs_rq);
if (unlikely(se->load.weight != shares))
reweight_entity(cfs_rq_of(se), se, shares);
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 56acf50..922170b 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -571,6 +571,7 @@ extern void free_fair_sched_group(struct task_group *tg);
extern int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent);
extern void online_fair_sched_group(struct task_group *tg);
extern void unregister_fair_sched_group(struct task_group *tg);
+extern void __sched_cgroup_mode_update(int mode);
#else /* !CONFIG_FAIR_GROUP_SCHED: */
static inline void free_fair_sched_group(struct task_group *tg) { }
static inline int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)