Re: [PATCH 2/2] sched_ext: Add cpuperf support

From: Tejun Heo
Date: Tue Jul 02 2024 - 13:57:00 EST


Hello,

So, maybe something like this. It's not the prettiest but avoids adding
indirect calls to fair and rt while allowing sched_ext to report what the
BPF scheduler wants. Only compile tested. Would something like this work for
the use cases you have on mind?

Thanks.

Index: work/kernel/sched/core.c
===================================================================
--- work.orig/kernel/sched/core.c
+++ work/kernel/sched/core.c
@@ -1671,6 +1671,20 @@ static inline void uclamp_rq_dec_id(stru
}
}

+bool sched_uclamp_enabled(void)
+{
+ return true;
+}
+
+static bool class_supports_uclamp(const struct sched_class *class)
+{
+ if (likely(class->uclamp_enabled == sched_uclamp_enabled))
+ return true;
+ if (!class->uclamp_enabled)
+ return false;
+ return class->uclamp_enabled();
+}
+
static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
{
enum uclamp_id clamp_id;
@@ -1684,7 +1698,7 @@ static inline void uclamp_rq_inc(struct
if (!static_branch_unlikely(&sched_uclamp_used))
return;

- if (unlikely(!p->sched_class->uclamp_enabled))
+ if (class_supports_uclamp(p->sched_class))
return;

for_each_clamp_id(clamp_id)
@@ -1708,7 +1722,7 @@ static inline void uclamp_rq_dec(struct
if (!static_branch_unlikely(&sched_uclamp_used))
return;

- if (unlikely(!p->sched_class->uclamp_enabled))
+ if (class_supports_uclamp(p->sched_class))
return;

for_each_clamp_id(clamp_id)
Index: work/kernel/sched/ext.c
===================================================================
--- work.orig/kernel/sched/ext.c
+++ work/kernel/sched/ext.c
@@ -116,10 +116,17 @@ enum scx_ops_flags {
*/
SCX_OPS_SWITCH_PARTIAL = 1LLU << 3,

+ /*
+ * Disable built-in uclamp support. Can be useful when the BPF scheduler
+ * wants to implement custom uclamp support.
+ */
+ SCX_OPS_DISABLE_UCLAMP = 1LLU << 4,
+
SCX_OPS_ALL_FLAGS = SCX_OPS_KEEP_BUILTIN_IDLE |
SCX_OPS_ENQ_LAST |
SCX_OPS_ENQ_EXITING |
- SCX_OPS_SWITCH_PARTIAL,
+ SCX_OPS_SWITCH_PARTIAL |
+ SCX_OPS_DISABLE_UCLAMP,
};

/* argument container for ops.init_task() */
@@ -3437,6 +3444,13 @@ static void switched_from_scx(struct rq
static void wakeup_preempt_scx(struct rq *rq, struct task_struct *p,int wake_flags) {}
static void switched_to_scx(struct rq *rq, struct task_struct *p) {}

+#ifdef CONFIG_UCLAMP_TASK
+static bool uclamp_enabled_scx(void)
+{
+ return !(scx_ops.flags & SCX_OPS_DISABLE_UCLAMP);
+}
+#endif
+
int scx_check_setscheduler(struct task_struct *p, int policy)
{
lockdep_assert_rq_held(task_rq(p));
@@ -3522,7 +3536,7 @@ DEFINE_SCHED_CLASS(ext) = {
.update_curr = update_curr_scx,

#ifdef CONFIG_UCLAMP_TASK
- .uclamp_enabled = 1,
+ .uclamp_enabled = uclamp_enabled_scx,
#endif
};

Index: work/kernel/sched/fair.c
===================================================================
--- work.orig/kernel/sched/fair.c
+++ work/kernel/sched/fair.c
@@ -13228,9 +13228,7 @@ DEFINE_SCHED_CLASS(fair) = {
.task_is_throttled = task_is_throttled_fair,
#endif

-#ifdef CONFIG_UCLAMP_TASK
- .uclamp_enabled = 1,
-#endif
+ SCHED_CLASS_UCLAMP_ENABLED
};

#ifdef CONFIG_SCHED_DEBUG
Index: work/kernel/sched/rt.c
===================================================================
--- work.orig/kernel/sched/rt.c
+++ work/kernel/sched/rt.c
@@ -2681,9 +2681,7 @@ DEFINE_SCHED_CLASS(rt) = {
.task_is_throttled = task_is_throttled_rt,
#endif

-#ifdef CONFIG_UCLAMP_TASK
- .uclamp_enabled = 1,
-#endif
+ SCHED_CLASS_UCLAMP_ENABLED
};

#ifdef CONFIG_RT_GROUP_SCHED
Index: work/kernel/sched/sched.h
===================================================================
--- work.orig/kernel/sched/sched.h
+++ work/kernel/sched/sched.h
@@ -2339,11 +2339,6 @@ struct affinity_context {
extern s64 update_curr_common(struct rq *rq);

struct sched_class {
-
-#ifdef CONFIG_UCLAMP_TASK
- int uclamp_enabled;
-#endif
-
void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
void (*yield_task) (struct rq *rq);
@@ -2405,8 +2400,21 @@ struct sched_class {
#ifdef CONFIG_SCHED_CORE
int (*task_is_throttled)(struct task_struct *p, int cpu);
#endif
+
+#ifdef CONFIG_UCLAMP_TASK
+ bool (*uclamp_enabled)(void);
+#endif
};

+#ifdef CONFIG_UCLAMP_TASK
+bool sched_uclamp_enabled(void);
+
+#define SCHED_CLASS_UCLAMP_ENABLED \
+ .uclamp_enabled = sched_uclamp_enabled,
+#else
+#define SCHED_CLASS_UCLAMP_ENABLED
+#endif
+
static inline void put_prev_task(struct rq *rq, struct task_struct *prev)
{
WARN_ON_ONCE(rq->curr != prev);