[PATCH] sched: psi: pass enqueue/dequeue flags to psi callbacks directly
From: Johannes Weiner
Date: Fri Oct 11 2024 - 06:18:07 EST
What psi needs to do on each enqueue and dequeue has gotten more
subtle, and the generic sched code trying to distill this into a bool
for the callbacks is awkward.
Pass the flags directly and let psi parse them. For that to work, the
#include "stats.h" (which has the psi callback implementations) needs
to be below the flag definitions in "sched.h". Move that section
further down, next to some of the other accounting stuff.
This also puts the ENQUEUE_SAVE/RESTORE branch behind the psi jump
label, slightly reducing overhead when PSI=y but runtime disabled.
Link: https://lore.kernel.org/lkml/20241011083323.GL17263@xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx/
Suggested-by: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Signed-off-by: Johannes Weiner <hannes@xxxxxxxxxxx>
---
kernel/sched/core.c | 12 +++++-----
kernel/sched/sched.h | 56 ++++++++++++++++++++++----------------------
kernel/sched/stats.h | 29 +++++++++++++++--------
3 files changed, 53 insertions(+), 44 deletions(-)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 527502a86ff9..42cf181bf3ab 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2019,10 +2019,10 @@ void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
*/
uclamp_rq_inc(rq, p);
- if (!(flags & ENQUEUE_RESTORE)) {
+ psi_enqueue(p, flags);
+
+ if (!(flags & ENQUEUE_RESTORE))
sched_info_enqueue(rq, p);
- psi_enqueue(p, flags & ENQUEUE_MIGRATED);
- }
if (sched_core_enabled(rq))
sched_core_enqueue(rq, p);
@@ -2039,10 +2039,10 @@ inline bool dequeue_task(struct rq *rq, struct task_struct *p, int flags)
if (!(flags & DEQUEUE_NOCLOCK))
update_rq_clock(rq);
- if (!(flags & DEQUEUE_SAVE)) {
+ if (!(flags & DEQUEUE_SAVE))
sched_info_dequeue(rq, p);
- psi_dequeue(p, !(flags & DEQUEUE_SLEEP));
- }
+
+ psi_dequeue(p, flags);
/*
* Must be before ->dequeue_task() because ->dequeue_task() can 'fail'
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index b1c3588a8f00..bbda03f17126 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2085,34 +2085,6 @@ static inline const struct cpumask *task_user_cpus(struct task_struct *p)
#endif /* CONFIG_SMP */
-#include "stats.h"
-
-#if defined(CONFIG_SCHED_CORE) && defined(CONFIG_SCHEDSTATS)
-
-extern void __sched_core_account_forceidle(struct rq *rq);
-
-static inline void sched_core_account_forceidle(struct rq *rq)
-{
- if (schedstat_enabled())
- __sched_core_account_forceidle(rq);
-}
-
-extern void __sched_core_tick(struct rq *rq);
-
-static inline void sched_core_tick(struct rq *rq)
-{
- if (sched_core_enabled(rq) && schedstat_enabled())
- __sched_core_tick(rq);
-}
-
-#else /* !(CONFIG_SCHED_CORE && CONFIG_SCHEDSTATS): */
-
-static inline void sched_core_account_forceidle(struct rq *rq) { }
-
-static inline void sched_core_tick(struct rq *rq) { }
-
-#endif /* !(CONFIG_SCHED_CORE && CONFIG_SCHEDSTATS) */
-
#ifdef CONFIG_CGROUP_SCHED
/*
@@ -3166,6 +3138,34 @@ extern void nohz_run_idle_balance(int cpu);
static inline void nohz_run_idle_balance(int cpu) { }
#endif
+#include "stats.h"
+
+#if defined(CONFIG_SCHED_CORE) && defined(CONFIG_SCHEDSTATS)
+
+extern void __sched_core_account_forceidle(struct rq *rq);
+
+static inline void sched_core_account_forceidle(struct rq *rq)
+{
+ if (schedstat_enabled())
+ __sched_core_account_forceidle(rq);
+}
+
+extern void __sched_core_tick(struct rq *rq);
+
+static inline void sched_core_tick(struct rq *rq)
+{
+ if (sched_core_enabled(rq) && schedstat_enabled())
+ __sched_core_tick(rq);
+}
+
+#else /* !(CONFIG_SCHED_CORE && CONFIG_SCHEDSTATS): */
+
+static inline void sched_core_account_forceidle(struct rq *rq) { }
+
+static inline void sched_core_tick(struct rq *rq) { }
+
+#endif /* !(CONFIG_SCHED_CORE && CONFIG_SCHEDSTATS) */
+
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
struct irqtime {
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
index 767e098a3bd1..8ee0add5a48a 100644
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -127,21 +127,25 @@ static inline void psi_account_irqtime(struct rq *rq, struct task_struct *curr,
* go through migration requeues. In this case, *sleeping* states need
* to be transferred.
*/
-static inline void psi_enqueue(struct task_struct *p, bool migrate)
+static inline void psi_enqueue(struct task_struct *p, int flags)
{
int clear = 0, set = 0;
if (static_branch_likely(&psi_disabled))
return;
+ /* Same runqueue, nothing changed for psi */
+ if (flags & ENQUEUE_RESTORE)
+ return;
+
if (p->se.sched_delayed) {
/* CPU migration of "sleeping" task */
- SCHED_WARN_ON(!migrate);
+ SCHED_WARN_ON(!(flags & ENQUEUE_MIGRATED));
if (p->in_memstall)
set |= TSK_MEMSTALL;
if (p->in_iowait)
set |= TSK_IOWAIT;
- } else if (migrate) {
+ } else if (flags & ENQUEUE_MIGRATED) {
/* CPU migration of runnable task */
set = TSK_RUNNING;
if (p->in_memstall)
@@ -158,17 +162,14 @@ static inline void psi_enqueue(struct task_struct *p, bool migrate)
psi_task_change(p, clear, set);
}
-static inline void psi_dequeue(struct task_struct *p, bool migrate)
+static inline void psi_dequeue(struct task_struct *p, int flags)
{
if (static_branch_likely(&psi_disabled))
return;
- /*
- * When migrating a task to another CPU, clear all psi
- * state. The enqueue callback above will work it out.
- */
- if (migrate)
- psi_task_change(p, p->psi_flags, 0);
+ /* Same runqueue, nothing changed for psi */
+ if (flags & DEQUEUE_SAVE)
+ return;
/*
* A voluntary sleep is a dequeue followed by a task switch. To
@@ -176,6 +177,14 @@ static inline void psi_dequeue(struct task_struct *p, bool migrate)
* TSK_RUNNING and TSK_IOWAIT for us when it moves TSK_ONCPU.
* Do nothing here.
*/
+ if (flags & DEQUEUE_SLEEP)
+ return;
+
+ /*
+ * When migrating a task to another CPU, clear all psi
+ * state. The enqueue callback above will work it out.
+ */
+ psi_task_change(p, p->psi_flags, 0);
}
static inline void psi_ttwu_dequeue(struct task_struct *p)
--
2.47.0