[tip: sched/core] sched/core: Optimize pick_next_task()

From: tip-bot2 for Peter Zijlstra
Date: Mon Nov 11 2019 - 04:33:05 EST


The following commit has been merged into the sched/core branch of tip:

Commit-ID: 5d7d605642b28a5911198a405a6072f091bfbee6
Gitweb: https://git.kernel.org/tip/5d7d605642b28a5911198a405a6072f091bfbee6
Author: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
AuthorDate: Fri, 08 Nov 2019 14:15:57 +01:00
Committer: Ingo Molnar <mingo@xxxxxxxxxx>
CommitterDate: Mon, 11 Nov 2019 08:35:19 +01:00

sched/core: Optimize pick_next_task()

Ever since we moved the sched_class definitions into their own files,
the constant expression {fair,idle}_sched_class.pick_next_task() is
not in fact a compile time constant anymore and results in an indirect
call (barring LTO).

Fix that by exposing pick_next_task_{fair,idle}() directly, this gets
rid of the indirect call (and RETPOLINE) on the fast path.

Also remove the unlikely() from the idle case, it is in fact /the/ way
we select idle -- and that is a very common thing to do.

Performance for will-it-scale/sched_yield improves by 2% (as reported
by 0-day).

Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: bsegall@xxxxxxxxxx
Cc: dietmar.eggemann@xxxxxxx
Cc: juri.lelli@xxxxxxxxxx
Cc: ktkhai@xxxxxxxxxxxxx
Cc: mgorman@xxxxxxx
Cc: qais.yousef@xxxxxxx
Cc: qperret@xxxxxxxxxx
Cc: rostedt@xxxxxxxxxxx
Cc: valentin.schneider@xxxxxxx
Cc: vincent.guittot@xxxxxxxxxx
Link: https://lkml.kernel.org/r/20191108131909.603037345@xxxxxxxxxxxxx
Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
---
kernel/sched/core.c | 6 +++---
kernel/sched/fair.c | 2 +-
kernel/sched/idle.c | 2 +-
kernel/sched/sched.h | 3 +++
4 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 59c4f29..7cf6547 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3917,14 +3917,14 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
prev->sched_class == &fair_sched_class) &&
rq->nr_running == rq->cfs.h_nr_running)) {

- p = fair_sched_class.pick_next_task(rq, prev, rf);
+ p = pick_next_task_fair(rq, prev, rf);
if (unlikely(p == RETRY_TASK))
goto restart;

/* Assumes fair_sched_class->next == idle_sched_class */
- if (unlikely(!p)) {
+ if (!p) {
put_prev_task(rq, prev);
- p = idle_sched_class.pick_next_task(rq, NULL, NULL);
+ p = pick_next_task_idle(rq, NULL, NULL);
}

return p;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c48a695..da81451 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6611,7 +6611,7 @@ preempt:
set_last_buddy(se);
}

-static struct task_struct *
+struct task_struct *
pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
{
struct cfs_rq *cfs_rq = &rq->cfs;
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 179d1d4..0fdceac 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -391,7 +391,7 @@ static void set_next_task_idle(struct rq *rq, struct task_struct *next)
schedstat_inc(rq->sched_goidle);
}

-static struct task_struct *
+struct task_struct *
pick_next_task_idle(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
{
struct task_struct *next = rq->idle;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index c8870c5..66172a3 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1821,6 +1821,9 @@ static inline bool sched_fair_runnable(struct rq *rq)
return rq->cfs.nr_running > 0;
}

+extern struct task_struct *pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf);
+extern struct task_struct *pick_next_task_idle(struct rq *rq, struct task_struct *prev, struct rq_flags *rf);
+
#ifdef CONFIG_SMP

extern void update_group_capacity(struct sched_domain *sd, int cpu);