[PATCH 4/7] sched: Optimize pick_next_task()

From: Peter Zijlstra
Date: Fri Nov 08 2019 - 08:21:51 EST


Ever since we moved the sched_class defenitions into their own files,
the constant expression {fair,idle}_sched_class.pick_next_task() is
not in fact a compile time constant anymore and results in an indirect
call (barring LTO).

Fix that by exposing pick_next_task_{fair,idle}() directly, this gets
rid of the indirect call (and RETPOLINE) on the fast path.

Also remove the unlikely() from the idle case, it is in fact /the/ way
we select idle -- and that is a very common thing to do.

Performance for will-it-scale/sched_yield improves by 2% (as reported
by 0-day).

Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
kernel/sched/core.c | 6 +++---
kernel/sched/fair.c | 2 +-
kernel/sched/idle.c | 2 +-
kernel/sched/sched.h | 3 +++
4 files changed, 8 insertions(+), 5 deletions(-)

--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3917,14 +3917,14 @@ pick_next_task(struct rq *rq, struct tas
prev->sched_class == &fair_sched_class) &&
rq->nr_running == rq->cfs.h_nr_running)) {

- p = fair_sched_class.pick_next_task(rq, prev, rf);
+ p = pick_next_task_fair(rq, prev, rf);
if (unlikely(p == RETRY_TASK))
goto restart;

/* Assumes fair_sched_class->next == idle_sched_class */
- if (unlikely(!p)) {
+ if (!p) {
put_prev_task(rq, prev);
- p = idle_sched_class.pick_next_task(rq, NULL, NULL);
+ p = pick_next_task_idle(rq, NULL, NULL);
}

return p;
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6599,7 +6599,7 @@ static void check_preempt_wakeup(struct
set_last_buddy(se);
}

-static struct task_struct *
+struct task_struct *
pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
{
struct cfs_rq *cfs_rq = &rq->cfs;
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -391,7 +391,7 @@ static void set_next_task_idle(struct rq
schedstat_inc(rq->sched_goidle);
}

-static struct task_struct *
+struct task_struct *
pick_next_task_idle(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
{
struct task_struct *next = rq->idle;
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1821,6 +1821,9 @@ static inline bool sched_fair_runnable(s
return rq->cfs.nr_running > 0;
}

+extern struct task_struct *pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf);
+extern struct task_struct *pick_next_task_idle(struct rq *rq, struct task_struct *prev, struct rq_flags *rf);
+
#ifdef CONFIG_SMP

extern void update_group_capacity(struct sched_domain *sd, int cpu);