[PATCH 3/3] sched/core: Make tasks with the same cookie pairs on SMT siblings

From: Cruz Zhao
Date: Tue Jun 28 2022 - 03:57:44 EST


If the number of tasks in the runqueue of SMT siblings are equal, we call
the core balanced, otherwise unbalanced. If the core is balanced, everytime
we pick next task, we can pick a pair of tasks with the same cookie for
each SMT sibling, and forceidle will be avoided.

- Migrate the task if source core and destination core can balance
If ck->nr_running of src_cpu is the highest among the source core, and
ck->nr_running of dst_cpu is the lowest among the destination core,
migrate the task.

- Select cookie matched idle CPU or idle CPU with the lowest
ck->nr_running among the core
In the fast path of task wakeup, if ck->nr_running of the cpu is the
lowest among the core, we can select this cpu to wake up.

- Find cookie matched idlest CPU or cookie matched CPU with the lowest
ck->nr_running among the core
In the slow path of task wakeup, if ck->nr_running of the cpu is the
lowest among the core, we can select this cpu to wake up.

Signed-off-by: Cruz Zhao <CruzZhao@xxxxxxxxxxxxxxxxx>
---
kernel/sched/fair.c | 4 ++--
kernel/sched/sched.h | 65 +++++++++++++++++++++++++++++++++++++++++++++++-----
2 files changed, 61 insertions(+), 8 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 78795a9..c18a716 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6096,7 +6096,7 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p,
for_each_cpu_and(i, sched_group_span(group), p->cpus_ptr) {
struct rq *rq = cpu_rq(i);

- if (!sched_core_cookie_match(rq, p))
+ if (!sched_core_cookie_match(NULL, rq, p))
continue;

if (sched_idle_cpu(i))
@@ -7681,7 +7681,7 @@ static int task_hot(struct task_struct *p, struct lb_env *env)
* Don't migrate task if the task's cookie does not match
* with the destination CPU's core cookie.
*/
- if (!sched_core_cookie_match(cpu_rq(env->dst_cpu), p))
+ if (!(sched_core_cookie_match(env->src_rq, env->dst_rq, p)))
return 1;

if (sysctl_sched_migration_cost == 0)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index d852c67..ee0e558 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1195,6 +1195,56 @@ struct sched_core_cookie {
};

/*
+ * When tasks with the same cookie can make pairs on SMT siblings, forceidle can be
+ * avoided a lot, so when wake up and load balance, we try to make and keep the pairs
+ * with the same cookie on SMT siblings.
+ */
+static inline bool
+sched_core_make_pair_balance(struct rq *src_rq, struct rq *dst_rq, struct task_struct *p)
+{
+ struct sched_core_cookie *ck = (struct sched_core_cookie *)p->core_cookie;
+ unsigned int src_cpu, dst_cpu, t;
+ unsigned int src_nr_running, dst_nr_running;
+
+ if (!ck)
+ return true;
+
+ /*
+ * When load balance, if ck->nr_running on src_cpu is less than that on SMT
+ * siblings, don't migrate the task.
+ */
+ if (src_rq) {
+ if (!sched_core_enabled(src_rq))
+ return true;
+ src_cpu = cpu_of(src_rq);
+ src_nr_running = *per_cpu_ptr(ck->nr_running, src_cpu);
+ for_each_cpu(t, cpu_smt_mask(src_cpu)) {
+ if (t == src_cpu)
+ continue;
+ if (*per_cpu_ptr(ck->nr_running, t) >= src_nr_running)
+ return false;
+ }
+
+ }
+
+ /*
+ * If task p can make pair the cookied task with p->core_cookie on the
+ * dst core, we can wake up task p on dst_rq, or migrate it to dst_rq.
+ */
+ dst_cpu = cpu_of(dst_rq);
+ dst_nr_running = *per_cpu_ptr(ck->nr_running, dst_cpu);
+ for_each_cpu(t, cpu_smt_mask(dst_cpu)) {
+ if (t == dst_cpu)
+ continue;
+ if (*per_cpu_ptr(ck->nr_running, t) <= dst_nr_running)
+ return false;
+ }
+
+ return true;
+}
+
+
+/*
* Helpers to check if the CPU's core cookie matches with the task's cookie
* when core scheduling is enabled.
* A special case is that the task's cookie always matches with CPU's core
@@ -1206,19 +1256,21 @@ static inline bool sched_cpu_cookie_match(struct rq *rq, struct task_struct *p)
if (!sched_core_enabled(rq))
return true;

- return rq->core->core_cookie == p->core_cookie;
+ return rq->core->core_cookie == p->core_cookie ||
+ sched_core_make_pair_balance(NULL, rq, p);
}

-static inline bool sched_core_cookie_match(struct rq *rq, struct task_struct *p)
+static inline bool
+sched_core_cookie_match(struct rq *src_rq, struct rq *dst_rq, struct task_struct *p)
{
bool idle_core = true;
int cpu;

/* Ignore cookie match if core scheduler is not enabled on the CPU. */
- if (!sched_core_enabled(rq))
+ if (!sched_core_enabled(dst_rq))
return true;

- for_each_cpu(cpu, cpu_smt_mask(cpu_of(rq))) {
+ for_each_cpu(cpu, cpu_smt_mask(cpu_of(dst_rq))) {
if (!available_idle_cpu(cpu)) {
idle_core = false;
break;
@@ -1229,7 +1281,8 @@ static inline bool sched_core_cookie_match(struct rq *rq, struct task_struct *p)
* A CPU in an idle core is always the best choice for tasks with
* cookies.
*/
- return idle_core || rq->core->core_cookie == p->core_cookie;
+ return idle_core || dst_rq->core->core_cookie == p->core_cookie ||
+ sched_core_make_pair_balance(src_rq, dst_rq, p);
}

static inline bool sched_group_cookie_match(struct rq *rq,
@@ -1243,7 +1296,7 @@ static inline bool sched_group_cookie_match(struct rq *rq,
return true;

for_each_cpu_and(cpu, sched_group_span(group), p->cpus_ptr) {
- if (sched_core_cookie_match(rq, p))
+ if (sched_core_cookie_match(NULL, rq, p))
return true;
}
return false;
--
1.8.3.1