[PATCH 3/3 v2] sched: Enforce fairness between cpu threads

From: Tim Chen
Date: Tue Aug 06 2019 - 15:50:45 EST


CPU thread could be suppressed by its sibling for extended time.
Implement a budget for force idling, making all CPU threads have
equal chance to run.

Signed-off-by: Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx>
---
kernel/sched/core.c | 43 +++++++++++++++++++++++++++++++++++++++++
kernel/sched/deadline.c | 1 +
kernel/sched/fair.c | 11 +++++++++++
kernel/sched/rt.c | 1 +
kernel/sched/sched.h | 4 ++++
5 files changed, 60 insertions(+)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0f893853766c..de83dcb84495 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -207,6 +207,46 @@ static struct task_struct *sched_core_next(struct task_struct *p, unsigned long
return p;
}

+void account_core_idletime(struct task_struct *p, u64 exec)
+{
+ const struct cpumask *smt_mask;
+ struct rq *rq;
+ bool force_idle, refill;
+ int i, cpu;
+
+ rq = task_rq(p);
+ if (!sched_core_enabled(rq))
+ return;
+
+ cpu = task_cpu(p);
+ force_idle = false;
+ refill = true;
+ smt_mask = cpu_smt_mask(cpu);
+
+ for_each_cpu(i, smt_mask) {
+ if (cpu == i || cpu_is_offline(i))
+ continue;
+
+ if (cpu_rq(i)->core_forceidle)
+ force_idle = true;
+
+ /* Only refill if everyone has run out of allowance */
+ if (cpu_rq(i)->core_idle_allowance > 0)
+ refill = false;
+ }
+
+ if (force_idle)
+ rq->core_idle_allowance -= (s64) exec;
+
+ if (rq->core_idle_allowance < 0 && refill) {
+ for_each_cpu(i, smt_mask) {
+ if (cpu_is_offline(i))
+ continue;
+ cpu_rq(i)->core_idle_allowance += (s64) SCHED_IDLE_ALLOWANCE;
+ }
+ }
+}
+
/*
* The static-key + stop-machine variable are needed such that:
*
@@ -273,6 +313,8 @@ void sched_core_put(void)

static inline void sched_core_enqueue(struct rq *rq, struct task_struct *p) { }
static inline void sched_core_dequeue(struct rq *rq, struct task_struct *p) { }
+static inline void account_core_idletime(struct task_struct *p, u64 exec) { }
+{

#endif /* CONFIG_SCHED_CORE */

@@ -6773,6 +6815,7 @@ void __init sched_init(void)
rq->core_enabled = 0;
rq->core_tree = RB_ROOT;
rq->core_forceidle = false;
+ rq->core_idle_allowance = (s64) SCHED_IDLE_ALLOWANCE;

rq->core_cookie = 0UL;
#endif
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 64fc444f44f9..684c64a95ec7 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1175,6 +1175,7 @@ static void update_curr_dl(struct rq *rq)

curr->se.sum_exec_runtime += delta_exec;
account_group_exec_runtime(curr, delta_exec);
+ account_core_idletime(curr, delta_exec);

curr->se.exec_start = now;
cgroup_account_cputime(curr, delta_exec);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e289b6e1545b..f65270784c28 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -611,6 +611,16 @@ bool prio_less_fair(struct task_struct *a, struct task_struct *b)
* Normalize the vruntime if tasks are in different cpus.
*/
if (task_cpu(a) != task_cpu(b)) {
+
+ if (a->core_cookie != b->core_cookie) {
+ /*
+ * Will be force idling one thread,
+ * pick the thread that has more allowance.
+ */
+ return (task_rq(a)->core_idle_allowance <
+ task_rq(b)->core_idle_allowance) ? true : false;
+ }
+
b_vruntime -= task_cfs_rq(b)->min_vruntime;
b_vruntime += task_cfs_rq(a)->min_vruntime;

@@ -817,6 +827,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
trace_sched_stat_runtime(curtask, delta_exec, curr->vruntime);
cgroup_account_cputime(curtask, delta_exec);
account_group_exec_runtime(curtask, delta_exec);
+ account_core_idletime(curtask, delta_exec);
}

account_cfs_rq_runtime(cfs_rq, delta_exec);
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 81557224548c..6f18e1455778 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -971,6 +971,7 @@ static void update_curr_rt(struct rq *rq)

curr->se.sum_exec_runtime += delta_exec;
account_group_exec_runtime(curr, delta_exec);
+ account_core_idletime(curr, delta_exec);

curr->se.exec_start = now;
cgroup_account_cputime(curr, delta_exec);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index bdabe7ce1152..927334b2078c 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -963,6 +963,7 @@ struct rq {
struct task_struct *core_pick;
unsigned int core_enabled;
unsigned int core_sched_seq;
+ s64 core_idle_allowance;
struct rb_root core_tree;
bool core_forceidle;

@@ -999,6 +1000,8 @@ static inline int cpu_of(struct rq *rq)
}

#ifdef CONFIG_SCHED_CORE
+#define SCHED_IDLE_ALLOWANCE 5000000 /* 5 msec */
+
DECLARE_STATIC_KEY_FALSE(__sched_core_enabled);

static inline bool sched_core_enabled(struct rq *rq)
@@ -1016,6 +1019,7 @@ static inline raw_spinlock_t *rq_lockp(struct rq *rq)

extern void queue_core_balance(struct rq *rq);
extern bool prio_less_fair(struct task_struct *a, struct task_struct *b);
+extern void account_core_idletime(struct task_struct *p, u64 exec);

#else /* !CONFIG_SCHED_CORE */

--
2.20.1