[PATCH 2/2] sched: Enforce fairness between cpu threads

From: Tim Chen
Date: Tue Aug 06 2019 - 15:50:45 EST

Next message: Martin Blumenstingl: "Re: [PATCH v2 2/6] thermal: amlogic: Add thermal driver to support G12 SoCs"
Previous message: Bjorn Helgaas: "Re: [v2,2/2] PCI: mediatek: Add controller support for MT7629"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

CPU thread could be suppressed by its sibling for extended time.
Implement a budget for force idling, making all CPU threads have
equal chance to run.

Signed-off-by: Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx>
---
kernel/sched/core.c | 41 +++++++++++++++++++++++++++++++++++++++++
kernel/sched/fair.c | 12 ++++++++++++
kernel/sched/sched.h | 4 ++++
3 files changed, 57 insertions(+)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 567eba50dc38..e22042883723 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -207,6 +207,44 @@ static struct task_struct *sched_core_next(struct task_struct *p, unsigned long
return p;
}

+void account_core_idletime(struct task_struct *p, u64 exec)
+{
+ const struct cpumask *smt_mask;
+ struct rq *rq;
+ bool force_idle, refill;
+ int i, cpu;
+
+ rq = task_rq(p);
+ if (!sched_core_enabled(rq) || !p->core_cookie)
+ return;
+
+ cpu = task_cpu(p);
+ force_idle = false;
+ refill = true;
+ smt_mask = cpu_smt_mask(cpu);
+
+ for_each_cpu(i, smt_mask) {
+ if (cpu == i)
+ continue;
+
+ if (cpu_rq(i)->core_forceidle)
+ force_idle = true;
+
+ /* Only refill if everyone has run out of allowance */
+ if (cpu_rq(i)->core_idle_allowance > 0)
+ refill = false;
+ }
+
+ if (force_idle)
+ rq->core_idle_allowance -= (s64) exec;
+
+ if (rq->core_idle_allowance < 0 && refill) {
+ for_each_cpu(i, smt_mask) {
+ cpu_rq(i)->core_idle_allowance += (s64) SCHED_IDLE_ALLOWANCE;
+ }
+ }
+}
+
/*
* The static-key + stop-machine variable are needed such that:
*
@@ -273,6 +311,8 @@ void sched_core_put(void)

static inline void sched_core_enqueue(struct rq *rq, struct task_struct *p) { }
static inline void sched_core_dequeue(struct rq *rq, struct task_struct *p) { }
+static inline void account_core_idletime(struct task_struct *p, u64 exec) { }
+{

#endif /* CONFIG_SCHED_CORE */

@@ -6773,6 +6813,7 @@ void __init sched_init(void)
rq->core_enabled = 0;
rq->core_tree = RB_ROOT;
rq->core_forceidle = false;
+ rq->core_idle_allowance = (s64) SCHED_IDLE_ALLOWANCE;

rq->core_cookie = 0UL;
#endif
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e289b6e1545b..d4f9ea03296e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -611,6 +611,17 @@ bool prio_less_fair(struct task_struct *a, struct task_struct *b)
* Normalize the vruntime if tasks are in different cpus.
*/
if (task_cpu(a) != task_cpu(b)) {
+
+ if (a->core_cookie && b->core_cookie &&
+ a->core_cookie != b->core_cookie) {
+ /*
+ * Will be force idling one thread,
+ * pick the thread that has more allowance.
+ */
+ return (task_rq(a)->core_idle_allowance <=
+ task_rq(b)->core_idle_allowance) ? true : false;
+ }
+
b_vruntime -= task_cfs_rq(b)->min_vruntime;
b_vruntime += task_cfs_rq(a)->min_vruntime;

@@ -817,6 +828,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
trace_sched_stat_runtime(curtask, delta_exec, curr->vruntime);
cgroup_account_cputime(curtask, delta_exec);
account_group_exec_runtime(curtask, delta_exec);
+ account_core_idletime(curtask, delta_exec);
}

account_cfs_rq_runtime(cfs_rq, delta_exec);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index bdabe7ce1152..927334b2078c 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -963,6 +963,7 @@ struct rq {
struct task_struct *core_pick;
unsigned int core_enabled;
unsigned int core_sched_seq;
+ s64 core_idle_allowance;
struct rb_root core_tree;
bool core_forceidle;

@@ -999,6 +1000,8 @@ static inline int cpu_of(struct rq *rq)
}

#ifdef CONFIG_SCHED_CORE
+#define SCHED_IDLE_ALLOWANCE 5000000 /* 5 msec */
+
DECLARE_STATIC_KEY_FALSE(__sched_core_enabled);

static inline bool sched_core_enabled(struct rq *rq)
@@ -1016,6 +1019,7 @@ static inline raw_spinlock_t *rq_lockp(struct rq *rq)

extern void queue_core_balance(struct rq *rq);
extern bool prio_less_fair(struct task_struct *a, struct task_struct *b);
+extern void account_core_idletime(struct task_struct *p, u64 exec);

#else /* !CONFIG_SCHED_CORE */

--
2.20.1

Next message: Martin Blumenstingl: "Re: [PATCH v2 2/6] thermal: amlogic: Add thermal driver to support G12 SoCs"
Previous message: Bjorn Helgaas: "Re: [v2,2/2] PCI: mediatek: Add controller support for MT7629"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]