Re: [PATCH 1/3] sched/fair: Introduce scaled capacity awareness in find_idlest_cpu code path

From: Rohit Jain
Date: Thu Oct 12 2017 - 13:00:05 EST


Hi Joel, Atish,

Moving off-line discussions to LKML, just so everyone's on the same page,
I actually like this version now and it is outperforming my previous
code, so I am on board with this version. It makes the code simpler too.

Since we need a fast way of returning an idle cpu in select_idle_sibling
path, I think that can remain as it is (or may be we can argue about the
patch on that thread)

If what I said abovemakes sense to everyone, I will send out a v6.

As always, please let me know what you think.

Thanks,
Rohit

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 56f343b..a1f622c 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5724,7 +5724,7 @@ static int cpu_util_wake(int cpu, struct task_struct *p);

Âstatic unsigned long capacity_spare_wake(int cpu, struct task_struct *p)
Â{
-ÂÂÂ return capacity_orig_of(cpu) - cpu_util_wake(cpu, p);
+ÂÂÂ return capacity_of(cpu) - cpu_util_wake(cpu, p);
Â}

Â/*
@@ -5870,6 +5870,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
ÂÂÂÂ unsigned long load, min_load = ULONG_MAX;
ÂÂÂÂ unsigned int min_exit_latency = UINT_MAX;
ÂÂÂÂ u64 latest_idle_timestamp = 0;
+ÂÂÂ unsigned int idle_cpu_cap = 0;
ÂÂÂÂ int least_loaded_cpu = this_cpu;
ÂÂÂÂ int shallowest_idle_cpu = -1;
ÂÂÂÂ int i;
@@ -5881,6 +5882,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
ÂÂÂÂ /* Traverse only the allowed CPUs */
ÂÂÂÂ for_each_cpu_and(i, sched_group_span(group), &p->cpus_allowed) {
ÂÂÂÂ ÂÂÂ if (idle_cpu(i)) {
+ÂÂÂ ÂÂÂ ÂÂÂ int idle_candidate = -1;
ÂÂÂÂ ÂÂÂ ÂÂÂ struct rq *rq = cpu_rq(i);
ÂÂÂÂ ÂÂÂ ÂÂÂ struct cpuidle_state *idle = idle_get_state(rq);
ÂÂÂÂ ÂÂÂ ÂÂÂ if (idle && idle->exit_latency < min_exit_latency) {
@@ -5891,7 +5893,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
ÂÂÂÂ ÂÂÂ ÂÂÂ ÂÂÂ Â*/
ÂÂÂÂ ÂÂÂ ÂÂÂ ÂÂÂ min_exit_latency = idle->exit_latency;
ÂÂÂÂ ÂÂÂ ÂÂÂ ÂÂÂ latest_idle_timestamp = rq->idle_stamp;
-ÂÂÂ ÂÂÂ ÂÂÂ ÂÂÂ shallowest_idle_cpu = i;
+ÂÂÂ ÂÂÂ ÂÂÂ ÂÂÂ idle_candidate = i;
ÂÂÂÂ ÂÂÂ ÂÂÂ } else if ((!idle || idle->exit_latency == min_exit_latency) &&
ÂÂÂÂ ÂÂÂ ÂÂÂ ÂÂÂ ÂÂ rq->idle_stamp > latest_idle_timestamp) {
ÂÂÂÂ ÂÂÂ ÂÂÂ ÂÂÂ /*
@@ -5900,8 +5902,14 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
ÂÂÂÂ ÂÂÂ ÂÂÂ ÂÂÂ Â* a warmer cache.
ÂÂÂÂ ÂÂÂ ÂÂÂ ÂÂÂ Â*/
ÂÂÂÂ ÂÂÂ ÂÂÂ ÂÂÂ latest_idle_timestamp = rq->idle_stamp;
-ÂÂÂ ÂÂÂ ÂÂÂ ÂÂÂ shallowest_idle_cpu = i;
+ÂÂÂ ÂÂÂ ÂÂÂ ÂÂÂ idle_candidate = i;
ÂÂÂÂ ÂÂÂ ÂÂÂ }
+
+ÂÂÂ ÂÂÂ ÂÂÂ if (idle_candidate != -1 &&
+ÂÂÂ ÂÂÂ ÂÂÂ ÂÂÂ (capacity_of(idle_candidate) > idle_cpu_cap)) {
+ÂÂÂ ÂÂÂ ÂÂÂ ÂÂÂ shallowest_idle_cpu = idle_candidate;
+ÂÂÂ ÂÂÂ ÂÂÂ ÂÂÂ idle_cpu_cap = capacity_of(idle_candidate);
+ÂÂÂÂÂÂÂÂÂÂÂ }
ÂÂÂÂ ÂÂÂ } else if (shallowest_idle_cpu == -1) {
ÂÂÂÂ ÂÂÂ ÂÂÂ load = weighted_cpuload(cpu_rq(i));
ÂÂÂÂ ÂÂÂ ÂÂÂ if (load < min_load || (load == min_load && i == this_cpu)) {
--
2.7.4


On 10/07/2017 04:48 PM, Rohit Jain wrote:
While looking for idle CPUs for a waking task, we should also account
for the delays caused due to the bandwidth reduction by RT/IRQ tasks.

This patch does that by trying to find a higher capacity CPU with
minimum wake up latency.

Signed-off-by: Rohit Jain<rohit.k.jain@xxxxxxxxxx>
---
kernel/sched/fair.c | 27 ++++++++++++++++++++++++---
1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 0107280..eaede50 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5579,6 +5579,11 @@ static unsigned long capacity_orig_of(int cpu)
return cpu_rq(cpu)->cpu_capacity_orig;
}
+static inline bool full_capacity(int cpu)
+{
+ return (capacity_of(cpu) >= (capacity_orig_of(cpu)*768 >> 10));
+}
+
static unsigned long cpu_avg_load_per_task(int cpu)
{
struct rq *rq = cpu_rq(cpu);
@@ -5865,8 +5870,10 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
unsigned long load, min_load = ULONG_MAX;
unsigned int min_exit_latency = UINT_MAX;
u64 latest_idle_timestamp = 0;
+ unsigned int backup_cap = 0;
int least_loaded_cpu = this_cpu;
int shallowest_idle_cpu = -1;
+ int shallowest_idle_cpu_backup = -1;
int i;
/* Check if we have any choice: */
@@ -5876,6 +5883,7 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
/* Traverse only the allowed CPUs */
for_each_cpu_and(i, sched_group_span(group), &p->cpus_allowed) {
if (idle_cpu(i)) {
+ int idle_candidate = -1;
struct rq *rq = cpu_rq(i);
struct cpuidle_state *idle = idle_get_state(rq);
if (idle && idle->exit_latency < min_exit_latency) {
@@ -5886,7 +5894,7 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
*/
min_exit_latency = idle->exit_latency;
latest_idle_timestamp = rq->idle_stamp;
- shallowest_idle_cpu = i;
+ idle_candidate = i;
} else if ((!idle || idle->exit_latency == min_exit_latency) &&
rq->idle_stamp > latest_idle_timestamp) {
/*
@@ -5895,7 +5903,16 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
* a warmer cache.
*/
latest_idle_timestamp = rq->idle_stamp;
- shallowest_idle_cpu = i;
+ idle_candidate = i;
+ }
+
+ if (idle_candidate != -1) {
+ if (full_capacity(idle_candidate)) {
+ shallowest_idle_cpu = idle_candidate;
+ } else if (capacity_of(idle_candidate) > backup_cap) {
+ shallowest_idle_cpu_backup = idle_candidate;
+ backup_cap = capacity_of(idle_candidate);
+ }
}
} else if (shallowest_idle_cpu == -1) {
load = weighted_cpuload(cpu_rq(i));
@@ -5906,7 +5923,11 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
}
}
- return shallowest_idle_cpu != -1 ? shallowest_idle_cpu : least_loaded_cpu;
+ if (shallowest_idle_cpu != -1)
+ return shallowest_idle_cpu;
+
+ return (shallowest_idle_cpu_backup != -1 ?
+ shallowest_idle_cpu_backup : least_loaded_cpu);
}
#ifdef CONFIG_SCHED_SMT