[PATCH v2] sched: Distinguish between idle_cpu calls based on desired effect

From: Rohit Jain
Date: Wed May 09 2018 - 12:40:35 EST


In commit 247f2f6f3c70 ("sched/core: Don't schedule threads on pre-empted
vCPUs") we distinguish between idle_cpu when the vcpu is not running for
scheduling threads. However, idle_cpu function is used at other places for
actually checking whether the state of the CPU is idle or not. Hence
splitting the use of that function based on the desired return value.

Changelog:
v1->v2:
* Changed the comment to say available_idle_cpu

Signed-off-by: Rohit Jain <rohit.k.jain@xxxxxxxxxx>
---
include/linux/sched.h | 1 +
kernel/sched/core.c | 14 ++++++++++++++
kernel/sched/fair.c | 20 ++++++++++----------
3 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index c241370..959a858 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1504,6 +1504,7 @@ static inline int task_nice(const struct task_struct *p)
extern int can_nice(const struct task_struct *p, const int nice);
extern int task_curr(const struct task_struct *p);
extern int idle_cpu(int cpu);
+extern int available_idle_cpu(int cpu);
extern int sched_setscheduler(struct task_struct *, int, const struct sched_param *);
extern int sched_setscheduler_nocheck(struct task_struct *, int, const struct sched_param *);
extern int sched_setattr(struct task_struct *, const struct sched_attr *);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 4e0ebae..4064187 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4029,6 +4029,20 @@ int idle_cpu(int cpu)
return 0;
#endif

+ return 1;
+}
+
+/**
+ * available_idle_cpu - is a given CPU idle for enqueuing work.
+ * @cpu: the processor in question.
+ *
+ * Return: 1 if the CPU is currently idle. 0 otherwise.
+ */
+int available_idle_cpu(int cpu)
+{
+ if (!idle_cpu(cpu))
+ return 0;
+
if (vcpu_is_preempted(cpu))
return 0;

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 1f6a23a..a0b0da4 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5870,8 +5870,8 @@ wake_affine_idle(int this_cpu, int prev_cpu, int sync)
* a cpufreq perspective, it's better to have higher utilisation
* on one CPU.
*/
- if (idle_cpu(this_cpu) && cpus_share_cache(this_cpu, prev_cpu))
- return idle_cpu(prev_cpu) ? prev_cpu : this_cpu;
+ if (available_idle_cpu(this_cpu) && cpus_share_cache(this_cpu, prev_cpu))
+ return available_idle_cpu(prev_cpu) ? prev_cpu : this_cpu;

if (sync && cpu_rq(this_cpu)->nr_running == 1)
return this_cpu;
@@ -6157,7 +6157,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this

/* Traverse only the allowed CPUs */
for_each_cpu_and(i, sched_group_span(group), &p->cpus_allowed) {
- if (idle_cpu(i)) {
+ if (available_idle_cpu(i)) {
struct rq *rq = cpu_rq(i);
struct cpuidle_state *idle = idle_get_state(rq);
if (idle && idle->exit_latency < min_exit_latency) {
@@ -6286,7 +6286,7 @@ void __update_idle_core(struct rq *rq)
if (cpu == core)
continue;

- if (!idle_cpu(cpu))
+ if (!available_idle_cpu(cpu))
goto unlock;
}

@@ -6318,7 +6318,7 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int

for_each_cpu(cpu, cpu_smt_mask(core)) {
cpumask_clear_cpu(cpu, cpus);
- if (!idle_cpu(cpu))
+ if (!available_idle_cpu(cpu))
idle = false;
}

@@ -6347,7 +6347,7 @@ static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int t
for_each_cpu(cpu, cpu_smt_mask(target)) {
if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
continue;
- if (idle_cpu(cpu))
+ if (available_idle_cpu(cpu))
return cpu;
}

@@ -6410,7 +6410,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
return -1;
if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
continue;
- if (idle_cpu(cpu))
+ if (available_idle_cpu(cpu))
break;
}

@@ -6430,13 +6430,13 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
struct sched_domain *sd;
int i, recent_used_cpu;

- if (idle_cpu(target))
+ if (available_idle_cpu(target))
return target;

/*
* If the previous CPU is cache affine and idle, don't be stupid:
*/
- if (prev != target && cpus_share_cache(prev, target) && idle_cpu(prev))
+ if (prev != target && cpus_share_cache(prev, target) && available_idle_cpu(prev))
return prev;

/* Check a recently used CPU as a potential idle candidate: */
@@ -6444,7 +6444,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
if (recent_used_cpu != prev &&
recent_used_cpu != target &&
cpus_share_cache(recent_used_cpu, target) &&
- idle_cpu(recent_used_cpu) &&
+ available_idle_cpu(recent_used_cpu) &&
cpumask_test_cpu(p->recent_used_cpu, &p->cpus_allowed)) {
/*
* Replace recent_used_cpu with prev as it is a potential
--
2.7.4