[PATCH v2 8/8] sched/fair: Dont iterate if no idle CPUs

From: Srikar Dronamraju
Date: Thu May 06 2021 - 12:46:39 EST


Now that the nr_busy_cpus for a LLC are updated in idle callbacks,
scheduler can detect if all threads of a LLC are busy. In such cases, it
can avoid searching for idle CPUs in the LLC that can run the wakee
thread.

Cc: LKML <linux-kernel@xxxxxxxxxxxxxxx>
Cc: Gautham R Shenoy <ego@xxxxxxxxxxxxxxxxxx>
Cc: Parth Shah <parth@xxxxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Valentin Schneider <valentin.schneider@xxxxxxx>
Cc: Dietmar Eggemann <dietmar.eggemann@xxxxxxx>
Cc: Mel Gorman <mgorman@xxxxxxxxxxxxxxxxxxx>
Cc: Vincent Guittot <vincent.guittot@xxxxxxxxxx>
Cc: Rik van Riel <riel@xxxxxxxxxxx>
Signed-off-by: Srikar Dronamraju <srikar@xxxxxxxxxxxxxxxxxx>
---
kernel/sched/fair.c | 23 +++++++++++++++--------
1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c70f0889258f..83104d3bd0f9 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -715,7 +715,7 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)
#include "pelt.h"
#ifdef CONFIG_SMP

-static int select_idle_sibling(struct task_struct *p, int prev_cpu, int cpu);
+static int select_idle_sibling(struct task_struct *p, int prev_cpu, int cpu, bool idle);
static unsigned long task_h_load(struct task_struct *p);
static unsigned long capacity_of(int cpu);

@@ -5870,7 +5870,8 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p,

static inline bool test_reset_idle_core(struct sched_domain_shared *sds, int val);

-static int wake_affine_idler_llc(struct task_struct *p, int pref_cpu, int try_cpu, int sync)
+static int wake_affine_idler_llc(struct task_struct *p, int pref_cpu, int try_cpu,
+ int sync, bool *idle)
{
int tnr_busy, tllc_size, pnr_busy, pllc_size;
struct sched_domain_shared *pref_sds, *try_sds;
@@ -5905,8 +5906,10 @@ static int wake_affine_idler_llc(struct task_struct *p, int pref_cpu, int try_cp
pllc_size = per_cpu(sd_llc_size, pref_cpu);
tllc_size = per_cpu(sd_llc_size, try_cpu);

- if (tnr_busy == tllc_size && pnr_busy == pllc_size)
+ if (tnr_busy == tllc_size && pnr_busy == pllc_size) {
+ *idle = false;
return nr_cpumask_bits;
+ }

diff = tnr_busy * pllc_size - pnr_busy * tllc_size;
if (diff > 0)
@@ -5918,7 +5921,7 @@ static int wake_affine_idler_llc(struct task_struct *p, int pref_cpu, int try_cp
}

static int wake_affine(struct sched_domain *sd, struct task_struct *p,
- int this_cpu, int prev_cpu, int sync)
+ int this_cpu, int prev_cpu, int sync, bool *idle)
{
bool share_caches = cpus_share_cache(prev_cpu, this_cpu);
int target = nr_cpumask_bits;
@@ -5927,7 +5930,7 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p,
target = wake_affine_idle(this_cpu, prev_cpu);

else if (sched_feat(WA_IDLER_LLC) && !share_caches)
- target = wake_affine_idler_llc(p, this_cpu, prev_cpu, sync);
+ target = wake_affine_idler_llc(p, this_cpu, prev_cpu, sync, idle);

if (sched_feat(WA_WEIGHT) && target == nr_cpumask_bits)
target = wake_affine_weight(sd, p, this_cpu, prev_cpu, sync);
@@ -6343,7 +6346,7 @@ static inline bool asym_fits_capacity(int task_util, int cpu)
/*
* Try and locate an idle core/thread in the LLC cache domain.
*/
-static int select_idle_sibling(struct task_struct *p, int prev, int target)
+static int select_idle_sibling(struct task_struct *p, int prev, int target, bool idle)
{
struct sched_domain *sd;
unsigned long task_util;
@@ -6420,6 +6423,9 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
}
}

+ if (!idle)
+ return target;
+
sd = rcu_dereference(per_cpu(sd_llc, target));
if (!sd)
return target;
@@ -6828,6 +6834,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
int want_affine = 0;
/* SD_flags and WF_flags share the first nibble */
int sd_flag = wake_flags & 0xF;
+ bool idle = true;

if (wake_flags & WF_TTWU) {
record_wakee(p);
@@ -6851,7 +6858,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
if (want_affine && (tmp->flags & SD_WAKE_AFFINE) &&
cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) {
if (cpu != prev_cpu)
- new_cpu = wake_affine(tmp, p, cpu, prev_cpu, sync);
+ new_cpu = wake_affine(tmp, p, cpu, prev_cpu, sync, &idle);

sd = NULL; /* Prefer wake_affine over balance flags */
break;
@@ -6868,7 +6875,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
new_cpu = find_idlest_cpu(sd, p, cpu, prev_cpu, sd_flag);
} else if (wake_flags & WF_TTWU) { /* XXX always ? */
/* Fast path */
- new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
+ new_cpu = select_idle_sibling(p, prev_cpu, new_cpu, idle);

if (want_affine)
current->recent_used_cpu = cpu;
--
2.18.2