[PATCH v3 12/19] sched,rt: Use cpumask_any*_distribute()

From: Peter Zijlstra
Date: Thu Oct 15 2020 - 07:10:46 EST


Replace a bunch of cpumask_any*() instances with
cpumask_any*_distribute(), by injecting this little bit of random in
cpu selection, we reduce the chance two competing balance operations
working off the same lowest_mask pick the same CPU.

Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
include/linux/cpumask.h | 6 ++++++
kernel/sched/deadline.c | 6 +++---
kernel/sched/rt.c | 6 +++---
lib/cpumask.c | 18 ++++++++++++++++++
4 files changed, 30 insertions(+), 6 deletions(-)

--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -199,6 +199,11 @@ static inline int cpumask_any_and_distri
return cpumask_next_and(-1, src1p, src2p);
}

+static inline int cpumask_any_distribute(const struct cpumask *srcp)
+{
+ return cpumask_first(srcp);
+}
+
#define for_each_cpu(cpu, mask) \
for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
#define for_each_cpu_not(cpu, mask) \
@@ -252,6 +257,7 @@ int cpumask_any_but(const struct cpumask
unsigned int cpumask_local_spread(unsigned int i, int node);
int cpumask_any_and_distribute(const struct cpumask *src1p,
const struct cpumask *src2p);
+int cpumask_any_distribute(const struct cpumask *srcp);

/**
* for_each_cpu - iterate over every cpu in a mask
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1981,8 +1981,8 @@ static int find_later_rq(struct task_str
return this_cpu;
}

- best_cpu = cpumask_first_and(later_mask,
- sched_domain_span(sd));
+ best_cpu = cpumask_any_and_distribute(later_mask,
+ sched_domain_span(sd));
/*
* Last chance: if a CPU being in both later_mask
* and current sd span is valid, that becomes our
@@ -2004,7 +2004,7 @@ static int find_later_rq(struct task_str
if (this_cpu != -1)
return this_cpu;

- cpu = cpumask_any(later_mask);
+ cpu = cpumask_any_distribute(later_mask);
if (cpu < nr_cpu_ids)
return cpu;

--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1752,8 +1752,8 @@ static int find_lowest_rq(struct task_st
return this_cpu;
}

- best_cpu = cpumask_first_and(lowest_mask,
- sched_domain_span(sd));
+ best_cpu = cpumask_any_and_distribute(lowest_mask,
+ sched_domain_span(sd));
if (best_cpu < nr_cpu_ids) {
rcu_read_unlock();
return best_cpu;
@@ -1770,7 +1770,7 @@ static int find_lowest_rq(struct task_st
if (this_cpu != -1)
return this_cpu;

- cpu = cpumask_any(lowest_mask);
+ cpu = cpumask_any_distribute(lowest_mask);
if (cpu < nr_cpu_ids)
return cpu;

--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -267,3 +267,21 @@ int cpumask_any_and_distribute(const str
return next;
}
EXPORT_SYMBOL(cpumask_any_and_distribute);
+
+int cpumask_any_distribute(const struct cpumask *srcp)
+{
+ int next, prev;
+
+ /* NOTE: our first selection will skip 0. */
+ prev = __this_cpu_read(distribute_cpu_mask_prev);
+
+ next = cpumask_next(prev, srcp);
+ if (next >= nr_cpu_ids)
+ next = cpumask_first(srcp);
+
+ if (next < nr_cpu_ids)
+ __this_cpu_write(distribute_cpu_mask_prev, next);
+
+ return next;
+}
+EXPORT_SYMBOL(cpumask_any_distribute);