Re: [PATCH] sched/isolation: Prefer housekeeping cpu in local node

From: Peter Zijlstra
Date: Thu Jun 20 2019 - 08:43:27 EST


On Thu, Jun 20, 2019 at 07:36:54PM +0800, Wanpeng Li wrote:
> From: Wanpeng Li <wanpengli@xxxxxxxxxxx>
>
> In real product setup, there will be houseeking cpus in each nodes, it
> is prefer to do housekeeping from local node, fallback to global online
> cpumask if failed to find houseeking cpu from local node.
>
> Cc: Ingo Molnar <mingo@xxxxxxxxxx>
> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
> Cc: Frederic Weisbecker <frederic@xxxxxxxxxx>
> Signed-off-by: Wanpeng Li <wanpengli@xxxxxxxxxxx>
> ---
> kernel/sched/isolation.c | 11 +++++++++--
> 1 file changed, 9 insertions(+), 2 deletions(-)
>
> diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c
> index 123ea07..9eb6805 100644
> --- a/kernel/sched/isolation.c
> +++ b/kernel/sched/isolation.c
> @@ -16,9 +16,16 @@ static unsigned int housekeeping_flags;
>
> int housekeeping_any_cpu(enum hk_flags flags)
> {
> + int cpu;
> +
> if (static_branch_unlikely(&housekeeping_overridden))
> - if (housekeeping_flags & flags)
> - return cpumask_any_and(housekeeping_mask, cpu_online_mask);
> + if (housekeeping_flags & flags) {
> + cpu = cpumask_any_and(housekeeping_mask, cpu_cpu_mask(smp_processor_id()));
> + if (cpu < nr_cpu_ids)
> + return cpu;
> + else
> + return cpumask_any_and(housekeeping_mask, cpu_online_mask);
> + }
> return smp_processor_id();
> }
> EXPORT_SYMBOL_GPL(housekeeping_any_cpu);

Why not something like so? IIRC there's more places that want this, but
I can't seem to remember quite where.

diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c
index 123ea07a3f3b..1cceab5f094c 100644
--- a/kernel/sched/isolation.c
+++ b/kernel/sched/isolation.c
@@ -16,9 +16,15 @@ static unsigned int housekeeping_flags;

int housekeeping_any_cpu(enum hk_flags flags)
{
- if (static_branch_unlikely(&housekeeping_overridden))
- if (housekeeping_flags & flags)
+ if (static_branch_unlikely(&housekeeping_overridden)) {
+ if (housekeeping_flags & flags) {
+ cpu = sched_numa_find_closest(housekeeping_mask, smp_processor_id());
+ if (cpu < nr_cpu_ids)
+ return cpu;
+
return cpumask_any_and(housekeeping_mask, cpu_online_mask);
+ }
+ }
return smp_processor_id();
}
EXPORT_SYMBOL_GPL(housekeeping_any_cpu);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index b08dee29ef5e..0db7431c7207 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1212,9 +1212,6 @@ enum numa_topology_type {
extern enum numa_topology_type sched_numa_topology_type;
extern int sched_max_numa_distance;
extern bool find_numa_distance(int distance);
-#endif
-
-#ifdef CONFIG_NUMA
extern void sched_init_numa(void);
extern void sched_domains_numa_masks_set(unsigned int cpu);
extern void sched_domains_numa_masks_clear(unsigned int cpu);
@@ -1224,6 +1221,8 @@ static inline void sched_domains_numa_masks_set(unsigned int cpu) { }
static inline void sched_domains_numa_masks_clear(unsigned int cpu) { }
#endif

+extern int sched_numa_find_closest(const struct cpumask *cpus, int cpu);
+
#ifdef CONFIG_NUMA_BALANCING
/* The regions in numa_faults array from task_struct */
enum numa_faults_stats {
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 63184cf0d0d7..408e94a6637c 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -1726,6 +1726,20 @@ void sched_domains_numa_masks_clear(unsigned int cpu)

#endif /* CONFIG_NUMA */

+int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
+{
+#ifdef CONFIG_NUMA
+ int i, j = cpu_to_node(cpu);
+
+ for (i = 0; i < sched_domains_numa_levels; ++) {
+ cpu = cpumask_any_and(cpus, sched_domains_numa_mask[i][j]);
+ if (cpu < nr_cpu_ids)
+ return cpu;
+ }
+#endif
+ return nr_cpu_ids;
+}
+
static int __sdt_alloc(const struct cpumask *cpu_map)
{
struct sched_domain_topology_level *tl;