[tip:sched/core] sched/isolation: Move isolcpus= handling to the housekeeping code

From: tip-bot for Frederic Weisbecker
Date: Fri Oct 27 2017 - 08:10:32 EST


Commit-ID: edb9382175c3ebdced8ffdb3e0f20052ad9fdbe9
Gitweb: https://git.kernel.org/tip/edb9382175c3ebdced8ffdb3e0f20052ad9fdbe9
Author: Frederic Weisbecker <frederic@xxxxxxxxxx>
AuthorDate: Fri, 27 Oct 2017 04:42:37 +0200
Committer: Ingo Molnar <mingo@xxxxxxxxxx>
CommitDate: Fri, 27 Oct 2017 09:55:30 +0200

sched/isolation: Move isolcpus= handling to the housekeeping code

We want to centralize the isolation features, to be done by the housekeeping
subsystem and scheduler domain isolation is a significant part of it.

No intended behaviour change, we just reuse the housekeeping cpumask
and core code.

Signed-off-by: Frederic Weisbecker <frederic@xxxxxxxxxx>
Acked-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Chris Metcalf <cmetcalf@xxxxxxxxxxxx>
Cc: Christoph Lameter <cl@xxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Cc: Luiz Capitulino <lcapitulino@xxxxxxxxxx>
Cc: Mike Galbraith <efault@xxxxxx>
Cc: Paul E. McKenney <paulmck@xxxxxxxxxxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Rik van Riel <riel@xxxxxxxxxx>
Cc: Wanpeng Li <kernellwp@xxxxxxxxx>
Link: http://lkml.kernel.org/r/1509072159-31808-11-git-send-email-frederic@xxxxxxxxxx
Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
---
drivers/base/cpu.c | 11 ++++++-
include/linux/sched.h | 2 --
include/linux/sched/isolation.h | 1 +
kernel/cgroup/cpuset.c | 15 ++++------
kernel/sched/core.c | 16 +----------
kernel/sched/isolation.c | 63 ++++++++++++++++++++++++++++++++---------
kernel/sched/topology.c | 24 ++++------------
7 files changed, 73 insertions(+), 59 deletions(-)

diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 321cd7b..a73ab95 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -18,6 +18,7 @@
#include <linux/cpufeature.h>
#include <linux/tick.h>
#include <linux/pm_qos.h>
+#include <linux/sched/isolation.h>

#include "base.h"

@@ -271,8 +272,16 @@ static ssize_t print_cpus_isolated(struct device *dev,
struct device_attribute *attr, char *buf)
{
int n = 0, len = PAGE_SIZE-2;
+ cpumask_var_t isolated;

- n = scnprintf(buf, len, "%*pbl\n", cpumask_pr_args(cpu_isolated_map));
+ if (!alloc_cpumask_var(&isolated, GFP_KERNEL))
+ return -ENOMEM;
+
+ cpumask_andnot(isolated, cpu_possible_mask,
+ housekeeping_cpumask(HK_FLAG_DOMAIN));
+ n = scnprintf(buf, len, "%*pbl\n", cpumask_pr_args(isolated));
+
+ free_cpumask_var(isolated);

return n;
}
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0f897df..1b0cc0d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -165,8 +165,6 @@ struct task_group;
/* Task command name length: */
#define TASK_COMM_LEN 16

-extern cpumask_var_t cpu_isolated_map;
-
extern void scheduler_tick(void);

#define MAX_SCHEDULE_TIMEOUT LONG_MAX
diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h
index e53cfa9..d849431 100644
--- a/include/linux/sched/isolation.h
+++ b/include/linux/sched/isolation.h
@@ -11,6 +11,7 @@ enum hk_flags {
HK_FLAG_MISC = (1 << 2),
HK_FLAG_SCHED = (1 << 3),
HK_FLAG_TICK = (1 << 4),
+ HK_FLAG_DOMAIN = (1 << 5),
};

#ifdef CONFIG_CPU_ISOLATION
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 4657e29..f7efa7b 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -57,7 +57,7 @@
#include <linux/backing-dev.h>
#include <linux/sort.h>
#include <linux/oom.h>
-
+#include <linux/sched/isolation.h>
#include <linux/uaccess.h>
#include <linux/atomic.h>
#include <linux/mutex.h>
@@ -656,7 +656,6 @@ static int generate_sched_domains(cpumask_var_t **domains,
int csn; /* how many cpuset ptrs in csa so far */
int i, j, k; /* indices for partition finding loops */
cpumask_var_t *doms; /* resulting partition; i.e. sched domains */
- cpumask_var_t non_isolated_cpus; /* load balanced CPUs */
struct sched_domain_attr *dattr; /* attributes for custom domains */
int ndoms = 0; /* number of sched domains in result */
int nslot; /* next empty doms[] struct cpumask slot */
@@ -666,10 +665,6 @@ static int generate_sched_domains(cpumask_var_t **domains,
dattr = NULL;
csa = NULL;

- if (!alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL))
- goto done;
- cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
-
/* Special case for the 99% of systems with one, full, sched domain */
if (is_sched_load_balance(&top_cpuset)) {
ndoms = 1;
@@ -683,7 +678,7 @@ static int generate_sched_domains(cpumask_var_t **domains,
update_domain_attr_tree(dattr, &top_cpuset);
}
cpumask_and(doms[0], top_cpuset.effective_cpus,
- non_isolated_cpus);
+ housekeeping_cpumask(HK_FLAG_DOMAIN));

goto done;
}
@@ -707,7 +702,8 @@ static int generate_sched_domains(cpumask_var_t **domains,
*/
if (!cpumask_empty(cp->cpus_allowed) &&
!(is_sched_load_balance(cp) &&
- cpumask_intersects(cp->cpus_allowed, non_isolated_cpus)))
+ cpumask_intersects(cp->cpus_allowed,
+ housekeeping_cpumask(HK_FLAG_DOMAIN))))
continue;

if (is_sched_load_balance(cp))
@@ -789,7 +785,7 @@ restart:

if (apn == b->pn) {
cpumask_or(dp, dp, b->effective_cpus);
- cpumask_and(dp, dp, non_isolated_cpus);
+ cpumask_and(dp, dp, housekeeping_cpumask(HK_FLAG_DOMAIN));
if (dattr)
update_domain_attr_tree(dattr + nslot, b);

@@ -802,7 +798,6 @@ restart:
BUG_ON(nslot != ndoms);

done:
- free_cpumask_var(non_isolated_cpus);
kfree(csa);

/*
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2210c02..1a55c84 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -84,9 +84,6 @@ __read_mostly int scheduler_running;
*/
int sysctl_sched_rt_runtime = 950000;

-/* CPUs with isolated domains */
-cpumask_var_t cpu_isolated_map;
-
/*
* __task_rq_lock - lock the rq @p resides on.
*/
@@ -5735,10 +5732,6 @@ static inline void sched_init_smt(void) { }

void __init sched_init_smp(void)
{
- cpumask_var_t non_isolated_cpus;
-
- alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL);
-
sched_init_numa();

/*
@@ -5748,16 +5741,12 @@ void __init sched_init_smp(void)
*/
mutex_lock(&sched_domains_mutex);
sched_init_domains(cpu_active_mask);
- cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
- if (cpumask_empty(non_isolated_cpus))
- cpumask_set_cpu(smp_processor_id(), non_isolated_cpus);
mutex_unlock(&sched_domains_mutex);

/* Move init over to a non-isolated CPU */
- if (set_cpus_allowed_ptr(current, non_isolated_cpus) < 0)
+ if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0)
BUG();
sched_init_granularity();
- free_cpumask_var(non_isolated_cpus);

init_sched_rt_class();
init_sched_dl_class();
@@ -5961,9 +5950,6 @@ void __init sched_init(void)
calc_load_update = jiffies + LOAD_FREQ;

#ifdef CONFIG_SMP
- /* May be allocated at isolcpus cmdline parse time */
- if (cpu_isolated_map == NULL)
- zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
idle_thread_set_boot_cpu();
set_cpu_rq_start_time(smp_processor_id());
#endif
diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c
index 1f61e44..8f666bc 100644
--- a/kernel/sched/isolation.c
+++ b/kernel/sched/isolation.c
@@ -63,32 +63,69 @@ void __init housekeeping_init(void)
WARN_ON_ONCE(cpumask_empty(housekeeping_mask));
}

-#ifdef CONFIG_NO_HZ_FULL
-static int __init housekeeping_nohz_full_setup(char *str)
+static int __init housekeeping_setup(char *str, enum hk_flags flags)
{
cpumask_var_t non_housekeeping_mask;
+ int err;

alloc_bootmem_cpumask_var(&non_housekeeping_mask);
- if (cpulist_parse(str, non_housekeeping_mask) < 0) {
- pr_warn("Housekeeping: Incorrect nohz_full cpumask\n");
+ err = cpulist_parse(str, non_housekeeping_mask);
+ if (err < 0 || cpumask_last(non_housekeeping_mask) >= nr_cpu_ids) {
+ pr_warn("Housekeeping: nohz_full= or isolcpus= incorrect CPU range\n");
free_bootmem_cpumask_var(non_housekeeping_mask);
return 0;
}

- alloc_bootmem_cpumask_var(&housekeeping_mask);
- cpumask_andnot(housekeeping_mask, cpu_possible_mask, non_housekeeping_mask);
-
- if (cpumask_empty(housekeeping_mask))
- cpumask_set_cpu(smp_processor_id(), housekeeping_mask);
+ if (!housekeeping_flags) {
+ alloc_bootmem_cpumask_var(&housekeeping_mask);
+ cpumask_andnot(housekeeping_mask,
+ cpu_possible_mask, non_housekeeping_mask);
+ if (cpumask_empty(housekeeping_mask))
+ cpumask_set_cpu(smp_processor_id(), housekeeping_mask);
+ } else {
+ cpumask_var_t tmp;
+
+ alloc_bootmem_cpumask_var(&tmp);
+ cpumask_andnot(tmp, cpu_possible_mask, non_housekeeping_mask);
+ if (!cpumask_equal(tmp, housekeeping_mask)) {
+ pr_warn("Housekeeping: nohz_full= must match isolcpus=\n");
+ free_bootmem_cpumask_var(tmp);
+ free_bootmem_cpumask_var(non_housekeeping_mask);
+ return 0;
+ }
+ free_bootmem_cpumask_var(tmp);
+ }

- housekeeping_flags = HK_FLAG_TICK | HK_FLAG_TIMER |
- HK_FLAG_RCU | HK_FLAG_MISC;
+ if ((flags & HK_FLAG_TICK) && !(housekeeping_flags & HK_FLAG_TICK)) {
+ if (IS_ENABLED(CONFIG_NO_HZ_FULL)) {
+ tick_nohz_full_setup(non_housekeeping_mask);
+ } else {
+ pr_warn("Housekeeping: nohz unsupported."
+ " Build with CONFIG_NO_HZ_FULL\n");
+ free_bootmem_cpumask_var(non_housekeeping_mask);
+ return 0;
+ }
+ }

- tick_nohz_full_setup(non_housekeeping_mask);
+ housekeeping_flags |= flags;

free_bootmem_cpumask_var(non_housekeeping_mask);

return 1;
}
+
+static int __init housekeeping_nohz_full_setup(char *str)
+{
+ unsigned int flags;
+
+ flags = HK_FLAG_TICK | HK_FLAG_TIMER | HK_FLAG_RCU | HK_FLAG_MISC;
+
+ return housekeeping_setup(str, flags);
+}
__setup("nohz_full=", housekeeping_nohz_full_setup);
-#endif
+
+static int __init housekeeping_isolcpus_setup(char *str)
+{
+ return housekeeping_setup(str, HK_FLAG_DOMAIN);
+}
+__setup("isolcpus=", housekeeping_isolcpus_setup);
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index e3d31b0..2e6b912 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -3,6 +3,7 @@
*/
#include <linux/sched.h>
#include <linux/mutex.h>
+#include <linux/sched/isolation.h>

#include "sched.h"

@@ -469,21 +470,6 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
update_top_cache_domain(cpu);
}

-/* Setup the mask of CPUs configured for isolated domains */
-static int __init isolated_cpu_setup(char *str)
-{
- int ret;
-
- alloc_bootmem_cpumask_var(&cpu_isolated_map);
- ret = cpulist_parse(str, cpu_isolated_map);
- if (ret || cpumask_last(cpu_isolated_map) >= nr_cpu_ids) {
- pr_err("sched: Error, all isolcpus= values must be between 0 and %u - ignoring them.\n", nr_cpu_ids-1);
- return 0;
- }
- return 1;
-}
-__setup("isolcpus=", isolated_cpu_setup);
-
struct s_data {
struct sched_domain ** __percpu sd;
struct root_domain *rd;
@@ -1792,7 +1778,7 @@ int sched_init_domains(const struct cpumask *cpu_map)
doms_cur = alloc_sched_domains(ndoms_cur);
if (!doms_cur)
doms_cur = &fallback_doms;
- cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map);
+ cpumask_and(doms_cur[0], cpu_map, housekeeping_cpumask(HK_FLAG_DOMAIN));
err = build_sched_domains(doms_cur[0], NULL);
register_sched_domain_sysctl();

@@ -1875,7 +1861,8 @@ void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
doms_new = alloc_sched_domains(1);
if (doms_new) {
n = 1;
- cpumask_andnot(doms_new[0], cpu_active_mask, cpu_isolated_map);
+ cpumask_and(doms_new[0], cpu_active_mask,
+ housekeeping_cpumask(HK_FLAG_DOMAIN));
}
} else {
n = ndoms_new;
@@ -1898,7 +1885,8 @@ match1:
if (!doms_new) {
n = 0;
doms_new = &fallback_doms;
- cpumask_andnot(doms_new[0], cpu_active_mask, cpu_isolated_map);
+ cpumask_and(doms_new[0], cpu_active_mask,
+ housekeeping_cpumask(HK_FLAG_DOMAIN));
}

/* Build new domains: */