[patch 05/15] sched: Consolidate the notifier maze

From: Thomas Gleixner
Date: Thu Mar 10 2016 - 07:08:34 EST


We can maintain the ordering of the scheduler cpu hotplug functionality nicely
in one notifer. Get rid of the maze.

Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
---
include/linux/cpu.h | 12 +--
kernel/sched/core.c | 174 ++++++++++++++++++++--------------------------------
2 files changed, 73 insertions(+), 113 deletions(-)

--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -61,19 +61,15 @@ struct notifier_block;
enum {
/*
* SCHED_ACTIVE marks a cpu which is coming up active during
- * CPU_ONLINE and CPU_DOWN_FAILED and must be the first
- * notifier. CPUSET_ACTIVE adjusts cpuset according to
- * cpu_active mask right after SCHED_ACTIVE. During
- * CPU_DOWN_PREPARE, SCHED_INACTIVE and CPUSET_INACTIVE are
- * ordered in the similar way.
+ * CPU_ONLINE and CPU_DOWN_FAILED and must be the first notifier. Is
+ * also cpuset according to cpu_active mask right after activating the
+ * cpu. During CPU_DOWN_PREPARE, SCHED_INACTIVE reversed the operation.
*
* This ordering guarantees consistent cpu_active mask and
* migration behavior to all cpu notifiers.
*/
CPU_PRI_SCHED_ACTIVE = INT_MAX,
- CPU_PRI_CPUSET_ACTIVE = INT_MAX - 1,
- CPU_PRI_SCHED_INACTIVE = INT_MIN + 1,
- CPU_PRI_CPUSET_INACTIVE = INT_MIN,
+ CPU_PRI_SCHED_INACTIVE = INT_MIN,

/* migration should happen before other stuff but after perf */
CPU_PRI_PERF = 20,
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5720,39 +5720,6 @@ static void set_cpu_rq_start_time(unsign
rq->age_stamp = sched_clock_cpu(cpu);
}

-static int sched_cpu_active(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
-{
- int cpu = (long)hcpu;
-
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_DOWN_FAILED:
- set_cpu_active(cpu, true);
- return NOTIFY_OK;
-
- default:
- return NOTIFY_DONE;
- }
-}
-
-static int sched_cpu_inactive(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
-{
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_DOWN_PREPARE:
- set_cpu_active((long)hcpu, false);
- return NOTIFY_OK;
- default:
- return NOTIFY_DONE;
- }
-}
-
-int sched_cpu_starting(unsigned int cpu)
-{
- set_cpu_rq_start_time(cpu);
- return 0;
-}
-
static cpumask_var_t sched_domains_tmpmask; /* sched_domains_mutex */

#ifdef CONFIG_SCHED_DEBUG
@@ -6895,10 +6862,13 @@ static void sched_init_numa(void)
init_numa_topology_type();
}

-static void sched_domains_numa_masks_set(int cpu)
+static void sched_domains_numa_masks_set(unsigned int cpu)
{
- int i, j;
int node = cpu_to_node(cpu);
+ int i, j;
+
+ if (!sched_smp_initialized)
+ return;

for (i = 0; i < sched_domains_numa_levels; i++) {
for (j = 0; j < nr_node_ids; j++) {
@@ -6908,54 +6878,23 @@ static void sched_domains_numa_masks_set
}
}

-static void sched_domains_numa_masks_clear(int cpu)
+static void sched_domains_numa_masks_clear(unsigned int cpu)
{
int i, j;
+
+ if (!sched_smp_initialized)
+ return;
+
for (i = 0; i < sched_domains_numa_levels; i++) {
for (j = 0; j < nr_node_ids; j++)
cpumask_clear_cpu(cpu, sched_domains_numa_masks[i][j]);
}
}

-/*
- * Update sched_domains_numa_masks[level][node] array when new cpus
- * are onlined.
- */
-static int sched_domains_numa_masks_update(struct notifier_block *nfb,
- unsigned long action,
- void *hcpu)
-{
- int cpu = (long)hcpu;
-
- if (!sched_smp_initialized)
- return NOTIFY_DONE;
-
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_ONLINE:
- sched_domains_numa_masks_set(cpu);
- break;
-
- case CPU_DEAD:
- sched_domains_numa_masks_clear(cpu);
- break;
-
- default:
- return NOTIFY_DONE;
- }
-
- return NOTIFY_OK;
-}
#else
-static inline void sched_init_numa(void)
-{
-}
-
-static int sched_domains_numa_masks_update(struct notifier_block *nfb,
- unsigned long action,
- void *hcpu)
-{
- return 0;
-}
+static inline void sched_init_numa(void) { }
+static void sched_domains_numa_masks_set(unsigned int cpu) { }
+static void sched_domains_numa_masks_clear(unsigned int cpu) { }
#endif /* CONFIG_NUMA */

static int __sdt_alloc(const struct cpumask *cpu_map)
@@ -7345,16 +7284,12 @@ static int num_cpus_frozen; /* used to m
* If we come here as part of a suspend/resume, don't touch cpusets because we
* want to restore it back to its original state upon resume anyway.
*/
-static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action,
- void *hcpu)
+static void cpuset_cpu_active(bool frozen)
{
if (!sched_smp_initialized)
- return NOTIFY_DONE;
-
- switch (action) {
- case CPU_ONLINE_FROZEN:
- case CPU_DOWN_FAILED_FROZEN:
+ return;

+ if (frozen) {
/*
* num_cpus_frozen tracks how many CPUs are involved in suspend
* resume sequence. As long as this is not the last online
@@ -7364,38 +7299,28 @@ static int cpuset_cpu_active(struct noti
num_cpus_frozen--;
if (likely(num_cpus_frozen)) {
partition_sched_domains(1, NULL, NULL);
- break;
+ return;
}
-
/*
* This is the last CPU online operation. So fall through and
* restore the original sched domains by considering the
* cpuset configurations.
*/
-
- case CPU_ONLINE:
- cpuset_update_active_cpus(true);
- break;
- default:
- return NOTIFY_DONE;
}
- return NOTIFY_OK;
+ cpuset_update_active_cpus(true);
}

-static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action,
- void *hcpu)
+static int cpuset_cpu_inactive(unsigned int cpu, bool frozen)
{
unsigned long flags;
- long cpu = (long)hcpu;
struct dl_bw *dl_b;
bool overflow;
int cpus;

if (!sched_smp_initialized)
- return NOTIFY_DONE;
+ return 0;

- switch (action) {
- case CPU_DOWN_PREPARE:
+ if (!frozen) {
rcu_read_lock_sched();
dl_b = dl_bw_of(cpu);

@@ -7407,17 +7332,60 @@ static int cpuset_cpu_inactive(struct no
rcu_read_unlock_sched();

if (overflow)
- return notifier_from_errno(-EBUSY);
+ return -EBUSY;
cpuset_update_active_cpus(false);
- break;
- case CPU_DOWN_PREPARE_FROZEN:
+ } else {
num_cpus_frozen++;
partition_sched_domains(1, NULL, NULL);
- break;
+ }
+ return 0;
+}
+
+static int sched_cpu_active(struct notifier_block *nfb, unsigned long action,
+ void *hcpu)
+{
+ unsigned int cpu = (unsigned long)hcpu;
+
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_DOWN_FAILED:
+ case CPU_ONLINE:
+ set_cpu_active(cpu, true);
+ sched_domains_numa_masks_set(cpu);
+ cpuset_cpu_active(action & CPU_TASKS_FROZEN);
+ return NOTIFY_OK;
default:
return NOTIFY_DONE;
}
- return NOTIFY_OK;
+}
+
+static int sched_cpu_inactive(struct notifier_block *nfb,
+ unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (unsigned long)hcpu;
+ int ret;
+
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_DOWN_PREPARE:
+ set_cpu_active(cpu, false);
+ ret = cpuset_cpu_inactive(cpu, action & CPU_TASKS_FROZEN);
+ if (ret) {
+ set_cpu_active(cpu, true);
+ return notifier_from_errno(ret);
+ }
+ return NOTIFY_OK;
+
+ case CPU_DEAD:
+ sched_domains_numa_masks_clear(cpu);
+ return NOTIFY_OK;
+ default:
+ return NOTIFY_DONE;
+ }
+}
+
+int sched_cpu_starting(unsigned int cpu)
+{
+ set_cpu_rq_start_time(cpu);
+ return 0;
}

void __init sched_init_smp(void)
@@ -7469,10 +7437,6 @@ static int __init migration_init(void)
cpu_notifier(sched_cpu_active, CPU_PRI_SCHED_ACTIVE);
cpu_notifier(sched_cpu_inactive, CPU_PRI_SCHED_INACTIVE);

- hotcpu_notifier(sched_domains_numa_masks_update, CPU_PRI_SCHED_ACTIVE);
- hotcpu_notifier(cpuset_cpu_active, CPU_PRI_CPUSET_ACTIVE);
- hotcpu_notifier(cpuset_cpu_inactive, CPU_PRI_CPUSET_INACTIVE);
-
return 0;
}
early_initcall(migration_init);