[GIT PULL] scheduler fixes

From: Ingo Molnar
Date: Thu Sep 17 2015 - 04:06:26 EST


Linus,

Please pull the latest sched-urgent-for-linus git tree from:

git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git sched-urgent-for-linus

# HEAD: 5473e0cc37c03c576adbda7591a6cc8e37c1bb7f sched: 'Annotate' migrate_tasks()

A migrate_tasks() locking fix, and a late-coming nohz change plus a nohz debug
check.

Thanks,

Ingo

------------------>
Frederic Weisbecker (1):
nohz: Assert existing housekeepers when nohz full enabled

Vatika Harlalka (1):
nohz: Affine unpinned timers to housekeepers

Wanpeng Li (1):
sched: 'Annotate' migrate_tasks()


include/linux/tick.h | 9 +++++++++
kernel/sched/core.c | 36 +++++++++++++++++++++++++++++++-----
kernel/time/tick-sched.c | 15 +++++++++++----
3 files changed, 51 insertions(+), 9 deletions(-)

diff --git a/include/linux/tick.h b/include/linux/tick.h
index 48d901f83f92..e312219ff823 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -147,11 +147,20 @@ static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask)
cpumask_or(mask, mask, tick_nohz_full_mask);
}

+static inline int housekeeping_any_cpu(void)
+{
+ return cpumask_any_and(housekeeping_mask, cpu_online_mask);
+}
+
extern void tick_nohz_full_kick(void);
extern void tick_nohz_full_kick_cpu(int cpu);
extern void tick_nohz_full_kick_all(void);
extern void __tick_nohz_task_switch(void);
#else
+static inline int housekeeping_any_cpu(void)
+{
+ return smp_processor_id();
+}
static inline bool tick_nohz_full_enabled(void) { return false; }
static inline bool tick_nohz_full_cpu(int cpu) { return false; }
static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { }
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 8b864ecee0e1..9b786704d34b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -623,18 +623,21 @@ int get_nohz_timer_target(void)
int i, cpu = smp_processor_id();
struct sched_domain *sd;

- if (!idle_cpu(cpu))
+ if (!idle_cpu(cpu) && is_housekeeping_cpu(cpu))
return cpu;

rcu_read_lock();
for_each_domain(cpu, sd) {
for_each_cpu(i, sched_domain_span(sd)) {
- if (!idle_cpu(i)) {
+ if (!idle_cpu(i) && is_housekeeping_cpu(cpu)) {
cpu = i;
goto unlock;
}
}
}
+
+ if (!is_housekeeping_cpu(cpu))
+ cpu = housekeeping_any_cpu();
unlock:
rcu_read_unlock();
return cpu;
@@ -5180,24 +5183,47 @@ static void migrate_tasks(struct rq *dead_rq)
break;

/*
- * Ensure rq->lock covers the entire task selection
- * until the migration.
+ * pick_next_task assumes pinned rq->lock.
*/
lockdep_pin_lock(&rq->lock);
next = pick_next_task(rq, &fake_task);
BUG_ON(!next);
next->sched_class->put_prev_task(rq, next);

+ /*
+ * Rules for changing task_struct::cpus_allowed are holding
+ * both pi_lock and rq->lock, such that holding either
+ * stabilizes the mask.
+ *
+ * Drop rq->lock is not quite as disastrous as it usually is
+ * because !cpu_active at this point, which means load-balance
+ * will not interfere. Also, stop-machine.
+ */
+ lockdep_unpin_lock(&rq->lock);
+ raw_spin_unlock(&rq->lock);
+ raw_spin_lock(&next->pi_lock);
+ raw_spin_lock(&rq->lock);
+
+ /*
+ * Since we're inside stop-machine, _nothing_ should have
+ * changed the task, WARN if weird stuff happened, because in
+ * that case the above rq->lock drop is a fail too.
+ */
+ if (WARN_ON(task_rq(next) != rq || !task_on_rq_queued(next))) {
+ raw_spin_unlock(&next->pi_lock);
+ continue;
+ }
+
/* Find suitable destination for @next, with force if needed. */
dest_cpu = select_fallback_rq(dead_rq->cpu, next);

- lockdep_unpin_lock(&rq->lock);
rq = __migrate_task(rq, next, dest_cpu);
if (rq != dead_rq) {
raw_spin_unlock(&rq->lock);
rq = dead_rq;
raw_spin_lock(&rq->lock);
}
+ raw_spin_unlock(&next->pi_lock);
}

rq->stop = stop;
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 3319e16f31e5..7c7ec4515983 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -290,16 +290,17 @@ static int __init tick_nohz_full_setup(char *str)
__setup("nohz_full=", tick_nohz_full_setup);

static int tick_nohz_cpu_down_callback(struct notifier_block *nfb,
- unsigned long action,
- void *hcpu)
+ unsigned long action,
+ void *hcpu)
{
unsigned int cpu = (unsigned long)hcpu;

switch (action & ~CPU_TASKS_FROZEN) {
case CPU_DOWN_PREPARE:
/*
- * If we handle the timekeeping duty for full dynticks CPUs,
- * we can't safely shutdown that CPU.
+ * The boot CPU handles housekeeping duty (unbound timers,
+ * workqueues, timekeeping, ...) on behalf of full dynticks
+ * CPUs. It must remain online when nohz full is enabled.
*/
if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
return NOTIFY_BAD;
@@ -370,6 +371,12 @@ void __init tick_nohz_init(void)
cpu_notifier(tick_nohz_cpu_down_callback, 0);
pr_info("NO_HZ: Full dynticks CPUs: %*pbl.\n",
cpumask_pr_args(tick_nohz_full_mask));
+
+ /*
+ * We need at least one CPU to handle housekeeping work such
+ * as timekeeping, unbound timers, workqueues, ...
+ */
+ WARN_ON_ONCE(cpumask_empty(housekeeping_mask));
}
#endif

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/