Re: [PATCH 3/4] sched: WARN when migrating to an offline CPU

From: Peter Zijlstra
Date: Fri Sep 29 2017 - 07:11:35 EST


On Thu, Sep 28, 2017 at 01:42:49PM +0200, Peter Zijlstra wrote:
> On Thu, Sep 28, 2017 at 11:03:10AM +0000, Levin, Alexander (Sasha Levin) wrote:
> > On Thu, Sep 28, 2017 at 12:35:41PM +0200, Peter Zijlstra wrote:
> > >On Thu, Sep 28, 2017 at 02:14:15AM -0700, Sasha Levin wrote:
>
> > >> [2035565360.446794] Unregister pv shared memory for cpu 2
> > >> [2035565360.467930] numa_remove_cpu cpu 2 node 2: mask now 6
> > >> [2035565360.471431] ------------[ cut here ]------------
> > >> [2035565360.472548] WARNING: CPU: 2 PID: 24 at > kernel/sched/core.c:1178 set_task_cpu (kernel/sched/core.c:1157)
> > >> [2035565360.473840] Modules linked in:
> > >> [2035565360.474632] CPU: 2 PID: 24 Comm: migration/2 Not tainted > 4.14.0-rc2-next-20170927+ #252
> > >
> > >Urgh, weird. That really shouldn't happen. Can you easily reproduce?
> >
> > Looks like yes. Seems like it's enough to stress CPU hotplug + trinity.
>
> OK, I'll see if I can reproduce building kernels and hotplug stress.
> Otherwise I'll try and cook up some debug patches for you.

I can't seem to trigger :-(

Can you please run with the below patch and:

# echo 1 > /proc/sys/kernel/traceoff_on_warning

---
kernel/sched/core.c | 3 +++
kernel/sched/fair.c | 10 ++++++++++
2 files changed, 13 insertions(+)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 18a6966567da..c613f7756981 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5592,6 +5592,7 @@ int sched_cpu_activate(unsigned int cpu)
struct rq_flags rf;

set_cpu_active(cpu, true);
+ trace_printk("active: %d mask: %*pbl\n", cpu, cpumask_pr_args(cpu_active_mask));

if (sched_smp_initialized) {
sched_domains_numa_masks_set(cpu);
@@ -5624,6 +5625,7 @@ int sched_cpu_deactivate(unsigned int cpu)
int ret;

set_cpu_active(cpu, false);
+ trace_printk("not-active: %d mask: %*pbl\n", cpu, cpumask_pr_args(cpu_active_mask));
/*
* We've cleared cpu_active_mask, wait for all preempt-disabled and RCU
* users of this state to go away such that all new such users will
@@ -5632,6 +5634,7 @@ int sched_cpu_deactivate(unsigned int cpu)
* Do sync before park smpboot threads to take care the rcu boost case.
*/
synchronize_rcu_mult(call_rcu, call_rcu_sched);
+ trace_printk("rcu-sync: %d\n", cpu);

if (!sched_smp_initialized)
return 0;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 70ba32e08a23..cb8f43a59f33 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8150,8 +8150,11 @@ static int load_balance(int this_cpu, struct rq *this_rq,
.tasks = LIST_HEAD_INIT(env.tasks),
};

+
cpumask_and(cpus, sched_domain_span(sd), cpu_active_mask);

+ trace_printk("dst_cpu: %d cpus: %*pbl\n", this_cpu, cpumask_pr_args(cpus));
+
schedstat_inc(sd->lb_count[idle]);

redo:
@@ -8248,6 +8251,9 @@ static int load_balance(int this_cpu, struct rq *this_rq,

env.dst_rq = cpu_rq(env.new_dst_cpu);
env.dst_cpu = env.new_dst_cpu;
+
+ trace_printk("dst_cpu: %d\n", env.dst_cpu);
+
env.flags &= ~LBF_DST_PINNED;
env.loop = 0;
env.loop_break = sched_nr_migrate_break;
@@ -8465,6 +8471,7 @@ static int idle_balance(struct rq *this_rq, struct rq_flags *rf)

update_blocked_averages(this_cpu);
rcu_read_lock();
+ trace_printk("rcu-read-lock: %d\n", this_cpu);
for_each_domain(this_cpu, sd) {
int continue_balancing = 1;
u64 t0, domain_cost;
@@ -8500,6 +8507,7 @@ static int idle_balance(struct rq *this_rq, struct rq_flags *rf)
if (pulled_task || this_rq->nr_running > 0)
break;
}
+ trace_printk("rcu-read-unlock: %d\n", this_cpu);
rcu_read_unlock();

raw_spin_lock(&this_rq->lock);
@@ -8790,6 +8798,7 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle)
update_blocked_averages(cpu);

rcu_read_lock();
+ trace_printk("rcu-read-lock: %d\n", cpu);
for_each_domain(cpu, sd) {
/*
* Decay the newidle max times here because this is a regular
@@ -8853,6 +8862,7 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle)
rq->max_idle_balance_cost =
max((u64)sysctl_sched_migration_cost, max_cost);
}
+ trace_printk("rcu-read-unlock: %d\n", cpu);
rcu_read_unlock();

/*