Re: hotplug lockdep splat (tip)

From: Peter Zijlstra
Date: Mon Sep 04 2017 - 11:37:38 EST


On Mon, Sep 04, 2017 at 03:27:07PM +0200, Mike Galbraith wrote:
> Well, flavor of gripe changed.

>
> [ 164.114290] ======================================================
> [ 164.115146] WARNING: possible circular locking dependency detected
> [ 164.115751] 4.13.0.g90abd70-tip-lockdep #4 Tainted: G E
> [ 164.116348] ------------------------------------------------------
> [ 164.116919] cpuhp/0/12 is trying to acquire lock:
> [ 164.117381] (cpuhp_state){+.+.}, at: [<ffffffff8108e4aa>] cpuhp_thread_fun+0x2a/0x160
> [ 164.118097]
> but now in release context of a crosslock acquired at the following:
> [ 164.118845] ((complete)&per_cpu_ptr(&cpuhp_state, i)->done#2){+.+.}, at: [<ffffffff8108e71f>] cpuhp_issue_call+0x13f/0x170
> [ 164.119789]
> which lock already depends on the new lock.
>
> [ 164.120483]
> the existing dependency chain (in reverse order) is:
> [ 164.121121]
> -> #2 ((complete)&per_cpu_ptr(&cpuhp_state, i)->done#2){+.+.}:

> [ 164.122741] wait_for_completion+0x53/0x1a0
> [ 164.123181] takedown_cpu+0x8a/0xf0

So this is:

takedown_cpu()
irq_lock_sparse
wait_for_completion(st->done);


> -> #1 (sparse_irq_lock){+.+.}:

> [ 164.131664] irq_lock_sparse+0x17/0x20
> [ 164.132039] irq_affinity_online_cpu+0x18/0xd0
> [ 164.132463] cpuhp_invoke_callback+0x1f6/0x830
> [ 164.132928] cpuhp_up_callbacks+0x37/0xb0
> [ 164.133487] cpuhp_thread_fun+0x14f/0x160

This is:

cpuhp_state
irq_lock_sparse


> [ 164.148780] complete+0x29/0x60
> [ 164.149064] cpuhp_thread_fun+0xa1/0x160

And this is I think:

cpuhp_thread_fun()
cpuhq_state
complete(st->done)


Which then spells deadlock like:

CPU0 CPU1 CPU2

cpuhp_state
irq_lock_sparse()
cpuhp_state
wait_for_completion()
irq_lock_sparse()
complete()

or something, because CPU0 needs to wait for CPU1's irq_lock_sparse
which will wait for CPU2's completion, which in turn waits for CPU0's
cpuhp_state.


Now, this again mixes up and down chains, but now on cpuhp_state.


But I cannot reproduce this.. I've let:

while :; do
echo 0 > /sys/devices/system/cpu/cpu1/online ;
echo 1 > /sys/devices/system/cpu/cpu1/online ;
done

run for a while, but nothing... :/

Doth teh beloweth make nice?


---
kernel/cpu.c | 47 +++++++++++++++++++++++++++++++++++++++--------
1 file changed, 39 insertions(+), 8 deletions(-)

diff --git a/kernel/cpu.c b/kernel/cpu.c
index acf5308fad51..2ab324d7ff7b 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -67,11 +67,15 @@ struct cpuhp_cpu_state {
static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state);

#if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
-static struct lock_class_key cpuhp_state_key;
+static struct lock_class_key cpuhp_state_up_key;
+#ifdef CONFIG_HOTPLUG_CPU
+static struct lock_class_key cpuhp_state_down_key;
+#endif
static struct lockdep_map cpuhp_state_lock_map =
- STATIC_LOCKDEP_MAP_INIT("cpuhp_state", &cpuhp_state_key);
+ STATIC_LOCKDEP_MAP_INIT("cpuhp_state-up", &cpuhp_state_up_key);
#endif

+
/**
* cpuhp_step - Hotplug state machine step
* @name: Name of the step
@@ -533,6 +537,28 @@ void __init cpuhp_threads_init(void)
kthread_unpark(this_cpu_read(cpuhp_state.thread));
}

+/*
+ * _cpu_down() and _cpu_up() have different lock ordering wrt st->done, but
+ * because these two functions are globally serialized and st->done is private
+ * to them, we can simply re-init st->done for each of them to separate the
+ * lock chains.
+ *
+ * Must be macro to ensure we have two different call sites.
+ */
+#ifdef CONFIG_LOCKDEP
+#define lockdep_reinit_st_done() \
+do { \
+ int __cpu; \
+ for_each_possible_cpu(__cpu) { \
+ struct cpuhp_cpu_state *st = \
+ per_cpu_ptr(&cpuhp_state, __cpu); \
+ init_completion(&st->done); \
+ } \
+} while(0)
+#else
+#define lockdep_reinit_st_done()
+#endif
+
#ifdef CONFIG_HOTPLUG_CPU
/**
* clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
@@ -676,12 +702,6 @@ void cpuhp_report_idle_dead(void)
cpuhp_complete_idle_dead, st, 0);
}

-#else
-#define takedown_cpu NULL
-#endif
-
-#ifdef CONFIG_HOTPLUG_CPU
-
/* Requires cpu_add_remove_lock to be held */
static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
enum cpuhp_state target)
@@ -697,6 +717,10 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,

cpus_write_lock();

+ lockdep_reinit_st_done();
+ lockdep_init_map(&cpuhp_state_lock_map, "cpuhp_state-down",
+ &cpuhp_state_down_key, 0);
+
cpuhp_tasks_frozen = tasks_frozen;

prev_state = st->state;
@@ -759,6 +783,9 @@ int cpu_down(unsigned int cpu)
return do_cpu_down(cpu, CPUHP_OFFLINE);
}
EXPORT_SYMBOL(cpu_down);
+
+#else
+#define takedown_cpu NULL
#endif /*CONFIG_HOTPLUG_CPU*/

/**
@@ -806,6 +833,10 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)

cpus_write_lock();

+ lockdep_reinit_st_done();
+ lockdep_init_map(&cpuhp_state_lock_map, "cpuhp_state-up",
+ &cpuhp_state_up_key, 0);
+
if (!cpu_present(cpu)) {
ret = -EINVAL;
goto out;