Re: [PATCH] rxrpc: struct mutex cannot be used for rxrpc_call::user_mutex

From: Peter Zijlstra
Date: Thu Dec 19 2019 - 04:05:46 EST


On Wed, Dec 18, 2019 at 12:28:01PM -0800, Davidlohr Bueso wrote:
> Hmm so fyi __crash_kexec() is another one, but can be called in hard-irq, and
> it's extremely obvious that the trylock+unlock occurs in the same context.

Hurmph, that unlock 'never' happens if I read it right :-) Still,
something like the below ought to cure it I suppose.

> It would be nice to automate this...

Automate what exactly? We'll stick your WARN back in on the next round.


---
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 15d70a90b50d..2faf2ec33032 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -47,6 +47,26 @@

DEFINE_MUTEX(kexec_mutex);

+static void kexec_lock(void)
+{
+ /*
+ * LOCK kexec_mutex cmpxchg(&panic_cpu, INVALID, cpu)
+ * MB MB
+ * panic_cpu == INVALID kexec_mutex == LOCKED
+ *
+ * Ensures either we observe the cmpxchg, or crash_kernel() observes
+ * our lock acquisition.
+ */
+ mutex_lock(&kexec_mutex);
+ smp_mb();
+ atomic_cond_load_acquire(&panic_cpu, VAL == PANIC_CPU_INVALID);
+}
+
+static void kexec_unlock(void)
+{
+ mutex_unlock(&kexec_mutex);
+}
+
/* Per cpu memory for storing cpu states in case of system crash. */
note_buf_t __percpu *crash_notes;

@@ -937,24 +957,13 @@ int kexec_load_disabled;
*/
void __noclone __crash_kexec(struct pt_regs *regs)
{
- /* Take the kexec_mutex here to prevent sys_kexec_load
- * running on one cpu from replacing the crash kernel
- * we are using after a panic on a different cpu.
- *
- * If the crash kernel was not located in a fixed area
- * of memory the xchg(&kexec_crash_image) would be
- * sufficient. But since I reuse the memory...
- */
- if (mutex_trylock(&kexec_mutex)) {
- if (kexec_crash_image) {
- struct pt_regs fixed_regs;
-
- crash_setup_regs(&fixed_regs, regs);
- crash_save_vmcoreinfo();
- machine_crash_shutdown(&fixed_regs);
- machine_kexec(kexec_crash_image);
- }
- mutex_unlock(&kexec_mutex);
+ if (kexec_crash_image) {
+ struct pt_regs fixed_regs;
+
+ crash_setup_regs(&fixed_regs, regs);
+ crash_save_vmcoreinfo();
+ machine_crash_shutdown(&fixed_regs);
+ machine_kexec(kexec_crash_image);
}
}
STACK_FRAME_NON_STANDARD(__crash_kexec);
@@ -973,7 +982,11 @@ void crash_kexec(struct pt_regs *regs)
if (old_cpu == PANIC_CPU_INVALID) {
/* This is the 1st CPU which comes here, so go ahead. */
printk_safe_flush_on_panic();
- __crash_kexec(regs);
+ /*
+ * Orders against kexec_lock(), see the comment there.
+ */
+ if (!mutex_is_locked(&kexec_mutex))
+ __crash_kexec(regs);

/*
* Reset panic_cpu to allow another panic()/crash_kexec()
@@ -987,10 +1000,10 @@ size_t crash_get_memory_size(void)
{
size_t size = 0;

- mutex_lock(&kexec_mutex);
+ kexec_lock();
if (crashk_res.end != crashk_res.start)
size = resource_size(&crashk_res);
- mutex_unlock(&kexec_mutex);
+ kexec_unlock();
return size;
}

@@ -1010,7 +1023,7 @@ int crash_shrink_memory(unsigned long new_size)
unsigned long old_size;
struct resource *ram_res;

- mutex_lock(&kexec_mutex);
+ kexec_lock();

if (kexec_crash_image) {
ret = -ENOENT;
@@ -1048,7 +1061,7 @@ int crash_shrink_memory(unsigned long new_size)
insert_resource(&iomem_resource, ram_res);

unlock:
- mutex_unlock(&kexec_mutex);
+ kexec_unlock();
return ret;
}