[RFC][PATCH] spinlock_debug: report spinlock lockup from unlock

From: Sergey Senozhatsky
Date: Sat Dec 17 2016 - 11:20:12 EST


There is a race window between the point when __spin_lock_debug()
detects spinlock lockup and the time when CPU that caused the
lockup receives its backtrace interrupt.

Before __spin_lock_debug() triggers all_cpu_backtrace() it calls
spin_dump() to printk() the current state of the lock and CPU
backtrace. These printk() calls can take some time to print the
messages to serial console, for instance (we are not talking
about console_unlock() loop and a flood of messages from other
CPUs, but just spin_dump() printk() and serial console).

All those preparation steps can give CPU that caused the lockup
enough time to run away, so when it receives a backtrace interrupt
it can look completely innocent.

The patch extends `struct raw_spinlock' with additional variable
that stores jiffies of successful do_raw_spin_lock() and checks
in debug_spin_unlock() whether the spin_lock has been locked for
too long. So we will have a reliable backtrace from CPU that
locked up and a reliable backtrace from CPU that caused the
lockup.

Missed spin_lock unlock deadline report (example):

BUG: spinlock missed unlock deadline on CPU#0, bash/327
lock: lock.25562+0x0/0x60, .magic: dead4ead, .owner: bash/327, .owner_cpu: 0
CPU: 0 PID: 327 Comm: bash
Call Trace:
dump_stack+0x4f/0x65
spin_dump+0x8a/0x8f
spin_bug+0x2b/0x2d
do_raw_spin_unlock+0x92/0xa3
_raw_spin_unlock+0x27/0x44
...

Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@xxxxxxxxx>
---
include/linux/spinlock_types.h | 4 +++-
kernel/locking/spinlock_debug.c | 5 +++++
2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h
index 73548eb13a5d..8972e56eeefb 100644
--- a/include/linux/spinlock_types.h
+++ b/include/linux/spinlock_types.h
@@ -25,6 +25,7 @@ typedef struct raw_spinlock {
#ifdef CONFIG_DEBUG_SPINLOCK
unsigned int magic, owner_cpu;
void *owner;
+ unsigned long acquire_tstamp;
#endif
#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct lockdep_map dep_map;
@@ -45,7 +46,8 @@ typedef struct raw_spinlock {
# define SPIN_DEBUG_INIT(lockname) \
.magic = SPINLOCK_MAGIC, \
.owner_cpu = -1, \
- .owner = SPINLOCK_OWNER_INIT,
+ .owner = SPINLOCK_OWNER_INIT, \
+ .acquire_tstamp = 0,
#else
# define SPIN_DEBUG_INIT(lockname)
#endif
diff --git a/kernel/locking/spinlock_debug.c b/kernel/locking/spinlock_debug.c
index 0374a596cffa..daeab4bc86ff 100644
--- a/kernel/locking/spinlock_debug.c
+++ b/kernel/locking/spinlock_debug.c
@@ -12,6 +12,7 @@
#include <linux/debug_locks.h>
#include <linux/delay.h>
#include <linux/export.h>
+#include <linux/jiffies.h>

void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name,
struct lock_class_key *key)
@@ -27,6 +28,7 @@ void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name,
lock->magic = SPINLOCK_MAGIC;
lock->owner = SPINLOCK_OWNER_INIT;
lock->owner_cpu = -1;
+ lock->acquire_tstamp = 0;
}

EXPORT_SYMBOL(__raw_spin_lock_init);
@@ -90,6 +92,7 @@ static inline void debug_spin_lock_after(raw_spinlock_t *lock)
{
lock->owner_cpu = raw_smp_processor_id();
lock->owner = current;
+ lock->acquire_tstamp = jiffies;
}

static inline void debug_spin_unlock(raw_spinlock_t *lock)
@@ -99,6 +102,8 @@ static inline void debug_spin_unlock(raw_spinlock_t *lock)
SPIN_BUG_ON(lock->owner != current, lock, "wrong owner");
SPIN_BUG_ON(lock->owner_cpu != raw_smp_processor_id(),
lock, "wrong CPU");
+ SPIN_BUG_ON(time_after_eq(jiffies, lock->acquire_tstamp + HZ),
+ lock, "missed unlock deadline");
lock->owner = SPINLOCK_OWNER_INIT;
lock->owner_cpu = -1;
}
--
2.11.0