[patch] spinlock-debug fix

From: Ingo Molnar
Date: Mon Jun 27 2005 - 04:52:02 EST



* Andrew Morton <akpm@xxxxxxxx> wrote:

> Reuben Farrelly <reuben-lkml@xxxxxxxx> wrote:
> >
> > > Anyway, scary trace. It look like some spinlock is thought to be in the
> > > wrong state in schedule(). Send the .config, please.
> >
> > Now online at http://www.reub.net/kernel/.config
>
> Me too.
>
> BUG: spinlock recursion on CPU#0, swapper/0, c120d520
> [<c01039ed>] dump_stack+0x19/0x20
> [<c01d9af2>] spin_bug+0x42/0x54
> [<c01d9bfa>] _raw_spin_lock+0x3e/0x84
> [<c031d0ad>] _spin_lock+0x9/0x10
> [<c031b9e9>] schedule+0x479/0xbc8
> [<c0100cb4>] cpu_idle+0x88/0x8c
> [<c01002c1>] rest_init+0x21/0x28
> [<c0442899>] start_kernel+0x151/0x158
> [<c010020f>] 0xc010020f
> Kernel panic - not syncing: bad locking
>
> The bug is in the new spinlock debugging code itself. Ingo, can you
> test that .config please?

couldnt reproduce it on an UP box, nor on an SMP/HT 2/4-way box, but it
finally triggered on a 2-way SMP box.

the bug is that current->pid is not a unique identifier on SMP (doh!).

The patch below fixes the bug - which also happens to be a speedup for
the debugging code, as the ->pid dereferencing does not have to be done
anymore. Also, i've disabled the panicing for now.

Ingo

- change owner_pid to owner, to fix bad pid uniqueness assumption on SMP
- some more debug output printed
- dont panic for now

Signed-off-by: Ingo Molnar <mingo@xxxxxxx>

include/linux/spinlock_types.h | 16 ++++++++++------
kernel/sched.c | 2 +-
lib/spinlock_debug.c | 30 +++++++++++++++++++-----------
3 files changed, 30 insertions(+), 18 deletions(-)

Index: linux/include/linux/spinlock_types.h
===================================================================
--- linux.orig/include/linux/spinlock_types.h
+++ linux/include/linux/spinlock_types.h
@@ -21,11 +21,12 @@ typedef struct {
unsigned int break_lock;
#endif
#ifdef CONFIG_DEBUG_SPINLOCK
- unsigned int magic, owner_pid, owner_cpu;
+ unsigned int magic, owner_cpu;
+ void *owner;
#endif
} spinlock_t;

-#define SPINLOCK_MAGIC 0xdead4ead
+#define SPINLOCK_MAGIC 0xdead4ead

typedef struct {
raw_rwlock_t raw_lock;
@@ -33,22 +34,25 @@ typedef struct {
unsigned int break_lock;
#endif
#ifdef CONFIG_DEBUG_SPINLOCK
- unsigned int magic, owner_pid, owner_cpu;
+ unsigned int magic, owner_cpu;
+ void *owner;
#endif
} rwlock_t;

-#define RWLOCK_MAGIC 0xdeaf1eed
+#define RWLOCK_MAGIC 0xdeaf1eed
+
+#define SPINLOCK_OWNER_INIT ((void *)-1L)

#ifdef CONFIG_DEBUG_SPINLOCK
# define SPIN_LOCK_UNLOCKED \
(spinlock_t) { .raw_lock = __RAW_SPIN_LOCK_UNLOCKED, \
.magic = SPINLOCK_MAGIC, \
- .owner_pid = -1, \
+ .owner = SPINLOCK_OWNER_INIT, \
.owner_cpu = -1 }
#define RW_LOCK_UNLOCKED \
(rwlock_t) { .raw_lock = __RAW_RW_LOCK_UNLOCKED, \
.magic = RWLOCK_MAGIC, \
- .owner_pid = -1, \
+ .owner = SPINLOCK_OWNER_INIT, \
.owner_cpu = -1 }
#else
# define SPIN_LOCK_UNLOCKED \
Index: linux/kernel/sched.c
===================================================================
--- linux.orig/kernel/sched.c
+++ linux/kernel/sched.c
@@ -1604,7 +1604,7 @@ static inline void finish_task_switch(ru
prev_task_flags = prev->flags;
#ifdef CONFIG_DEBUG_SPINLOCK
/* this is a valid case when another task releases the spinlock */
- rq->lock.owner_pid = current->pid;
+ rq->lock.owner = current;
#endif
finish_arch_switch(prev);
finish_lock_switch(rq, prev);
Index: linux/lib/spinlock_debug.c
===================================================================
--- linux.orig/lib/spinlock_debug.c
+++ linux/lib/spinlock_debug.c
@@ -14,16 +14,24 @@
static void spin_bug(spinlock_t *lock, const char *msg)
{
static long print_once = 1;
+ struct task_struct *owner = NULL;

if (xchg(&print_once, 0)) {
- printk("BUG: spinlock %s on CPU#%d, %s/%d, %p\n", msg,
- smp_processor_id(), current->comm, current->pid, lock);
+ if (lock->owner && lock->owner != SPINLOCK_OWNER_INIT)
+ owner = lock->owner;
+ printk("BUG: spinlock %s on CPU#%d, %s/%d\n",
+ msg, smp_processor_id(), current->comm, current->pid);
+ printk(" lock: %p, .magic: %08x, .owner: %s/%d, .owner_cpu: %d\n",
+ lock, lock->magic,
+ owner ? owner->comm : "<none>",
+ owner ? owner->pid : -1,
+ lock->owner_cpu);
dump_stack();
#ifdef CONFIG_SMP
/*
* We cannot continue on SMP:
*/
- panic("bad locking");
+// panic("bad locking");
#endif
}
}
@@ -33,7 +41,7 @@ static void spin_bug(spinlock_t *lock, c
static inline void debug_spin_lock_before(spinlock_t *lock)
{
SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic");
- SPIN_BUG_ON(lock->owner_pid == current->pid, lock, "recursion");
+ SPIN_BUG_ON(lock->owner == current, lock, "recursion");
SPIN_BUG_ON(lock->owner_cpu == raw_smp_processor_id(),
lock, "cpu recursion");
}
@@ -41,17 +49,17 @@ static inline void debug_spin_lock_befor
static inline void debug_spin_lock_after(spinlock_t *lock)
{
lock->owner_cpu = raw_smp_processor_id();
- lock->owner_pid = current->pid;
+ lock->owner = current;
}

static inline void debug_spin_unlock(spinlock_t *lock)
{
SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic");
SPIN_BUG_ON(!spin_is_locked(lock), lock, "already unlocked");
- SPIN_BUG_ON(lock->owner_pid != current->pid, lock, "wrong owner");
+ SPIN_BUG_ON(lock->owner != current, lock, "wrong owner");
SPIN_BUG_ON(lock->owner_cpu != raw_smp_processor_id(),
lock, "wrong CPU");
- lock->owner_pid = -1;
+ lock->owner = SPINLOCK_OWNER_INIT;
lock->owner_cpu = -1;
}

@@ -176,7 +184,7 @@ void _raw_read_unlock(rwlock_t *lock)
static inline void debug_write_lock_before(rwlock_t *lock)
{
RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
- RWLOCK_BUG_ON(lock->owner_pid == current->pid, lock, "recursion");
+ RWLOCK_BUG_ON(lock->owner == current, lock, "recursion");
RWLOCK_BUG_ON(lock->owner_cpu == raw_smp_processor_id(),
lock, "cpu recursion");
}
@@ -184,16 +192,16 @@ static inline void debug_write_lock_befo
static inline void debug_write_lock_after(rwlock_t *lock)
{
lock->owner_cpu = raw_smp_processor_id();
- lock->owner_pid = current->pid;
+ lock->owner = current;
}

static inline void debug_write_unlock(rwlock_t *lock)
{
RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
- RWLOCK_BUG_ON(lock->owner_pid != current->pid, lock, "wrong owner");
+ RWLOCK_BUG_ON(lock->owner != current, lock, "wrong owner");
RWLOCK_BUG_ON(lock->owner_cpu != raw_smp_processor_id(),
lock, "wrong CPU");
- lock->owner_pid = -1;
+ lock->owner = SPINLOCK_OWNER_INIT;
lock->owner_cpu = -1;
}

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/