Re: [v4.6-rc7-183-g1410b74e4061]

From: Sedat Dilek
Date: Mon May 16 2016 - 14:12:03 EST


On 5/16/16, Peter Zijlstra <peterz@xxxxxxxxxxxxx> wrote:
> On Mon, May 16, 2016 at 07:42:35PM +0200, Sedat Dilek wrote:
>
>> Unfortunately, I could not reproduce this again with none of my
>> 183-kernels.
>> When I first hit a "chain_key collision" issue, it was hard to redproduce,
>> so.
>> Any idea, how I can "force" this?
>
> Nope; I wish I knew, that'd be so much easier to work with :/
>
> I'm hoping someone will report a reproducer, even something that
> triggers once every 5-10 runs would be awesome.
>
> In any case, like I've explained before, nothing regressed as such, we
> only added this new warning under DEBUG_LOCKDEP because we want to
> better understand the condition that triggers it.
>
> If it bothers you, just turn off DEBUG_LOCKDEP and know that your kernel
> is as reliable as it was before. OTOH, if you do keep it on, please
> let me know if you can (semi) reliably trigger this, as I'd really like
> to have a better understanding.
>

OK, I keep checking my logs.

I refreshed your patch Ingo pointed me to.

But it fails like this (on top of Linux v4.6 final)...
[...]
if [ "" = "-pg" ]; then if [ kernel/locking/mutex-debug.o !=
"scripts/mod/empty.o" ]; then ./scripts/recordmcount
"kernel/locking/mutex-debug.o"; fi; fi;
mycompiler -Wp,-MD,kernel/locking/.lockdep.o.d -nostdinc -isystem
/usr/lib/gcc/x86_64-linux-gnu/4.9/include -nostdinc -isystem
/usr/lib/gcc/x86_64-linux-gnu/4.9/include -I./arch/x86/include
-Iarch/x86/include/generated/uapi -Iarch/x86/include/generated
-Iinclude -I./arch/x86/include/uapi -Iarch/x86/include/generated/uapi
-I./include/uapi -Iinclude/generated/uapi -include
./include/linux/kconfig.h -D__KERNEL__ -Wall -Wundef
-Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common
-Werror-implicit-function-declaration -Wno-format-security -std=gnu89
-mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -m64 -falign-jumps=1
-falign-loops=1 -mno-80387 -mno-fp-ret-in-387
-mpreferred-stack-boundary=3 -mtune=generic -mno-red-zone
-mcmodel=kernel -funit-at-a-time -maccumulate-outgoing-args
-DCONFIG_X86_X32_ABI -DCONFIG_AS_CFI=1 -DCONFIG_AS_CFI_SIGNAL_FRAME=1
-DCONFIG_AS_CFI_SECTIONS=1 -DCONFIG_AS_FXSAVEQ=1 -DCONFIG_AS_SSSE3=1
-DCONFIG_AS_CRC32=1 -DCONFIG_AS_AVX=1 -DCONFIG_AS_AVX2=1 -pipe
-Wno-sign-compare -fno-asynchronous-unwind-tables
-fno-delete-null-pointer-checks -O2 --param=allow-store-data-races=0
-Wframe-larger-than=1024 -fno-stack-protector
-Wno-unused-but-set-variable -fno-omit-frame-pointer
-fno-optimize-sibling-calls -fno-var-tracking-assignments -mfentry
-DCC_USING_FENTRY -Wdeclaration-after-statement -Wno-pointer-sign
-fno-strict-overflow -fconserve-stack -Werror=implicit-int
-Werror=strict-prototypes -Werror=date-time -DCC_HAVE_ASM_GOTO
-D"KBUILD_STR(s)=#s" -D"KBUILD_BASENAME=KBUILD_STR(lockdep)"
-D"KBUILD_MODNAME=KBUILD_STR(lockdep)" -c -o
kernel/locking/.tmp_lockdep.o kernel/locking/lockdep.c
kernel/locking/lockdep.c: In function 'print_chain_keys_held_locks':
kernel/locking/lockdep.c:2034:2: error: too few arguments to function
'print_chain_key_iteration'
print_chain_key_iteration(hlock_next->class_idx, chain_key);
^
kernel/locking/lockdep.c:2006:12: note: declared here
static u64 print_chain_key_iteration(int class_idx, u64 chain_key,
u64 prev_key)
^
make[4]: *** [kernel/locking/lockdep.o] Error 1
make[3]: *** [kernel/locking] Error 2
make[2]: *** [kernel] Error 2
[...]

- Sedat -
From b953be255bfb46970c75950e297be836577bc525 Mon Sep 17 00:00:00 2001
From: Sedat Dilek <sedat.dilek@xxxxxxxxx>
Date: Mon, 16 May 2016 15:51:04 +0200
Subject: [PATCH] locking/lockdep: Some more additional chain_key collision
information

From: Peter Zijlstra <peterz@xxxxxxxxxxxxx>

For more details see thread "[v4.6-rc7-183-g1410b74e4061]" at LKML [1].

Patch for testing from Peter Zijlstra see [2] and [3].

[1] http://marc.info/?t=146321784300002&r=1&w=2
[2] http://marc.info/?l=linux-kernel&m=146339587506110&w=2
[3] https://lkml.org/lkml/2016/5/10/214

Cc: Wanpeng Li <wanpeng.li@xxxxxxxxxxxxxxx>
Cc: Alfredo Alvarez Fernandez <alfredoalvarezfernandez@xxxxxxxxx>
Cc: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Sedat Dilek <sedat.dilek@xxxxxxxxx>
Cc: Ted Tso <tytso@xxxxxxx>
Cc: LKML <linux-kernel@xxxxxxxxxxxxxxx>
Cc: the arch/x86 maintainers <x86@xxxxxxxxxx>
Cc: linux-fsdevel@xxxxxxxxxxxxxxx
---
kernel/locking/lockdep.c | 31 +++++++++++++++++++------------
1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 78c1c0ee6dc1..5dc21eb101b0 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -2003,13 +2003,14 @@ static inline int get_first_held_lock(struct task_struct *curr,
/*
* Returns the next chain_key iteration
*/
-static u64 print_chain_key_iteration(int class_idx, u64 chain_key)
+static u64 print_chain_key_iteration(int class_idx, u64 chain_key, u64 prev_key)
{
u64 new_chain_key = iterate_chain_key(chain_key, class_idx);

- printk(" class_idx:%d -> chain_key:%016Lx",
+ printk(" class_idx: %d -> chain_key: %016Lx (%016Lx)",
class_idx,
- (unsigned long long)new_chain_key);
+ (unsigned long long)new_chain_key,
+ (unsigned long long)prev_key);
return new_chain_key;
}

@@ -2024,7 +2025,8 @@ print_chain_keys_held_locks(struct task_struct *curr, struct held_lock *hlock_ne
printk("depth: %u\n", depth + 1);
for (i = get_first_held_lock(curr, hlock_next); i < depth; i++) {
hlock = curr->held_locks + i;
- chain_key = print_chain_key_iteration(hlock->class_idx, chain_key);
+ chain_key = print_chain_key_iteration(hlock->class_idx, chain_key,
+ hlock->prev_chain_key);

print_lock(hlock);
}
@@ -2042,7 +2044,7 @@ static void print_chain_keys_chain(struct lock_chain *chain)
printk("depth: %u\n", chain->depth);
for (i = 0; i < chain->depth; i++) {
class_id = chain_hlocks[chain->base + i];
- chain_key = print_chain_key_iteration(class_id + 1, chain_key);
+ chain_key = print_chain_key_iteration(class_id + 1, chain_key, 0);

print_lock_name(lock_classes + class_id);
printk("\n");
@@ -2051,7 +2053,8 @@ static void print_chain_keys_chain(struct lock_chain *chain)

static void print_collision(struct task_struct *curr,
struct held_lock *hlock_next,
- struct lock_chain *chain)
+ struct lock_chain *chain,
+ u64 chain_key)
{
printk("\n");
printk("======================\n");
@@ -2060,11 +2063,14 @@ static void print_collision(struct task_struct *curr,
printk("----------------------\n");
printk("%s/%d: ", current->comm, task_pid_nr(current));
printk("Hash chain already cached but the contents don't match!\n");
+ printk("chain->chain_key: %016Lx chain_key: %016Lx\n",
+ (unsigned long long)chain->chain_key,
+ (unsigned long long)chain_key);

- printk("Held locks:");
+ printk("Held locks: ");
print_chain_keys_held_locks(curr, hlock_next);

- printk("Locks in cached chain:");
+ printk("Locks in cached chain: ");
print_chain_keys_chain(chain);

printk("\nstack backtrace:\n");
@@ -2080,7 +2086,8 @@ static void print_collision(struct task_struct *curr,
*/
static int check_no_collision(struct task_struct *curr,
struct held_lock *hlock,
- struct lock_chain *chain)
+ struct lock_chain *chain,
+ u64 chain_key)
{
#ifdef CONFIG_DEBUG_LOCKDEP
int i, j, id;
@@ -2088,7 +2095,7 @@ static int check_no_collision(struct task_struct *curr,
i = get_first_held_lock(curr, hlock);

if (DEBUG_LOCKS_WARN_ON(chain->depth != curr->lockdep_depth - (i - 1))) {
- print_collision(curr, hlock, chain);
+ print_collision(curr, hlock, chain, chain_key);
return 0;
}

@@ -2096,7 +2103,7 @@ static int check_no_collision(struct task_struct *curr,
id = curr->held_locks[i].class_idx - 1;

if (DEBUG_LOCKS_WARN_ON(chain_hlocks[chain->base + j] != id)) {
- print_collision(curr, hlock, chain);
+ print_collision(curr, hlock, chain, chain_key);
return 0;
}
}
@@ -2134,7 +2141,7 @@ static inline int lookup_chain_cache(struct task_struct *curr,
if (chain->chain_key == chain_key) {
cache_hit:
debug_atomic_inc(chain_lookup_hits);
- if (!check_no_collision(curr, hlock, chain))
+ if (!check_no_collision(curr, hlock, chain, chain_key))
return 0;

if (very_verbose(class))
--
2.8.2