[PATCH v2 3/3] lockdep: prevent and detect chain_key collisions

From: Alfredo Alvarez Fernandez
Date: Fri Feb 19 2016 - 01:50:12 EST


From: Alfredo Alvarez Fernandez <alfredoalvarezfernandez@xxxxxxxxx>

The chain_key hashing macro iterate_chain_key(key1, key2) does not
generate a new different value if both key1 and key2 are 0. In that
case the generated value is again 0. This can lead to collisions which
can result in lockdep not detecting deadlocks or circular
dependencies.

Avoid the problem by using class_idx (1-based) instead of class id
(0-based) as an input for the hashing macro 'key2' in
iterate_chain_key(key1, key2).

The use of class id created collisions in cases like the following:

1.- Consider an initial state in which no class has been acquired yet.
Under these circumstances an AA deadlock will not be detected by
lockdep:

lock [key1,key2]->new key (key1=old chain_key, key2=id)
--------------------------
A [0,0]->0
A [0,0]->0 (collision)

The newly generated chain_key collides with the one used before and as
a result the check for a deadlock is skipped

A simple test using liblockdep and a pthread mutex confirms the
problem: (omitting stack traces)

new class 0xe15038: 0x7ffc64950f20
acquire class [0xe15038] 0x7ffc64950f20
acquire class [0xe15038] 0x7ffc64950f20
hash chain already cached, key: 0000000000000000 tail class:
[0xe15038] 0x7ffc64950f20

2.- Consider an ABBA in 2 different tasks and no class yet acquired.

T1 [key1,key2]->new key T2[key1,key2]->new key
-- --
A [0,0]->0

B [0,1]->1

B [0,1]->1 (collision)

A

In this case the collision prevents lockdep from creating the new
dependency A->B. This in turn results in lockdep not detecting the
circular dependency when T2 acquires A.

Add detection for chain_key collision under CONFIG_DEBUG_LOCKDEP.
When a collision is detected the problem is reported and all lock
debugging is turned off.

Signed-off-by: Alfredo Alvarez Fernandez <alfredoalvarezernandez@xxxxxxxxx>
---
Changes in v2:
- Add detection for chain_key collisions under CONFIG_DEBUG_LOCKDEP.
When a collision is detected the problem is reported and all lock
debugging is turned off.

Tested using liblockdep and the added tests before and after
applying the fix, confirming both that the code added for the
detection correctly reports the problem and that the fix actually
fixes it.

Tested tweaking lockdep to generate false collisions and
verified that the problem is reported and that lock debugging is
turned off.

Also tested with lockdep's test suite after applying the patch:
[ 0.000000] Good, all 253 testcases passed! |

kernel/locking/lockdep.c | 73 +++++++++++++++++++++++++++++++++++++-----------
1 file changed, 57 insertions(+), 16 deletions(-)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 60ace56..2c28298 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -2007,6 +2007,53 @@ struct lock_class *lock_chain_get_class(struct lock_chain *chain, int i)
}

/*
+ * Returns the index of the first held_lock of the current chain
+ */
+static inline int get_first_held_lock(struct task_struct *curr,
+ struct held_lock *hlock)
+{
+ int i;
+ struct held_lock *hlock_curr;
+
+ for (i = curr->lockdep_depth - 1; i >= 0; i--) {
+ hlock_curr = curr->held_locks + i;
+ if (hlock_curr->irq_context != hlock->irq_context)
+ break;
+
+ }
+
+ return ++i;
+}
+
+/*
+ * Checks whether the chain and the current held locks are consistent
+ * in depth and also in content. If they are not it most likely means
+ * that there was a collision during the calculation of the chain_key.
+ * Returns: 0 not passed, 1 passed
+ */
+static int check_no_collision(struct task_struct *curr,
+ struct held_lock *hlock,
+ struct lock_chain *chain)
+{
+#ifdef CONFIG_DEBUG_LOCKDEP
+ int i, j, id;
+
+ i = get_first_held_lock(curr, hlock);
+
+ if (DEBUG_LOCKS_WARN_ON(chain->depth != curr->lockdep_depth - (i - 1)))
+ return 0;
+
+ for (j = 0; j < chain->depth - 1; j++, i++) {
+ id = curr->held_locks[i].class_idx - 1;
+
+ if (DEBUG_LOCKS_WARN_ON(chain_hlocks[chain->base + j] != id))
+ return 0;
+ }
+#endif
+ return 1;
+}
+
+/*
* Look up a dependency chain. If the key is not present yet then
* add it and return 1 - in this case the new dependency chain is
* validated. If the key is already hashed, return 0.
@@ -2019,7 +2066,6 @@ static inline int lookup_chain_cache(struct task_struct *curr,
struct lock_class *class = hlock_class(hlock);
struct list_head *hash_head = chainhashentry(chain_key);
struct lock_chain *chain;
- struct held_lock *hlock_curr;
int i, j;

/*
@@ -2037,6 +2083,9 @@ static inline int lookup_chain_cache(struct task_struct *curr,
if (chain->chain_key == chain_key) {
cache_hit:
debug_atomic_inc(chain_lookup_hits);
+ if (!check_no_collision(curr, hlock, chain))
+ return 0;
+
if (very_verbose(class))
printk("\nhash chain already cached, key: "
"%016Lx tail class: [%p] %s\n",
@@ -2074,13 +2123,7 @@ cache_hit:
chain = lock_chains + nr_lock_chains++;
chain->chain_key = chain_key;
chain->irq_context = hlock->irq_context;
- /* Find the first held_lock of current chain */
- for (i = curr->lockdep_depth - 1; i >= 0; i--) {
- hlock_curr = curr->held_locks + i;
- if (hlock_curr->irq_context != hlock->irq_context)
- break;
- }
- i++;
+ i = get_first_held_lock(curr, hlock);
chain->depth = curr->lockdep_depth + 1 - i;
if (likely(nr_chain_hlocks + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) {
chain->base = nr_chain_hlocks;
@@ -2168,7 +2211,7 @@ static void check_chain_key(struct task_struct *curr)
{
#ifdef CONFIG_DEBUG_LOCKDEP
struct held_lock *hlock, *prev_hlock = NULL;
- unsigned int i, id;
+ unsigned int i;
u64 chain_key = 0;

for (i = 0; i < curr->lockdep_depth; i++) {
@@ -2185,17 +2228,16 @@ static void check_chain_key(struct task_struct *curr)
(unsigned long long)hlock->prev_chain_key);
return;
}
- id = hlock->class_idx - 1;
/*
* Whoops ran out of static storage again?
*/
- if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS))
+ if (DEBUG_LOCKS_WARN_ON(hlock->class_idx > MAX_LOCKDEP_KEYS))
return;

if (prev_hlock && (prev_hlock->irq_context !=
hlock->irq_context))
chain_key = 0;
- chain_key = iterate_chain_key(chain_key, id);
+ chain_key = iterate_chain_key(chain_key, hlock->class_idx);
prev_hlock = hlock;
}
if (chain_key != curr->curr_chain_key) {
@@ -3073,7 +3115,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
struct task_struct *curr = current;
struct lock_class *class = NULL;
struct held_lock *hlock;
- unsigned int depth, id;
+ unsigned int depth;
int chain_head = 0;
int class_idx;
u64 chain_key;
@@ -3176,11 +3218,10 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
* The 'key ID' is what is the most compact key value to drive
* the hash, not class->key.
*/
- id = class - lock_classes;
/*
* Whoops, we did it again.. ran straight out of our static allocation.
*/
- if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS))
+ if (DEBUG_LOCKS_WARN_ON(class_idx > MAX_LOCKDEP_KEYS))
return 0;

chain_key = curr->curr_chain_key;
@@ -3198,7 +3239,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
chain_key = 0;
chain_head = 1;
}
- chain_key = iterate_chain_key(chain_key, id);
+ chain_key = iterate_chain_key(chain_key, class_idx);

if (nest_lock && !__lock_is_held(nest_lock))
return print_lock_nested_lock_not_held(curr, hlock, ip);
--
2.5.0