Re: [PATCH 3/4] rhashtable: use bit_spin_locks to protect hash bucket.

From: NeilBrown
Date: Thu Apr 11 2019 - 02:13:33 EST


On Thu, Apr 11 2019, NeilBrown wrote:

> On Wed, Apr 10 2019, Guenter Roeck wrote:
>
>> Hi,
>>
.....
>>
>> This patch causes my qemu q800 boot test to crash reliably.
>>
....
>> Code: 4a89 6604 4280 60ea 2c2b 000c 2748 000c <2869> 000c 082c 0003 0002 6728 4878 0014 7620 4873 3800 486e ffec 4eb9 002e 5b88
>
> Thanks for testing and for the report.
.....
>
> .... and after googling a bit I see that 68000 require 2-byte alignment,
> but not 4-byte. Oh..
>
> That means there aren't two spare bits in an address, so I cannot use
> one for the NULLS and one for a lock bit. Bother.
>
> I might be able to find a different way forward, but for now I think we
> need to drop this series.

I have found a way forward that I like. It only requires one bit per
address to be over-loaded.

The following patch implements it and works for me.
Could you please confirm that it fixes your problem on m68k ??

I'll break it up into a few reviewable patches and post them separately.

Thanks again,
NeilBrown


diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 01253d809687..214487aaf3eb 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -40,7 +40,7 @@
* the chain. To avoid dereferencing this pointer without clearing
* the bit first, we use an opaque 'struct rhash_lock_head *' for the
* pointer stored in the bucket. This struct needs to be defined so
- * that rcu_derefernce() works on it, but it has no content so a
+ * that rcu_dereference() works on it, but it has no content so a
* cast is needed for it to be useful. This ensures it isn't
* used by mistake with clearing the lock bit first.
*/
@@ -85,70 +85,6 @@ struct bucket_table {
struct rhash_lock_head __rcu *buckets[] ____cacheline_aligned_in_smp;
};

-/*
- * We lock a bucket by setting BIT(0) in the pointer - this is always
- * zero in real pointers and in the nulls marker.
- * bit_spin_locks do not handle contention well, but the whole point
- * of the hashtable design is to achieve minimum per-bucket contention.
- * A nested hash table might not have a bucket pointer. In that case
- * we cannot get a lock. For remove and replace the bucket cannot be
- * interesting and doesn't need locking.
- * For insert we allocate the bucket if this is the last bucket_table,
- * and then take the lock.
- * Sometimes we unlock a bucket by writing a new pointer there. In that
- * case we don't need to unlock, but we do need to reset state such as
- * local_bh. For that we have rht_assign_unlock(). As rcu_assign_pointer()
- * provides the same release semantics that bit_spin_unlock() provides,
- * this is safe.
- */
-
-static inline void rht_lock(struct rhash_lock_head **bkt)
-{
- local_bh_disable();
- bit_spin_lock(0, (unsigned long *)bkt);
-}
-
-static inline void rht_unlock(struct rhash_lock_head **bkt)
-{
- bit_spin_unlock(0, (unsigned long *)bkt);
- local_bh_enable();
-}
-
-static inline void rht_assign_unlock(struct rhash_lock_head **bkt,
- struct rhash_head *obj)
-{
- struct rhash_head **p = (struct rhash_head **)bkt;
-
- if (obj == RHT_NULLS_MARKER(bkt))
- obj = NULL;
- rcu_assign_pointer(*p, obj);
- preempt_enable();
- __release(bitlock);
- local_bh_enable();
-}
-
-/*
- * If 'p' is a bucket head and might be locked:
- * rht_ptr() returns the address without the lock bit.
- * rht_ptr_locked() returns the address WITH the lock bit.
- */
-static inline struct rhash_head __rcu *rht_ptr(const struct rhash_lock_head **bkt,
- struct bucket_table *tbl,
- unsigned int hash)
-{
- const struct rhash_lock_head *p =
- rht_dereference_bucket(*bkt, tbl, hash);
- if (!p)
- return RHT_NULLS_MARKER(bkt);
- return (void *)(((unsigned long)p) & ~BIT(0));
-}
-
-static inline struct rhash_lock_head __rcu *rht_ptr_locked(const
- struct rhash_head *p)
-{
- return (void *)(((unsigned long)p) | BIT(0));
-}
-
/*
* NULLS_MARKER() expects a hash value with the low
* bits mostly likely to be significant, and it discards
@@ -367,6 +303,93 @@ static inline struct rhash_lock_head __rcu **rht_bucket_insert(
&tbl->buckets[hash];
}

+/*
+ * We lock a bucket by setting BIT(0) in the pointer - this is always
+ * zero in real pointers and in the nulls marker.
+ * bit_spin_locks do not handle contention well, but the whole point
+ * of the hashtable design is to achieve minimum per-bucket contention.
+ * A nested hash table might not have a bucket pointer. In that case
+ * we cannot get a lock. For remove and replace the bucket cannot be
+ * interesting and doesn't need locking.
+ * For insert we allocate the bucket if this is the last bucket_table,
+ * and then take the lock.
+ * Sometimes we unlock a bucket by writing a new pointer there. In that
+ * case we don't need to unlock, but we do need to reset state such as
+ * local_bh. For that we have rht_assign_unlock(). As rcu_assign_pointer()
+ * provides the same release semantics that bit_spin_unlock() provides,
+ * this is safe.
+ */
+
+static inline void rht_lock(struct rhash_lock_head **bkt)
+{
+ local_bh_disable();
+ bit_spin_lock(0, (unsigned long *)bkt);
+}
+
+static inline void rht_unlock(struct rhash_lock_head **bkt)
+{
+ bit_spin_unlock(0, (unsigned long *)bkt);
+ local_bh_enable();
+}
+
+/*
+ * If 'p' is a bucket head and might be locked:
+ * rht_ptr() returns the address without the lock bit.
+ * rht_ptr_locked() returns the address WITH the lock bit.
+ */
+static inline struct rhash_head __rcu *rht_ptr(struct rhash_lock_head __rcu * const *bkt,
+ struct bucket_table *tbl,
+ unsigned int hash)
+{
+ const struct rhash_lock_head *p =
+ rht_dereference_bucket_rcu(*bkt, tbl, hash);
+ if ((((unsigned long)p) & ~BIT(0)) == 0)
+ return RHT_NULLS_MARKER(bkt);
+ return (void *)(((unsigned long)p) & ~BIT(0));
+}
+
+/*
+ * This can be called when access is known to be exclusive,
+ * such as when destorying an rhashtable
+ */
+static inline struct rhash_head __rcu *rht_ptr_unprotected(
+ struct rhash_lock_head __rcu * const *bkt)
+{
+ const struct rhash_lock_head *p = rcu_dereference_protected(*bkt, true);
+ if (!p)
+ return RHT_NULLS_MARKER(bkt);
+ return (void *)(((unsigned long)p) & ~BIT(0));
+}
+
+static inline struct rhash_lock_head __rcu *rht_ptr_locked(const
+ struct rhash_head *p)
+{
+ return (void *)(((unsigned long)p) | BIT(0));
+}
+
+static inline void rht_assign_locked(struct rhash_lock_head __rcu **bkt,
+ struct rhash_head *obj)
+{
+ struct rhash_head __rcu **p = (struct rhash_head __rcu **)bkt;
+
+ if (rht_is_a_nulls(obj))
+ obj = NULL;
+ rcu_assign_pointer(*p, rht_ptr_locked(obj));
+}
+
+static inline void rht_assign_unlock(struct rhash_lock_head __rcu **bkt,
+ struct rhash_head *obj)
+{
+ struct rhash_head __rcu **p = (struct rhash_head __rcu **)bkt;
+
+ if (rht_is_a_nulls(obj))
+ obj = NULL;
+ rcu_assign_pointer(*p, obj);
+ preempt_enable();
+ __release(bitlock);
+ local_bh_enable();
+}
+
/**
* rht_for_each_from - iterate over hash chain from given head
* @pos: the &struct rhash_head to use as a loop cursor.
@@ -375,7 +398,7 @@ static inline struct rhash_lock_head __rcu **rht_bucket_insert(
* @hash: the hash value / bucket index
*/
#define rht_for_each_from(pos, head, tbl, hash) \
- for (pos = rht_dereference_bucket(head, tbl, hash); \
+ for (pos = head; \
!rht_is_a_nulls(pos); \
pos = rht_dereference_bucket((pos)->next, tbl, hash))

@@ -386,7 +409,7 @@ static inline struct rhash_lock_head __rcu **rht_bucket_insert(
* @hash: the hash value / bucket index
*/
#define rht_for_each(pos, tbl, hash) \
- rht_for_each_from(pos, rht_ptr(*rht_bucket(tbl, hash)), tbl, hash)
+ rht_for_each_from(pos, rht_ptr(rht_bucket(tbl, hash), tbl, hash))

/**
* rht_for_each_entry_from - iterate over hash chain from given head
@@ -398,7 +421,7 @@ static inline struct rhash_lock_head __rcu **rht_bucket_insert(
* @member: name of the &struct rhash_head within the hashable struct.
*/
#define rht_for_each_entry_from(tpos, pos, head, tbl, hash, member) \
- for (pos = rht_dereference_bucket(head, tbl, hash); \
+ for (pos = head; \
(!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \
pos = rht_dereference_bucket((pos)->next, tbl, hash))

@@ -411,8 +434,8 @@ static inline struct rhash_lock_head __rcu **rht_bucket_insert(
* @member: name of the &struct rhash_head within the hashable struct.
*/
#define rht_for_each_entry(tpos, pos, tbl, hash, member) \
- rht_for_each_entry_from(tpos, pos, rht_ptr(*rht_bucket(tbl, hash)), \
- tbl, hash, member)
+ rht_for_each_entry_from(tpos, pos, rht_ptr(rht_bucket(tbl, hash), \
+ tbl, hash, member))

/**
* rht_for_each_entry_safe - safely iterate over hash chain of given type
@@ -427,8 +450,7 @@ static inline struct rhash_lock_head __rcu **rht_bucket_insert(
* remove the loop cursor from the list.
*/
#define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member) \
- for (pos = rht_dereference_bucket(rht_ptr(*rht_bucket(tbl, hash)), \
- tbl, hash), \
+ for (pos = rht_ptr(rht_bucket(tbl, hash), tbl, hash)), \
next = !rht_is_a_nulls(pos) ? \
rht_dereference_bucket(pos->next, tbl, hash) : NULL; \
(!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \
@@ -449,7 +471,7 @@ static inline struct rhash_lock_head __rcu **rht_bucket_insert(
*/
#define rht_for_each_rcu_from(pos, head, tbl, hash) \
for (({barrier(); }), \
- pos = rht_dereference_bucket_rcu(head, tbl, hash); \
+ pos = head; \
!rht_is_a_nulls(pos); \
pos = rcu_dereference_raw(pos->next))

@@ -464,10 +486,9 @@ static inline struct rhash_lock_head __rcu **rht_bucket_insert(
* traversal is guarded by rcu_read_lock().
*/
#define rht_for_each_rcu(pos, tbl, hash) \
- for (({barrier(); }), \
- pos = rht_ptr(rht_dereference_bucket_rcu( \
- *rht_bucket(tbl, hash), tbl, hash)); \
- !rht_is_a_nulls(pos); \
+ for (({barrier(); }), \
+ pos = rht_ptr(rht_bucket(tbl, hash), tbl, hash); \
+ !rht_is_a_nulls(pos); \
pos = rcu_dereference_raw(pos->next))

/**
@@ -485,7 +506,7 @@ static inline struct rhash_lock_head __rcu **rht_bucket_insert(
*/
#define rht_for_each_entry_rcu_from(tpos, pos, head, tbl, hash, member) \
for (({barrier(); }), \
- pos = rht_dereference_bucket_rcu(head, tbl, hash); \
+ pos = head; \
(!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \
pos = rht_dereference_bucket_rcu(pos->next, tbl, hash))

@@ -501,10 +522,10 @@ static inline struct rhash_lock_head __rcu **rht_bucket_insert(
* the _rcu mutation primitives such as rhashtable_insert() as long as the
* traversal is guarded by rcu_read_lock().
*/
-#define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member) \
- rht_for_each_entry_rcu_from(tpos, pos, \
- rht_ptr(*rht_bucket(tbl, hash)), \
- tbl, hash, member)
+#define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member) \
+ rht_for_each_entry_rcu_from(tpos, pos, \
+ rht_ptr(rht_bucket(tbl, hash), \
+ tbl, hash, member))

/**
* rhl_for_each_rcu - iterate over rcu hash table list
@@ -559,8 +580,7 @@ static inline struct rhash_head *__rhashtable_lookup(
hash = rht_key_hashfn(ht, tbl, key, params);
bkt = rht_bucket(tbl, hash);
do {
- he = rht_ptr(rht_dereference_bucket_rcu(*bkt, tbl, hash));
- rht_for_each_rcu_from(he, he, tbl, hash) {
+ rht_for_each_rcu_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) {
if (params.obj_cmpfn ?
params.obj_cmpfn(&arg, rht_obj(ht, he)) :
rhashtable_compare(&arg, rht_obj(ht, he)))
@@ -693,7 +713,7 @@ static inline void *__rhashtable_insert_fast(
return rhashtable_insert_slow(ht, key, obj);
}

- rht_for_each_from(head, rht_ptr(*bkt), tbl, hash) {
+ rht_for_each_from(head, rht_ptr(bkt, tbl, hash), tbl, hash) {
struct rhlist_head *plist;
struct rhlist_head *list;

@@ -738,7 +758,7 @@ static inline void *__rhashtable_insert_fast(
goto slow_path;

/* Inserting at head of list makes unlocking free. */
- head = rht_ptr(rht_dereference_bucket(*bkt, tbl, hash));
+ head = rht_ptr(bkt, tbl, hash);

RCU_INIT_POINTER(obj->next, head);
if (rhlist) {
@@ -965,7 +985,7 @@ static inline int __rhashtable_remove_fast_one(
pprev = NULL;
rht_lock(bkt);

- rht_for_each_from(he, rht_ptr(*bkt), tbl, hash) {
+ rht_for_each_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) {
struct rhlist_head *list;

list = container_of(he, struct rhlist_head, rhead);
@@ -1124,7 +1144,7 @@ static inline int __rhashtable_replace_fast(
pprev = NULL;
rht_lock(bkt);

- rht_for_each_from(he, rht_ptr(*bkt), tbl, hash) {
+ rht_for_each_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) {
if (he != obj_old) {
pprev = &he->next;
continue;
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index c5d0974467ee..2eafc8463349 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -59,7 +59,7 @@ int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash)
return 1;
if (unlikely(tbl->nest))
return 1;
- return bit_spin_is_locked(1, (unsigned long *)&tbl->buckets[hash]);
+ return bit_spin_is_locked(0, (unsigned long *)&tbl->buckets[hash]);
}
EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held);
#else
@@ -221,7 +221,7 @@ static int rhashtable_rehash_one(struct rhashtable *ht,
struct bucket_table *new_tbl = rhashtable_last_table(ht, old_tbl);
int err = -EAGAIN;
struct rhash_head *head, *next, *entry;
- struct rhash_head **pprev = NULL;
+ struct rhash_head __rcu **pprev = NULL;
unsigned int new_hash;

if (new_tbl->nest)
@@ -229,7 +229,8 @@ static int rhashtable_rehash_one(struct rhashtable *ht,

err = -ENOENT;

- rht_for_each_from(entry, rht_ptr(*bkt), old_tbl, old_hash) {
+ rht_for_each_from(entry, rht_ptr(bkt, old_tbl, old_hash),
+ old_tbl, old_hash) {
err = 0;
next = rht_dereference_bucket(entry->next, old_tbl, old_hash);

@@ -246,8 +247,8 @@ static int rhashtable_rehash_one(struct rhashtable *ht,

rht_lock(&new_tbl->buckets[new_hash]);

- head = rht_ptr(rht_dereference_bucket(new_tbl->buckets[new_hash],
- new_tbl, new_hash));
+ head = rht_ptr(new_tbl->buckets + new_hash,
+ new_tbl, new_hash);

RCU_INIT_POINTER(entry->next, head);

@@ -257,7 +258,7 @@ static int rhashtable_rehash_one(struct rhashtable *ht,
rcu_assign_pointer(*pprev, next);
else
/* Need to preserved the bit lock. */
- rcu_assign_pointer(*bkt, rht_ptr_locked(next));
+ rht_assign_locked(bkt, next);

out:
return err;
@@ -484,12 +485,12 @@ static void *rhashtable_lookup_one(struct rhashtable *ht,
.ht = ht,
.key = key,
};
- struct rhash_head **pprev = NULL;
+ struct rhash_head __rcu **pprev = NULL;
struct rhash_head *head;
int elasticity;

elasticity = RHT_ELASTICITY;
- rht_for_each_from(head, rht_ptr(*bkt), tbl, hash) {
+ rht_for_each_from(head, rht_ptr(bkt, tbl, hash), tbl, hash) {
struct rhlist_head *list;
struct rhlist_head *plist;

@@ -515,7 +516,7 @@ static void *rhashtable_lookup_one(struct rhashtable *ht,
rcu_assign_pointer(*pprev, obj);
else
/* Need to preserve the bit lock */
- rcu_assign_pointer(*bkt, rht_ptr_locked(obj));
+ rht_assign_locked(bkt, obj);

return NULL;
}
@@ -555,7 +556,7 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
if (unlikely(rht_grow_above_100(ht, tbl)))
return ERR_PTR(-EAGAIN);

- head = rht_ptr(rht_dereference_bucket(*bkt, tbl, hash));
+ head = rht_ptr(bkt, tbl, hash);

RCU_INIT_POINTER(obj->next, head);
if (ht->rhlist) {
@@ -568,7 +569,7 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
/* bkt is always the head of the list, so it holds
* the lock, which we need to preserve
*/
- rcu_assign_pointer(*bkt, rht_ptr_locked(obj));
+ rht_assign_locked(bkt, obj);

atomic_inc(&ht->nelems);
if (rht_grow_above_75(ht, tbl))
@@ -1137,7 +1138,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
struct rhash_head *pos, *next;

cond_resched();
- for (pos = rht_ptr(rht_dereference(*rht_bucket(tbl, i), ht)),
+ for (pos = rht_ptr_unprotected(rht_bucket(tbl, i)),
next = !rht_is_a_nulls(pos) ?
rht_dereference(pos->next, ht) : NULL;
!rht_is_a_nulls(pos);
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index 02592c2a249c..7b93cfefe195 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -500,7 +500,7 @@ static unsigned int __init print_ht(struct rhltable *rhlt)
struct rhash_head *pos, *next;
struct test_obj_rhl *p;

- pos = rht_ptr(rht_dereference(tbl->buckets[i], ht));
+ pos = rht_ptr_unprotected(tbl->buckets + i);
next = !rht_is_a_nulls(pos) ? rht_dereference(pos->next, ht) : NULL;

if (!rht_is_a_nulls(pos)) {

Attachment: signature.asc
Description: PGP signature