Re: [RFC][PATCH] <linux/stringhash.h>: fix end_name_hash() for 64bit long

From: Amir Goldstein
Date: Sun Feb 11 2018 - 09:52:04 EST


On Fri, Feb 9, 2018 at 8:33 PM, Linus Torvalds
<torvalds@xxxxxxxxxxxxxxxxxxxx> wrote:

>
> I *would* love to get some kind of estimate on how this changes the
> hash distribution. Do you have anything like that? The way this
> function is designed to be used, the upper bits should be used for the
> hash bucket information, so doing some bucket size distribution on
> (say) the upper ~12 bits or something with some real string input
> would be really good.
>

I've added bucket counters and tracing number of entries / number of non
empty buckets and largest bucket to dcache (patch attached).
Increased d_hash_shift to take only 12 bits for hash table offset.
The results vary a bit per run, but below are some outputs from sample runs
of find / of 55K entries right after boot.

The bottom line is that, at least w.r.t this simplistic bucket analysis and this
specific workload, the patch does not seem to move the needle. The bucket
distribution of before and after patch are similar and also similar to the
word-at-a-time results.
Just for comparison, I added a run with noop end_name_hash() which shows
a worse bucket distribution.

Perhaps it would take a different workload, or different analysis to demonstrate
the alleged deficiencies of the current implementation? Not sure which evidence
will be required to justify the change, even if, the patch is correct in theory?
I wonder if we should just let this old code be... maybe add something to the
comment or remove the part about "try to avoid losing bits..."?

Thanks,
Amir.


----------------------------------------------------

x86_64, word-at-a-time, 12 bits:

[ 0.008583] dcache: d_hash_shift = 20, nentries = 1, nbuckets = 1,
maxcount = 1
[ 0.783683] dcache: d_hash_shift = 20, nentries = 130, nbuckets =
129, maxcount = 2
[ 0.785336] dcache: d_hash_shift = 20, nentries = 314, nbuckets =
303, maxcount = 3
[ 0.789104] dcache: d_hash_shift = 20, nentries = 818, nbuckets =
736, maxcount = 4
[ 0.795435] dcache: d_hash_shift = 20, nentries = 1738, nbuckets =
1406, maxcount = 5
[ 0.804935] dcache: d_hash_shift = 20, nentries = 3056, nbuckets =
2134, maxcount = 6
[ 0.814014] dcache: d_hash_shift = 20, nentries = 4158, nbuckets =
2595, maxcount = 7
[ 0.824848] dcache: d_hash_shift = 20, nentries = 5635, nbuckets =
3051, maxcount = 8
[ 0.844848] dcache: d_hash_shift = 20, nentries = 8523, nbuckets =
3584, maxcount = 9
[ 0.852072] dcache: d_hash_shift = 20, nentries = 9571, nbuckets =
3708, maxcount = 10
[ 0.855598] dcache: d_hash_shift = 20, nentries = 10098, nbuckets =
3745, maxcount = 11
[ 2.793453] dcache: d_hash_shift = 20, nentries = 13547, nbuckets =
3937, maxcount = 12
[ 3.097325] dcache: d_hash_shift = 20, nentries = 14394, nbuckets =
3965, maxcount = 13
[ 3.559967] dcache: d_hash_shift = 20, nentries = 16783, nbuckets =
4017, maxcount = 14
[ 10.259225] dcache: d_hash_shift = 20, nentries = 22166, nbuckets =
4070, maxcount = 15
[ 10.326126] dcache: d_hash_shift = 20, nentries = 24960, nbuckets =
4083, maxcount = 16
[ 10.338824] dcache: d_hash_shift = 20, nentries = 25348, nbuckets =
4084, maxcount = 17
[ 10.423085] dcache: d_hash_shift = 20, nentries = 29715, nbuckets =
4095, maxcount = 18
[ 10.746374] dcache: d_hash_shift = 20, nentries = 30771, nbuckets =
4095, maxcount = 19

x86_64, byte at a time, before patch:

[ 0.008624] dcache: d_hash_shift = 20, nentries = 1, nbuckets = 1,
maxcount = 1
[ 0.797948] dcache: d_hash_shift = 20, nentries = 168, nbuckets =
167, maxcount = 2
[ 0.803184] dcache: d_hash_shift = 20, nentries = 953, nbuckets =
858, maxcount = 3
[ 0.809085] dcache: d_hash_shift = 20, nentries = 1690, nbuckets =
1390, maxcount = 4
[ 0.818364] dcache: d_hash_shift = 20, nentries = 2868, nbuckets =
2082, maxcount = 5
[ 0.826448] dcache: d_hash_shift = 20, nentries = 4016, nbuckets =
2609, maxcount = 6
[ 0.844307] dcache: d_hash_shift = 20, nentries = 6525, nbuckets =
3357, maxcount = 7
[ 0.848492] dcache: d_hash_shift = 20, nentries = 7121, nbuckets =
3473, maxcount = 8
[ 0.850470] dcache: d_hash_shift = 20, nentries = 7419, nbuckets =
3514, maxcount = 9
[ 2.781536] dcache: d_hash_shift = 20, nentries = 13484, nbuckets =
3984, maxcount = 10
[ 3.082709] dcache: d_hash_shift = 20, nentries = 14329, nbuckets =
4012, maxcount = 11
[ 3.116017] dcache: d_hash_shift = 20, nentries = 14411, nbuckets =
4011, maxcount = 12
[ 3.736935] dcache: d_hash_shift = 20, nentries = 17509, nbuckets =
4054, maxcount = 13
[ 17.557370] dcache: d_hash_shift = 20, nentries = 19928, nbuckets =
4073, maxcount = 14
[ 17.587867] dcache: d_hash_shift = 20, nentries = 21488, nbuckets =
4083, maxcount = 15
[ 17.622295] dcache: d_hash_shift = 20, nentries = 22798, nbuckets =
4085, maxcount = 16
[ 17.642227] dcache: d_hash_shift = 20, nentries = 23505, nbuckets =
4086, maxcount = 17
[ 17.722103] dcache: d_hash_shift = 20, nentries = 27107, nbuckets =
4092, maxcount = 18
[ 17.749791] dcache: d_hash_shift = 20, nentries = 28353, nbuckets =
4093, maxcount = 19
[ 17.958165] dcache: d_hash_shift = 20, nentries = 30687, nbuckets =
4095, maxcount = 20

x86_64, byte at a time, after patch:

[ 0.008648] dcache: d_hash_shift = 20, nentries = 1, nbuckets = 1,
maxcount = 1
[ 0.795676] dcache: d_hash_shift = 20, nentries = 157, nbuckets =
156, maxcount = 2
[ 0.799493] dcache: d_hash_shift = 20, nentries = 619, nbuckets =
563, maxcount = 3
[ 0.803741] dcache: d_hash_shift = 20, nentries = 1115, nbuckets =
928, maxcount = 4
[ 0.814418] dcache: d_hash_shift = 20, nentries = 2619, nbuckets =
1894, maxcount = 5
[ 0.817449] dcache: d_hash_shift = 20, nentries = 3053, nbuckets =
2095, maxcount = 6
[ 0.838418] dcache: d_hash_shift = 20, nentries = 5981, nbuckets =
3159, maxcount = 7
[ 0.843916] dcache: d_hash_shift = 20, nentries = 6790, nbuckets =
3340, maxcount = 8
[ 0.854101] dcache: d_hash_shift = 20, nentries = 8263, nbuckets =
3565, maxcount = 9
[ 0.864520] dcache: d_hash_shift = 20, nentries = 9745, nbuckets =
3760, maxcount = 10
[ 2.799091] dcache: d_hash_shift = 20, nentries = 13273, nbuckets =
3954, maxcount = 11
[ 3.201936] dcache: d_hash_shift = 20, nentries = 14576, nbuckets =
3988, maxcount = 12
[ 3.996005] dcache: d_hash_shift = 20, nentries = 17902, nbuckets =
4051, maxcount = 13
[ 9.847851] dcache: d_hash_shift = 20, nentries = 19322, nbuckets =
4068, maxcount = 14
[ 9.858100] dcache: d_hash_shift = 20, nentries = 19891, nbuckets =
4074, maxcount = 15
[ 9.889944] dcache: d_hash_shift = 20, nentries = 21424, nbuckets =
4082, maxcount = 16
[ 9.912863] dcache: d_hash_shift = 20, nentries = 22145, nbuckets =
4087, maxcount = 17
[ 9.940020] dcache: d_hash_shift = 20, nentries = 23126, nbuckets =
4089, maxcount = 18
[ 10.016343] dcache: d_hash_shift = 20, nentries = 26093, nbuckets =
4094, maxcount = 19
[ 10.072039] dcache: d_hash_shift = 20, nentries = 28712, nbuckets =
4094, maxcount = 20

x86_64, byte at a time, with NOOP end_name_hash() (i.e. take 32 LSB):

[ 0.008609] dcache: d_hash_shift = 20, nentries = 1, nbuckets = 1,
maxcount = 1
[ 0.081990] dcache: d_hash_shift = 20, nentries = 5, nbuckets = 4,
maxcount = 2
[ 0.802145] dcache: d_hash_shift = 20, nentries = 31, nbuckets =
28, maxcount = 3
[ 0.813503] dcache: d_hash_shift = 20, nentries = 554, nbuckets =
482, maxcount = 4
[ 0.817755] dcache: d_hash_shift = 20, nentries = 1151, nbuckets =
932, maxcount = 5
[ 0.817910] dcache: d_hash_shift = 20, nentries = 1175, nbuckets =
947, maxcount = 6
[ 0.817987] dcache: d_hash_shift = 20, nentries = 1187, nbuckets =
952, maxcount = 7
[ 0.838691] dcache: d_hash_shift = 20, nentries = 4152, nbuckets =
2502, maxcount = 8
[ 0.848732] dcache: d_hash_shift = 20, nentries = 5564, nbuckets =
2959, maxcount = 9
[ 0.848774] dcache: d_hash_shift = 20, nentries = 5570, nbuckets =
2960, maxcount = 10
[ 0.872152] dcache: d_hash_shift = 20, nentries = 8949, nbuckets =
3548, maxcount = 11
[ 0.879572] dcache: d_hash_shift = 20, nentries = 10020, nbuckets =
3647, maxcount = 12
[ 0.893884] dcache: d_hash_shift = 20, nentries = 12009, nbuckets =
3803, maxcount = 13
[ 0.897821] dcache: d_hash_shift = 20, nentries = 12570, nbuckets =
3827, maxcount = 14
[ 0.898154] dcache: d_hash_shift = 20, nentries = 12615, nbuckets =
3827, maxcount = 15
[ 0.898432] dcache: d_hash_shift = 20, nentries = 12654, nbuckets =
3829, maxcount = 16
[ 0.898877] dcache: d_hash_shift = 20, nentries = 12717, nbuckets =
3830, maxcount = 17
[ 0.906711] dcache: d_hash_shift = 20, nentries = 12821, nbuckets =
3838, maxcount = 18
[ 0.906822] dcache: d_hash_shift = 20, nentries = 12822, nbuckets =
3838, maxcount = 19
[ 0.906920] dcache: d_hash_shift = 20, nentries = 12823, nbuckets =
3838, maxcount = 20
[...]
[ 3.743889] dcache: d_hash_shift = 20, nentries = 17112, nbuckets =
3992, maxcount = 91
[ 4.113855] dcache: d_hash_shift = 20, nentries = 17927, nbuckets =
4000, maxcount = 92
[ 11.915833] dcache: d_hash_shift = 20, nentries = 27988, nbuckets =
4084, maxcount = 93
diff --git a/fs/dcache.c b/fs/dcache.c
index 7c38f39958bc..ba11d2047e73 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -103,13 +103,25 @@ EXPORT_SYMBOL(slash_name);

static unsigned int d_hash_shift __read_mostly;

-static struct hlist_bl_head *dentry_hashtable __read_mostly;
+struct _hlist_bl_head {
+ struct hlist_bl_head head;
+ int count;
+};

-static inline struct hlist_bl_head *d_hash(unsigned int hash)
+static int nentries, nbuckets, maxcount;
+
+static struct _hlist_bl_head *dentry_hashtable __read_mostly;
+
+static inline struct _hlist_bl_head *_d_hash(unsigned int hash)
{
return dentry_hashtable + (hash >> d_hash_shift);
}

+static inline struct hlist_bl_head *d_hash(unsigned int hash)
+{
+ return &_d_hash(hash)->head;
+}
+
#define IN_LOOKUP_SHIFT 10
static struct hlist_bl_head in_lookup_hashtable[1 << IN_LOOKUP_SHIFT];

@@ -174,7 +186,7 @@ int proc_nr_dentry(struct ctl_table *table, int write, void __user *buffer,
* Compare 2 name strings, return 0 if they match, otherwise non-zero.
* The strings are both count bytes long, and count is non-zero.
*/
-#ifdef CONFIG_DCACHE_WORD_ACCESS
+#if 0//def CONFIG_DCACHE_WORD_ACCESS

#include <asm/word-at-a-time.h>
/*
@@ -471,19 +483,28 @@ static void dentry_lru_add(struct dentry *dentry)
static void ___d_drop(struct dentry *dentry)
{
if (!d_unhashed(dentry)) {
+ struct _hlist_bl_head *_b;
struct hlist_bl_head *b;
/*
* Hashed dentries are normally on the dentry hashtable,
* with the exception of those newly allocated by
* d_obtain_root, which are always IS_ROOT:
*/
- if (unlikely(IS_ROOT(dentry)))
+ if (unlikely(IS_ROOT(dentry))) {
+ _b = NULL;
b = &dentry->d_sb->s_roots;
- else
- b = d_hash(dentry->d_name.hash);
-
+ } else {
+ _b = _d_hash(dentry->d_name.hash);
+ b = &_b->head;
+ }
hlist_bl_lock(b);
__hlist_bl_del(&dentry->d_hash);
+ if (_b) {
+ _b->count--;
+ if (!_b->count)
+ nbuckets--;
+ nentries--;
+ }
hlist_bl_unlock(b);
/* After this call, in-progress rcu-walk path lookup will fail. */
write_seqcount_invalidate(&dentry->d_seq);
@@ -2406,10 +2427,20 @@ EXPORT_SYMBOL(d_delete);

static void __d_rehash(struct dentry *entry)
{
- struct hlist_bl_head *b = d_hash(entry->d_name.hash);
+ struct _hlist_bl_head *_b = _d_hash(entry->d_name.hash);
+ struct hlist_bl_head *b = &_b->head;

hlist_bl_lock(b);
hlist_bl_add_head_rcu(&entry->d_hash, b);
+ if (!_b->count)
+ nbuckets++;
+ _b->count++;
+ nentries++;
+ if (_b->count > maxcount) {
+ maxcount = _b->count;
+ pr_info("dcache: d_hash_shift = %d, nentries = %d, nbuckets = %d, maxcount = %d\n",
+ d_hash_shift, nentries, nbuckets, maxcount);
+ }
hlist_bl_unlock(b);
}

@@ -3610,7 +3641,7 @@ static void __init dcache_init_early(void)

dentry_hashtable =
alloc_large_system_hash("Dentry cache",
- sizeof(struct hlist_bl_head),
+ sizeof(struct _hlist_bl_head),
dhash_entries,
13,
HASH_EARLY | HASH_ZERO,
@@ -3618,7 +3649,7 @@ static void __init dcache_init_early(void)
NULL,
0,
0);
- d_hash_shift = 32 - d_hash_shift;
+ d_hash_shift = 32 - 12;//d_hash_shift;
}

static void __init dcache_init(void)
@@ -3638,7 +3669,7 @@ static void __init dcache_init(void)

dentry_hashtable =
alloc_large_system_hash("Dentry cache",
- sizeof(struct hlist_bl_head),
+ sizeof(struct _hlist_bl_head),
dhash_entries,
13,
HASH_ZERO,
@@ -3646,7 +3677,7 @@ static void __init dcache_init(void)
NULL,
0,
0);
- d_hash_shift = 32 - d_hash_shift;
+ d_hash_shift = 32 - 12;//d_hash_shift;
}

/* SLAB cache for __getname() consumers */
diff --git a/fs/namei.c b/fs/namei.c
index afe2a8af9ce4..e11200c12351 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1798,7 +1798,7 @@ static int walk_component(struct nameidata *nd, int flags)
* the final mask". Again, that could be replaced with a
* efficient population count instruction or similar.
*/
-#ifdef CONFIG_DCACHE_WORD_ACCESS
+#if 0//def CONFIG_DCACHE_WORD_ACCESS

#include <asm/word-at-a-time.h>

@@ -1985,6 +1985,9 @@ unsigned long full_name_long_hash(const void *salt, const char *name,
}
EXPORT_SYMBOL(full_name_long_hash);

+// define end_name_hash to NOOP
+//#define end_name_hash (unsigned int)
+
unsigned int full_name_hash(const void *salt, const char *name,
unsigned int len)
{