[PATCH 05/10] Eliminate bad hash multipliers from hash_32() and hash_64()
From: George Spelvin
Date: Wed May 25 2016  03:29:37 EST
To avoid inefficiency, hash_64() on 32bit systems is changed
to use a different algorithm. It makes two calls to hash_32()
instead.
Signedoffby: George Spelvin <linux@xxxxxxxxxxxxxxxxxxx>

include/linux/hash.h  100 ++++++++++++++++++++++
1 file changed, 43 insertions(+), 57 deletions()
diff git a/include/linux/hash.h b/include/linux/hash.h
index b9201c33..8926f369 100644
 a/include/linux/hash.h
+++ b/include/linux/hash.h
@@ 3,91 +3,76 @@
/* Fast hashing routine for ints, longs and pointers.
(C) 2002 Nadia Yvette Chambers, IBM */
/*
 * Knuth recommends primes in approximately golden ratio to the maximum
 * integer representable by a machine word for multiplicative hashing.
 * Chuck Lever verified the effectiveness of this technique:
 * http://www.citi.umich.edu/techreports/reports/cititr001.pdf
 *
 * These primes are chosen to be bitsparse, that is operations on
 * them can use shifts and additions instead of multiplications for
 * machines where multiplications are slow.
 */

#include <asm/types.h>
#include <linux/compiler.h>
/* 2^31 + 2^29  2^25 + 2^22  2^19  2^16 + 1 */
#define GOLDEN_RATIO_PRIME_32 0x9e370001UL
/* 2^63 + 2^61  2^57 + 2^54  2^51  2^18 + 1 */
#define GOLDEN_RATIO_PRIME_64 0x9e37fffffffc0001UL

+/*
+ * The "GOLDEN_RATIO_PRIME" is used in ifs/btrfs/brtfs_inode.h and
+ * fs/inode.c. It's not actually prime any more (the previous primes
+ * were actively bad for hashing), but the name remains.
+ */
#if BITS_PER_LONG == 32
#define GOLDEN_RATIO_PRIME GOLDEN_RATIO_PRIME_32
+#define GOLDEN_RATIO_PRIME GOLDEN_RATIO_32
#define hash_long(val, bits) hash_32(val, bits)
#elif BITS_PER_LONG == 64
#define hash_long(val, bits) hash_64(val, bits)
#define GOLDEN_RATIO_PRIME GOLDEN_RATIO_PRIME_64
+#define GOLDEN_RATIO_PRIME GOLDEN_RATIO_64
#else
#error Wordsize not 32 or 64
#endif
/*
 * The above primes are actively bad for hashing, since they are
 * too sparse. The 32bit one is mostly ok, the 64bit one causes
 * real problems. Besides, the "prime" part is pointless for the
 * multiplicative hash.
+ * This hash multiplies the input by a large odd number and takes the
+ * high bits. Since multiplication propagates changes to the most
+ * significant end only, it is essential that the high bits of the
+ * product be used for the hash value.
+ *
+ * Chuck Lever verified the effectiveness of this technique:
+ * http://www.citi.umich.edu/techreports/reports/cititr001.pdf
*
* Although a random odd number will do, it turns out that the golden
* ratio phi = (sqrt(5)1)/2, or its negative, has particularly nice
 * properties.
+ * properties. (See Knuth vol 3, section 6.4, exercise 9.)
*
 * These are the negative, (1  phi) = (phi^2) = (3  sqrt(5))/2.
 * (See Knuth vol 3, section 6.4, exercise 9.)
+ * These are the negative, (1  phi) = phi**2 = (3  sqrt(5))/2,
+ * which is very slightly easier to multiply by and makes no
+ * difference to the hash distribution.
*/
#define GOLDEN_RATIO_32 0x61C88647
#define GOLDEN_RATIO_64 0x61C8864680B583EBull
+static inline u32 __hash_32(u32 val)
+{
+ return val * GOLDEN_RATIO_32;
+}
+
+static inline u32 hash_32(u32 val, unsigned int bits)
+{
+ /* High bits are more random, so use them. */
+ return __hash_32(val) >> (32  bits);
+}
+
static __always_inline u32 hash_64(u64 val, unsigned int bits)
{
 u64 hash = val;

#if BITS_PER_LONG == 64
 hash = hash * GOLDEN_RATIO_64;
#else
 /* Sigh, gcc can't optimise this alone like it does for 32 bits. */
 u64 n = hash;
 n <<= 18;
 hash = n;
 n <<= 33;
 hash = n;
 n <<= 3;
 hash += n;
 n <<= 3;
 hash = n;
 n <<= 4;
 hash += n;
 n <<= 2;
 hash += n;
#endif

if (__builtin_constant_p(bits > 32  bits == 0)) {
BUILD_BUG_ON(bits > 32  bits == 0);
} else {
WARN_ON(bits > 32  bits == 0);
}
 /* High bits are more random, so use them. */
 return (unsigned)(hash >> (64  bits));
}

static inline u32 hash_32(u32 val, unsigned int bits)
{
 /* On some cpus multiply is faster, on others gcc will do shifts */
 u32 hash = val * GOLDEN_RATIO_PRIME_32;

 /* High bits are more random, so use them. */
 return hash >> (32  bits);
+#if BITS_PER_LONG == 64
+ /* 64x64bit multiply is efficient on all 64bit processors */
+ return val * GOLDEN_RATIO_64 >> (64  bits);
+#else
+ /*
+ * Hash 64 bits using only 32x32bit multiply. GOLDEN_RATIO is
+ * phi**2 = 1phi = 0.38196601. The square of that is phi**4 =
+ * 0.14589803 = 1/6.85, which is starting to have the low bits of
+ * (val >> 32) not affect the high bits of the hash. By subtracting,
+ * we end up with phi**3 = 0.23606798, which is a bit better.
+ */
+ return hash_32((u32)val  __hash_32(val >> 32), bits);
+#endif
}
static inline u32 hash_ptr(const void *ptr, unsigned int bits)
@@ 95,6 +80,7 @@ static inline u32 hash_ptr(const void *ptr, unsigned int bits)
return hash_long((unsigned long)ptr, bits);
}
+/* This really should be called fold32_ptr; it does no hashing to speak of. */
static inline u32 hash32_ptr(const void *ptr)
{
unsigned long val = (unsigned long)ptr;

2.8.1