[PATCH 2/3] bitmap: improve small_const case for find_next() functions

From: Yury Norov
Date: Thu Oct 27 2022 - 00:38:29 EST


find_next_bit() and friends use small_const_nbits() to detect possibility
of compile-time optimization. It works well for nbits up to BITS_PER_LONG,
i.e. for 1-word bitmaps. When nbits belongs to 2nd, 3rd or any other word,
small_const_nbits() returns false.

But when both offset and size are within the same word boundary, we still
can make a small_const optimization because the whole business is just
fetching a single word, and masking head and tail bits if needed.

This patch adds a new small_const_nbits_off() macro doing that. It replaces
small_const_nbits() in find_next_bit() functions.

Signed-off-by: Yury Norov <yury.norov@xxxxxxxxx>
---
include/asm-generic/bitsperlong.h | 12 +++++++++
include/linux/find.h | 45 ++++++++++++-------------------
2 files changed, 29 insertions(+), 28 deletions(-)

diff --git a/include/asm-generic/bitsperlong.h b/include/asm-generic/bitsperlong.h
index 1023e2a4bd37..c294ff798154 100644
--- a/include/asm-generic/bitsperlong.h
+++ b/include/asm-generic/bitsperlong.h
@@ -35,4 +35,16 @@
#define small_const_nbits(nbits) \
(__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG && (nbits) > 0)

+/*
+ * small_const_nbits_off(nbits, off) is true precisely when it is known at
+ * compile-time that all bits in range [off, nbits) belong to the same word.
+ * Bitmaps of size 0 are very rare, and a compile-time-known-size 0 is most
+ * likely a sign of error. They will be handled correctly by the bit/bitmap
+ * APIs using the out-of-line functions, so that the inline implementations
+ * can unconditionally dereference the pointer(s).
+ */
+#define small_const_nbits_off(nbits, off) \
+ (__builtin_constant_p(nbits) && __builtin_constant_p(off) && (nbits) > 0 && \
+ (nbits) > (off) && (off) / BITS_PER_LONG == ((nbits) - 1) / BITS_PER_LONG)
+
#endif /* __ASM_GENERIC_BITS_PER_LONG */
diff --git a/include/linux/find.h b/include/linux/find.h
index db2f2851601d..df5c4d1adf4c 100644
--- a/include/linux/find.h
+++ b/include/linux/find.h
@@ -7,6 +7,7 @@
#endif

#include <linux/bitops.h>
+#include <linux/math.h>

unsigned long _find_next_bit(const unsigned long *addr1, unsigned long nbits,
unsigned long start);
@@ -49,14 +50,11 @@ static __always_inline
unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
unsigned long offset)
{
- if (small_const_nbits(size)) {
- unsigned long val;
+ if (small_const_nbits_off(size, offset)) {
+ unsigned long val = addr[offset/BITS_PER_LONG] &
+ GENMASK((size - 1) % BITS_PER_LONG, offset % BITS_PER_LONG);

- if (unlikely(offset >= size))
- return size;
-
- val = *addr & GENMASK(size - 1, offset);
- return val ? __ffs(val) : size;
+ return val ? round_down(offset, BITS_PER_LONG) + __ffs(val) : size;
}

return _find_next_bit(addr, size, offset);
@@ -79,14 +77,11 @@ unsigned long find_next_and_bit(const unsigned long *addr1,
const unsigned long *addr2, unsigned long size,
unsigned long offset)
{
- if (small_const_nbits(size)) {
- unsigned long val;
+ if (small_const_nbits_off(size, offset)) {
+ unsigned long val = addr1[offset/BITS_PER_LONG] & addr2[offset/BITS_PER_LONG] &
+ GENMASK((size - 1) % BITS_PER_LONG, offset % BITS_PER_LONG);

- if (unlikely(offset >= size))
- return size;
-
- val = *addr1 & *addr2 & GENMASK(size - 1, offset);
- return val ? __ffs(val) : size;
+ return val ? round_down(offset, BITS_PER_LONG) + __ffs(val) : size;
}

return _find_next_and_bit(addr1, addr2, size, offset);
@@ -110,14 +105,11 @@ unsigned long find_next_andnot_bit(const unsigned long *addr1,
const unsigned long *addr2, unsigned long size,
unsigned long offset)
{
- if (small_const_nbits(size)) {
- unsigned long val;
+ if (small_const_nbits_off(size, offset)) {
+ unsigned long val = addr1[offset/BITS_PER_LONG] & ~addr2[offset/BITS_PER_LONG] &
+ GENMASK((size - 1) % BITS_PER_LONG, offset % BITS_PER_LONG);

- if (unlikely(offset >= size))
- return size;
-
- val = *addr1 & ~*addr2 & GENMASK(size - 1, offset);
- return val ? __ffs(val) : size;
+ return val ? round_down(offset, BITS_PER_LONG) + __ffs(val) : size;
}

return _find_next_andnot_bit(addr1, addr2, size, offset);
@@ -138,14 +130,11 @@ static __always_inline
unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
unsigned long offset)
{
- if (small_const_nbits(size)) {
- unsigned long val;
+ if (small_const_nbits_off(size, offset)) {
+ unsigned long val = addr[offset/BITS_PER_LONG] |
+ ~GENMASK((size - 1) % BITS_PER_LONG, offset % BITS_PER_LONG);

- if (unlikely(offset >= size))
- return size;
-
- val = *addr | ~GENMASK(size - 1, offset);
- return val == ~0UL ? size : ffz(val);
+ return val == ~0UL ? size : round_down(offset, BITS_PER_LONG) + ffz(val);
}

return _find_next_zero_bit(addr, size, offset);
--
2.34.1