[PATCH 3/5] lib/find_bit: unify _find_first_{,and,zero}_bit implementations
From: Yury Norov
Date: Thu Jul 28 2022 - 12:12:39 EST
The functions are almost identical, so create a common helper for them so
that compiler will be able to either inline the helper and optimize-out
parameters known at compile-time, or save some space by keeping it as a
real function.
On kvm/x86_64, bloat-o-meter reports +9 bytes. Find_bit_benchmark 5 times
before and after doesn't show significant (i.e. delta is greater than 3
sigma) difference, except find_next_bit, which is most likely an outlier
(although, lucky for the patch):
v5.19-rc8 Optimized Difference (more - better)
Random dense bitmap ns ns % sigmas
find_next_bit: 721209 594936 18 3.19
find_next_zero_bit: 738138 638182 14 1.40
find_last_bit: 802393 940846 -17 -0.31
find_first_bit: 3560900 3379983 5 0.65
find_first_and_bit: 38601442 37683449 2 1.00
find_next_and_bit: 335574 300373 10 2.82
Random sparse bitmap
find_next_bit: 15868 13856 13 0.82
find_next_zero_bit: 1311843 1227418 6 0.72
find_last_bit: 13633 14080 -3 -0.74
find_first_bit: 1273625 1253343 2 0.52
find_first_and_bit: 8548 8157 5 0.32
find_next_and_bit: 8828 8437 4 0.52
Signed-off-by: Yury Norov <yury.norov@xxxxxxxxx>
---
lib/find_bit.c | 62 +++++++++++++++++++++++++++-----------------------
1 file changed, 33 insertions(+), 29 deletions(-)
diff --git a/lib/find_bit.c b/lib/find_bit.c
index 4ef3151b3109..d207d1699834 100644
--- a/lib/find_bit.c
+++ b/lib/find_bit.c
@@ -20,12 +20,38 @@
#include <linux/swab.h>
/*
- * This is a common helper function for find_next_bit, find_next_zero_bit, and
- * find_next_and_bit. The differences are:
+ * This is a common helper functions for find_{first,next}_bit{,_le}.
+ * Internal parameters are:
* - The "invert" argument, which is XORed with each fetched word before
- * searching it for one bits.
- * - The optional "addr2", which is anded with "addr1" if present.
+ * searching it for set bits; to implement find_*_zero_bit().
+ * - The optional "addr2", which is ANDed with "addr1" if present; to
+ * implement find_*_and_bit().
+ * - The "need_swab" that converts words to BE format; to implement
+ * find_*_le() on big-endian machines.
*/
+static inline
+unsigned long __find_first_bit(const unsigned long *addr1, const unsigned long *addr2,
+ unsigned long size, unsigned long invert, bool need_swab)
+{
+ unsigned long idx, val;
+
+ for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
+ val = addr1[idx];
+ if (addr2)
+ val &= addr2[idx];
+
+ val ^= invert;
+
+ if (val) {
+ if (need_swab)
+ val = swab(val);
+ return min(idx * BITS_PER_LONG + __ffs(val), size);
+ }
+ }
+
+ return size;
+}
+
static inline unsigned long __find_next_bit(const unsigned long *addr1,
const unsigned long *addr2, unsigned long nbits,
unsigned long start, unsigned long invert, bool need_swab)
@@ -118,14 +144,7 @@ EXPORT_SYMBOL(_find_next_bit_le);
*/
unsigned long _find_first_bit(const unsigned long *addr, unsigned long size)
{
- unsigned long idx;
-
- for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
- if (addr[idx])
- return min(idx * BITS_PER_LONG + __ffs(addr[idx]), size);
- }
-
- return size;
+ return __find_first_bit(addr, NULL, size, 0UL, false);
}
EXPORT_SYMBOL(_find_first_bit);
#endif
@@ -138,15 +157,7 @@ unsigned long _find_first_and_bit(const unsigned long *addr1,
const unsigned long *addr2,
unsigned long size)
{
- unsigned long idx, val;
-
- for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
- val = addr1[idx] & addr2[idx];
- if (val)
- return min(idx * BITS_PER_LONG + __ffs(val), size);
- }
-
- return size;
+ return __find_first_bit(addr1, addr2, size, 0UL, false);
}
EXPORT_SYMBOL(_find_first_and_bit);
#endif
@@ -157,14 +168,7 @@ EXPORT_SYMBOL(_find_first_and_bit);
*/
unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size)
{
- unsigned long idx;
-
- for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
- if (addr[idx] != ~0UL)
- return min(idx * BITS_PER_LONG + ffz(addr[idx]), size);
- }
-
- return size;
+ return __find_first_bit(addr, NULL, size, ~0UL, false);
}
EXPORT_SYMBOL(_find_first_zero_bit);
#endif
--
2.34.1