Re: [PATCH v3 1/3] lib: find_*_bit reimplementation

From: George Spelvin
Date: Mon Feb 09 2015 - 03:32:19 EST

Next message: Ingo Molnar: "[GIT PULL] RCU tree changes for v3.20"
Previous message: Jeff Kirsher: "Re: [PATCH] net:ethernet:intel:Remove outdated fix me comment in the function,gb_acquire_swfw_sync_i210"
In reply to: George Spelvin: "Re: [PATCH v3 1/3] lib: find_*_bit reimplementation"
Next in thread: Rasmus Villemoes: "Re: [PATCH v3 1/3] lib: find_*_bit reimplementation"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

Two more comments on the code. Two minor, but one that
seems like a bug, so for now, it's

Nacked-by: George Spelvin <linux@xxxxxxxxxxx>

Specifically, it seems like find_last_bit used to ignore trailing
garbage in the bitmap, but now will stop searching if the last word
contains some set bits not within size.

The minor one is that I don't think the first-word masking needs to
be conditional. The general code works fine if the start is aligned
(HIGH_BITS_MASK just generates an all-ones mask), is quite quick, and
saves a test & conditional branch.

A second minor one is that the comment in include/linux/bitops.h has
an obvious typo, and should probably be fixed, too.

Here's a diff on top of yours with all my suggested changes, both
these two and the ones from my previous e-mail.

diff --git a/find_last_bit.c b/find_last_bit.c
index e67e970..106050f 100644
--- a/find_last_bit.c
+++ b/find_last_bit.c
@@ -13,6 +13,7 @@
* 2 of the License, or (at your option) any later version.
*/

+#include <linux/bitops.h>
#include <linux/export.h>
#include <linux/kernel.h>

diff --git a/find_next_bit.c b/find_next_bit.c
index 71aa497..9d01d4a 100644
--- a/find_next_bit.c
+++ b/find_next_bit.c
@@ -16,41 +16,34 @@
#include <linux/export.h>
#include <linux/kernel.h>

-#define HIGH_BITS_MASK(nr) (ULONG_MAX << (nr))
+#define HIGH_BITS_MASK(nr) (~0UL << (nr))

#if !defined(find_next_bit) || !defined(find_next_zero_bit)
+
+/*
+ * This is a common helper function for find_next_bit and
+ * find_next_zero_bit. The difference is the "invert" argument, which
+ * is XORed with each fetched word before searching it for one bits.
+ */
static unsigned long _find_next_bit(const unsigned long *addr,
- unsigned long nbits, unsigned long start, unsigned long mask)
+ unsigned long nbits, unsigned long start, unsigned long invert)
{
unsigned long tmp;

if (start >= nbits)
return nbits;

- tmp = addr[start / BITS_PER_LONG] ^ mask;
+ tmp = addr[start / BITS_PER_LONG] ^ invert;

/* Handle 1st word. */
- if (!IS_ALIGNED(start, BITS_PER_LONG)) {
- tmp &= HIGH_BITS_MASK(start % BITS_PER_LONG);
- start = round_down(start, BITS_PER_LONG);
- }
+ tmp &= HIGH_BITS_MASK(start % BITS_PER_LONG);
+ start = round_down(start, BITS_PER_LONG);

while (!tmp) {
start += BITS_PER_LONG;
if (start >= nbits)
return nbits;
-
- /*
- * This is an equvalent for:
- *
- * tmp = find_set ? addr[start / BITS_PER_LONG]
- * : ~addr[start / BITS_PER_LONG];
- *
- * but saves a branch condition.
- *
- * Thanks George Spelvin <linux@xxxxxxxxxxx> for idea.
- */
- tmp = addr[start / BITS_PER_LONG] ^ mask;
+ tmp = addr[start / BITS_PER_LONG] ^ invert;
}

return min(start + __ffs(tmp), nbits);
@@ -105,7 +98,7 @@ unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
unsigned long idx;

for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
- if (addr[idx] != ULONG_MAX)
+ if (~addr[idx])
return min(idx * BITS_PER_LONG + ffz(addr[idx]), size);
}

@@ -130,14 +123,14 @@ static inline unsigned long ext2_swab(const unsigned long y)

#if !defined(find_next_bit_le) || !defined(find_next_zero_bit_le)
static unsigned long _find_next_bit_le(const unsigned long *addr,
- unsigned long nbits, unsigned long start, unsigned long mask)
+ unsigned long nbits, unsigned long start, unsigned long invert)
{
unsigned long tmp;

if (start >= nbits)
return nbits;

- tmp = addr[start / BITS_PER_LONG] ^ mask;
+ tmp = addr[start / BITS_PER_LONG] ^ invert;

/* Handle 1st word. */
if (!IS_ALIGNED(start, BITS_PER_LONG)) {
@@ -150,7 +143,7 @@ static unsigned long _find_next_bit_le(const unsigned long *addr,
if (start >= nbits)
return nbits;

- tmp = addr[start / BITS_PER_LONG] ^ mask;
+ tmp = addr[start / BITS_PER_LONG] ^ invert;
}

return min(start + __ffs(ext2_swab(tmp)), nbits);
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 5d858e02..4a5e5934 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -218,9 +218,9 @@ static inline unsigned long __ffs64(u64 word)
/**
* find_last_bit - find the last set bit in a memory region
* @addr: The address to start the search at
- * @size: The maximum size to search
+ * @size: The number of bits to search
*
- * Returns the bit number of the first set bit, or size.
+ * Returns the bit number of the last set bit, or size if no bits are set.
*/
extern unsigned long find_last_bit(const unsigned long *addr,
unsigned long size);

Now for the serious issue. I see a function change, fo find_last_bit
with no justifying comments.

diff --git a/lib/find_last_bit.c b/lib/find_last_bit.c
index 91ca09f..106050f 100644
--- a/lib/find_last_bit.c
+++ b/lib/find_last_bit.c
@@ -19,29 +21,13 @@

unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
{
- unsigned long words;
- unsigned long tmp;
-
- /* Start at final word. */
- words = size / BITS_PER_LONG;
-
- /* Partial final word? */
- if (size & (BITS_PER_LONG-1)) {
- tmp = (addr[words] & (~0UL >> (BITS_PER_LONG
- - (size & (BITS_PER_LONG-1)))));
- if (tmp)
- goto found;
- }
+ unsigned long idx = DIV_ROUND_UP(size, BITS_PER_LONG);

- while (words) {
- tmp = addr[--words];
- if (tmp) {
-found:
- return words * BITS_PER_LONG + __fls(tmp);
- }
+ while (idx--) {
+ if (addr[idx])
+ return min(idx * BITS_PER_LONG + __fls(addr[idx]), size);
}

- /* Not found */
return size;
}
EXPORT_SYMBOL(find_last_bit);

Previously, the last word was masked, so bits beyond "size" were ignored.
With the revised code, something like find_last_bit(array, 96) will return 96
if array[1] >> 32 is non-zero, even if array[1] & 0xffffffff is zero.

Looking through the callers, I haven't found a case where this matters yet
so perhaps it's a safe optimization, but this really needs to be more
clearly documented if intentional.

If no change was desired, I'd think a good way to do this would be:

unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
{
size_t idx = DIV_ROUND_UP(size, BITS_PER_LONG);
unsigned long tmp = addr[--idx];

tmp &= (2UL << (size % BITS_PER_LONG)) - 1; /* Mask last word */

while (!tmp) {
if (!idx)
return size;
tmp = addr[--idx];
}
return idx * BITS_PER_LONG + __fls(tmp);
}

Which is not that different from the old code, but cleaned up.

(Feel free to add my Signed-off-by if you think you need it.)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Next message: Ingo Molnar: "[GIT PULL] RCU tree changes for v3.20"
Previous message: Jeff Kirsher: "Re: [PATCH] net:ethernet:intel:Remove outdated fix me comment in the function,gb_acquire_swfw_sync_i210"
In reply to: George Spelvin: "Re: [PATCH v3 1/3] lib: find_*_bit reimplementation"
Next in thread: Rasmus Villemoes: "Re: [PATCH v3 1/3] lib: find_*_bit reimplementation"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]