[patch 11/18] Cleanup & optimize bitops.

From: Martin Schwidefsky
Date: Tue Feb 05 2008 - 10:41:53 EST


From: Martin Schwidefsky <schwidefsky@xxxxxxxxxx>

The bitops header is now a bit shorter and easier to understand since
it uses less inline assembly. It requires some tricks to persuade the
compiler to generate decent code. The ffz/ffs functions now use the
_zb_findmap/_sb_findmap table as well.
With this cleanup the new bitops for ext4 can be implemented with a
few lines, instead of another large inline assembly.

Signed-off-by: Martin Schwidefsky <schwidefsky@xxxxxxxxxx>
---

include/asm-s390/bitops.h | 515 +++++++++++++++++++---------------------------
1 file changed, 219 insertions(+), 296 deletions(-)

Index: quilt-2.6/include/asm-s390/bitops.h
===================================================================
--- quilt-2.6.orig/include/asm-s390/bitops.h
+++ quilt-2.6/include/asm-s390/bitops.h
@@ -440,242 +440,256 @@ __constant_test_bit(unsigned long nr, co
__test_bit((nr),(addr)) )

/*
- * ffz = Find First Zero in word. Undefined if no zero exists,
- * so code should check against ~0UL first..
+ * Optimized find bit helper functions.
*/
-static inline unsigned long ffz(unsigned long word)
+
+/**
+ * __ffz_word_loop - find byte offset of first long != -1UL
+ * @addr: pointer to array of unsigned long
+ * @size: size of the array in bits
+ */
+static inline unsigned long __ffz_word_loop(const unsigned long *addr,
+ unsigned long size)
+{
+ typedef struct { long _[__BITOPS_WORDS(size)]; } addrtype;
+ unsigned long bytes = 0;
+
+ asm volatile(
+#ifndef __s390x__
+ " ahi %1,31\n"
+ " srl %1,5\n"
+ "0: c %2,0(%0,%3)\n"
+ " jne 1f\n"
+ " la %0,4(%0)\n"
+ " brct %1,0b\n"
+ "1:\n"
+#else
+ " aghi %1,63\n"
+ " srlg %1,%1,6\n"
+ "0: cg %2,0(%0,%3)\n"
+ " jne 1f\n"
+ " la %0,8(%0)\n"
+ " brct %1,0b\n"
+ "1:\n"
+#endif
+ : "+a" (bytes), "+d" (size)
+ : "d" (-1UL), "a" (addr), "m" (*(addrtype *) addr)
+ : "cc" );
+ return bytes;
+}
+
+/**
+ * __ffs_word_loop - find byte offset of first long != 0UL
+ * @addr: pointer to array of unsigned long
+ * @size: size of the array in bits
+ */
+static inline unsigned long __ffs_word_loop(const unsigned long *addr,
+ unsigned long size)
{
- unsigned long bit = 0;
+ typedef struct { long _[__BITOPS_WORDS(size)]; } addrtype;
+ unsigned long bytes = 0;
+
+ asm volatile(
+#ifndef __s390x__
+ " ahi %1,31\n"
+ " srl %1,5\n"
+ "0: c %2,0(%0,%3)\n"
+ " jne 1f\n"
+ " la %0,4(%0)\n"
+ " brct %1,0b\n"
+ "1:\n"
+#else
+ " aghi %1,63\n"
+ " srlg %1,%1,6\n"
+ "0: cg %2,0(%0,%3)\n"
+ " jne 1f\n"
+ " la %0,8(%0)\n"
+ " brct %1,0b\n"
+ "1:\n"
+#endif
+ : "+a" (bytes), "+a" (size)
+ : "d" (0UL), "a" (addr), "m" (*(addrtype *) addr)
+ : "cc" );
+ return bytes;
+}

+/**
+ * __ffz_word - add number of the first unset bit
+ * @nr: base value the bit number is added to
+ * @word: the word that is searched for unset bits
+ */
+static inline unsigned long __ffz_word(unsigned long nr, unsigned long word)
+{
#ifdef __s390x__
if (likely((word & 0xffffffff) == 0xffffffff)) {
word >>= 32;
- bit += 32;
+ nr += 32;
}
#endif
if (likely((word & 0xffff) == 0xffff)) {
word >>= 16;
- bit += 16;
+ nr += 16;
}
if (likely((word & 0xff) == 0xff)) {
word >>= 8;
- bit += 8;
+ nr += 8;
}
- return bit + _zb_findmap[word & 0xff];
+ return nr + _zb_findmap[(unsigned char) word];
}

-/*
- * __ffs = find first bit in word. Undefined if no bit exists,
- * so code should check against 0UL first..
+/**
+ * __ffs_word - add number of the first set bit
+ * @nr: base value the bit number is added to
+ * @word: the word that is searched for set bits
*/
-static inline unsigned long __ffs (unsigned long word)
+static inline unsigned long __ffs_word(unsigned long nr, unsigned long word)
{
- unsigned long bit = 0;
-
#ifdef __s390x__
if (likely((word & 0xffffffff) == 0)) {
word >>= 32;
- bit += 32;
+ nr += 32;
}
#endif
if (likely((word & 0xffff) == 0)) {
word >>= 16;
- bit += 16;
+ nr += 16;
}
if (likely((word & 0xff) == 0)) {
word >>= 8;
- bit += 8;
+ nr += 8;
}
- return bit + _sb_findmap[word & 0xff];
+ return nr + _sb_findmap[(unsigned char) word];
}

-/*
- * Find-bit routines..
- */

-#ifndef __s390x__
+/**
+ * __load_ulong_be - load big endian unsigned long
+ * @p: pointer to array of unsigned long
+ * @offset: byte offset of source value in the array
+ */
+static inline unsigned long __load_ulong_be(const unsigned long *p,
+ unsigned long offset)
+{
+ p = (unsigned long *)((unsigned long) p + offset);
+ return *p;
+}

-static inline int
-find_first_zero_bit(const unsigned long * addr, unsigned long size)
+/**
+ * __load_ulong_le - load little endian unsigned long
+ * @p: pointer to array of unsigned long
+ * @offset: byte offset of source value in the array
+ */
+static inline unsigned long __load_ulong_le(const unsigned long *p,
+ unsigned long offset)
{
- typedef struct { long _[__BITOPS_WORDS(size)]; } addrtype;
- unsigned long cmp, count;
- unsigned int res;
+ unsigned long word;

- if (!size)
- return 0;
+ p = (unsigned long *)((unsigned long) p + offset);
+#ifndef __s390x__
asm volatile(
- " lhi %1,-1\n"
- " lr %2,%3\n"
- " slr %0,%0\n"
- " ahi %2,31\n"
- " srl %2,5\n"
- "0: c %1,0(%0,%4)\n"
- " jne 1f\n"
- " la %0,4(%0)\n"
- " brct %2,0b\n"
- " lr %0,%3\n"
- " j 4f\n"
- "1: l %2,0(%0,%4)\n"
- " sll %0,3\n"
- " lhi %1,0xff\n"
- " tml %2,0xffff\n"
- " jno 2f\n"
- " ahi %0,16\n"
- " srl %2,16\n"
- "2: tml %2,0x00ff\n"
- " jno 3f\n"
- " ahi %0,8\n"
- " srl %2,8\n"
- "3: nr %2,%1\n"
- " ic %2,0(%2,%5)\n"
- " alr %0,%2\n"
- "4:"
- : "=&a" (res), "=&d" (cmp), "=&a" (count)
- : "a" (size), "a" (addr), "a" (&_zb_findmap),
- "m" (*(addrtype *) addr) : "cc");
- return (res < size) ? res : size;
+ " ic %0,0(%1)\n"
+ " icm %0,2,1(%1)\n"
+ " icm %0,4,2(%1)\n"
+ " icm %0,8,3(%1)"
+ : "=&d" (word) : "a" (p), "m" (*p) : "cc");
+#else
+ asm volatile(
+ " lrvg %0,%1"
+ : "=d" (word) : "m" (*p) );
+#endif
+ return word;
}

-static inline int
-find_first_bit(const unsigned long * addr, unsigned long size)
+/*
+ * The various find bit functions.
+ */
+
+/*
+ * ffz - find first zero in word.
+ * @word: The word to search
+ *
+ * Undefined if no zero exists, so code should check against ~0UL first.
+ */
+static inline unsigned long ffz(unsigned long word)
{
- typedef struct { long _[__BITOPS_WORDS(size)]; } addrtype;
- unsigned long cmp, count;
- unsigned int res;
+ return __ffz_word(0, word);
+}

- if (!size)
- return 0;
- asm volatile(
- " slr %1,%1\n"
- " lr %2,%3\n"
- " slr %0,%0\n"
- " ahi %2,31\n"
- " srl %2,5\n"
- "0: c %1,0(%0,%4)\n"
- " jne 1f\n"
- " la %0,4(%0)\n"
- " brct %2,0b\n"
- " lr %0,%3\n"
- " j 4f\n"
- "1: l %2,0(%0,%4)\n"
- " sll %0,3\n"
- " lhi %1,0xff\n"
- " tml %2,0xffff\n"
- " jnz 2f\n"
- " ahi %0,16\n"
- " srl %2,16\n"
- "2: tml %2,0x00ff\n"
- " jnz 3f\n"
- " ahi %0,8\n"
- " srl %2,8\n"
- "3: nr %2,%1\n"
- " ic %2,0(%2,%5)\n"
- " alr %0,%2\n"
- "4:"
- : "=&a" (res), "=&d" (cmp), "=&a" (count)
- : "a" (size), "a" (addr), "a" (&_sb_findmap),
- "m" (*(addrtype *) addr) : "cc");
- return (res < size) ? res : size;
+/**
+ * __ffs - find first bit in word.
+ * @word: The word to search
+ *
+ * Undefined if no bit exists, so code should check against 0 first.
+ */
+static inline unsigned long __ffs (unsigned long word)
+{
+ return __ffs_word(0, word);
}

-#else /* __s390x__ */
+/**
+ * ffs - find first bit set
+ * @x: the word to search
+ *
+ * This is defined the same way as
+ * the libc and compiler builtin ffs routines, therefore
+ * differs in spirit from the above ffz (man ffs).
+ */
+static inline int ffs(int x)
+{
+ if (!x)
+ return 0;
+ return __ffs_word(1, x);
+}

-static inline unsigned long
-find_first_zero_bit(const unsigned long * addr, unsigned long size)
+/**
+ * find_first_zero_bit - find the first zero bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum size to search
+ *
+ * Returns the bit-number of the first zero bit, not the number of the byte
+ * containing a bit.
+ */
+static inline unsigned long find_first_zero_bit(const unsigned long *addr,
+ unsigned long size)
{
- typedef struct { long _[__BITOPS_WORDS(size)]; } addrtype;
- unsigned long res, cmp, count;
+ unsigned long bytes, bits;

if (!size)
return 0;
- asm volatile(
- " lghi %1,-1\n"
- " lgr %2,%3\n"
- " slgr %0,%0\n"
- " aghi %2,63\n"
- " srlg %2,%2,6\n"
- "0: cg %1,0(%0,%4)\n"
- " jne 1f\n"
- " la %0,8(%0)\n"
- " brct %2,0b\n"
- " lgr %0,%3\n"
- " j 5f\n"
- "1: lg %2,0(%0,%4)\n"
- " sllg %0,%0,3\n"
- " clr %2,%1\n"
- " jne 2f\n"
- " aghi %0,32\n"
- " srlg %2,%2,32\n"
- "2: lghi %1,0xff\n"
- " tmll %2,0xffff\n"
- " jno 3f\n"
- " aghi %0,16\n"
- " srl %2,16\n"
- "3: tmll %2,0x00ff\n"
- " jno 4f\n"
- " aghi %0,8\n"
- " srl %2,8\n"
- "4: ngr %2,%1\n"
- " ic %2,0(%2,%5)\n"
- " algr %0,%2\n"
- "5:"
- : "=&a" (res), "=&d" (cmp), "=&a" (count)
- : "a" (size), "a" (addr), "a" (&_zb_findmap),
- "m" (*(addrtype *) addr) : "cc");
- return (res < size) ? res : size;
+ bytes = __ffz_word_loop(addr, size);
+ bits = __ffz_word(bytes*8, __load_ulong_be(addr, bytes));
+ return (bits < size) ? bits : size;
}

-static inline unsigned long
-find_first_bit(const unsigned long * addr, unsigned long size)
+/**
+ * find_first_bit - find the first set bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum size to search
+ *
+ * Returns the bit-number of the first set bit, not the number of the byte
+ * containing a bit.
+ */
+static inline unsigned long find_first_bit(const unsigned long * addr,
+ unsigned long size)
{
- typedef struct { long _[__BITOPS_WORDS(size)]; } addrtype;
- unsigned long res, cmp, count;
+ unsigned long bytes, bits;

if (!size)
return 0;
- asm volatile(
- " slgr %1,%1\n"
- " lgr %2,%3\n"
- " slgr %0,%0\n"
- " aghi %2,63\n"
- " srlg %2,%2,6\n"
- "0: cg %1,0(%0,%4)\n"
- " jne 1f\n"
- " aghi %0,8\n"
- " brct %2,0b\n"
- " lgr %0,%3\n"
- " j 5f\n"
- "1: lg %2,0(%0,%4)\n"
- " sllg %0,%0,3\n"
- " clr %2,%1\n"
- " jne 2f\n"
- " aghi %0,32\n"
- " srlg %2,%2,32\n"
- "2: lghi %1,0xff\n"
- " tmll %2,0xffff\n"
- " jnz 3f\n"
- " aghi %0,16\n"
- " srl %2,16\n"
- "3: tmll %2,0x00ff\n"
- " jnz 4f\n"
- " aghi %0,8\n"
- " srl %2,8\n"
- "4: ngr %2,%1\n"
- " ic %2,0(%2,%5)\n"
- " algr %0,%2\n"
- "5:"
- : "=&a" (res), "=&d" (cmp), "=&a" (count)
- : "a" (size), "a" (addr), "a" (&_sb_findmap),
- "m" (*(addrtype *) addr) : "cc");
- return (res < size) ? res : size;
-}
-
-#endif /* __s390x__ */
-
-static inline int
-find_next_zero_bit (const unsigned long * addr, unsigned long size,
- unsigned long offset)
+ bytes = __ffs_word_loop(addr, size);
+ bits = __ffs_word(bytes*8, __load_ulong_be(addr, bytes));
+ return (bits < size) ? bits : size;
+}
+
+/**
+ * find_next_zero_bit - find the first zero bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The maximum size to search
+ */
+static inline int find_next_zero_bit (const unsigned long * addr,
+ unsigned long size,
+ unsigned long offset)
{
const unsigned long *p;
unsigned long bit, set;
@@ -688,10 +702,10 @@ find_next_zero_bit (const unsigned long
p = addr + offset / __BITOPS_WORDSIZE;
if (bit) {
/*
- * s390 version of ffz returns __BITOPS_WORDSIZE
+ * __ffz_word returns __BITOPS_WORDSIZE
* if no zero bit is present in the word.
*/
- set = ffz(*p >> bit) + bit;
+ set = __ffz_word(0, *p >> bit) + bit;
if (set >= size)
return size + offset;
if (set < __BITOPS_WORDSIZE)
@@ -703,9 +717,15 @@ find_next_zero_bit (const unsigned long
return offset + find_first_zero_bit(p, size);
}

-static inline int
-find_next_bit (const unsigned long * addr, unsigned long size,
- unsigned long offset)
+/**
+ * find_next_bit - find the first set bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The maximum size to search
+ */
+static inline int find_next_bit (const unsigned long * addr,
+ unsigned long size,
+ unsigned long offset)
{
const unsigned long *p;
unsigned long bit, set;
@@ -718,10 +738,10 @@ find_next_bit (const unsigned long * add
p = addr + offset / __BITOPS_WORDSIZE;
if (bit) {
/*
- * s390 version of __ffs returns __BITOPS_WORDSIZE
+ * __ffs_word returns __BITOPS_WORDSIZE
* if no one bit is present in the word.
*/
- set = __ffs(*p & (~0UL << bit));
+ set = __ffs_word(0, *p & (~0UL << bit));
if (set >= size)
return size + offset;
if (set < __BITOPS_WORDSIZE)
@@ -744,8 +764,6 @@ static inline int sched_find_first_bit(u
return find_first_bit(b, 140);
}

-#include <asm-generic/bitops/ffs.h>
-
#include <asm-generic/bitops/fls.h>
#include <asm-generic/bitops/fls64.h>

@@ -775,105 +793,22 @@ static inline int sched_find_first_bit(u
#define ext2_find_next_bit(addr, size, off) \
generic_find_next_le_bit((unsigned long *)(addr), (size), (off))

-#ifndef __s390x__
-
-static inline int
-ext2_find_first_zero_bit(void *vaddr, unsigned int size)
+static inline int ext2_find_first_zero_bit(void *vaddr, unsigned int size)
{
- typedef struct { long _[__BITOPS_WORDS(size)]; } addrtype;
- unsigned long cmp, count;
- unsigned int res;
+ unsigned long bytes, bits;

if (!size)
return 0;
- asm volatile(
- " lhi %1,-1\n"
- " lr %2,%3\n"
- " ahi %2,31\n"
- " srl %2,5\n"
- " slr %0,%0\n"
- "0: cl %1,0(%0,%4)\n"
- " jne 1f\n"
- " ahi %0,4\n"
- " brct %2,0b\n"
- " lr %0,%3\n"
- " j 4f\n"
- "1: l %2,0(%0,%4)\n"
- " sll %0,3\n"
- " ahi %0,24\n"
- " lhi %1,0xff\n"
- " tmh %2,0xffff\n"
- " jo 2f\n"
- " ahi %0,-16\n"
- " srl %2,16\n"
- "2: tml %2,0xff00\n"
- " jo 3f\n"
- " ahi %0,-8\n"
- " srl %2,8\n"
- "3: nr %2,%1\n"
- " ic %2,0(%2,%5)\n"
- " alr %0,%2\n"
- "4:"
- : "=&a" (res), "=&d" (cmp), "=&a" (count)
- : "a" (size), "a" (vaddr), "a" (&_zb_findmap),
- "m" (*(addrtype *) vaddr) : "cc");
- return (res < size) ? res : size;
+ bytes = __ffz_word_loop(vaddr, size);
+ bits = __ffz_word(bytes*8, __load_ulong_le(vaddr, bytes));
+ return (bits < size) ? bits : size;
}

-#else /* __s390x__ */
-
-static inline unsigned long
-ext2_find_first_zero_bit(void *vaddr, unsigned long size)
-{
- typedef struct { long _[__BITOPS_WORDS(size)]; } addrtype;
- unsigned long res, cmp, count;
-
- if (!size)
- return 0;
- asm volatile(
- " lghi %1,-1\n"
- " lgr %2,%3\n"
- " aghi %2,63\n"
- " srlg %2,%2,6\n"
- " slgr %0,%0\n"
- "0: clg %1,0(%0,%4)\n"
- " jne 1f\n"
- " aghi %0,8\n"
- " brct %2,0b\n"
- " lgr %0,%3\n"
- " j 5f\n"
- "1: cl %1,0(%0,%4)\n"
- " jne 2f\n"
- " aghi %0,4\n"
- "2: l %2,0(%0,%4)\n"
- " sllg %0,%0,3\n"
- " aghi %0,24\n"
- " lghi %1,0xff\n"
- " tmlh %2,0xffff\n"
- " jo 3f\n"
- " aghi %0,-16\n"
- " srl %2,16\n"
- "3: tmll %2,0xff00\n"
- " jo 4f\n"
- " aghi %0,-8\n"
- " srl %2,8\n"
- "4: ngr %2,%1\n"
- " ic %2,0(%2,%5)\n"
- " algr %0,%2\n"
- "5:"
- : "=&a" (res), "=&d" (cmp), "=&a" (count)
- : "a" (size), "a" (vaddr), "a" (&_zb_findmap),
- "m" (*(addrtype *) vaddr) : "cc");
- return (res < size) ? res : size;
-}
-
-#endif /* __s390x__ */
-
-static inline int
-ext2_find_next_zero_bit(void *vaddr, unsigned long size, unsigned long offset)
+static inline int ext2_find_next_zero_bit(void *vaddr, unsigned long size,
+ unsigned long offset)
{
unsigned long *addr = vaddr, *p;
- unsigned long word, bit, set;
+ unsigned long bit, set;

if (offset >= size)
return size;
@@ -882,23 +817,11 @@ ext2_find_next_zero_bit(void *vaddr, uns
size -= offset;
p = addr + offset / __BITOPS_WORDSIZE;
if (bit) {
-#ifndef __s390x__
- asm volatile(
- " ic %0,0(%1)\n"
- " icm %0,2,1(%1)\n"
- " icm %0,4,2(%1)\n"
- " icm %0,8,3(%1)"
- : "=&a" (word) : "a" (p), "m" (*p) : "cc");
-#else
- asm volatile(
- " lrvg %0,%1"
- : "=a" (word) : "m" (*p) );
-#endif
/*
* s390 version of ffz returns __BITOPS_WORDSIZE
* if no zero bit is present in the word.
*/
- set = ffz(word >> bit) + bit;
+ set = ffz(__load_ulong_le(p, 0) >> bit) + bit;
if (set >= size)
return size + offset;
if (set < __BITOPS_WORDSIZE)

--
blue skies,
Martin.

"Reality continues to ruin my life." - Calvin.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/