Re: [PATCH] strlen

Brad Proctor (fredlie@sprynet.com)
Fri, 29 Oct 1999 01:05:05 -0400


This is a multi-part message in MIME format.
--------------CCCABBB80542BCC450476460
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit

> If you do the start then do aligned loads (always a good idea) on 4 byte
> boundaries you wont cross a page by suprise.

Ok, I've fixed it. I tested the code thoroughly and did some
benchmarking, and it is about 35% faster than the original. However, it
is slightly slower (< 10%) when the string is 5 or less characters.

Brad Proctor
--------------CCCABBB80542BCC450476460
Content-Type: text/plain; charset=us-ascii;
name="string.diff"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
filename="string.diff"

--- linux-2.3.4/include/asm-i386/string-486.h Fri Oct 29 00:34:47 1999
+++ linux/include/asm-i386/string-486.h Fri Oct 29 00:44:49 1999
@@ -323,23 +323,45 @@
#endif

#define __HAVE_ARCH_STRLEN
-extern inline size_t strlen(const char * s)
+extern inline size_t strlen(const char *s )
{
-/*
- * slightly slower on a 486, but with better chances of
- * register allocation
+/* Brad Proctor (99/10/28)
+ * This function works by testing 4 bytes at a time for a
+ * 0 byte. The C code at the top is to make sure the string
+ * is aligned on a 32-bit boundary.
*/
-register char dummy, *tmp= (char *)s;
-__asm__ __volatile__(
- "\n1:\t"
- "movb\t(%0),%1\n\t"
- "incl\t%0\n\t"
- "testb\t%1,%1\n\t"
- "jne\t1b"
- :"=r" (tmp),"=q" (dummy)
- :"0" (s)
- : "memory" );
-return (tmp-s-1);
+register size_t __res, d0;
+const char *p = s;
+ switch((unsigned long)s & 3)
+ {
+ case 1: if(*p != 0) ++p;
+ case 2: if(*p != 0) ++p;
+ case 3: if(*p != 0) ++p;
+ else return (p-s);
+ case 0:
+ }
+__asm__ __volatile__ (
+ "addl %0,%2\n\t"
+ "movl (%0),%3\n\t"
+ "addl $4,%0\n\t"
+ ".align 2\n\t"
+ "1:leal -0x1010101(%3),%%ecx\n\t"
+ "xorl $-1,%3\n\t"
+ "andl %3,%%ecx\n\t"
+ "movl (%0),%3\n\t"
+ "addl $4,%0\n\t"
+ "andl $0x80808080,%%ecx\n\t"
+ "jz 1b\n\t"
+ "testl $0x8080,%%ecx\n\t"
+ "jnz 1f\n\t"
+ "shrl $16,%%ecx\n\t"
+ "addl $2,%0\n\t"
+ "1:shlb $1,%%cl\n\t"
+ "sbbl %2,%0\n\t"
+ : "=r" (__res)
+ : "0" (p), "r" (7), "r" (d0)
+ : "ecx");
+return (__res);
}

/* Added by Gertjan van Wingerde to make minix and sysv module work */

--------------CCCABBB80542BCC450476460--

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/