Hi to all,
I made this patch as some people request using
486 optimized string routines for older
(486 and 586) machines.
Actually i made 2 things:
- replaced buggy macro definitions for
memset and memcpy which caused compiler
break. Also paranoia check added for (counter == 0);
- rewrote these functions in order to speed
them up and reduce the code size;
To whom is interested - please send me comments, bug reports
or ideas.
The goal is to make kernel use these procedures when running
on 486 or 586 processors, so please test...
best,
Petkan
--- linux-2.4.0-test7/include/asm-i386/string-486.h.orig Tue Aug 29 11:10:51 2000
+++ linux/include/asm-i386/string-486.h Thu Aug 31 16:10:11 2000
@@ -16,10 +16,11 @@
* Split into 2 CPU specific files by Alan Cox to keep #ifdef noise down.
*
* 1999/10/5 Proper register args for newer GCCs and minor bugs
- * fixed - Petko Manolov (petkan@spct.net)
+ * fixed - Petko Manolov (petkan@dce.bg)
* 1999/10/14 3DNow memscpy() added - Petkan
* 2000/05/09 extern changed to static in function definitions
* and a few cleanups - Petkan
+ * 2000/08/29 memset and memcpy rewritten - Petkan
*/
#define __HAVE_ARCH_STRCPY
@@ -273,79 +274,60 @@
/* end of additional stuff */
-/*
- * These ought to get tweaked to do some cache priming.
- */
-
-static inline void * __memcpy_by4(void * to, const void * from, size_t n)
+static inline void *__memcpy(void * to, const void *from, size_t len)
{
-register void *tmp = (void *)to;
-register int dummy1,dummy2;
-__asm__ __volatile__ (
- "\n1:\tmovl (%2),%0\n\t"
- "addl $4,%2\n\t"
- "movl %0,(%1)\n\t"
- "addl $4,%1\n\t"
- "decl %3\n\t"
- "jnz 1b"
- :"=r" (dummy1), "=r" (tmp), "=r" (from), "=r" (dummy2)
- :"1" (tmp), "2" (from), "3" (n/4)
- :"memory");
-return (to);
-}
+ int d0,d1,d2;
-static inline void * __memcpy_by2(void * to, const void * from, size_t n)
-{
-register void *tmp = (void *)to;
-register int dummy1,dummy2;
-__asm__ __volatile__ (
- "shrl $1,%3\n\t"
- "jz 2f\n" /* only a word */
- "1:\tmovl (%2),%0\n\t"
- "addl $4,%2\n\t"
- "movl %0,(%1)\n\t"
- "addl $4,%1\n\t"
- "decl %3\n\t"
- "jnz 1b\n"
- "2:\tmovw (%2),%w0\n\t"
- "movw %w0,(%1)"
- :"=r" (dummy1), "=r" (tmp), "=r" (from), "=r" (dummy2)
- :"1" (tmp), "2" (from), "3" (n/2)
- :"memory");
-return (to);
-}
-
-static inline void * __memcpy_g(void * to, const void * from, size_t n)
-{
-int d0, d1, d2;
-register void *tmp = (void *)to;
-__asm__ __volatile__ (
- "shrl $1,%%ecx\n\t"
- "jnc 1f\n\t"
- "movsb\n"
- "1:\tshrl $1,%%ecx\n\t"
- "jnc 2f\n\t"
+ __asm__ __volatile__ (
+ "rep; movsl\n\t"
+ "testb $2,%b3\n\t"
+ "jz 1f\n\t"
"movsw\n"
- "2:\trep\n\t"
- "movsl"
+ "1:\t"
+ "testb $1,%b3\n\t"
+ "jz 2f\n\t"
+ "movsb\n"
+ "2:"
:"=&c" (d0), "=&D" (d1), "=&S" (d2)
- :"0" (n), "1" ((long) tmp), "2" ((long) from)
- :"memory");
-return (to);
+ :"q" (len), "0" (len/4), "1" (to), "2" (from)
+ :"memory"
+ );
+
+ return to;
}
-#define __memcpy_c(d,s,count) \
-((count%4==0) ? \
- __memcpy_by4((d),(s),(count)) : \
- ((count%2==0) ? \
- __memcpy_by2((d),(s),(count)) : \
- __memcpy_g((d),(s),(count))))
-
-#define __memcpy(d,s,count) \
-(__builtin_constant_p(count) ? \
- __memcpy_c((d),(s),(count)) : \
- __memcpy_g((d),(s),(count)))
-
+
+static inline void *__constant_memcpy(void * to, const void *from, size_t len)
+{
+ int d0,d1,d2;
+ register int tmp;
+
+#define MEMCP(x) \
+ __asm__ volatile ( \
+ "\n1:\t" \
+ "movl (%0),%3\n\t" \
+ "movl %3,(%1)\n\t" \
+ "addl $4,%1\n\t" \
+ "addl $4,%0\n\t" \
+ "decl %2\n\t" \
+ "jnz 1b\n\t" \
+ x \
+ :"=r" (d0), "=r" (d1), "=r" (d2),"=q" (tmp)\
+ :"0" (from), "1" (to), "2" (len/4) \
+ :"memory" \
+ )
+
+ switch ( len % 4 ) {
+ case 0: MEMCP(""); return to;
+ case 1: MEMCP("movb (%0),%b3; movb %b3,(%1)"); return to;
+ case 2: MEMCP("movw (%0),%w3; movw %w3,(%1)"); return to;
+ default: MEMCP("movw (%0),%w3; movw %w3,(%1)\n\t"
+ "movb 2(%0),%b3; movb %b3,2(%1)"); return to;
+ }
+#undef MEMCP
+}
+
+
#define __HAVE_ARCH_MEMCPY
#include <linux/config.h>
@@ -363,24 +345,26 @@
** This CPU favours 3DNow strongly (eg AMD K6-II, K6-III, Athlon)
*/
-static inline void * __constant_memcpy3d(void * to, const void * from, size_t len)
+static inline void *__memcpy3d(void *to, const void *from, size_t len)
{
if(len<512 || in_interrupt())
- return __memcpy_c(to, from, len);
+ return __memcpy(to, from, len);
return _mmx_memcpy(to, from, len);
}
-static inline void *__memcpy3d(void *to, const void *from, size_t len)
+
+static inline void *__constant_memcpy3d(void *to, const void *from, size_t len)
{
if(len<512 || in_interrupt())
- return __memcpy_g(to, from, len);
+ return __constant_memcpy(to, from, len);
return _mmx_memcpy(to, from, len);
}
-#define memcpy(d, s, count) \
-(__builtin_constant_p(count) ? \
- __constant_memcpy3d((d),(s),(count)) : \
- __memcpy3d((d),(s),(count)))
+
+#define memcpy(d, s, count) \
+ (__builtin_constant_p(count) && count) ?\
+ __constant_memcpy3d( d, s, count ) : \
+ __memcpy3d( d, s, count )
#else /* CONFIG_X86_USE_3DNOW */
@@ -389,7 +373,10 @@
*/
-#define memcpy(d, s, count) __memcpy(d, s, count)
+#define memcpy(d, s, count) \
+ (__builtin_constant_p(count) && count) ?\
+ __constant_memcpy( d, s, count ) : \
+ __memcpy( d, s, count )
#endif /* CONFIG_X86_USE_3DNOW */
@@ -429,22 +416,7 @@
}
-#define __HAVE_ARCH_MEMCMP
-static inline int memcmp(const void * cs,const void * ct,size_t count)
-{
-int d0, d1, d2;
-register int __res;
-__asm__ __volatile__(
- "repe\n\t"
- "cmpsb\n\t"
- "je 1f\n\t"
- "sbbl %0,%0\n\t"
- "orb $1,%b0\n"
- "1:"
- :"=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
- :"0" (0), "1" (cs), "2" (ct), "3" (count));
-return __res;
-}
+#define memcmp __builtin_memcmp
#define __HAVE_ARCH_MEMCHR
@@ -465,141 +437,57 @@
return __res;
}
-#define __memset_cc(s,c,count) \
-((count%4==0) ? \
- __memset_cc_by4((s),(c),(count)) : \
- ((count%2==0) ? \
- __memset_cc_by2((s),(c),(count)) : \
- __memset_cg((s),(c),(count))))
-
-#define __memset_gc(s,c,count) \
-((count%4==0) ? \
- __memset_gc_by4((s),(c),(count)) : \
- ((count%2==0) ? \
- __memset_gc_by2((s),(c),(count)) : \
- __memset_gg((s),(c),(count))))
-
-#define __HAVE_ARCH_MEMSET
-#define memset(s,c,count) \
-(__builtin_constant_p(c) ? \
- (__builtin_constant_p(count) ? \
- __memset_cc((s),(c),(count)) : \
- __memset_cg((s),(c),(count))) : \
- (__builtin_constant_p(count) ? \
- __memset_gc((s),(c),(count)) : \
- __memset_gg((s),(c),(count))))
-static inline void * __memset_cc_by4(void * s, char c, size_t count)
+static inline void *__memset_generic( void *s, char c, size_t count )
{
-/*
- * register char *tmp = s;
- */
-register char *tmp = (char *)s;
-register int dummy;
-__asm__ __volatile__ (
- "\n1:\tmovl %2,(%0)\n\t"
- "addl $4,%0\n\t"
- "decl %1\n\t"
- "jnz 1b"
- :"=r" (tmp), "=r" (dummy)
- :"q" (0x01010101UL * (unsigned char) c), "0" (tmp), "1" (count/4)
- :"memory");
-return s;
-}
+ int d0,d1;
-static inline void * __memset_cc_by2(void * s, char c, size_t count)
-{
-register void *tmp = (void *)s;
-register int dummy;
-__asm__ __volatile__ (
- "shrl $1,%1\n\t" /* may be divisible also by 4 */
- "jz 2f\n"
- "\n1:\tmovl %2,(%0)\n\t"
- "addl $4,%0\n\t"
- "decl %1\n\t"
- "jnz 1b\n"
- "2:\tmovw %w2,(%0)"
- :"=r" (tmp), "=r" (dummy)
- :"q" (0x01010101UL * (unsigned char) c), "0" (tmp), "1" (count/2)
- :"memory");
-return s;
+ __asm__ volatile (
+ "rep\n\t"
+ "stosb\n\t"
+ :"=&c" (d0), "=&D" (d1)
+ :"a" (c), "0" (count), "1" (s)
+ :"memory"
+ );
+
+ return s;
}
-static inline void * __memset_gc_by4(void * s, char c, size_t count)
-{
-register void *tmp = (void *)s;
-register int dummy;
-__asm__ __volatile__ (
- "movb %b0,%h0\n"
- "pushw %w0\n\t"
- "shll $16,%0\n\t"
- "popw %w0\n"
- "1:\tmovl %0,(%1)\n\t"
- "addl $4,%1\n\t"
- "decl %2\n\t"
- "jnz 1b\n"
- :"=q" (c), "=r" (tmp), "=r" (dummy)
- :"0" ((unsigned) c), "1" (tmp), "2" (count/4)
- :"memory");
-return s;
-}
-static inline void * __memset_gc_by2(void * s, char c, size_t count)
-{
-register void *tmp = (void *)s;
-register int dummy1,dummy2;
-__asm__ __volatile__ (
- "movb %b0,%h0\n\t"
- "shrl $1,%2\n\t" /* may be divisible also by 4 */
- "jz 2f\n\t"
- "pushw %w0\n\t"
- "shll $16,%0\n\t"
- "popw %w0\n"
- "1:\tmovl %0,(%1)\n\t"
- "addl $4,%1\n\t"
- "decl %2\n\t"
- "jnz 1b\n"
- "2:\tmovw %w0,(%1)"
- :"=q" (dummy1), "=r" (tmp), "=r" (dummy2)
- :"0" ((unsigned) c), "1" (tmp), "2" (count/2)
- :"memory");
-return s;
+static inline void *__memset_constant( void *s, char c, size_t count )
+{
+ int d0,d1;
+
+#define MEMST(x) \
+ __asm__ volatile ( \
+ "\n1:\t" \
+ "movl %2,(%0)\n\t" \
+ "addl $4,%0\n\t" \
+ "decl %1\n\t" \
+ "jnz 1b\n" \
+ x \
+ :"=r" (d0), "=r" (d1) \
+ :"q" (0x01010101UL * ((unsigned char)c)), "0" (s), "1" (count/4) \
+ :"memory" \
+ )
+
+ switch ( count % 4 ) {
+ case 0: MEMST(""); return s;
+ case 1: MEMST("\tmovb %b2,(%0)"); return s;
+ case 2: MEMST("\tmovw %w2,(%0)"); return s;
+ default: MEMST("\tmovw %w2,(%0)\n\tmovb %b2,2(%0)"); return s;
+ }
}
-static inline void * __memset_cg(void * s, char c, size_t count)
-{
-int d0, d1;
-register void *tmp = (void *)s;
-__asm__ __volatile__ (
- "shrl $1,%%ecx\n\t"
- "rep\n\t"
- "stosw\n\t"
- "jnc 1f\n\t"
- "movb %%al,(%%edi)\n"
- "1:"
- :"=&c" (d0), "=&D" (d1)
- :"a" (0x0101U * (unsigned char) c), "0" (count), "1" (tmp)
- :"memory");
-return s;
-}
-static inline void * __memset_gg(void * s,char c,size_t count)
-{
-int d0, d1, d2;
-register void *tmp = (void *)s;
-__asm__ __volatile__ (
- "movb %%al,%%ah\n\t"
- "shrl $1,%%ecx\n\t"
- "rep\n\t"
- "stosw\n\t"
- "jnc 1f\n\t"
- "movb %%al,(%%edi)\n"
- "1:"
- :"=&c" (d0), "=&D" (d1), "=&D" (d2)
- :"0" (count), "1" (tmp), "2" (c)
- :"memory");
-return s;
-}
+#define __memset( s, c, count ) \
+ (__builtin_constant_p( count ) && count ) ? \
+ __memset_constant( s, c, count ) : \
+ __memset_generic( s, c, count )
+
+#define __HAVE_ARCH_MEMSET
+#define memset( s, c, count ) __memset( s, c, count )
+
/*
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
Please read the FAQ at http://www.tux.org/lkml/
This archive was generated by hypermail 2b29 : Thu Aug 31 2000 - 21:00:27 EST