[PATCH] optimised memmove() for alpha

Martin Lucina (mato@kotelna.sk)
Sat, 22 May 1999 21:40:22 +1200


On Wed, May 19, 1999 at 10:36:51PM +0100, Tim Waugh wrote:

> Here's a patch against 2.3.3. This should get rid of the stalling when
> doing ncurses stuff on a TGA console.
>
> It's untested, but it compiles. I'm compiling a kernel with it now, but
> it's compiling over NFS, with 32Mb and no swap, so things are progressing
> slowly.

Here's a different version. This one is based on adding a bunch of
conditionals to the __memcpy_xxx functions and in theory should be just as
fast as Linus's original memcpy. I had a look at the assembly code compiled
and since the dir paramter is a constant, gcc (egcs-1.1.1 in my case) will
optimise out all the conditionals, bless it's optimiser.

This is also untested. It hasn't crashed my machine yet and I hope I haven't
done anything silly :-)

Cheers,

mato

--
Martin Lucina http://www.kotelna.sk/mato/ Wellington, New Zealand
I've always been mad I know I've been mad like the most of us are 
Pretty hard to explain why you're a madman even if you're not mad

diff -urN --exclude=.* --exclude=*.o --exclude=*.a --exclude=RCS linux-2.2.9/arch/alpha/lib/memcpy.c linux/arch/alpha/lib/memcpy.c --- linux-2.2.9/arch/alpha/lib/memcpy.c Wed Apr 8 03:05:05 1998 +++ linux/arch/alpha/lib/memcpy.c Sat May 22 21:06:59 1999 @@ -2,6 +2,8 @@ * linux/arch/alpha/lib/memcpy.c * * Copyright (C) 1995 Linus Torvalds + * + * Optimised memmove() implementation by Martin Lucina, 1999. */ /* @@ -26,7 +28,7 @@ if (n <= 0) return; \ n--; \ *(char *) d = *(char *) s; \ - d++; s++; \ + if (dir > 0) { d++; s++; } else { d--; s--; } \ } /* @@ -37,7 +39,7 @@ while (n > 0) { \ n--; \ *(char *) d = *(char *) s; \ - d++; s++; \ + if (dir > 0) { d++; s++; } else { d--; s--; } \ } /* @@ -53,7 +55,8 @@ * * Note the ordering to try to avoid load (and address generation) latencies. */ -static inline void __memcpy_unaligned(unsigned long d, unsigned long s, long n) +static inline void __memcpy_unaligned(unsigned long d, unsigned long s, long n, + const long dir) { ALIGN_DEST_TO8(d,s,n); n -= 8; /* to avoid compare against 8 in the loop */ @@ -62,7 +65,15 @@ __asm__("ldq_u %0,%1":"=r" (low_word):"m" (*(unsigned long *) s)); do { unsigned long tmp; - __asm__("ldq_u %0,%1":"=r" (high_word):"m" (*(unsigned long *)(s+8))); + if (dir > 0) { + __asm__("ldq_u %0,%1" + :"=r" (high_word) + :"m" (*(unsigned long *)(s+8))); + } else { + __asm__("ldq_u %0,%1" + :"=r" (high_word) + :"m" (*(unsigned long *)(s-8))); + } n -= 8; __asm__("extql %1,%2,%0" :"=r" (low_word) @@ -70,9 +81,9 @@ __asm__("extqh %1,%2,%0" :"=r" (tmp) :"r" (high_word), "r" (s)); - s += 8; + (dir > 0) ? (s += 8) : (s -= 8); *(unsigned long *) d = low_word | tmp; - d += 8; + (dir > 0) ? (d += 8) : (d -= 8); low_word = high_word; } while (n >= 0); } @@ -88,7 +99,8 @@ * * Note the ordering to try to avoid load (and address generation) latencies. */ -static inline void __memcpy_aligned(unsigned long d, unsigned long s, long n) +static inline void __memcpy_aligned(unsigned long d, unsigned long s, long n, + const long dir) { ALIGN_DEST_TO8(d,s,n); n -= 8; @@ -96,9 +108,9 @@ unsigned long tmp; __asm__("ldq %0,%1":"=r" (tmp):"m" (*(unsigned long *) s)); n -= 8; - s += 8; + (dir > 0) ? (s += 8) : (s -= 8); *(unsigned long *) d = tmp; - d += 8; + (dir > 0) ? (d += 8) : (d -= 8); } n += 8; DO_REST_ALIGNED(d,s,n); @@ -107,10 +119,35 @@ void * memcpy(void * dest, const void *src, size_t n) { if (!(((unsigned long) dest ^ (unsigned long) src) & 7)) { - __memcpy_aligned((unsigned long) dest, (unsigned long) src, n); + __memcpy_aligned((unsigned long) dest, + (unsigned long) src, n, 1); return dest; } - __memcpy_unaligned((unsigned long) dest, (unsigned long) src, n); + __memcpy_unaligned((unsigned long) dest, + (unsigned long) src, n, 1); + return dest; +} + +void * memmove(void * dest, const void *src, size_t n) +{ + if ((unsigned long) dest < (unsigned long) src) { /* copy up */ + if (!(((unsigned long) dest ^ (unsigned long) src) & 7)) { + __memcpy_aligned((unsigned long) dest, + (unsigned long) src, n, 1); + return dest; + } + __memcpy_unaligned((unsigned long) dest, + (unsigned long) src, n, 1); + } else { /* copy down */ + if (!((((n - 1) + (unsigned long) dest) ^ + ((n - 1) + (unsigned long) src)) & 7)) { + __memcpy_aligned((n - 1) + (unsigned long) dest, + (n - 1) + (unsigned long) src, n, -1); + return dest; + } + __memcpy_unaligned((n - 1) + (unsigned long) dest, + (n - 1) + (unsigned long) src, n, -1); + } return dest; }

- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.rutgers.edu Please read the FAQ at http://www.tux.org/lkml/