memeq() memory comparing function proporsal

jb@heva.net.pl
Thu, 22 Apr 1999 19:38:48 +0200 (CEST)


Hi

memcmp() is commonly used for checking memory areas equality rather than
finding out which one is bigger. Unfortunately due to expectations
about a return value's sign it is hard to optimize (especially as far i386
concerned).

A new function memeq() may test for equality only, which is much easier
to code. It will default to memcmp()==0 if __HAVE_ARCH_MEMEQ is not defined.

Following patch adds memeq() to asm-i386/string.h. It doesn't replaces
memcmp() with memeq() so no optimization will be observed. (I'll try
to find places when such replacement is possible)

There's small improvement in memcpy() function which now tries to
interleave data reads and writes to minimize register aliasing (this
only appears when count is small constant). And the big bug in memset() was
fixed. ;-)

This code has been tested on one computer - I hope it is ok (check it!
it's simple).

http://wizard.ae.krakow.pl/~jb/patches/memeq.diff.gz

Jan Bobrowski
jb@wizard.ae.krakow.pl

PS. What are d0, d1, d2 variables needed for in memcpy() assembly?

----------------------------------------------------------- memeq.diff
diff -u --recursive --new-file linux-2.2.6/include/asm-i386/string.h linux/include/asm-i386/string.h
--- linux-2.2.6/include/asm-i386/string.h Thu Mar 4 04:16:20 1999
+++ linux/include/asm-i386/string.h Thu Apr 22 16:29:59 1999
@@ -226,77 +226,77 @@
* This looks horribly ugly, but the compiler can optimize it totally,
* as the count is constant.
*/
+
+#define _COPY1(t,f,A) (*(A*)(t) = *(A*)(f))
+#define _COPY2(t,f,A,B) \
+{ \
+ A a = *(A*)(f); \
+ B b = *(B*)((f)+sizeof a); \
+ *(A*)(t) = a; \
+ *(B*)((t) + sizeof a) = b; \
+}
+
extern inline void * __constant_memcpy(void * to, const void * from, size_t n)
{
- switch (n) {
- case 0:
- return to;
- case 1:
- *(unsigned char *)to = *(const unsigned char *)from;
- return to;
- case 2:
- *(unsigned short *)to = *(const unsigned short *)from;
- return to;
- case 3:
- *(unsigned short *)to = *(const unsigned short *)from;
- *(2+(unsigned char *)to) = *(2+(const unsigned char *)from);
- return to;
- case 4:
- *(unsigned long *)to = *(const unsigned long *)from;
- return to;
- case 6: /* for Ethernet addresses */
- *(unsigned long *)to = *(const unsigned long *)from;
- *(2+(unsigned short *)to) = *(2+(const unsigned short *)from);
- return to;
- case 8:
- *(unsigned long *)to = *(const unsigned long *)from;
- *(1+(unsigned long *)to) = *(1+(const unsigned long *)from);
+ __u8* t = (__u8*)to;
+ const __u8* f = (const __u8*)from;
+
+ switch(n) {
+ case 0: return to;
+ case 1: _COPY1(t,f,__u8); return to;
+ case 2: _COPY1(t,f,__u16); return to;
+ case 3: _COPY2(t,f,__u16,__u8); return to;
+ case 4: _COPY1(t,f,__u32); return to;
+ case 5: _COPY2(t,f,__u32,__u8); return to;
+ case 6: _COPY2(t,f,__u32,__u16); return to;
+ case 8: _COPY2(t,f,__u32,__u32); return to;
+ case 9:
+ _COPY2(t,f,__u32,__u32);
+ _COPY1(t+8,f+8,__u8);
return to;
case 12:
- *(unsigned long *)to = *(const unsigned long *)from;
- *(1+(unsigned long *)to) = *(1+(const unsigned long *)from);
- *(2+(unsigned long *)to) = *(2+(const unsigned long *)from);
+ _COPY2(t,f,__u32,__u32);
+ _COPY1(t+8,f+8,__u32);
return to;
case 16:
- *(unsigned long *)to = *(const unsigned long *)from;
- *(1+(unsigned long *)to) = *(1+(const unsigned long *)from);
- *(2+(unsigned long *)to) = *(2+(const unsigned long *)from);
- *(3+(unsigned long *)to) = *(3+(const unsigned long *)from);
+ _COPY2(t,f,__u32,__u32);
+ _COPY2(t+8,f+8,__u32,__u32);
return to;
case 20:
- *(unsigned long *)to = *(const unsigned long *)from;
- *(1+(unsigned long *)to) = *(1+(const unsigned long *)from);
- *(2+(unsigned long *)to) = *(2+(const unsigned long *)from);
- *(3+(unsigned long *)to) = *(3+(const unsigned long *)from);
- *(4+(unsigned long *)to) = *(4+(const unsigned long *)from);
+ _COPY2(t,f,__u32,__u32);
+ _COPY2(t+8,f+8,__u32,__u32);
+ _COPY1(t+16,f+16,__u32);
return to;
}
-#define COMMON(x) \
-__asm__ __volatile__( \
- "cld\n\t" \
- "rep ; movsl" \
- x \
- : "=&c" (d0), "=&D" (d1), "=&S" (d2) \
- : "0" (n/4),"1" ((long) to),"2" ((long) from) \
- : "memory");
-{
- int d0, d1, d2;
- switch (n % 4) {
- case 0: COMMON(""); return to;
- case 1: COMMON("\n\tmovsb"); return to;
- case 2: COMMON("\n\tmovsw"); return to;
- default: COMMON("\n\tmovsw\n\tmovsb"); return to;
+#define COMMON(x) \
+__asm__ __volatile__( \
+ "cld\n\t" \
+ "rep ; movsl" \
+ x \
+ : "=&c" (d0), "=&D" (d1), "=&S" (d2) \
+ : "0" (n/4),"1" ((long) to),"2" ((long) from) \
+ : "memory" \
+)
+ {
+ int d0, d1, d2;
+ switch (n % 4) {
+ case 0: COMMON(""); return to;
+ case 1: COMMON("\n\tmovsb"); return to;
+ case 2: COMMON("\n\tmovsw"); return to;
+ default: COMMON("\n\tmovsw\n\tmovsb"); return to;
+ }
}
}
-
+#undef _COPY1
+#undef _COPY2
#undef COMMON
-}

#define __HAVE_ARCH_MEMCPY
-#define memcpy(t, f, n) \
-(__builtin_constant_p(n) ? \
- __constant_memcpy((t),(f),(n)) : \
- __memcpy((t),(f),(n)))
+#define memcpy(t, f, n) ( \
+ __builtin_constant_p(n) ? \
+ __constant_memcpy((t),(f),(n)) : \
+ __memcpy((t),(f),(n)) \
+)

#define __HAVE_ARCH_MEMMOVE
extern inline void * memmove(void * dest,const void * src, size_t n)
@@ -326,6 +326,55 @@

#define memcmp __builtin_memcmp

+#ifdef __KERNEL__
+
+#define COMMON(L,S) \
+asm volatile ( \
+ "cld;" \
+ "repz; cmps" L ";" \
+ "setne %%cl" \
+: "=S" (a), "=D" (b), "=c" (r) \
+: "S" (a), "D" (b), "c" (n/S) \
+);
+
+#define DIFF(A,B,T) ((T)(*(T*)(A)-*(T*)(B)))
+
+extern inline int __constant_memneq(const void* a, const void* b, unsigned n)
+{
+ int r;
+
+ switch(n) {
+ case 0: return 0;
+ case 1: return DIFF(a,b,u8);
+ case 2: return DIFF(a,b,u16);
+ case 3: return DIFF(a,b,u16) | DIFF(a+2,b+2,u8);
+ case 4: return DIFF(a,b,u32);
+ case 5: return DIFF(a,b,u32) | DIFF(a+4,b+4,u8);
+ case 6: return DIFF(a,b,u32) | DIFF(a+4,b+4,u16);
+ case 7: return DIFF(a,b,u32) ?: DIFF(a+4,b+4,u16) | DIFF(a+6,b+6,u8);
+ case 8: return DIFF(a,b,u32) | DIFF(a+4,b+4,u32);
+ case 9: return DIFF(a,b,u32) ?: DIFF(a+4,b+4,u32) | DIFF(a+8,b+8,u8);
+ }
+
+ switch(n%4) {
+ case 0: COMMON("l",4); return r;
+ case 2: COMMON("w",2); return r;
+ default:
+ COMMON("b",1); return r;
+ }
+}
+#undef COMMON
+#undef DIFF
+
+#define __HAVE_ARCH_MEMEQ
+#define memeq(a,b,n) ( \
+ __builtin_constant_p(n) ? \
+ !__constant_memneq((a),(b),(n)) : \
+ !memcmp((a),(b),(n)) \
+)
+
+#endif /* __KERNEL__ */
+
#define __HAVE_ARCH_MEMCHR
extern inline void * memchr(const void * cs,int c,size_t count)
{
@@ -462,7 +511,7 @@
#define __HAVE_ARCH_MEMSET
#define memset(s, c, count) \
(__builtin_constant_p(c) ? \
- __constant_c_x_memset((s),(0x01010101UL*(unsigned char)c),(count)) : \
+ __constant_c_x_memset((s),(0x01010101UL*(unsigned char)(c)),(count)) : \
__memset((s),(c),(count)))

/*
diff -u --recursive --new-file linux-2.2.6/include/linux/string.h linux/include/linux/string.h
--- linux-2.2.6/include/linux/string.h Thu Mar 4 04:16:20 1999
+++ linux/include/linux/string.h Thu Apr 22 16:31:36 1999
@@ -39,6 +39,10 @@
*/
#include <asm/string.h>

+#if defined(__KERNEL__) && !defined(__HAVE_ARCH_MEMEQ)
+#define memeq(a,b,n) (0==memcmp((a),(b),(n)))
+#endif
+
#ifdef __cplusplus
}
#endif

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/