[PATCH] i386: optimize swab64
From: Denis Vlasenko
Date: Mon Apr 25 2005 - 02:21:47 EST
I noticed that swab64 explicitly swaps 32-bit halves, but this is
not really needed because CPU is 32-bit anyway and we can
just tell GCC to treat registers as being swapped.
Example of resulting code:
mov 0x20(%ecx,%edi,8),%eax
mov 0x24(%ecx,%edi,8),%edx
lea 0x1(%edi),%esi
mov %esi,0xfffffdf4(%ebp)
mov %eax,%ebx
mov %edx,%esi
bswap %ebx
bswap %esi
mov %esi,0xffffff74(%ebp,%edi,8)
mov %ebx,0xffffff78(%ebp,%edi,8)
As you can see, swap is achieved simply by using
appropriate registers in last two insns.
(Why does gcc do extra register moves just before bswaps
is another question. No regression here, old code had them too)
Run-tested.
--
vda
diff -urpN linux-2.6.12-rc2.0.orig/include/asm-i386/byteorder.h linux-2.6.12-rc2.z.cur/include/asm-i386/byteorder.h
--- linux-2.6.12-rc2.0.orig/include/asm-i386/byteorder.h Tue Oct 19 00:54:36 2004
+++ linux-2.6.12-rc2.z.cur/include/asm-i386/byteorder.h Sun Apr 24 22:38:14 2005
@@ -25,6 +25,8 @@ static __inline__ __attribute_const__ __
return x;
}
+/* NB: swap of 32-bit halves is achieved by asm constraints.
+** This will save a xchgl in many cases */
static __inline__ __attribute_const__ __u64 ___arch__swab64(__u64 val)
{
union {
@@ -33,13 +35,13 @@ static __inline__ __attribute_const__ __
} v;
v.u = val;
#ifdef CONFIG_X86_BSWAP
- asm("bswapl %0 ; bswapl %1 ; xchgl %0,%1"
- : "=r" (v.s.a), "=r" (v.s.b)
+ asm("bswapl %0 ; bswapl %1"
+ : "=r" (v.s.b), "=r" (v.s.a)
: "0" (v.s.a), "1" (v.s.b));
#else
- v.s.a = ___arch__swab32(v.s.a);
+ v.s.a = ___arch__swab32(v.s.a);
v.s.b = ___arch__swab32(v.s.b);
- asm("xchgl %0,%1" : "=r" (v.s.a), "=r" (v.s.b) : "0" (v.s.a), "1" (v.s.b));
+ asm("" : "=r" (v.s.b), "=r" (v.s.a) : "0" (v.s.a), "1" (v.s.b));
#endif
return v.u;
}
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/