Re: [PATCH] Remove some divide instructions

From: Zachary Amsden
Date: Wed Oct 27 2004 - 18:13:09 EST


Apologies in advance for e-mail difficulties today - I can't reply directly to your message, so threading may be off.

I'd suggest _only_ changing the i386 version.

For example, your asm-generic changes looks senseless, since they only
make the macros more complex, without actually changing anything. And the
other architectures may want to do other things, since right now at least
some of them use things like fixed hardware registers etc which is not
necessarily appropriate for the non-asm case...

That way you'd also only modify the architecture that you can actually verify..

Linus


Backed out everything but i386 and generic. For the generic version, I compiled and tested this code outside of the kernel. Actually, I found that at least for my tool chain, the generic version

+# define do_div(n,base) ({ \
+ uint32_t __rem; \
+ if (__builtin_constant_p(base) && !((base) & ((base)-1))) { \
+ __rem = ((uint64_t)(n)) % (base); \
+ (n) = ((uint64_t)(n)) / (base); \
+ } else { \
+ uint32_t __base = (base); \
+ __rem = ((uint64_t)(n)) % __base; \
+ (n) = ((uint64_t)(n)) / __base; \
+ } \
+ __rem;

Does indeed generate different code for the constant case - without it, due to the assignment to __base, the shift / mask optimization does not take place. Apparently the constant attribute and associated optimizations do not propagate through the assignment. Other gccs may behave differently. I also tried making __base const, to no avail. If one were willing to ignore the potential macro side effects of the references to (base), the code could be the same, but I'm not the best judge of whether that is a good thing to do.

Zach
zach@xxxxxxxxxx
diff -ru linux-2.6.10-rc1/include/asm-generic/div64.h linux-2.6.10-rc1-nsz/include/asm-generic/div64.h
--- linux-2.6.10-rc1/include/asm-generic/div64.h 2004-10-27 10:41:40.000000000 -0700
+++ linux-2.6.10-rc1-nsz/include/asm-generic/div64.h 2004-10-27 10:24:06.000000000 -0700
@@ -22,12 +22,17 @@

#if BITS_PER_LONG == 64

-# define do_div(n,base) ({ \
- uint32_t __base = (base); \
- uint32_t __rem; \
- __rem = ((uint64_t)(n)) % __base; \
- (n) = ((uint64_t)(n)) / __base; \
- __rem; \
+# define do_div(n,base) ({ \
+ uint32_t __rem; \
+ if (__builtin_constant_p(base) && !((base) & ((base)-1))) { \
+ __rem = ((uint64_t)(n)) % (base); \
+ (n) = ((uint64_t)(n)) / (base); \
+ } else { \
+ uint32_t __base = (base); \
+ __rem = ((uint64_t)(n)) % __base; \
+ (n) = ((uint64_t)(n)) / __base; \
+ } \
+ __rem; \
})

#elif BITS_PER_LONG == 32
@@ -37,16 +42,21 @@
/* The unnecessary pointer compare is there
* to check for type safety (n must be 64bit)
*/
-# define do_div(n,base) ({ \
- uint32_t __base = (base); \
- uint32_t __rem; \
- (void)(((typeof((n)) *)0) == ((uint64_t *)0)); \
- if (likely(((n) >> 32) == 0)) { \
- __rem = (uint32_t)(n) % __base; \
- (n) = (uint32_t)(n) / __base; \
- } else \
- __rem = __div64_32(&(n), __base); \
- __rem; \
+# define do_div(n,base) ({ \
+ uint32_t __rem; \
+ (void)(((typeof((n)) *)0) == ((uint64_t *)0)); \
+ if (__builtin_constant_p(base) && !((base) & ((base)-1))) { \
+ __rem = ((uint64_t)(n)) % (base); \
+ (n) = ((uint64_t)(n)) / (base); \
+ } else { \
+ uint32_t __base = (base); \
+ if (likely(((n) >> 32) == 0)) { \
+ __rem = (uint32_t)(n) % __base; \
+ (n) = (uint32_t)(n) / __base; \
+ } else \
+ __rem = __div64_32(&(n), __base); \
+ } \
+ __rem; \
})

#else /* BITS_PER_LONG == ?? */
diff -ru linux-2.6.10-rc1/include/asm-i386/div64.h linux-2.6.10-rc1-nsz/include/asm-i386/div64.h
--- linux-2.6.10-rc1/include/asm-i386/div64.h 2004-10-27 10:41:40.000000000 -0700
+++ linux-2.6.10-rc1-nsz/include/asm-i386/div64.h 2004-10-27 10:15:04.000000000 -0700
@@ -14,16 +14,23 @@
* convention" on x86.
*/
#define do_div(n,base) ({ \
- unsigned long __upper, __low, __high, __mod, __base; \
- __base = (base); \
- asm("":"=a" (__low), "=d" (__high):"A" (n)); \
- __upper = __high; \
- if (__high) { \
- __upper = __high % (__base); \
- __high = __high / (__base); \
+ unsigned long __mod; \
+ if (__builtin_constant_p(base) && !((base) & ((base)-1))) { \
+ __mod = ((uint64_t)(n)) % base; \
+ (n) = ((uint64_t)(n)) / base; \
+ } else { \
+ unsigned long __upper, __low, __high, __base; \
+ __base = (base); \
+ asm("":"=a" (__low), "=d" (__high):"A" (n)); \
+ __upper = __high; \
+ if (__high) { \
+ __upper = __high % (__base); \
+ __high = __high / (__base); \
+ } \
+ asm("divl %2": "=a" (__low), "=d" (__mod) : \
+ "rm" (__base), "0" (__low), "1" (__upper)); \
+ asm("":"=A" (n):"a" (__low),"d" (__high)); \
} \
- asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (__base), "0" (__low), "1" (__upper)); \
- asm("":"=A" (n):"a" (__low),"d" (__high)); \
__mod; \
})