--- arch/x86/lib/delay_32.c-orig 2008-05-31 23:44:34.000000000 +0200 +++ arch/x86/lib/delay_32.c 2008-06-02 11:55:50.000000000 +0200 @@ -3,6 +3,7 @@ * * Copyright (C) 1993 Linus Torvalds * Copyright (C) 1997 Martin Mares + * Copyright (C) 2008 Jiri Hladky * * The __delay function must _NOT_ be inlined as its execution time * depends wildly on alignment on many x86 processors. The additional @@ -28,16 +29,32 @@ /* simple loop based delay: */ static void delay_loop(unsigned long loops) { - int d0; - - __asm__ __volatile__( - "\tjmp 1f\n" - ".align 16\n" - "1:\tjmp 2f\n" - ".align 16\n" - "2:\tdecl %0\n\tjns 2b" - :"=&a" (d0) - :"0" (loops)); +/* + * Translates to +#APP + test %eax,%eax + jz 3f + jmp 1f +.align 16 +1: jmp 2f +.align 16 +2: decl %eax + jnz 2b +3: decl %eax +#NO_APP +*/ +__asm__ __volatile__( + "test %0,%0\n" + "\tjz 3f\n" + "\tjmp 1f\n" + ".align 16\n" + "1:\tjmp 2f\n" + ".align 16\n" + "2:\tdecl %0\n\tjnz 2b\n" + "3:\tdecl %0" + :/*we don't need output */ + :"a" (loops)); +} } /* TSC based delay: */