Re: GCC proposal for "@" asm constraint

From: Andrea Arcangeli (andrea@suse.de)
Date: Mon Sep 18 2000 - 20:18:31 EST


On Mon, Sep 18, 2000 at 07:53:04PM -0400, John Wehle wrote:
> What version of gcc? Recently some work was done to improve the handling of
> constant memory.

I'm using 2.95.2 19991024.

Take this small testcase:

#include <linux/spinlock.h>

int * p;
spinlock_t lock = SPIN_LOCK_UNLOCKED;

extern void dummy(int, int);

myfunc() {
        int a, b;
        spin_lock(&lock);
        a = *p;
        spin_unlock(&lock);

        spin_lock(&lock);
        b = *p;
        spin_unlock(&lock);

        dummy(a,b);
}

If I compile it with:

        gcc -O2 -D__SMP__ -I ~/kernel/devel/2.2.18pre9aa1/include/ -S p.c

where 2.2.18pre9aa1 is the current spinlock implementation without
the "memory" clobber and with the __dummy trick I get:

        .file "p.c"
        .version "01.01"
gcc2_compiled.:
.globl lock
.data
        .align 4
        .type lock,@object
        .size lock,4
lock:
        .long 0
.text
        .align 16
.globl myfunc
        .type myfunc,@function
myfunc:
        pushl %ebp
        movl %esp,%ebp
        subl $8,%esp
#APP
        
1: lock ; btsl $0,lock
        jc 2f
.section .text.lock,"ax"
2: testb $1,lock
        jne 2b
        jmp 1b
.previous
#NO_APP
        movl p,%eax
        ^^^^^^^^^^^
        movl (%eax),%edx
#APP
        lock ; btrl $0,lock
        
1: lock ; btsl $0,lock
        jc 2f
.section .text.lock,"ax"
2: testb $1,lock
        jne 2b
        jmp 1b
.previous
#NO_APP
        movl (%eax),%eax
#APP
        lock ; btrl $0,lock
#NO_APP
        addl $-8,%esp
        pushl %eax
        pushl %edx
        call dummy
        movl %ebp,%esp
        popl %ebp
        ret
.Lfe1:
        .size myfunc,.Lfe1-myfunc
        .comm p,4,4
        .ident "GCC: (GNU) 2.95.2 19991024 (release)"

If now I repeat the same after applying this patch to the
kernel tree that I was inlining:

--- 2.2.18pre9aa1/include/asm-i386/spinlock.h.~1~ Mon Sep 18 04:56:28 2000
+++ 2.2.18pre9aa1/include/asm-i386/spinlock.h Tue Sep 19 03:04:56 2000
@@ -173,12 +173,12 @@
 #define spin_lock(lock) \
 __asm__ __volatile__( \
         spin_lock_string \
- :"=m" (__dummy_lock(lock)))
+ :"=m" (__dummy_lock(lock)) : : "memory")
 
 #define spin_unlock(lock) \
 __asm__ __volatile__( \
         spin_unlock_string \
- :"=m" (__dummy_lock(lock)))
+ :"=m" (__dummy_lock(lock)) : : "memory")
 
 #define spin_trylock(lock) (!test_and_set_bit(0,(lock)))

I then get this assembler:

        .file "p.c"
        .version "01.01"
gcc2_compiled.:
.globl lock
.data
        .align 4
        .type lock,@object
        .size lock,4
lock:
        .long 0
.text
        .align 16
.globl myfunc
        .type myfunc,@function
myfunc:
        pushl %ebp
        movl %esp,%ebp
        subl $8,%esp
#APP
        
1: lock ; btsl $0,lock
        jc 2f
.section .text.lock,"ax"
2: testb $1,lock
        jne 2b
        jmp 1b
.previous
#NO_APP
        movl p,%eax
        ^^^^^^^^^^^
        movl (%eax),%edx
#APP
        lock ; btrl $0,lock
        
1: lock ; btsl $0,lock
        jc 2f
.section .text.lock,"ax"
2: testb $1,lock
        jne 2b
        jmp 1b
.previous
#NO_APP
        movl p,%eax
        ^^^^^^^^^^^
        movl (%eax),%eax
#APP
        lock ; btrl $0,lock
#NO_APP
        addl $-8,%esp
        pushl %eax
        pushl %edx
        call dummy
        movl %ebp,%esp
        popl %ebp
        ret
.Lfe1:
        .size myfunc,.Lfe1-myfunc
        .comm p,4,4
        .ident "GCC: (GNU) 2.95.2 19991024 (release)"

The diff between the generated asms:

--- p.s.default-spinlocks Tue Sep 19 03:10:14 2000
+++ p.s.memory Tue Sep 19 03:10:29 2000
@@ -39,6 +39,7 @@
         jmp 1b
 .previous
 #NO_APP
+ movl p,%eax
         movl (%eax),%eax
 #APP
         lock ; btrl $0,lock

The reload of the address of `p' isn't necessary and gcc is wrong in generating
it. p is a constant embedded into the .text section and set at link time, the
only way to change it would be if the assembler that declares "memory" as
clobber would be self modifying the code itself and gcc should assume nothing
about self modifying code instead (none bit of IA32 linux is self modifying).

The above reload are just wasted CPU cycles that we're little worried to waste.

Andrea
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Sat Sep 23 2000 - 21:00:19 EST