Andrea Arcangeli wrote:
>
> That's what I understood by reading your previous posts. It seems we don't
> need to enforce any ordering in IA32 as the hardware is doing that for us.
> Right? Of course the current code can't hurt, it's only slower (like what
> we have with spin_unlock right now).
>
[Either my test program is completely buggy, or ]
We definitively need the memory barrier during set_current_state():
I've attached a samle program that mimics __wait_inode(), and it always
locks up with the normal "mov" and the correct delay between the wake-up
and the sleep functions.
[it should automagically find a suitable delay, if not, then increase
delay2]
Tested with Dual Pentium II/350.
-- Manfred --------------0203B9AAC99D16DD2019F99D Content-Type: text/plain; charset=us-ascii; name="movopt.cpp" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="movopt.cpp"/* * movopt: test for Intel memory ordering. * Copyright (C) 1999 by Manfred Spraul. * * Redistribution of this file is permitted under the terms of the GNU * Public License (GPL) * $Header: /pub/cvs/ms/movopt/movopt.cpp,v 1.3 1999/11/25 23:38:44 manfreds Exp $ */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> #include <pthread.h> #include <assert.h>
volatile int start = 0; /* 1: always sleep 10 000: never sleep PII/350: lock-up at delay=217 */ volatile int delay = 1; /* increase this value if no lock-up occurs, eg: 2000 */ volatile int delay2 = 300;
volatile int current_state = 0; volatile int lock = 1;
volatile int ready = 0; volatile int go = 0;
void* threadfnc(void* param) { int i=0,j;
while(start==0) ; if(1 == (int)param) goto cpu1; if(2 == (int)param) goto cpu2; assert(0); cpu1: for(;i<50000000;i++) { lock = 1; current_state = 0; go = 0; while(!ready) ; go = 1; for(j=0;j<delay;j++) ; __asm__ __volatile( #if 1 "movl $1,%1\n\t" /* set current_state = 1 */ #else "lock;bts $0,%1\n\t" #endif "movl %0,%%eax\n\t" /* lock into %%eax */ "cmpl $1,%%eax\n\t" "jne dont_sleep\n" "retry:\n\t" "movl %1, %%eax\n\t" "cmpl $0,%%eax\n\t" "jne retry\n" "dont_sleep:\n" : /* no output */ : "m" (lock), "m" (current_state) : "eax", "cc", "memory"); if((i%5000)==0) { printf("delay %d: ok\n",delay); delay++; } } printf("thread %d finished.\n",(int)param); exit(0); cpu2: for(;;) { ready = 1; while(!go) ; ready = 0; go = 0; for(j=0;j<delay2;j++) ; __asm__ __volatile( "movl $0,%0\n\t" "movl $0,%1\n\t" "lock; addl $0,(%%esp)\n\t" /* flush our write buffers*/ : /* no output */ : "m" (lock), "m" (current_state) : "memory"); } }
void start_thread(int id) { pthread_t thread; int res;
res = pthread_create(&thread,NULL,threadfnc,(void*)id); if(res != 0) assert(false); }
int main() { printf("movopt:\n"); start_thread(1); start_thread(2); printf(" starting, please wait.\n"); fflush(stdout); start = 1; for(;;) sleep(1000); }
--------------0203B9AAC99D16DD2019F99D--
- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.rutgers.edu Please read the FAQ at http://www.tux.org/lkml/