#include #include #include #include #include #include #include #include #include #define PER_PROCESS_ALLOC (256*1024*1024) #define NUM_SYNC_WORDS (256/32) typedef struct shared_data_s { int sync[NUM_SYNC_WORDS]; } shared_data_t; typedef shared_data_t *shared_data_p; shared_data_p shared_data; int num_cpus; void *map; int n; #if __x86_64__ #define LOCK_PREFIX "lock ; " int atomic_cas (int *ptr, int old, int new) { int prev; __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2" : "=a"(prev) : "q"(new), "m"(*ptr), "0"(old) : "memory"); return prev == old; } #else # error this test must be run on an x86_64 cpu #endif int get_bit (int *bits, int bitnum) { int *word = bits + (bitnum / 32); int bit = (1 << (bitnum % 32)); return (*word & bit) != 0; } void atomic_set_bit (int *bits, int bitnum) { int *word = bits + (bitnum / 32); int bit = (1 << (bitnum % 32)); int old_val, new_val; do { old_val = *word; new_val = old_val | bit; } while (!atomic_cas (word, old_val, new_val)); } void barrier () { int *sync = shared_data->sync; atomic_set_bit (sync, n); /* set my bit */ if (n == 0) { int i, cnt; /* spin until all bits are set */ for (cnt = 0; cnt != num_cpus;) for (i = 0, cnt = 0; i < num_cpus; ++i) if (get_bit(sync, i)) ++cnt; /* all set, open the barrier. */ for (i = 0; i < (num_cpus + 31)/32; ++i) sync[i] = 0; } else while (get_bit (sync, n)) /* spin on my bit set */; } void run_test () { int nxt = (n + 1) % num_cpus; char *s; char *cp; int i; int c; char *buf; barrier (); /* write the data for the next process */ s = map + nxt * PER_PROCESS_ALLOC; c = 'A' + nxt % 26; memset (s, c, PER_PROCESS_ALLOC-1); s[PER_PROCESS_ALLOC-1] = '\0'; barrier (); /* read our data */ s = map + n * PER_PROCESS_ALLOC; c = 'A' + n % 26; buf = malloc (PER_PROCESS_ALLOC); if (!buf) { perror ("malloc"); abort (); } memset (buf, c, PER_PROCESS_ALLOC-1); buf[PER_PROCESS_ALLOC-1] = '\0'; if (strcmp(s, buf)) { fprintf (stderr, "%d: data mismatch\n", n); abort (); } barrier (); exit (0); } int main (int argc, char *argv[]) { char *pgm = argv[0]; int mask; int fd; int pid; off_t alloc_size; int wait_status; num_cpus = (int)sysconf(_SC_NPROCESSORS_ONLN); if (num_cpus <= 0) { perror ("sysconf"); abort (); } for (mask = 1; (num_cpus & ~mask); mask <<= 1) num_cpus = (num_cpus & ~mask); shared_data = mmap((void *)0, 64*1024*1024, PROT_READ|PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, 0, (off_t)0); if (shared_data == MAP_FAILED) { perror ("mmap"); abort (); } memset (shared_data, '\0', sizeof (shared_data_t)); alloc_size = num_cpus * PER_PROCESS_ALLOC; map = mmap((void *)0, alloc_size, PROT_READ|PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, 0, (off_t)0); if (map == MAP_FAILED) { perror ("mmap"); abort (); } for (n = 0; n < num_cpus; ++n) { pid = fork (); if (pid == 0) run_test (n); /* no return */ else if (pid < 0) { perror ("fork"); abort (); } } while ((pid = wait (&wait_status)) > 0) { if (WIFEXITED (wait_status)) { int child_exit = WEXITSTATUS (wait_status); if (child_exit) { fprintf (stderr, "non-zero child exit status\n"); abort ();} } else if (WIFSIGNALED (wait_status)) { int child_sig = WTERMSIG (wait_status); fprintf (stderr, "child caught signal\n"); abort (); } } exit (0); }