Shared memory not SMP safe to user-mode code.

Richard B. Johnson (root@chaos.analogic.com)
Tue, 30 Nov 1999 10:50:56 -0500 (EST)


Hello world,

The following program shows that shared memory is not SMP safe.
It is my understanding that, from the user's code point-of-view,
the fact that a machine may have two (or more) processors should
be invisible.

If you compile this on a single UP machine, it will print 'good'
forever, with an occasional message about 'spinning' which gets
resolved immediately.

If you compile this on a SMP machine, it will get stuck with both
the parent and the child spinning on the same (impossible) value.
If you look at the code, you will see that it is impossible for
both the child and the parent to have acquired the same key-value.

Further, the 'pad' values in the shared memory structure sometimes
get overwritten with strange values when neither the parent nor the
child ever writes to them! This only happens on a SMP machine.

Pedantics take note. It is well known that one is to use kernel-provided
semaphores to synchronize access to shared memory. In this case, the
parent process emulates what hardware would do to a shared memory
segment, and the child emulates what software would have to do to
maintain the state of the hardware, i.e., "why shared-memory network
cards get dorked????".

It appears as though, on a SMP machine, shared memory is not the same
page shared amongst tasks. It appears (emphasis upon appears) as though
shared-memory is a copy of something that does not get updated properly
after a write (like it's in one CPU's cache, but not in another). I
think one has to invalidate the cache after a write to shared memory??

Linux version 2.3.13 (root@chaos) (gcc version 2.7.2.3) #48 SMP Thu Oct 21 16:41:57 EDT 1999

processor : 0
vendor_id : GenuineIntel
cpu family : 6
model : 5
model name : Pentium II (Deschutes)
stepping : 1
cpu MHz : 400.914605
cache size : 512 KB
fdiv_bug : no
hlt_bug : no
sep_bug : no
f00f_bug : no
fpu : yes
fpu_exception : yes
cpuid level : 2
wp : yes
flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 mmx osfxsr
bogomips : 400.59

processor : 1
vendor_id : GenuineIntel
cpu family : 6
model : 5
model name : Pentium II (Deschutes)
stepping : 1
cpu MHz : 400.914605
cache size : 512 KB
fdiv_bug : no
hlt_bug : no
sep_bug : no
f00f_bug : no
fpu : yes
fpu_exception : yes
cpuid level : 2
wp : yes
flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 mmx osfxsr
bogomips : 399.77

Character devices:
1 mem
2 pty
3 ttyp
4 ttyS
5 cua
7 vcs
10 misc
36 netlink
128 ptm
136 pts

Block devices:
1 ramdisk
2 fd
8 sd
4: cascade
ext2
msdos
nodev proc
nodev devpts
CPU0 CPU1
0: 439207 434984 IO-APIC-edge timer
1: 22116 22127 IO-APIC-edge keyboard
2: 0 0 XT-PIC cascade
10: 92087 91768 IO-APIC-level eth0
11: 3641 3608 IO-APIC-level BusLogic BT-958
13: 1 0 XT-PIC fpu
NMI: 0
ERR: 0

0000-001f : dma1
0020-003f : pic1
0040-005f : timer
0060-006f : keyboard
0080-008f : dma page reg
00a0-00bf : pic2
00c0-00df : dma2
00f0-00ff : fpu
02f8-02ff : serial(auto)
03c0-03df : vga+
03f8-03ff : serial(auto)
b800-b803 : BusLogic BT-958
d000-d03f : eth0

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <errno.h>
#include <signal.h>
#include <sys/wait.h>
#include <netinet/in.h>
#include <fcntl.h>
#include <sys/ipc.h>
#include <sys/shm.h>
#include <sys/mman.h>

extern void iopl(int);

typedef struct {
unsigned int pad0;
unsigned int pad1;
unsigned int pad2;
unsigned int i;
unsigned int j;
unsigned int key;
unsigned int spin;
unsigned int pad3;
unsigned int pad4;
} PARS __attribute__((packed));

#define KEY 0xdeadface
#define PAGE_SIZE 0x1000
#define FIRST_FLAGS (IPC_EXCL|IPC_CREAT|SHM_R|SHM_W)
#define NEXT_FLAGS (IPC_EXCL|SHM_R|SHM_W)
#define PROT (PROT_READ|PROT_WRITE)
#define FLAGS (MAP_FIXED|MAP_SHARED)
#if !defined(MAP_FAILED)
#define MAP_FAILED (void *) -1
#endif

#define cli __asm__ __volatile__("cli\n")
#define sti __asm__ __volatile__("sti\n")

#define ERRORS(s) \
{ \
fprintf(stderr, "Error from line %d, file %s, call %s() (%s)\n", \
__LINE__,__FILE__,s,strerror(errno)); \
exit(EXIT_FAILURE); \
}

/*-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-*/
/*
* This initialzes one page of shared memory. If it doesn't exist,
* it is created.
*/
void init_shmem(PARS **pars)
{
int i;
if(*pars == NULL)
{
if((i = shmget(KEY, PAGE_SIZE, FIRST_FLAGS)) < 0)
if((i = shmget(KEY, PAGE_SIZE, NEXT_FLAGS)) < 0)
ERRORS("shmget");
if((*pars = (PARS *) shmat(i, NULL, 0)) == MAP_FAILED)
ERRORS("shmat");
}
}
/*-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-*/
/*
* This detaches a shared memory segment.
*/
void quit_shmem(PARS **pars)
{
if(shmdt((const void *) *pars) < 0)
ERRORS("shmdt");
*pars = NULL;
}
/*-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-*/

int main()
{
PARS *pars = NULL;
volatile unsigned int key;
init_shmem(&pars);
iopl(3);
memset(pars, 0x00, PAGE_SIZE);
fprintf(stderr, "Spin = %u\n", pars->spin);
fprintf(stderr, "Key = %u\n", pars->key);
fprintf(stderr, "J = %u\n", pars->j);
fprintf(stderr, "I = %u\n", pars->i);

switch(fork())
{
case 0:
quit_shmem(&pars);
init_shmem(&pars);
iopl(3);
fprintf(stderr, "CHILD\n");
fprintf(stderr, "Spin = %u\n", pars->spin);
fprintf(stderr, "Key = %u\n", pars->key);
fprintf(stderr, "J = %u\n", pars->j);
fprintf(stderr, "I = %u\n", pars->i);
for(;;)
{
cli;
while((key = pars->key++) == 0)
;
pars->spin += key;
if(pars->spin != key)
{
pars->spin -= key;
sti;
fprintf(stderr, " Child spinning with %u\n", pars->spin);
fprintf(stderr, "Pad0 = %u\n", pars->pad0);
fprintf(stderr, "Pad1 = %u\n", pars->pad1);
fprintf(stderr, "Pad2 = %u\n", pars->pad2);
fprintf(stderr, "Pad3 = %u\n", pars->pad3);
usleep(rand() % 10000);
continue;
}
sti;
if(pars->i < pars->j)
fprintf(stderr, "i = %08x, j = %08x\n", pars->i, pars->j);
else
fprintf(stderr, "good\n");
cli;
pars->spin -= key;
sti;
}
case -1:
ERRORS("fork");
default:
fprintf(stderr, "PARENT\n");
fprintf(stderr, "Spin = %u\n", pars->spin);
fprintf(stderr, "Key = %u\n", pars->key);
fprintf(stderr, "J = %u\n", pars->j);
fprintf(stderr, "I = %u\n", pars->i);

for(;;)
{
cli;
while((key = pars->key++) == 0)
;
pars->spin += key;
if(pars->spin != key)
{
pars->spin -= key;
sti;
fprintf(stderr, "Parent spinning with %u\n", pars->spin);
usleep(rand() % 10000);
continue;
}
pars->i++;
pars->j++;
pars->spin -= key;
sti;
}
}
return 0;
}

Cheers,
Dick Johnson

Penguin : Linux version 2.3.13 on an i686 machine (400.59 BogoMips).
Warning : It's hard to remain at the trailing edge of technology.

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/