NFS problem + example program

Jim Nance (jlnance@avanticorp.com)
Fri, 11 Dec 1998 10:06:07 -0500


Hello All,
I am trying to get an application that works under Solaris to work under
Linux. Its a distributed processing application and it shares data through
a common file system and synchronizes itself using locks in the file system.
It works under Solaris but I am having some problems under Linux. The
initial problem I was working on is that host1 writes a file, and removes
a lock. host2 opens the file and gets an empty file even though host1 has
written data into the file. I wrote this program to try to demonstrate that
problem but ended up finding another problem. This program does:

lock using hardlink
do trivial file op
unlock
goto start

If you run it on 2 Solaris machines you never see any "Still waiting for lock"
messages. If you run it on 2 linux machines you will eventually see one
process get stuck waiting for the lock. It remains stuck even if you kill
the other process and remove the lock file by hand. It appears that it
has cached the results of the stat() call and is not actually sending them
to the remote machine. After a minute or so it realizes that the lock
is gone and it starts working again. I am using 2.1.131+ac7 as my kernel
and I am using HJ's knfs nfs server. I have also tried running the program
from directories served off Solaris and Digital Unix NFS servers and the
problem is still there.

Thanks,

Jim

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>

#define LOCKFILE "my_lock_file"
#define DATAFILE "my_data_file"
#define TESTSTR "This is a test string to be\nwritten\nto\nthe\nfile"

void lockme(void)
{
static char lname[1024];
static size_t llen;
unsigned int lcnt = 0;

if(!llen) {
int fd;
char hname[1024];
if(gethostname(hname, sizeof(hname))) {
perror("lockme:");
exit(-1);
}
sprintf(lname, "%s_%d", hname, getpid());
llen = strlen(lname);
fd = open(lname, O_CREAT | O_EXCL | O_WRONLY, 0644);
if(fd<0) {
perror(lname);
exit(-1);
}
close(fd);
}

do {
struct stat buff;
int lcode = link(lname, LOCKFILE);
int scode = stat(lname, &buff);

if(!scode && buff.st_nlink==2) {
if(lcode) {
printf("lcode was bad and link was good??\n");
}
return;
}

++lcnt;
if(lcnt%32==31) {
printf("Still waiting for lock on try %u\n", lcnt);
}
usleep(1000);
} while(1);
}

void unlockme(void)
{
unlink(LOCKFILE);
}

int main(int ac, char **av)
{
struct stat buff;
size_t nchar = strlen(TESTSTR);
char tbuff[1024];
FILE *fp;
int first;

for(first=1;;first=0) {
lockme();

if(stat(DATAFILE, &buff)) {
if(first) {
fp = fopen(DATAFILE,"w");
if(!fp) {
fprintf(stderr,"Can not open \" "DATAFILE "\"\n");
exit(-1);
}
fwrite(TESTSTR, nchar, 1, fp);
fclose(fp);
} else {
fprintf(stderr, "\"" DATAFILE "\" Does not exist\n");
exit(-1);
}
}

tbuff[0] = 0;
fp=fopen(DATAFILE,"r");
fread(tbuff, nchar, 1, fp);
fclose(fp);

if(strcmp(TESTSTR, tbuff)) {
printf("Strings differ\n");
}

fp = fopen(DATAFILE ".tmp" ,"w");
if(!fp) {
fprintf(stderr,"Can not open \" "DATAFILE ".tmp" "\"\n");
exit(-1);
}
fwrite(TESTSTR, nchar, 1, fp);
fclose(fp);

if(rename(DATAFILE ".tmp", DATAFILE)) {
fprintf(stderr, "Can not rename files\n");
exit(-1);
}

unlockme();
usleep(20000); /* Give the other host a chance */
}
return 0;
}

-- 
----------------------------------------------------------------------------
Jim Nance                                                 Avant! Corporation
(919) 941-6655    Do you have sweet iced tea?       jim_nance@avanticorp.com
                  No, but there's sugar on the table.

- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.rutgers.edu Please read the FAQ at http://www.tux.org/lkml/