Re: Serious locking bug in Linux NFS

H.J. Lu (hjl@lucon.org)
Fri, 16 Oct 1998 19:46:46 -0700 (PDT)


>
>
> I'm testing the knfsd-981014 package with kernel version 2.1.125.
> I have noticed MANY failures and odd problems.
>
> 1) Showmount doesn't always display who is mounted.

I need to know how to reproduce it.

> 2) The log file doesn't always seem to catch mount/unmount
> requests.

That is even stranger. I need to know how to reproduce it.

> 3) I got this message while starting statd:
>
> Oct 16 13:50:15 linuxserver ./statd[23846]: address mismatch: expected
> 240.92.123.21, got 240.92.123.21
>

Here is the relevant code:

if (lp->addr.s_addr != sin->sin_addr.s_addr)
dprintf(L_WARNING, "address mismatch: "
"expected %s, got %s\n",
inet_ntoa(lp->addr),
inet_ntoa(sin->sin_addr));

I don't see how it can happen besides a bad compiler/HW. It looks
like you may have a compiler/HW.

> (The numbers and hostname were changed slightly.)
>
> 4) Locking doesn't seem to work correctly at all. I have a small
> C program to test locking. Here it is:
>
> --------------------------------------------
>
> #include <stdio.h>
> #include <unistd.h>
> #include <fcntl.h>
>
> int main (void)
> {
> FILE *k;
> struct flock fl;
> char j[1024];
>
> printf ("Opening 'testlock' for writing.\n");
> k = fopen ("testlock", "a+");
>
> if (!k) {
> printf ("Unable to open testlock\n");
> exit (0);
> }
> printf ("Success!\n");
>
> gets (j);
>
> printf ("Trying to get a read lock for 'testlock'\n");
> fl.l_type = F_RDLCK;
> fl.l_whence = 0;
> fl.l_start = 0;
> fl.l_len = 0;
>
> if ((fcntl (k->_fileno, F_SETLKW, &fl)) == -1) {
> printf ("Unable to get lock for testlock\n");
> exit (0);}
> printf ("Success!\n");
>
> gets (j);
>
> printf ("Trying to get a write lock for 'testlock'\n");
> fl.l_type = F_WRLCK;
> fl.l_whence = 0;
> fl.l_start = 0;
> fl.l_len = 0;
>
> if ((fcntl (k->_fileno, F_SETLKW, &fl)) == -1) {
> printf ("Unable to get lock for testlock\n");
> exit (0);}
> printf ("Success!\n");
>
> gets (j);
>
> printf ("Trying to release the lock for 'testlock'\n");
>
> fl.l_type = F_UNLCK;
> fl.l_whence = 0;
> fl.l_start = 0;
> fl.l_len = 0;
>
> if ((fcntl (k->_fileno, F_SETLKW, &fl)) == -1) {
> printf ("Unable to release lock for testlock\n");
> exit (0);}
> printf ("Success!\n");
>
> gets (j);
>
> printf ("Closing the file.\n");
>
> fclose (k);
> printf ("Success!\n");
>
> return 0;
> }
>
> ----------------------------------------
>
> For these tests, I'm using your linuc-knfs package as the server,
> and the packages which are shipped with RedHat Linux 5.1 patched per their
> errata file.
>
> One machine/two processes on the same file:
>

Here is a kernel patch. Please make sure lockd is runing on your client
machine. You should start "portmap" before mounting NFS.

H.J.
----
Index: fs/lockd/clntproc.c
===================================================================
RCS file: /home/work/cvs/linux/linux/fs/lockd/clntproc.c,v
retrieving revision 1.1.1.6
diff -u -r1.1.1.6 clntproc.c
--- fs/lockd/clntproc.c 1998/09/07 02:27:41 1.1.1.6
+++ fs/lockd/clntproc.c 1998/10/16 19:35:11
@@ -125,7 +125,8 @@

/* If we're cleaning up locks because the process is exiting,
* perform the RPC call asynchronously. */
- if (cmd == F_SETLK && fl->fl_type == F_UNLCK
+ if ((cmd == F_SETLK || cmd == F_SETLKW)
+ && fl->fl_type == F_UNLCK
&& (current->flags & PF_EXITING)) {
sigfillset(&current->blocked); /* Mask all signals */
recalc_sigpending(current);
@@ -144,7 +145,8 @@

if (cmd == F_GETLK) {
status = nlmclnt_test(call, fl);
- } else if (cmd == F_SETLK && fl->fl_type == F_UNLCK) {
+ } else if ((cmd == F_SETLK || cmd == F_SETLKW)
+ && fl->fl_type == F_UNLCK) {
status = nlmclnt_unlock(call, fl);
} else if (cmd == F_SETLK || cmd == F_SETLKW) {
call->a_args.block = (cmd == F_SETLKW)? 1 : 0;
Index: fs/nfs/file.c
===================================================================
RCS file: /home/work/cvs/linux/linux/fs/nfs/file.c,v
retrieving revision 1.1.1.11
diff -u -r1.1.1.11 file.c
--- fs/nfs/file.c 1998/09/07 02:26:39 1.1.1.11
+++ fs/nfs/file.c 1998/10/16 19:32:32
@@ -255,7 +255,7 @@

/* If unlocking a file region, flush dirty pages (unless we've
* been killed by a signal, that is). */
- if (cmd == F_SETLK && fl->fl_type == F_UNLCK
+ if ((cmd == F_SETLK || cmd == F_SETLKW) && fl->fl_type == F_UNLCK
&& !signal_pending(current)) {
status = nfs_flush_dirty_pages(inode, current->pid,
fl->fl_start, fl->fl_end == NLM_OFFSET_MAX? 0 :

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/