Patch for 2.1.57 lockd -- please test!

Bill Hawes (whawes@star.net)
Tue, 30 Sep 1997 15:51:22 -0400


This is a multi-part message in MIME format.
--------------1D17B1788AFFDEA91790A2D6
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit

I took a look at the lockd code and think I've spotted the problem
leading to multiple lockd processes. Anyone experiencing lockd problems
might want to give try this patch ... it's untested but hopefully will
work OK :-)

Let me know if any of the printk messages are triggered.

Regards,
Bill
--------------1D17B1788AFFDEA91790A2D6
Content-Type: text/plain; charset=us-ascii; name="lockd_57-patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline; filename="lockd_57-patch"

--- fs/lockd/svc.c.old Wed Apr 16 00:47:24 1997
+++ fs/lockd/svc.c Tue Sep 30 12:06:17 1997
@@ -42,7 +42,8 @@

extern struct svc_program nlmsvc_program;
struct nlmsvc_binding * nlmsvc_ops = NULL;
-static int nlmsvc_sema = 0;
+static struct semaphore nlmsvc_sema = MUTEX;
+static unsigned int nlmsvc_users = 0;
static int nlmsvc_pid = 0;
unsigned long nlmsvc_grace_period = 0;
unsigned long nlmsvc_timeout = 0;
@@ -99,9 +100,10 @@
/*
* The main request loop. We don't terminate until the last
* NFS mount or NFS daemon has gone away, and we've been sent a
- * signal.
+ * signal, or another process has taken over our job.
*/
- while (nlmsvc_sema || !signalled()) {
+ while ((nlmsvc_users || !signalled()) && nlmsvc_pid == current->pid)
+ {
if (signalled())
current->signal = 0;

@@ -164,9 +166,13 @@
/* release rpciod */
rpciod_down();

+ /*
+ * Don't clear the slot if there's a new lockd process!
+ */
+ if (current->pid == nlmsvc_pid)
+ nlmsvc_pid = 0;
/* Release module */
MOD_DEC_USE_COUNT;
- nlmsvc_pid = 0;
}

/*
@@ -185,42 +191,82 @@
return svc_create_socket(serv, protocol, &sin);
}

+/*
+ * Bring up the lockd thread if it's not already up.
+ */
int
lockd_up(void)
{
struct svc_serv * serv;
- int error;
-
- if (nlmsvc_pid || nlmsvc_sema++)
- return 0;
+ int error = 0;

- dprintk("lockd: creating service\n");
- if ((serv = svc_create(&nlmsvc_program, 0, NLMSVC_XDRSIZE)) == NULL)
- return -ENOMEM;
-
- if ((error = lockd_makesock(serv, IPPROTO_UDP, 0)) < 0
- || (error = lockd_makesock(serv, IPPROTO_TCP, 0)) < 0) {
- svc_destroy(serv);
- return error;
+ down(&nlmsvc_sema);
+ /*
+ * Check whether we're already up and running.
+ */
+ if (nlmsvc_pid) {
+ nlmsvc_users++;
+ goto out;
}
+ /*
+ * Sanity check: if there's no pid,
+ * there shouldn't be any users ...
+ */
+ if (nlmsvc_users)
+ printk("lockd_up: no pid, users=%d\n", nlmsvc_users);

- if ((error = svc_create_thread(lockd, serv)) < 0)
- nlmsvc_sema--;
+ dprintk("lockd: creating service\n");
+ error = -ENOMEM;
+ serv = svc_create(&nlmsvc_program, 0, NLMSVC_XDRSIZE);
+ if (!serv)
+ goto out;
+
+ if ((error = lockd_makesock(serv, IPPROTO_UDP, 0)) < 0
+ || (error = lockd_makesock(serv, IPPROTO_TCP, 0)) < 0)
+ goto destroy_and_out;
+
+ error = svc_create_thread(lockd, serv);
+ if (!error)
+ nlmsvc_users++;

/* Release server */
+destroy_and_out:
svc_destroy(serv);
- return 0;
+out:
+ up(&nlmsvc_sema);
+ return error;
}

void
lockd_down(void)
{
- if (!nlmsvc_pid || --nlmsvc_sema > 0)
- return;
+ down(&nlmsvc_sema);
+ if (!nlmsvc_users) {
+ printk("lockd_down: No users! pid=%d\n", nlmsvc_pid);
+ goto out;
+ }
+ if (--nlmsvc_users)
+ goto out;
+
+ if (!nlmsvc_pid) {
+ printk("lockd_down: nothing to do!\n");
+ goto out;
+ }

kill_proc(nlmsvc_pid, SIGKILL, 1);
- nlmsvc_sema = 0;
- nlmsvc_pid = 0;
+ schedule();
+ if (nlmsvc_pid) {
+ printk("lockd_down: waiting for pid %d to exit\n", nlmsvc_pid);
+ current->timeout = HZ;
+ schedule();
+ current->timeout = 0;
+ if (nlmsvc_pid) {
+ printk("lockd_down: clearing pid\n");
+ nlmsvc_pid = 0;
+ }
+ }
+out:
+ up(&nlmsvc_sema);
}

#ifdef MODULE

--------------1D17B1788AFFDEA91790A2D6--