[NFS] SMP race 1 (patch against 2.4.0-test1)

From: Trond Myklebust (trond.myklebust@fys.uio.no)
Date: Mon Jun 12 2000 - 11:30:32 EST


The following patch fixes a small race in net/sunrpc/xprt.c whereby
the 'xprt->cong' can getting updated in a bh (and thus is not covered
by the big kernel lock).

It also contains a few more fixes in preparation for the lifting of
said big kernel lock in rpciod().

Notably:
        - make cl_users atomic_t
        - protect xid allocation from races using spinlock.
        - protect portmapper code from races

Cheers,
  Trond

diff -u --recursive --new-file linux-2.3.99-pre10-3-read/fs/lockd/host.c linux-2.3.99-pre10-3-sunrpc/fs/lockd/host.c
--- linux-2.3.99-pre10-3-read/fs/lockd/host.c Mon Apr 3 00:31:32 2000
+++ linux-2.3.99-pre10-3-sunrpc/fs/lockd/host.c Thu Jun 1 01:41:34 2000
@@ -328,7 +328,7 @@
                         if (host->h_monitored)
                                 nsm_unmonitor(host);
                         if ((clnt = host->h_rpcclnt) != NULL) {
- if (clnt->cl_users) {
+ if (atomic_read(&clnt->cl_users)) {
                                         printk(KERN_WARNING
                                                 "lockd: active RPC handle\n");
                                         clnt->cl_dead = 1;
diff -u --recursive --new-file linux-2.3.99-pre10-3-read/include/linux/sunrpc/clnt.h linux-2.3.99-pre10-3-sunrpc/include/linux/sunrpc/clnt.h
--- linux-2.3.99-pre10-3-read/include/linux/sunrpc/clnt.h Thu May 25 16:02:13 2000
+++ linux-2.3.99-pre10-3-sunrpc/include/linux/sunrpc/clnt.h Thu Jun 1 13:23:21 2000
@@ -30,7 +30,7 @@
  * The high-level client handle
  */
 struct rpc_clnt {
- unsigned int cl_users; /* number of references */
+ atomic_t cl_users; /* number of references */
         struct rpc_xprt * cl_xprt; /* transport */
         struct rpc_procinfo * cl_procinfo; /* procedure info */
         u32 cl_maxproc; /* max procedure number */
diff -u --recursive --new-file linux-2.3.99-pre10-3-read/net/sunrpc/clnt.c linux-2.3.99-pre10-3-sunrpc/net/sunrpc/clnt.c
--- linux-2.3.99-pre10-3-read/net/sunrpc/clnt.c Mon Apr 3 22:24:06 2000
+++ linux-2.3.99-pre10-3-sunrpc/net/sunrpc/clnt.c Sun Jun 4 00:24:36 2000
@@ -91,6 +91,7 @@
         if (!clnt)
                 goto out_no_clnt;
         memset(clnt, 0, sizeof(*clnt));
+ atomic_set(&clnt->cl_users, 0);
 
         clnt->cl_xprt = xprt;
         clnt->cl_procinfo = version->procs;
@@ -140,16 +141,16 @@
 {
         dprintk("RPC: shutting down %s client for %s\n",
                 clnt->cl_protname, clnt->cl_server);
- while (clnt->cl_users) {
+ while (atomic_read(&clnt->cl_users)) {
 #ifdef RPC_DEBUG
                 dprintk("RPC: rpc_shutdown_client: client %s, tasks=%d\n",
- clnt->cl_protname, clnt->cl_users);
+ clnt->cl_protname, atomic_read(&clnt->cl_users));
 #endif
                 /* Don't let rpc_release_client destroy us */
                 clnt->cl_oneshot = 0;
                 clnt->cl_dead = 0;
                 rpc_killall_tasks(clnt);
- sleep_on(&destroy_wait);
+ sleep_on_timeout(&destroy_wait, 1*HZ);
         }
         return rpc_destroy_client(clnt);
 }
@@ -182,14 +183,10 @@
 rpc_release_client(struct rpc_clnt *clnt)
 {
         dprintk("RPC: rpc_release_client(%p, %d)\n",
- clnt, clnt->cl_users);
- if (clnt->cl_users) {
- if (--(clnt->cl_users) > 0)
- return;
- } else
- printk("rpc_release_client: %s client already free??\n",
- clnt->cl_protname);
+ clnt, atomic_read(&clnt->cl_users));
 
+ if (!atomic_dec_and_test(&clnt->cl_users))
+ return;
         wake_up(&destroy_wait);
         if (clnt->cl_oneshot || clnt->cl_dead)
                 rpc_destroy_client(clnt);
diff -u --recursive --new-file linux-2.3.99-pre10-3-read/net/sunrpc/pmap_clnt.c linux-2.3.99-pre10-3-sunrpc/net/sunrpc/pmap_clnt.c
--- linux-2.3.99-pre10-3-read/net/sunrpc/pmap_clnt.c Fri Apr 14 02:19:57 2000
+++ linux-2.3.99-pre10-3-sunrpc/net/sunrpc/pmap_clnt.c Sun Jun 4 00:12:50 2000
@@ -31,6 +31,7 @@
 static struct rpc_clnt * pmap_create(char *, struct sockaddr_in *, int);
 static void pmap_getport_done(struct rpc_task *);
 extern struct rpc_program pmap_program;
+spinlock_t pmap_lock = SPIN_LOCK_UNLOCKED;
 
 /*
  * Obtain the port for a given RPC service on a given host. This one can
@@ -49,11 +50,14 @@
                         task->tk_pid, clnt->cl_server,
                         map->pm_prog, map->pm_vers, map->pm_prot);
 
+ spin_lock(&pmap_lock);
         if (clnt->cl_binding) {
                 rpc_sleep_on(&clnt->cl_bindwait, task, NULL, 0);
+ spin_unlock(&pmap_lock);
                 return;
         }
         clnt->cl_binding = 1;
+ spin_unlock(&pmap_lock);
 
         task->tk_status = -EACCES; /* why set this? returns -EIO below */
         if (!(pmap_clnt = pmap_create(clnt->cl_server, sap, map->pm_prot)))
@@ -74,8 +78,10 @@
         return;
 
 bailout:
+ spin_lock(&pmap_lock);
         clnt->cl_binding = 0;
         rpc_wake_up(&clnt->cl_bindwait);
+ spin_unlock(&pmap_lock);
         task->tk_status = -EIO;
         task->tk_action = NULL;
 }
@@ -129,8 +135,10 @@
                 clnt->cl_port = htons(clnt->cl_port);
                 clnt->cl_xprt->addr.sin_port = clnt->cl_port;
         }
+ spin_lock(&pmap_lock);
         clnt->cl_binding = 0;
         rpc_wake_up(&clnt->cl_bindwait);
+ spin_unlock(&pmap_lock);
 }
 
 /*
diff -u --recursive --new-file linux-2.3.99-pre10-3-read/net/sunrpc/sched.c linux-2.3.99-pre10-3-sunrpc/net/sunrpc/sched.c
--- linux-2.3.99-pre10-3-read/net/sunrpc/sched.c Fri May 5 07:40:17 2000
+++ linux-2.3.99-pre10-3-sunrpc/net/sunrpc/sched.c Thu Jun 1 01:00:56 2000
@@ -778,7 +778,7 @@
         spin_unlock(&rpc_sched_lock);
 
         if (clnt)
- clnt->cl_users++;
+ atomic_inc(&clnt->cl_users);
 
 #ifdef RPC_DEBUG
         task->tk_magic = 0xf00baa;
@@ -823,8 +823,8 @@
         /* Check whether to release the client */
         if (clnt) {
                 printk("rpc_new_task: failed, users=%d, oneshot=%d\n",
- clnt->cl_users, clnt->cl_oneshot);
- clnt->cl_users++; /* pretend we were used ... */
+ atomic_read(&clnt->cl_users), clnt->cl_oneshot);
+ atomic_inc(&clnt->cl_users); /* pretend we were used ... */
                 rpc_release_client(clnt);
         }
         goto out;
diff -u --recursive --new-file linux-2.3.99-pre10-3-read/net/sunrpc/xprt.c linux-2.3.99-pre10-3-sunrpc/net/sunrpc/xprt.c
--- linux-2.3.99-pre10-3-read/net/sunrpc/xprt.c Thu Apr 6 01:58:38 2000
+++ linux-2.3.99-pre10-3-sunrpc/net/sunrpc/xprt.c Sat Jun 3 02:36:18 2000
@@ -290,11 +290,12 @@
 {
         unsigned long cwnd = xprt->cwnd;
 
+ spin_lock_bh(&xprt_sock_lock);
         if (xprt->nocong)
- return;
+ goto out;
         if (result >= 0) {
                 if (xprt->cong < cwnd || time_before(jiffies, xprt->congtime))
- return;
+ goto out;
                 /* The (cwnd >> 1) term makes sure
                  * the result gets rounded properly. */
                 cwnd += (RPC_CWNDSCALE * RPC_CWNDSCALE + (cwnd >> 1)) / cwnd;
@@ -317,6 +318,8 @@
         }
 
         xprt->cwnd = cwnd;
+ out:
+ spin_unlock_bh(&xprt_sock_lock);
 }
 
 /*
@@ -1294,15 +1297,18 @@
 
         dprintk("RPC: %4d xprt_reserve cong = %ld cwnd = %ld\n",
                                 task->tk_pid, xprt->cong, xprt->cwnd);
- if (!RPCXPRT_CONGESTED(xprt) && xprt->free) {
- xprt_reserve_status(task);
+ spin_lock_bh(&xprt_sock_lock);
+ xprt_reserve_status(task);
+ if (task->tk_rqstp) {
                 task->tk_timeout = 0;
         } else if (!task->tk_timeout) {
                 task->tk_status = -ENOBUFS;
         } else {
                 dprintk("RPC: xprt_reserve waiting on backlog\n");
- rpc_sleep_on(&xprt->backlog, task, xprt_reserve_status, NULL);
+ task->tk_status = -EAGAIN;
+ rpc_sleep_on(&xprt->backlog, task, NULL, NULL);
         }
+ spin_unlock_bh(&xprt_sock_lock);
         dprintk("RPC: %4d xprt_reserve returns %d\n",
                                 task->tk_pid, task->tk_status);
         return task->tk_status;
@@ -1323,25 +1329,20 @@
                 /* NOP */
         } else if (task->tk_rqstp) {
                 /* We've already been given a request slot: NOP */
- } else if (!RPCXPRT_CONGESTED(xprt) && xprt->free) {
+ } else {
+ if (RPCXPRT_CONGESTED(xprt) || !(req = xprt->free))
+ goto out_nofree;
                 /* OK: There's room for us. Grab a free slot and bump
                  * congestion value */
- spin_lock(&xprt_lock);
- if (!(req = xprt->free)) {
- spin_unlock(&xprt_lock);
- goto out_nofree;
- }
                 xprt->free = req->rq_next;
                 req->rq_next = NULL;
- spin_unlock(&xprt_lock);
                 xprt->cong += RPC_CWNDSCALE;
                 task->tk_rqstp = req;
                 xprt_request_init(task, xprt);
 
                 if (xprt->free)
                         xprt_clear_backlog(xprt);
- } else
- goto out_nofree;
+ }
 
         return;
 
@@ -1388,24 +1389,21 @@
 
         dprintk("RPC: %4d release request %p\n", task->tk_pid, req);
 
- spin_lock(&xprt_lock);
- req->rq_next = xprt->free;
- xprt->free = req;
-
         /* remove slot from queue of pending */
         if (task->tk_rpcwait) {
                 printk("RPC: task of released request still queued!\n");
-#ifdef RPC_DEBUG
- printk("RPC: (task is on %s)\n", rpc_qname(task->tk_rpcwait));
-#endif
                 rpc_remove_wait_queue(task);
         }
- spin_unlock(&xprt_lock);
+
+ spin_lock_bh(&xprt_sock_lock);
+ req->rq_next = xprt->free;
+ xprt->free = req;
 
         /* Decrease congestion value. */
         xprt->cong -= RPC_CWNDSCALE;
 
         xprt_clear_backlog(xprt);
+ spin_unlock_bh(&xprt_sock_lock);
 }
 
 /*

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Thu Jun 15 2000 - 21:00:26 EST