[PATCH] improved RPC statistics

From: Chuck Lever (cel@citi.umich.edu)
Date: Sat Oct 19 2002 - 15:29:24 EST


add several new RPC statistics counters to help sysadmins more easily
debug client-side NFS performance and reliability problems. make the RPC
counters longs instead of ints to take advantage of 64-bit counters on
platforms that can support them.

it adds some new counter values in /proc/net/rpc/nfs, so it should be
applied before the feature freeze deadline. please apply this to your
tree.

diff -drN -U3 00-stock/include/linux/sunrpc/stats.h 01-counters/include/linux/sunrpc/stats.h
--- 00-stock/include/linux/sunrpc/stats.h Tue Oct 15 23:28:20 2002
+++ 01-counters/include/linux/sunrpc/stats.h Sat Oct 19 15:48:23 2002
@@ -1,7 +1,7 @@
 /*
  * linux/include/linux/sunrpc/stats.h
  *
- * Client statistics collection for SUN RPC
+ * Statistics collection for SUN RPC
  *
  * Copyright (C) 1996 Olaf Kirch <okir@monad.swb.de>
  */
@@ -11,33 +11,40 @@
 
 #include <linux/config.h>
 #include <linux/proc_fs.h>
+#include <linux/cache.h>
 
 struct rpc_stat {
         struct rpc_program * program;
 
- unsigned int netcnt,
+ unsigned long netcnt,
                                 netudpcnt,
                                 nettcpcnt,
                                 nettcpconn,
                                 netreconn;
- unsigned int rpccnt,
+ unsigned long rpccnt,
                                 rpcretrans,
                                 rpcauthrefresh,
- rpcgarbage;
-};
+ rpcgarbage,
+ rpcnospace,
+ rpcbadxids,
+ rpcbadverfs,
+ rpccantconn,
+ rpcnomem,
+ rpccantsend;
+} ____cacheline_aligned;
 
 struct svc_stat {
         struct svc_program * program;
 
- unsigned int netcnt,
+ unsigned long netcnt,
                                 netudpcnt,
                                 nettcpcnt,
                                 nettcpconn;
- unsigned int rpccnt,
+ unsigned long rpccnt,
                                 rpcbadfmt,
                                 rpcbadauth,
                                 rpcbadclnt;
-};
+} ____cacheline_aligned;
 
 void rpc_proc_init(void);
 void rpc_proc_exit(void);
diff -drN -U3 00-stock/include/linux/sunrpc/xprt.h 01-counters/include/linux/sunrpc/xprt.h
--- 00-stock/include/linux/sunrpc/xprt.h Tue Oct 15 23:28:32 2002
+++ 01-counters/include/linux/sunrpc/xprt.h Sat Oct 19 15:54:19 2002
@@ -146,6 +146,7 @@
         unsigned long sockstate; /* Socket state */
         unsigned char shutdown : 1, /* being shut down */
                                 nocong : 1, /* no congestion control */
+ neverconn : 1, /* no connection attempt yet */
                                 resvport : 1, /* use a reserved port */
                                 stream : 1; /* TCP */
 
diff -drN -U3 00-stock/net/sunrpc/clnt.c 01-counters/net/sunrpc/clnt.c
--- 00-stock/net/sunrpc/clnt.c Tue Oct 15 23:27:53 2002
+++ 01-counters/net/sunrpc/clnt.c Sat Oct 19 15:48:23 2002
@@ -453,6 +453,8 @@
                 xprt_release(task);
         }
 
+ task->tk_client->cl_stats->rpcnomem++;
+
         switch (status) {
         case -EAGAIN: /* woken up; retry */
                 task->tk_action = call_reserve;
@@ -494,6 +496,7 @@
         if (RPC_IS_ASYNC(task) || !(task->tk_client->cl_intr && signalled())) {
                 xprt_release(task);
                 task->tk_action = call_reserve;
+ task->tk_client->cl_stats->rpcnomem++;
                 rpc_delay(task, HZ>>4);
                 return;
         }
@@ -669,6 +672,7 @@
                 task->tk_action = call_bind;
                 break;
         case -EAGAIN:
+ clnt->cl_stats->rpcnospace++;
                 task->tk_action = call_transmit;
                 break;
         case -EIO:
@@ -757,14 +761,16 @@
         }
 
         /* Verify the RPC header */
- if (!(p = call_verify(task)))
+ if (!(p = call_verify(task))) {
+ clnt->cl_stats->rpcbadverfs++;
                 return;
+ }
 
         /*
          * The following is an NFS-specific hack to cater for setuid
          * processes whose uid is mapped to nobody on the server.
          */
- if (task->tk_client->cl_droppriv &&
+ if (clnt->cl_droppriv &&
             (ntohl(*p) == NFSERR_ACCES || ntohl(*p) == NFSERR_PERM)) {
                 if (RPC_IS_SETUID(task) && task->tk_suid_retry) {
                         dprintk("RPC: %4d retry squashed uid\n", task->tk_pid);
diff -drN -U3 00-stock/net/sunrpc/stats.c 01-counters/net/sunrpc/stats.c
--- 00-stock/net/sunrpc/stats.c Tue Oct 15 23:29:07 2002
+++ 01-counters/net/sunrpc/stats.c Sat Oct 19 15:48:23 2002
@@ -40,16 +40,23 @@
         int len, i, j;
 
         len = sprintf(buffer,
- "net %d %d %d %d\n",
+ "net %lu %lu %lu %lu\n",
                         statp->netcnt,
                         statp->netudpcnt,
                         statp->nettcpcnt,
                         statp->nettcpconn);
         len += sprintf(buffer + len,
- "rpc %d %d %d\n",
+ "rpc %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
                         statp->rpccnt,
                         statp->rpcretrans,
- statp->rpcauthrefresh);
+ statp->rpcauthrefresh,
+ statp->rpcgarbage,
+ statp->rpcnospace,
+ statp->rpcbadxids,
+ statp->rpcbadverfs,
+ statp->rpccantconn,
+ statp->rpcnomem,
+ statp->rpccantsend);
 
         for (i = 0; i < prog->nrvers; i++) {
                 if (!(vers = prog->version[i]))
@@ -88,13 +95,13 @@
         int len, i, j;
 
         len = sprintf(buffer,
- "net %d %d %d %d\n",
+ "net %lu %lu %lu %lu\n",
                         statp->netcnt,
                         statp->netudpcnt,
                         statp->nettcpcnt,
                         statp->nettcpconn);
         len += sprintf(buffer + len,
- "rpc %d %d %d %d %d\n",
+ "rpc %lu %lu %lu %lu %lu\n",
                         statp->rpccnt,
                         statp->rpcbadfmt+statp->rpcbadauth+statp->rpcbadclnt,
                         statp->rpcbadfmt,
diff -drN -U3 00-stock/net/sunrpc/xprt.c 01-counters/net/sunrpc/xprt.c
--- 00-stock/net/sunrpc/xprt.c Tue Oct 15 23:28:29 2002
+++ 01-counters/net/sunrpc/xprt.c Sat Oct 19 16:21:26 2002
@@ -213,6 +213,7 @@
 static inline int
 xprt_sendmsg(struct rpc_xprt *xprt, struct rpc_rqst *req)
 {
+ struct rpc_stat *stats = req->rq_task->tk_client->cl_stats;
         struct socket *sock = xprt->sock;
         struct msghdr msg;
         struct xdr_buf *xdr = &req->rq_snd_buf;
@@ -224,6 +225,12 @@
         if (!sock)
                 return -ENOTCONN;
 
+ stats->netcnt++;
+ if (xprt->stream)
+ stats->nettcpcnt++;
+ else
+ stats->netudpcnt++;
+
         xprt_pktdump("packet data:",
                                 req->rq_svec->iov_base,
                                 req->rq_svec->iov_len);
@@ -258,16 +265,21 @@
                 /* When the server has died, an ICMP port unreachable message
                  * prompts ECONNREFUSED.
                  */
+ stats->rpccantsend++;
+ break;
         case -EAGAIN:
                 break;
         case -ENOTCONN:
         case -EPIPE:
                 /* connection broken */
+ stats->rpccantsend++;
                 if (xprt->stream)
                         result = -ENOTCONN;
                 break;
         default:
+ stats->rpccantsend++;
                 printk(KERN_NOTICE "RPC: sendmsg returned error %d\n", -result);
+ break;
         }
         return result;
 }
@@ -416,6 +428,7 @@
 void
 xprt_connect(struct rpc_task *task)
 {
+ struct rpc_stat *stats = task->tk_client->cl_stats;
         struct rpc_xprt *xprt = task->tk_xprt;
         struct socket *sock = xprt->sock;
         struct sock *inet;
@@ -487,6 +500,11 @@
                 if (inet->state != TCP_ESTABLISHED) {
                         xprt_close(xprt);
                         task->tk_status = -EAGAIN;
+ stats->nettcpconn++;
+ if (!xprt->neverconn) {
+ stats->netreconn++;
+ xprt->neverconn = 0;
+ }
                         goto out_write;
                 }
 
@@ -497,12 +515,14 @@
         case -EPIPE:
                 xprt_close(xprt);
                 task->tk_status = -ENOTCONN;
+ stats->rpccantconn++;
                 goto out_write;
 
         default:
                 /* Report myriad other possible returns. If this file
                  * system is soft mounted, just error out, like Solaris. */
                 xprt_close(xprt);
+ stats->rpccantconn++;
                 if (task->tk_client->cl_softrtry) {
                         printk(KERN_WARNING
                         "RPC: error %d connecting to server %s, exiting\n",
@@ -528,12 +548,18 @@
 static void
 xprt_conn_status(struct rpc_task *task)
 {
+ struct rpc_stat *stats = task->tk_client->cl_stats;
         struct rpc_xprt *xprt = task->tk_xprt;
 
         switch (task->tk_status) {
         case 0:
                 dprintk("RPC: %4d xprt_conn_status: connection established\n",
                                 task->tk_pid);
+ stats->nettcpconn++;
+ if (!xprt->neverconn) {
+ stats->netreconn++;
+ xprt->neverconn = 0;
+ }
                 goto out;
         case -ETIMEDOUT:
                 dprintk("RPC: %4d xprt_conn_status: timed out\n",
@@ -548,6 +574,8 @@
                 rpc_delay(task, RPC_REESTABLISH_TIMEOUT);
                 break;
         }
+ stats->rpccantconn++;
+
         /* if soft mounted, cause this RPC to fail */
         if (task->tk_client->cl_softrtry)
                 task->tk_status = -EIO;
@@ -1374,6 +1402,7 @@
         if (xprt->stream) {
                 xprt->cwnd = RPC_MAXCWND;
                 xprt->nocong = 1;
+ xprt->neverconn = 1;
         } else
                 xprt->cwnd = RPC_INITCWND;
         spin_lock_init(&xprt->sock_lock);

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Wed Oct 23 2002 - 22:00:48 EST