Clean up the Van Jacobson network congestion control code.
Cheers,
Trond
diff -u --recursive --new-file linux-2.5.25-xprt_write/include/linux/sunrpc/xprt.h linux-2.5.25-rpc_cong1/include/linux/sunrpc/xprt.h
--- linux-2.5.25-xprt_write/include/linux/sunrpc/xprt.h Tue Jul 16 15:28:49 2002
+++ linux-2.5.25-rpc_cong1/include/linux/sunrpc/xprt.h Tue Jul 16 15:32:03 2002
@@ -19,7 +19,7 @@
* The transport code maintains an estimate on the maximum number of out-
* standing RPC requests, using a smoothed version of the congestion
* avoidance implemented in 44BSD. This is basically the Van Jacobson
- * slow start algorithm: If a retransmit occurs, the congestion window is
+ * congestion algorithm: If a retransmit occurs, the congestion window is
* halved; otherwise, it is incremented by 1/cwnd when
*
* - a reply is received and
@@ -32,15 +32,13 @@
* Note: on machines with low memory we should probably use a smaller
* MAXREQS value: At 32 outstanding reqs with 8 megs of RAM, fragment
* reassembly will frequently run out of memory.
- * Come Linux 2.3, we'll handle fragments directly.
*/
#define RPC_MAXCONG 16
#define RPC_MAXREQS (RPC_MAXCONG + 1)
#define RPC_CWNDSCALE 256
#define RPC_MAXCWND (RPC_MAXCONG * RPC_CWNDSCALE)
#define RPC_INITCWND RPC_CWNDSCALE
-#define RPCXPRT_CONGESTED(xprt) \
- ((xprt)->cong >= (xprt)->cwnd)
+#define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd)
/* Default timeout values */
#define RPC_MAX_UDP_TIMEOUT (60*HZ)
@@ -83,6 +81,7 @@
struct rpc_task * rq_task; /* RPC task data */
__u32 rq_xid; /* request XID */
struct rpc_rqst * rq_next; /* free list */
+ int rq_cong; /* has incremented xprt->cong */
int rq_received; /* receive completed */
struct list_head rq_list;
diff -u --recursive --new-file linux-2.5.25-xprt_write/net/sunrpc/xprt.c linux-2.5.25-rpc_cong1/net/sunrpc/xprt.c
--- linux-2.5.25-xprt_write/net/sunrpc/xprt.c Tue Jul 16 15:30:05 2002
+++ linux-2.5.25-rpc_cong1/net/sunrpc/xprt.c Tue Jul 16 15:32:03 2002
@@ -89,6 +89,7 @@
static void xprt_reconn_status(struct rpc_task *task);
static struct socket *xprt_create_socket(int, struct rpc_timeout *);
static int xprt_bind_socket(struct rpc_xprt *, struct socket *);
+static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
#ifdef RPC_DEBUG_DATA
/*
@@ -254,6 +255,40 @@
}
/*
+ * Van Jacobson congestion avoidance. Check if the congestion window
+ * overflowed. Put the task to sleep if this is the case.
+ */
+static int
+__xprt_get_cong(struct rpc_xprt *xprt, struct rpc_task *task)
+{
+ struct rpc_rqst *req = task->tk_rqstp;
+
+ if (req->rq_cong)
+ return 1;
+ dprintk("RPC: %4d xprt_cwnd_limited cong = %ld cwnd = %ld\n",
+ task->tk_pid, xprt->cong, xprt->cwnd);
+ if (RPCXPRT_CONGESTED(xprt))
+ return 0;
+ req->rq_cong = 1;
+ xprt->cong += RPC_CWNDSCALE;
+ return 1;
+}
+
+/*
+ * Adjust the congestion window, and wake up the next task
+ * that has been sleeping due to congestion
+ */
+static void
+__xprt_put_cong(struct rpc_xprt *xprt, struct rpc_rqst *req)
+{
+ if (!req->rq_cong)
+ return;
+ req->rq_cong = 0;
+ xprt->cong -= RPC_CWNDSCALE;
+ __xprt_lock_write_next(xprt);
+}
+
+/*
* Adjust RPC congestion window
* We use a time-smoothed congestion estimator to avoid heavy oscillation.
*/
@@ -1146,8 +1181,6 @@
if (task->tk_rqstp)
return 0;
- dprintk("RPC: %4d xprt_reserve cong = %ld cwnd = %ld\n",
- task->tk_pid, xprt->cong, xprt->cwnd);
spin_lock_bh(&xprt->xprt_lock);
xprt_reserve_status(task);
if (task->tk_rqstp) {
@@ -1181,13 +1214,14 @@
} else if (task->tk_rqstp) {
/* We've already been given a request slot: NOP */
} else {
- if (RPCXPRT_CONGESTED(xprt) || !(req = xprt->free))
+ if (!(req = xprt->free))
+ goto out_nofree;
+ if (!(xprt->nocong || __xprt_get_cong(xprt, req)))
goto out_nofree;
/* OK: There's room for us. Grab a free slot and bump
* congestion value */
xprt->free = req->rq_next;
req->rq_next = NULL;
- xprt->cong += RPC_CWNDSCALE;
task->tk_rqstp = req;
xprt_request_init(task, xprt);
@@ -1252,9 +1286,7 @@
req->rq_next = xprt->free;
xprt->free = req;
- /* Decrease congestion value. */
- xprt->cong -= RPC_CWNDSCALE;
-
+ __xprt_put_cong(xprt, req);
xprt_clear_backlog(xprt);
spin_unlock_bh(&xprt->xprt_lock);
}
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
This archive was generated by hypermail 2b29 : Tue Jul 23 2002 - 22:00:20 EST