(fwd) NIS/RPC mystery solved!

H.J. Lu (hjl@lucon.org)
Tue, 10 Nov 1998 08:45:35 -0800 (PST)


> All,
>
> I tracked down the cause of my mysterious Sun RPC / NIS failures. These
> appeared with the 2.1.127-pre3 patches, and have been relentless. Turns
> out that there's a conflict between one of HJ's kernel patches (supplied
> with knfsd-1022) and the recent scheduler changes. I've not yet tracked
> down exactly which one, but suspect the mods to sunrpc/sched.c and
> sched.h.
>
> All I can verify at the moment is that 2.1.127-pre7 without knfsd has
> an operational RPC layer.

Can you apply all my relevant knfsd patches and then try

# cd .../linux
# patch -P2 -R < my_patch_here

to back out the rpc related one. That is the only thing I can think of which
affects rpc.

> HJ, you may want to look over the entire set of kernel patches? I'm a bit
> concerned that none of them have been picked up (or even commented on) by
> Linus. You seem convinced that they are (or were) necessary, so perhaps
> you and he should communicate on this matter prior to 2.2 release?

I need time and I also need a stable kernel. I will do it when 2.1.128
is out and stable.

>
> Since many folks are likely to apply the knfsd kernel patches in their
> efforts at migrating to knfsd, certainly this show-stopper with RPC ought
> be addressed.
>

Please give this a try first.

Thanks.

H.J.
-----
Index: linux/linux/include/linux/sunrpc/sched.h
diff -u linux/linux/include/linux/sunrpc/sched.h:1.1.1.5 linux/linux/include/linux/sunrpc/sched.h:1.2
--- linux/linux/include/linux/sunrpc/sched.h:1.1.1.5 Sun Sep 6 19:29:37 1998
+++ linux/linux/include/linux/sunrpc/sched.h Sun Sep 6 20:08:07 1998
@@ -128,7 +128,7 @@
void rpc_execute(struct rpc_task *);
void rpc_run_child(struct rpc_task *parent, struct rpc_task *child,
rpc_action action);
-void rpc_add_wait_queue(struct rpc_wait_queue *, struct rpc_task *);
+int rpc_add_wait_queue(struct rpc_wait_queue *, struct rpc_task *);
void rpc_remove_wait_queue(struct rpc_task *);
void rpc_sleep_on(struct rpc_wait_queue *, struct rpc_task *,
rpc_action action, rpc_action timer);
Index: linux/linux/net/sunrpc/sched.c
diff -u linux/linux/net/sunrpc/sched.c:1.1.1.7 linux/linux/net/sunrpc/sched.c:1.2
--- linux/linux/net/sunrpc/sched.c:1.1.1.7 Sun Sep 6 19:33:01 1998
+++ linux/linux/net/sunrpc/sched.c Sun Sep 6 20:08:09 1998
@@ -79,13 +79,16 @@
* improve overall performance.
* Everyone else gets appended to the queue to ensure proper FIFO behavior.
*/
-void
+int
rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task)
{
if (task->tk_rpcwait) {
if (task->tk_rpcwait != queue)
+ {
printk(KERN_WARNING "RPC: doubly enqueued task!\n");
- return;
+ return -EWOULDBLOCK;
+ }
+ return 0;
}
if (RPC_IS_SWAPPER(task))
rpc_insert_list(&queue->task, task);
@@ -95,6 +98,8 @@

dprintk("RPC: %4d added to queue %p \"%s\"\n",
task->tk_pid, queue, rpc_qname(queue));
+
+ return 0;
}

/*
@@ -168,7 +173,13 @@
return;
}
if (RPC_IS_ASYNC(task)) {
- rpc_add_wait_queue(&schedq, task);
+ int status;
+ status = rpc_add_wait_queue(&schedq, task);
+ if (status)
+ {
+ printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status);
+ task->tk_status = status;
+ }
wake_up(&rpciod_idle);
} else {
wake_up(&task->tk_wait);
@@ -202,6 +213,7 @@
rpc_action action, rpc_action timer)
{
unsigned long oldflags;
+ int status;

dprintk("RPC: %4d sleep_on(queue \"%s\" time %ld)\n", task->tk_pid,
rpc_qname(q), jiffies);
@@ -211,11 +223,20 @@
*/
save_flags(oldflags); cli();

- rpc_add_wait_queue(q, task);
- task->tk_callback = action;
- if (task->tk_timeout)
- rpc_add_timer(task, timer);
- task->tk_flags &= ~RPC_TASK_RUNNING;
+ status = rpc_add_wait_queue(q, task);
+ if (status)
+ {
+ printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status);
+ task->tk_status = status;
+ task->tk_flags |= RPC_TASK_RUNNING;
+ }
+ else
+ {
+ task->tk_callback = action;
+ if (task->tk_timeout)
+ rpc_add_timer(task, timer);
+ task->tk_flags &= ~RPC_TASK_RUNNING;
+ }

restore_flags(oldflags);
return;
Index: linux/linux/net/sunrpc/xprt.c
diff -u linux/linux/net/sunrpc/xprt.c:1.1.1.8 linux/linux/net/sunrpc/xprt.c:1.2
--- linux/linux/net/sunrpc/xprt.c:1.1.1.8 Sun Sep 6 19:33:03 1998
+++ linux/linux/net/sunrpc/xprt.c Sun Sep 6 20:08:10 1998
@@ -935,6 +935,7 @@
struct rpc_timeout *timeo;
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
+ int status;

/*DEBUG*/int ac_debug=xprt->snd_sent;

@@ -992,9 +993,17 @@
* the pending list now:
*/
start_bh_atomic();
- rpc_add_wait_queue(&xprt->pending, task);
- task->tk_callback = NULL;
+ status = rpc_add_wait_queue(&xprt->pending, task);
+ if (!status)
+ task->tk_callback = NULL;
end_bh_atomic();
+
+ if (status)
+ {
+ printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status);
+ task->tk_status = status;
+ return;
+ }

/* Continue transmitting the packet/record. We must be careful
* to cope with writespace callbacks arriving _after_ we have

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/