Oleg Drokin wrote:
> 
> Hello!
> 
>    It seems I have found some kind of race, if hotplug is enabled,
>    and one have changing number of network interfaces (observed on ppp).
>    I use 2.4.0-test11. Problem is 100% repeatable.
> 
>    My test case is this: I start ppp over ssh session, then do ifconfig
>    and then kill pppd (by kill -9 pid).
>    Immediatelly I getting: 1. waitpid(<somepid>) failed, -512 2. oops.
>    Inspecting oops, I have found that
>    current->files == NULL in kmod.c::exec_usermodehelper.
>    After some investigations, it appears, that when pppd exits,
>    ppp interface gets destroyed, and /sbin/hotplug net is called,
>    using now dead pppd as parent.
>    Simple (and probably not that good) solution is attached as patch, below.
>    It get gid of oops, but waitpid failed thing is still there.
Yup.  This is due to the kernel trying to fork/exec from within
a half-dead process.
Could you please test this patch?  It was sent to Linus for
test12-pre4, but....
--- linux-2.4.0-test12-pre4/include/linux/sched.h	Mon Dec  4 21:07:13 2000
+++ linux-akpm/include/linux/sched.h	Tue Dec  5 00:36:08 2000
@@ -150,6 +150,7 @@
 asmlinkage void schedule(void);
 
 extern void schedule_task(struct tq_struct *task);
+extern int start_context_thread(void);
 
 /*
  * The default fd array needs to be at least BITS_PER_LONG,
--- linux-2.4.0-test12-pre4/kernel/kmod.c	Tue Nov 21 20:11:21 2000
+++ linux-akpm/kernel/kmod.c	Tue Dec  5 00:36:08 2000
@@ -165,7 +165,7 @@
  
 int request_module(const char * module_name)
 {
-	int pid;
+	pid_t pid;
         int waitpid_result;
         sigset_t tmpsig;
         int i;
@@ -259,40 +259,121 @@
 
 static int exec_helper (void *arg)
 {
+	long ret;
         void **params = (void **) arg;
         char *path = (char *) params [0];
         char **argv = (char **) params [1];
         char **envp = (char **) params [2];
-	return exec_usermodehelper (path, argv, envp);
+
+	ret = exec_usermodehelper (path, argv, envp);
+	if (ret < 0)
+		ret = -ret;
+	do_exit(ret);
 }
 
+struct subprocess_info {
+	struct semaphore *sem;
+	char *path;
+	char **argv;
+	char **envp;
+	int retval;
+};
 
-int call_usermodehelper (char *path, char **argv, char **envp)
+/*
+ * This is a standalone child of keventd.  It forks off another thread which
+ * is the desired usermode helper and then waits for the child to exit.
+ * We return the usermode process's exit code, or some -ve error code.
+ */
+static int ____call_usermodehelper(void *data)
 {
-	void *params [3] = { path, argv, envp };
-	int pid, pid2, retval;
+	struct subprocess_info *sub_info = data;
+	struct task_struct *curtask = current;
+	void *params [3] = { sub_info->path, sub_info->argv, sub_info->envp };
+	pid_t pid, pid2;
         mm_segment_t fs;
+	int retval = 0;
 
-	if ( ! current->fs->root ) {
-		printk(KERN_ERR "call_usermodehelper[%s]: no root fs\n",
-			path);
-		return -EPERM;
-	}
-	if ((pid = kernel_thread (exec_helper, (void *) params, 0)) < 0) {
-		printk(KERN_ERR "failed fork %s, errno = %d", argv [0], -pid);
-		return -1;
-	}
+	if (!curtask->fs->root) {
+		printk(KERN_ERR "call_usermodehelper[%s]: no root fs\n", sub_info->path);
+		retval = -EPERM;
+		goto up_and_out;
+	}
+	if ((pid = kernel_thread(exec_helper, (void *) params, 0)) < 0) {
+		printk(KERN_ERR "failed fork2 %s, errno = %d", sub_info->argv[0], -pid);
+		retval = pid;
+		goto up_and_out;
+	}
+
+	if (retval >= 0) {
+		/* Block everything but SIGKILL/SIGSTOP */
+		spin_lock_irq(&curtask->sigmask_lock);
+		siginitsetinv(&curtask->blocked, sigmask(SIGKILL) | sigmask(SIGSTOP));
+		recalc_sigpending(curtask);
+		spin_unlock_irq(&curtask->sigmask_lock);
+
+		/* Allow the system call to access kernel memory */
+		fs = get_fs();
+		set_fs(KERNEL_DS);
+		pid2 = waitpid(pid, &retval, __WCLONE);
+		if (pid2 == -1 && errno < 0)
+			pid2 = errno;
+		set_fs(fs);
+
+		if (pid2 != pid) {
+			printk(KERN_ERR "waitpid(%d) failed, %d\n", pid, pid2);
+			retval = (pid2 < 0) ? pid2 : -1;
+		}
+	}
+
+up_and_out:
+	sub_info->retval = retval;
+	curtask->exit_signal = SIGCHLD;		/* Wake up parent */
+	up_and_exit(sub_info->sem, retval);
+}
 
-	fs = get_fs ();
-	set_fs (KERNEL_DS);
-	pid2 = waitpid (pid, &retval, __WCLONE);
-	set_fs (fs);
+/*
+ * This is a schedule_task function, so we must not sleep for very long at all.
+ * But the exec'ed process could do anything at all.  So we launch another
+ * kernel thread.
+ */
+static void __call_usermodehelper(void *data)
+{
+	struct subprocess_info *sub_info = data;
+	pid_t pid;
 
-	if (pid2 != pid) {
-		printk(KERN_ERR "waitpid(%d) failed, %d\n", pid, pid2);
-			return -1;
+	if ((pid = kernel_thread (____call_usermodehelper, (void *)sub_info, 0)) < 0) {
+		printk(KERN_ERR "failed fork1 %s, errno = %d", sub_info->argv[0], -pid);
+		sub_info->retval = pid;
+		up(sub_info->sem);
         }
-	return retval;
+}
+
+/*
+ * This function can be called via do_exit->__exit_files, which means that
+ * we're partway through exitting and things break if we fork children.
+ * So we use keventd to parent the usermode helper.
+ * We return the usermode application's exit code or some -ve error.
+ */
+int call_usermodehelper (char *path, char **argv, char **envp)
+{
+	DECLARE_MUTEX_LOCKED(sem);
+	struct subprocess_info sub_info = {
+		sem:	&sem,
+		path:	path,
+		argv:	argv,
+		envp:	envp,
+		retval:	0,
+	};
+	struct tq_struct tqs = {
+		next:		0,
+		sync:		0,
+		routine:	__call_usermodehelper,
+		data:		&sub_info,
+	};
+
+	schedule_task(&tqs);
+	down(&sem);		/* Wait for an error or completion */
+	return sub_info.retval;
 }
 
 EXPORT_SYMBOL(exec_usermodehelper);
--- linux-2.4.0-test12-pre4/kernel/context.c	Tue Nov 21 20:11:21 2000
+++ linux-akpm/kernel/context.c	Tue Dec  5 00:36:08 2000
@@ -6,16 +6,23 @@
  * dwmw2@redhat.com
  */
 
+#define __KERNEL_SYSCALLS__
+
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/init.h>
+#include <linux/unistd.h>
+#include <linux/signal.h>
 
 static DECLARE_TASK_QUEUE(tq_context);
 static DECLARE_WAIT_QUEUE_HEAD(context_task_wq);
+static int keventd_running;
 
 void schedule_task(struct tq_struct *task)
 {
+	if (keventd_running == 0)
+		printk(KERN_ERR "schedule_task(): keventd has not started\n");
         queue_task(task, &tq_context);
         wake_up(&context_task_wq);
 }
@@ -24,18 +31,27 @@
 
 static int context_thread(void *dummy)
 {
-	DECLARE_WAITQUEUE(wait, current);
+	struct task_struct *curtask = current;
+	DECLARE_WAITQUEUE(wait, curtask);
+	struct k_sigaction sa;
 
         daemonize();
-	strcpy(current->comm, "eventd");
+	strcpy(curtask->comm, "keventd");
+	keventd_running = 1;
 
-        spin_lock_irq(¤t->sigmask_lock);
-        sigfillset(¤t->blocked);
-        recalc_sigpending(current);
-        spin_unlock_irq(¤t->sigmask_lock);
+	spin_lock_irq(&curtask->sigmask_lock);
+	siginitsetinv(&curtask->blocked, sigmask(SIGCHLD));
+	recalc_sigpending(curtask);
+	spin_unlock_irq(&curtask->sigmask_lock);
+
+	/* Install a handler so SIGCLD is delivered */
+	sa.sa.sa_handler = SIG_IGN;
+	sa.sa.sa_flags = 0;
+	siginitset(&sa.sa.sa_mask, sigmask(SIGCHLD));
+	do_sigaction(SIGCHLD, &sa, (struct k_sigaction *)0);
 
         for (;;) {
-		current->state = TASK_INTERRUPTIBLE;
+		__set_task_state(curtask, TASK_INTERRUPTIBLE);
                 add_wait_queue(&context_task_wq, &wait);
 
                 /*
@@ -46,15 +62,19 @@
                         schedule();
 
                 remove_wait_queue(&context_task_wq, &wait);
-		current->state = TASK_RUNNING;
+		__set_task_state(curtask, TASK_RUNNING);
                 run_task_queue(&tq_context);
+		if (signal_pending(curtask)) {
+			while (waitpid(-1, (unsigned int *)0, __WALL|WNOHANG) > 0)
+				;
+			flush_signals(curtask);
+			recalc_sigpending(curtask);
+		}
         }
 }
 
-static int __init start_context_thread(void)
+int start_context_thread(void)
 {
         kernel_thread(context_thread, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
         return 0;
 }
-
-module_init(start_context_thread);
--- linux-2.4.0-test12-pre4/init/main.c	Mon Dec  4 21:07:13 2000
+++ linux-akpm/init/main.c	Tue Dec  5 00:36:08 2000
@@ -700,6 +700,7 @@
         else mount_initrd =0;
 #endif
 
+	start_context_thread();
         do_initcalls();
 
         /* .. filesystems .. */
@@ -712,16 +713,16 @@
         init_pcmcia_ds();		/* Do this last */
 #endif
 
-#ifdef CONFIG_HOTPLUG
+	/* Mount the root filesystem.. */
+	mount_root();
+
+#if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
         /* do this after other 'do this last' stuff, because we want
          * to minimize spurious executions of /sbin/hotplug
          * during boot-up
          */
         net_notifier_init();
 #endif
-
-	/* Mount the root filesystem.. */
-	mount_root();
 
         mount_devfs_fs ();
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
Please read the FAQ at http://www.tux.org/lkml/
This archive was generated by hypermail 2b29 : Thu Dec 07 2000 - 21:00:11 EST