the attached patch (against BK-curr) fixes the lockup.
The bug happened because reparenting in the CLONE_THREAD case was done in
a fundamentally non-atomic way, which was asking for various races to
happen: eg. the target parent gets reparented to the currently exiting
thread ...
(the non-CLONE_THREAD case is safe because nothing reparents init.)
the solution is to make all of reparenting atomic (including the
forget_original_parent() bit) - this is possible with some reorganization
done in signal.c and exit.c. This also made some of the loops simpler.
(the kernel has now survived 30 minutes of an extreme-threading test,
which would produce a lockup within 1 minute formerly.)
(btw., the lockup is not unique to SMP, it could also have happen on UP
with preempt enabled.)
Ingo
--- linux/include/linux/sched.h.orig Mon Sep 9 23:14:26 2002
+++ linux/include/linux/sched.h Tue Sep 10 11:44:40 2002
@@ -547,6 +547,7 @@
extern void unblock_all_signals(void);
extern int send_sig_info(int, struct siginfo *, struct task_struct *);
extern int force_sig_info(int, struct siginfo *, struct task_struct *);
+extern int __kill_pg_info(int sig, struct siginfo *info, pid_t pgrp);
extern int kill_pg_info(int, struct siginfo *, pid_t);
extern int kill_sl_info(int, struct siginfo *, pid_t);
extern int kill_proc_info(int, struct siginfo *, pid_t);
--- linux/kernel/exit.c.orig Mon Sep 9 21:59:24 2002
+++ linux/kernel/exit.c Tue Sep 10 11:47:48 2002
@@ -125,11 +125,10 @@
*
* "I ask you, have you ever known what it is to be an orphan?"
*/
-static int will_become_orphaned_pgrp(int pgrp, struct task_struct * ignored_task)
+static int __will_become_orphaned_pgrp(int pgrp, struct task_struct * ignored_task)
{
struct task_struct *p;
- read_lock(&tasklist_lock);
for_each_task(p) {
if ((p == ignored_task) || (p->pgrp != pgrp) ||
(p->state == TASK_ZOMBIE) ||
@@ -137,25 +136,33 @@
continue;
if ((p->parent->pgrp != pgrp) &&
(p->parent->session == p->session)) {
- read_unlock(&tasklist_lock);
return 0;
}
}
- read_unlock(&tasklist_lock);
return 1; /* (sighing) "Often!" */
}
+static int will_become_orphaned_pgrp(int pgrp, struct task_struct * ignored_task)
+{
+ int retval;
+
+ read_lock(&tasklist_lock);
+ retval = __will_become_orphaned_pgrp(pgrp, ignored_task);
+ read_unlock(&tasklist_lock);
+
+ return retval;
+}
+
int is_orphaned_pgrp(int pgrp)
{
return will_become_orphaned_pgrp(pgrp, 0);
}
-static inline int has_stopped_jobs(int pgrp)
+static inline int __has_stopped_jobs(int pgrp)
{
int retval = 0;
struct task_struct * p;
- read_lock(&tasklist_lock);
for_each_task(p) {
if (p->pgrp != pgrp)
continue;
@@ -164,7 +171,17 @@
retval = 1;
break;
}
+ return retval;
+}
+
+static inline int has_stopped_jobs(int pgrp)
+{
+ int retval;
+
+ read_lock(&tasklist_lock);
+ retval = __has_stopped_jobs(pgrp);
read_unlock(&tasklist_lock);
+
return retval;
}
@@ -253,6 +270,8 @@
p->real_parent = child_reaper;
else
p->real_parent = reaper;
+ if (p->parent == p->real_parent)
+ BUG();
if (p->pdeath_signal)
send_sig(p->pdeath_signal, p, 0);
@@ -416,8 +435,6 @@
struct task_struct *p, *reaper = father;
struct list_head *_p;
- write_lock_irq(&tasklist_lock);
-
if (father->exit_signal != -1)
reaper = prev_thread(reaper);
else
@@ -442,7 +459,6 @@
p = list_entry(_p,struct task_struct,ptrace_list);
reparent_thread(p, reaper, child_reaper);
}
- write_unlock_irq(&tasklist_lock);
}
static inline void zap_thread(task_t *p, task_t *father, int traced)
@@ -477,12 +493,10 @@
(p->session == current->session)) {
int pgrp = p->pgrp;
- write_unlock_irq(&tasklist_lock);
- if (is_orphaned_pgrp(pgrp) && has_stopped_jobs(pgrp)) {
- kill_pg(pgrp,SIGHUP,1);
- kill_pg(pgrp,SIGCONT,1);
+ if (__will_become_orphaned_pgrp(pgrp, 0) && __has_stopped_jobs(pgrp)) {
+ __kill_pg_info(SIGHUP, (void *)1, pgrp);
+ __kill_pg_info(SIGCONT, (void *)1, pgrp);
}
- write_lock_irq(&tasklist_lock);
}
}
@@ -493,7 +507,8 @@
static void exit_notify(void)
{
struct task_struct *t;
- struct list_head *_p, *_n;
+
+ write_lock_irq(&tasklist_lock);
forget_original_parent(current);
/*
@@ -510,10 +525,10 @@
if ((t->pgrp != current->pgrp) &&
(t->session == current->session) &&
- will_become_orphaned_pgrp(current->pgrp, current) &&
- has_stopped_jobs(current->pgrp)) {
- kill_pg(current->pgrp,SIGHUP,1);
- kill_pg(current->pgrp,SIGCONT,1);
+ __will_become_orphaned_pgrp(current->pgrp, current) &&
+ __has_stopped_jobs(current->pgrp)) {
+ __kill_pg_info(SIGHUP, (void *)1, current->pgrp);
+ __kill_pg_info(SIGCONT, (void *)1, current->pgrp);
}
/* Let father know we died
@@ -548,24 +563,16 @@
* jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)
*/
- write_lock_irq(&tasklist_lock);
current->state = TASK_ZOMBIE;
if (current->exit_signal != -1)
do_notify_parent(current, current->exit_signal);
zap_again:
- list_for_each_safe(_p, _n, ¤t->children)
- zap_thread(list_entry(_p,struct task_struct,sibling), current, 0);
- list_for_each_safe(_p, _n, ¤t->ptrace_children)
- zap_thread(list_entry(_p,struct task_struct,ptrace_list), current, 1);
- /*
- * zap_thread might drop the tasklist lock, thus we could
- * have new children queued back from the ptrace list into the
- * child list:
- */
- if (unlikely(!list_empty(¤t->children) ||
- !list_empty(¤t->ptrace_children)))
- goto zap_again;
+ while (!list_empty(¤t->children))
+ zap_thread(list_entry(current->children.next,struct task_struct,sibling), current, 0);
+ while (!list_empty(¤t->ptrace_children))
+ zap_thread(list_entry(current->ptrace_children.next,struct task_struct,sibling), current, 1);
+ BUG_ON(!list_empty(¤t->children));
/*
* No need to unlock IRQs, we'll schedule() immediately
* anyway. In the preemption case this also makes it
--- linux/kernel/signal.c.orig Mon Sep 9 23:09:06 2002
+++ linux/kernel/signal.c Mon Sep 9 23:14:12 2002
@@ -881,15 +881,13 @@
* control characters do (^C, ^Z etc)
*/
-int
-kill_pg_info(int sig, struct siginfo *info, pid_t pgrp)
+int __kill_pg_info(int sig, struct siginfo *info, pid_t pgrp)
{
int retval = -EINVAL;
if (pgrp > 0) {
struct task_struct *p;
retval = -ESRCH;
- read_lock(&tasklist_lock);
for_each_task(p) {
if (p->pgrp == pgrp && thread_group_leader(p)) {
int err = send_sig_info(sig, info, p);
@@ -897,8 +895,19 @@
retval = err;
}
}
- read_unlock(&tasklist_lock);
}
+ return retval;
+}
+
+int
+kill_pg_info(int sig, struct siginfo *info, pid_t pgrp)
+{
+ int retval;
+
+ read_lock(&tasklist_lock);
+ retval = __kill_pg_info(sig, info, pgrp);
+ read_unlock(&tasklist_lock);
+
return retval;
}
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
This archive was generated by hypermail 2b29 : Sun Sep 15 2002 - 22:00:20 EST