Re: [PATCH] [RFC] fix missed SIGCONT cases
From: Oleg Nesterov
Date: Thu Feb 28 2008 - 10:27:24 EST
On 02/28, Roland McGrath wrote:
>
> -static void handle_stop_signal(int sig, struct task_struct *p)
> +static int prepare_signal(int sig, struct task_struct *p)
> {
> struct task_struct *t;
> + int cont = 0;
>
> if (p->signal->flags & SIGNAL_GROUP_EXIT)
> /*
> * The process is in the middle of dying already.
> */
> - return;
> + return cont;
>
> if (sig_kernel_stop(sig)) {
> /*
> @@ -635,11 +639,9 @@ static void handle_stop_signal(int sig, struct task_struct *p)
> * We were in fact stopped, and are now continued.
> * Notify the parent with CLD_CONTINUED.
> */
> + cont = 1;
> p->signal->flags = SIGNAL_STOP_CONTINUED;
> p->signal->group_exit_code = 0;
> - spin_unlock(&p->sighand->siglock);
> - do_notify_parent_cldstop(p, CLD_CONTINUED);
> - spin_lock(&p->sighand->siglock);
> } else {
Oh, I'd like to suggest something simpler, but I can't :(
BTW, I think we have the same problem when handle_stop_signal() does
do_notify_parent_cldstop(p, CLD_STOPPED) above. The multithreaded app
in the middle of the group stop can resume without actually seeing
SIGCONT, no?
Currently I have the very vagues idea, please see the "patch" below
(it is not right of course, just for illustration).
These unlock(->siglock)'s on the signal sending path are really nasty,
it would be wonderfull to avoid them. Note also sig_needs_tasklist(),
we take tasklist_lock() exactly because we will probably drop siglock.
What do you think about the approach at least?
Oleg.
--- include/linux/sched.h 2008-02-15 16:59:17.000000000 +0300
+++ include/linux/sched.h 2008-02-28 18:12:20.833069408 +0300
@@ -455,6 +455,10 @@ struct signal_struct {
int group_stop_count;
unsigned int flags; /* see SIGNAL_* flags below */
+ #define __CLD_STOPPED 1
+ #define __CLD_CONTINUED 2
+ unsigned int notify_parent; /* can live in fact in ->flags */
+
/* POSIX.1b Interval Timers */
struct list_head posix_timers;
--- kernel/signal.c 2008-02-15 16:59:17.000000000 +0300
+++ kernel/signal.c 2008-02-28 18:19:01.542067371 +0300
@@ -596,9 +596,7 @@ static void handle_stop_signal(int sig,
*/
p->signal->group_stop_count = 0;
p->signal->flags = SIGNAL_STOP_CONTINUED;
- spin_unlock(&p->sighand->siglock);
- do_notify_parent_cldstop(p, CLD_STOPPED);
- spin_lock(&p->sighand->siglock);
+ p->signal->notify_parent |= __CLD_STOPPED;
}
rm_from_queue(SIG_KERNEL_STOP_MASK, &p->signal->shared_pending);
t = p;
@@ -637,9 +635,7 @@ static void handle_stop_signal(int sig,
*/
p->signal->flags = SIGNAL_STOP_CONTINUED;
p->signal->group_exit_code = 0;
- spin_unlock(&p->sighand->siglock);
- do_notify_parent_cldstop(p, CLD_CONTINUED);
- spin_lock(&p->sighand->siglock);
+ p->signal->notify_parent |= __CLD_CONTINUED;
} else {
/*
* We are not stopped, but there could be a stop
@@ -1755,6 +1751,7 @@ int get_signal_to_deliver(siginfo_t *inf
struct pt_regs *regs, void *cookie)
{
sigset_t *mask = ¤t->blocked;
+ int notify_parent;
int signr = 0;
try_to_freeze();
@@ -1890,7 +1887,15 @@ relock:
do_group_exit(signr);
/* NOTREACHED */
}
+ notify_parent = current->signal->notify_parent;
+ current->signal->notify_parent = 0;
spin_unlock_irq(¤t->sighand->siglock);
+
+ if (notify_parent & __CLD_STOPPED)
+ do_notify_parent_cldstop(p, CLD_STOPPED);
+ if (notify_parent & __CLD_CONTINUED)
+ do_notify_parent_cldstop(p, CLD_CONTINUED);
+
return signr;
}
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/