[path][rfc] add PR_DETACH prctl command

From: Stas Sergeev
Date: Wed Feb 23 2011 - 08:55:22 EST


Hi.

The attched patch adds the PR_DETACH prctl command.
It is needed for those rare but unfortunate cases, where
you can't daemonize your process before creating a thread.
The effect of this command is similar to the fork() and then
exit() on parent, except that:
1. PID does not change
2. Threads are not destroyed

It would be nice to know what people think about such an
approach.

Signed-off-by: stsp@xxxxxxxx
CC: Oleg Nesterov <oleg@xxxxxxxxxx>
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 7c99c1c..ccccfa8 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -462,7 +462,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
/* convert nsec -> ticks */
start_time = nsec_to_clock_t(start_time);

- seq_printf(m, "%d (%s) %c %d %d %d %d %d %u %lu \
+ seq_printf(m, "%d (%s) %c %d %d %d %d %d %llu %lu \
%lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \
%lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld\n",
pid_nr_ns(pid, ns),
diff --git a/include/asm-generic/siginfo.h b/include/asm-generic/siginfo.h
index 942d30b..1da9c20 100644
--- a/include/asm-generic/siginfo.h
+++ b/include/asm-generic/siginfo.h
@@ -218,7 +218,8 @@ typedef struct siginfo {
#define CLD_TRAPPED (__SI_CHLD|4) /* traced child has trapped */
#define CLD_STOPPED (__SI_CHLD|5) /* child has stopped */
#define CLD_CONTINUED (__SI_CHLD|6) /* stopped child has continued */
-#define NSIGCHLD 6
+#define CLD_DETACHED (__SI_CHLD|7) /* child has detached */
+#define NSIGCHLD 7

/*
* SIGPOLL si_codes
diff --git a/include/linux/prctl.h b/include/linux/prctl.h
index a3baeb2..fbd2451 100644
--- a/include/linux/prctl.h
+++ b/include/linux/prctl.h
@@ -102,4 +102,6 @@

#define PR_MCE_KILL_GET 34

+#define PR_DETACH 35
+
#endif /* _LINUX_PRCTL_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 777d8a5..75c977e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1194,7 +1194,7 @@ struct task_struct {
volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
void *stack;
atomic_t usage;
- unsigned int flags; /* per process flags, defined below */
+ u64 flags; /* per process flags, defined below */
unsigned int ptrace;

int lock_depth; /* BKL lock depth */
@@ -1746,6 +1746,7 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */
#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */
#define PF_FREEZER_NOSIG 0x80000000 /* Freezer won't send signals to it */
+#define PF_DETACH 0x100000000ULL /* Detach from parent */

/*
* Only the _current_ task can read/write to tsk->flags, but other
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 156cc55..f11c1ca 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -181,7 +181,7 @@ static inline void check_for_tasks(int cpu)
(!cputime_eq(p->utime, cputime_zero) ||
!cputime_eq(p->stime, cputime_zero)))
printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d "
- "(state = %ld, flags = %x)\n",
+ "(state = %ld, flags = %llx)\n",
p->comm, task_pid_nr(p), cpu,
p->state, p->flags);
}
diff --git a/kernel/exit.c b/kernel/exit.c
index f9a45eb..2c8f050 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1507,6 +1507,38 @@ static int wait_task_continued(struct wait_opts *wo, struct task_struct *p)
return retval;
}

+static int wait_task_detached(struct wait_opts *wo, struct task_struct *p)
+{
+ int retval = 0;
+ pid_t pid = task_pid_vnr(p);
+ uid_t uid = __task_cred(p)->uid;
+
+ get_task_struct(p);
+ if (unlikely(wo->wo_flags & WNOWAIT)) {
+ read_unlock(&tasklist_lock);
+ return wait_noreap_copyout(wo, p, pid, uid, CLD_DETACHED,
+ p->exit_code >> 8);
+ }
+
+ p->flags &= ~PF_DETACH;
+ if (!ptrace_reparented(p))
+ p->parent = init_pid_ns.child_reaper;
+ p->real_parent = init_pid_ns.child_reaper;
+ p->exit_signal = SIGCHLD;
+ list_move_tail(&p->sibling, &p->real_parent->children);
+
+ read_unlock(&tasklist_lock);
+ if (wo->wo_stat)
+ retval = put_user(p->exit_code, wo->wo_stat);
+
+ if (!retval)
+ retval = wait_noreap_copyout(wo, p, pid, uid, CLD_DETACHED,
+ p->exit_code >> 8);
+ else
+ put_task_struct(p);
+ return retval;
+}
+
/*
* Consider @p for a wait by @parent.
*
@@ -1549,6 +1581,9 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace,
if (p->exit_state == EXIT_DEAD)
return 0;

+ if (p->flags & PF_DETACH)
+ return wait_task_detached(wo, p);
+
/*
* We don't reap group leaders with subthreads.
*/
diff --git a/kernel/signal.c b/kernel/signal.c
index 4e3cff1..2cd495a 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1450,10 +1450,10 @@ int do_notify_parent(struct task_struct *tsk, int sig)

BUG_ON(sig == -1);

- /* do_notify_parent_cldstop should have been called instead. */
- BUG_ON(task_is_stopped_or_traced(tsk));
+ /* do_notify_parent_cldstop should have been called instead. */
+ BUG_ON(task_is_stopped_or_traced(tsk));

- BUG_ON(!task_ptrace(tsk) &&
+ BUG_ON(!task_ptrace(tsk) && (tsk->flags & PF_EXITING) &&
(tsk->group_leader != tsk || !thread_group_empty(tsk)));

info.si_signo = sig;
diff --git a/kernel/sys.c b/kernel/sys.c
index 18da702..c09205f 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1736,6 +1736,22 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
else
error = PR_MCE_KILL_DEFAULT;
break;
+ case PR_DETACH:
+ error = -EPERM;
+ /* if parent is init, or not a group leader - bail */
+ if (me->real_parent == init_pid_ns.child_reaper)
+ break;
+ if (me->group_leader != me)
+ break;
+ if (arg2 & ~0xff)
+ break;
+ write_lock_irq(&tasklist_lock);
+ me->exit_code = arg2 << 8;
+ me->flags |= PF_DETACH;
+ do_notify_parent(me, me->exit_signal);
+ write_unlock_irq(&tasklist_lock);
+ error = 0;
+ break;
default:
error = -EINVAL;
break;