[PATCH 2/4] ptrace children revamp

From: Roland McGrath
Date: Mon May 05 2008 - 20:33:56 EST


ptrace no longer fiddles with the children/sibling links, and the
old ptrace_children list is gone. Now ptrace, whether of one's own
children or another's via PTRACE_ATTACH, just uses the new ptraced
list instead.

There should be no user-visible difference that matters. The only
change is the order in which do_wait() sees multiple stopped
children and stopped ptrace attachees. Since wait_task_stopped()
was changed earlier so it no longer reorders the children list, we
already know this won't cause any new problems.

Signed-off-by: Roland McGrath <roland@xxxxxxxxxx>
---
include/linux/init_task.h | 4 +-
include/linux/sched.h | 26 +++---
kernel/exit.c | 224 ++++++++++++++++++++++++---------------------
kernel/fork.c | 6 +-
kernel/ptrace.c | 37 +++++---
5 files changed, 159 insertions(+), 138 deletions(-)

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index b24c287..2a53494 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -161,8 +161,8 @@ extern struct group_info init_groups;
.nr_cpus_allowed = NR_CPUS, \
}, \
.tasks = LIST_HEAD_INIT(tsk.tasks), \
- .ptrace_children= LIST_HEAD_INIT(tsk.ptrace_children), \
- .ptrace_list = LIST_HEAD_INIT(tsk.ptrace_list), \
+ .ptraced = LIST_HEAD_INIT(tsk.ptraced), \
+ .ptrace_entry = LIST_HEAD_INIT(tsk.ptrace_entry), \
.real_parent = &tsk, \
.parent = &tsk, \
.children = LIST_HEAD_INIT(tsk.children), \
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 03c2380..3be5c96 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1074,12 +1074,6 @@ struct task_struct {
#endif

struct list_head tasks;
- /*
- * ptrace_list/ptrace_children forms the list of my children
- * that were stolen by a ptracer.
- */
- struct list_head ptrace_children;
- struct list_head ptrace_list;

struct mm_struct *mm, *active_mm;

@@ -1101,18 +1095,25 @@ struct task_struct {
/*
* pointers to (original) parent process, youngest child, younger sibling,
* older sibling, respectively. (p->father can be replaced with
- * p->parent->pid)
+ * p->real_parent->pid)
*/
- struct task_struct *real_parent; /* real parent process (when being debugged) */
- struct task_struct *parent; /* parent process */
+ struct task_struct *real_parent; /* real parent process */
+ struct task_struct *parent; /* recipient of SIGCHLD, wait4() reports */
/*
- * children/sibling forms the list of my children plus the
- * tasks I'm ptracing.
+ * children/sibling forms the list of my natural children
*/
struct list_head children; /* list of my children */
struct list_head sibling; /* linkage in my parent's children list */
struct task_struct *group_leader; /* threadgroup leader */

+ /*
+ * ptraced is the list of tasks this task is using ptrace on.
+ * This includes both natural children and PTRACE_ATTACH targets.
+ * p->ptrace_entry is p's link on the p->parent->ptraced list.
+ */
+ struct list_head ptraced;
+ struct list_head ptrace_entry;
+
/* PID/PID hash table linkage. */
struct pid_link pids[PIDTYPE_MAX];
struct list_head thread_group;
@@ -1838,9 +1839,6 @@ extern void wait_task_inactive(struct task_struct * p);
#define wait_task_inactive(p) do { } while (0)
#endif

-#define remove_parent(p) list_del_init(&(p)->sibling)
-#define add_parent(p) list_add_tail(&(p)->sibling,&(p)->parent->children)
-
#define next_task(p) list_entry(rcu_dereference((p)->tasks.next), struct task_struct, tasks)

#define for_each_process(p) \
diff --git a/kernel/exit.c b/kernel/exit.c
index 934371a..c95765f 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -70,7 +70,7 @@ static void __unhash_process(struct task_struct *p)
__get_cpu_var(process_counts)--;
}
list_del_rcu(&p->thread_group);
- remove_parent(p);
+ list_del_init(&p->sibling);
}

/*
@@ -146,6 +146,18 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
put_task_struct(container_of(rhp, struct task_struct, rcu));
}

+/*
+ * Do final ptrace-related cleanup of a zombie being reaped.
+ *
+ * Called with write_lock(&tasklist_lock) held.
+ */
+static void ptrace_release_task(struct task_struct *p)
+{
+ BUG_ON(!list_empty(&p->ptraced));
+ ptrace_unlink(p);
+ BUG_ON(!list_empty(&p->ptrace_entry));
+}
+
void release_task(struct task_struct * p)
{
struct task_struct *leader;
@@ -154,8 +166,7 @@ repeat:
atomic_dec(&p->user->processes);
proc_flush_task(p);
write_lock_irq(&tasklist_lock);
- ptrace_unlink(p);
- BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
+ ptrace_release_task(p);
__exit_signal(p);

/*
@@ -309,9 +320,8 @@ static void reparent_to_kthreadd(void)

ptrace_unlink(current);
/* Reparent to init */
- remove_parent(current);
current->real_parent = current->parent = kthreadd_task;
- add_parent(current);
+ list_move_tail(&current->sibling, &current->real_parent->children);

/* Set the exit signal to SIGCHLD so we signal init on exit */
current->exit_signal = SIGCHLD;
@@ -686,37 +696,71 @@ static void exit_mm(struct task_struct * tsk)
mmput(mm);
}

-static void
-reparent_thread(struct task_struct *p, struct task_struct *father, int traced)
+/*
+ * Detach all tasks we were using ptrace on.
+ * Any that need to be release_task'd are put on the @dead list.
+ *
+ * Called with write_lock(&tasklist_lock) held.
+ */
+static void ptrace_exit(struct task_struct *parent, struct list_head *dead)
{
- if (p->pdeath_signal)
- /* We already hold the tasklist_lock here. */
- group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p);
+ struct task_struct *p, *n;

- /* Move the child from its dying parent to the new one. */
- if (unlikely(traced)) {
- /* Preserve ptrace links if someone else is tracing this child. */
- list_del_init(&p->ptrace_list);
- if (ptrace_reparented(p))
- list_add(&p->ptrace_list, &p->real_parent->ptrace_children);
- } else {
- /* If this child is being traced, then we're the one tracing it
- * anyway, so let go of it.
+ list_for_each_entry_safe(p, n, &parent->ptraced, ptrace_entry) {
+ __ptrace_unlink(p);
+
+ if (p->exit_state != EXIT_ZOMBIE)
+ continue;
+
+ /*
+ * If it's a zombie, our attachedness prevented normal
+ * parent notification or self-reaping. Do notification
+ * now if it would have happened earlier. If it should
+ * reap itself, add it to the @dead list. We can't call
+ * release_task() here because we already hold tasklist_lock.
+ *
+ * If it's our own child, there is no notification to do.
*/
- p->ptrace = 0;
- remove_parent(p);
- p->parent = p->real_parent;
- add_parent(p);
+ if (!task_detached(p) && thread_group_empty(p)) {
+ if (!same_thread_group(p->real_parent, parent))
+ do_notify_parent(p, p->exit_signal);
+ }

- if (task_is_traced(p)) {
+ if (task_detached(p)) {
/*
- * If it was at a trace stop, turn it into
- * a normal stop since it's no longer being
- * traced.
+ * Mark it as in the process of being reaped.
*/
- ptrace_untrace(p);
+ p->exit_state = EXIT_DEAD;
+ list_add(&p->ptrace_entry, dead);
}
}
+}
+
+/*
+ * Finish up exit-time ptrace cleanup.
+ *
+ * Called without locks.
+ */
+static void ptrace_exit_finish(struct task_struct *parent,
+ struct list_head *dead)
+{
+ struct task_struct *p, *n;
+
+ BUG_ON(!list_empty(&parent->ptraced));
+
+ list_for_each_entry_safe(p, n, dead, ptrace_entry) {
+ list_del_init(&p->ptrace_entry);
+ release_task(p);
+ }
+}
+
+static void reparent_thread(struct task_struct *p, struct task_struct *father)
+{
+ if (p->pdeath_signal)
+ /* We already hold the tasklist_lock here. */
+ group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p);
+
+ list_move_tail(&p->sibling, &p->real_parent->children);

/* If this is a threaded reparent there is no need to
* notify anyone anything has happened.
@@ -731,7 +775,8 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced)
/* If we'd notified the old parent about this child's death,
* also notify the new parent.
*/
- if (!traced && p->exit_state == EXIT_ZOMBIE &&
+ if (!ptrace_reparented(p) &&
+ p->exit_state == EXIT_ZOMBIE &&
!task_detached(p) && thread_group_empty(p))
do_notify_parent(p, p->exit_signal);

@@ -748,12 +793,15 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced)
static void forget_original_parent(struct task_struct *father)
{
struct task_struct *p, *n, *reaper = father;
- struct list_head ptrace_dead;
-
- INIT_LIST_HEAD(&ptrace_dead);
+ LIST_HEAD(ptrace_dead);

write_lock_irq(&tasklist_lock);

+ /*
+ * First clean up ptrace if we were using it.
+ */
+ ptrace_exit(father, &ptrace_dead);
+
do {
reaper = next_thread(reaper);
if (reaper == father) {
@@ -762,58 +810,19 @@ static void forget_original_parent(struct task_struct *father)
}
} while (reaper->flags & PF_EXITING);

- /*
- * There are only two places where our children can be:
- *
- * - in our child list
- * - in our ptraced child list
- *
- * Search them and reparent children.
- */
list_for_each_entry_safe(p, n, &father->children, sibling) {
- int ptrace;
-
- ptrace = p->ptrace;
-
- /* if father isn't the real parent, then ptrace must be enabled */
- BUG_ON(father != p->real_parent && !ptrace);
-
- if (father == p->real_parent) {
- /* reparent with a reaper, real father it's us */
- p->real_parent = reaper;
- reparent_thread(p, father, 0);
- } else {
- /* reparent ptraced task to its real parent */
- __ptrace_unlink (p);
- if (p->exit_state == EXIT_ZOMBIE && !task_detached(p) &&
- thread_group_empty(p))
- do_notify_parent(p, p->exit_signal);
- }
-
- /*
- * if the ptraced child is a detached zombie we must collect
- * it before we exit, or it will remain zombie forever since
- * we prevented it from self-reap itself while it was being
- * traced by us, to be able to see it in wait4.
- */
- if (unlikely(ptrace && p->exit_state == EXIT_ZOMBIE && task_detached(p)))
- list_add(&p->ptrace_list, &ptrace_dead);
- }
-
- list_for_each_entry_safe(p, n, &father->ptrace_children, ptrace_list) {
p->real_parent = reaper;
- reparent_thread(p, father, 1);
+ if (p->parent == father) {
+ BUG_ON(p->ptrace);
+ p->parent = p->real_parent;
+ }
+ reparent_thread(p, father);
}

write_unlock_irq(&tasklist_lock);
BUG_ON(!list_empty(&father->children));
- BUG_ON(!list_empty(&father->ptrace_children));
-
- list_for_each_entry_safe(p, n, &ptrace_dead, ptrace_list) {
- list_del_init(&p->ptrace_list);
- release_task(p);
- }

+ ptrace_exit_finish(father, &ptrace_dead);
}

/*
@@ -1174,13 +1183,6 @@ static int eligible_child(enum pid_type type, struct pid *pid, int options,
return 0;
}

- /*
- * Do not consider detached threads that are
- * not ptraced:
- */
- if (task_detached(p) && !p->ptrace)
- return 0;
-
/* Wait for all children (clone and not) if __WALL is set;
* otherwise, wait for clone children *only* if __WCLONE is
* set; otherwise, wait for non-clone children *only*. (Note:
@@ -1393,7 +1395,7 @@ static int wait_task_zombie(struct task_struct *p, int options,
* the lock and this task is uninteresting. If we return nonzero, we have
* released the lock and the system call should return.
*/
-static int wait_task_stopped(struct task_struct *p,
+static int wait_task_stopped(int ptrace, struct task_struct *p,
int options, struct siginfo __user *infop,
int __user *stat_addr, struct rusage __user *ru)
{
@@ -1401,7 +1403,7 @@ static int wait_task_stopped(struct task_struct *p,
uid_t uid = 0; /* unneeded, required by compiler */
pid_t pid;

- if (!(p->ptrace & PT_PTRACED) && !(options & WUNTRACED))
+ if (!(options & WUNTRACED))
return 0;

exit_code = 0;
@@ -1410,7 +1412,7 @@ static int wait_task_stopped(struct task_struct *p,
if (unlikely(!task_is_stopped_or_traced(p)))
goto unlock_sig;

- if (!(p->ptrace & PT_PTRACED) && p->signal->group_stop_count > 0)
+ if (!ptrace && p->signal->group_stop_count > 0)
/*
* A group stop is in progress and this is the group leader.
* We won't report until all threads have stopped.
@@ -1439,7 +1441,7 @@ unlock_sig:
*/
get_task_struct(p);
pid = task_pid_vnr(p);
- why = (p->ptrace & PT_PTRACED) ? CLD_TRAPPED : CLD_STOPPED;
+ why = ptrace ? CLD_TRAPPED : CLD_STOPPED;
read_unlock(&tasklist_lock);

if (unlikely(options & WNOWAIT))
@@ -1530,7 +1532,7 @@ static int wait_task_continued(struct task_struct *p, int options,
* Returns zero if the search for a child should continue; then *@retval is
* 0 if @p is an eligible child, or still -ECHILD.
*/
-static int wait_consider_task(struct task_struct *parent,
+static int wait_consider_task(struct task_struct *parent, int ptrace,
struct task_struct *p, int *retval,
enum pid_type type, struct pid *pid, int options,
struct siginfo __user *infop,
@@ -1540,6 +1542,15 @@ static int wait_consider_task(struct task_struct *parent,
if (ret <= 0)
return ret;

+ if (likely(!ptrace) && unlikely(p->ptrace)) {
+ /*
+ * This child is hidden by ptrace.
+ * We aren't allowed to see it now, but eventually we will.
+ */
+ *retval = 0;
+ return 0;
+ }
+
if (p->exit_state == EXIT_DEAD)
return 0;

@@ -1556,7 +1567,8 @@ static int wait_consider_task(struct task_struct *parent,
*retval = 0;

if (task_is_stopped_or_traced(p))
- return wait_task_stopped(p, options, infop, stat_addr, ru);
+ return wait_task_stopped(ptrace, p, options,
+ infop, stat_addr, ru);

return wait_task_continued(p, options, infop, stat_addr, ru);
}
@@ -1577,10 +1589,16 @@ static int do_wait_thread(struct task_struct *tsk, int *retval,
struct task_struct *p;

list_for_each_entry(p, &tsk->children, sibling) {
- int ret = wait_consider_task(tsk, p, retval, type, pid, options,
- infop, stat_addr, ru);
- if (ret)
- return ret;
+ /*
+ * Do not consider detached threads.
+ */
+ if (!task_detached(p)) {
+ int ret = wait_consider_task(tsk, 0, p, retval,
+ type, pid, options,
+ infop, stat_addr, ru);
+ if (ret)
+ return ret;
+ }
}

return 0;
@@ -1594,21 +1612,15 @@ static int ptrace_do_wait(struct task_struct *tsk, int *retval,
struct task_struct *p;

/*
- * If we never saw an eligile child, check for children stolen by
- * ptrace. We don't leave -ECHILD in *@retval if there are any,
- * because we will eventually be allowed to wait for them again.
+ * Traditionally we see ptrace'd stopped tasks regardless of options.
*/
- if (!*retval)
- return 0;
+ options |= WUNTRACED;

- list_for_each_entry(p, &tsk->ptrace_children, ptrace_list) {
- int ret = eligible_child(type, pid, options, p);
- if (unlikely(ret < 0))
+ list_for_each_entry(p, &tsk->ptraced, ptrace_entry) {
+ int ret = wait_consider_task(tsk, 1, p, retval, type, pid,
+ options, infop, stat_addr, ru);
+ if (ret)
return ret;
- if (ret) {
- *retval = 0;
- return 0;
- }
}

return 0;
diff --git a/kernel/fork.c b/kernel/fork.c
index 933e60e..5ce8f95 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1254,8 +1254,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
*/
p->group_leader = p;
INIT_LIST_HEAD(&p->thread_group);
- INIT_LIST_HEAD(&p->ptrace_children);
- INIT_LIST_HEAD(&p->ptrace_list);
+ INIT_LIST_HEAD(&p->ptrace_entry);
+ INIT_LIST_HEAD(&p->ptraced);

/* Now that the task is set up, run cgroup callbacks if
* necessary. We need to run them before the task is visible
@@ -1327,7 +1327,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
}

if (likely(p->pid)) {
- add_parent(p);
+ list_add_tail(&p->sibling, &p->real_parent->children);
if (unlikely(p->ptrace & PT_PTRACED))
__ptrace_link(p, current->parent);

diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 6c19e94..acf80a4 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -33,13 +33,9 @@
*/
void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
{
- BUG_ON(!list_empty(&child->ptrace_list));
- if (child->parent == new_parent)
- return;
- list_add(&child->ptrace_list, &child->parent->ptrace_children);
- remove_parent(child);
+ BUG_ON(!list_empty(&child->ptrace_entry));
+ list_add(&child->ptrace_entry, &new_parent->ptraced);
child->parent = new_parent;
- add_parent(child);
}

/*
@@ -73,12 +69,8 @@ void __ptrace_unlink(struct task_struct *child)
BUG_ON(!child->ptrace);

child->ptrace = 0;
- if (ptrace_reparented(child)) {
- list_del_init(&child->ptrace_list);
- remove_parent(child);
- child->parent = child->real_parent;
- add_parent(child);
- }
+ child->parent = child->real_parent;
+ list_del_init(&child->ptrace_entry);

if (task_is_traced(child))
ptrace_untrace(child);
@@ -492,14 +484,33 @@ int ptrace_traceme(void)
/*
* Are we already being traced?
*/
+repeat:
task_lock(current);
if (!(current->ptrace & PT_PTRACED)) {
+ /*
+ * See ptrace_attach() comments about the locking here.
+ */
+ unsigned long flags;
+ if (!write_trylock_irqsave(&tasklist_lock, flags)) {
+ task_unlock(current);
+ do {
+ cpu_relax();
+ } while (!write_can_lock(&tasklist_lock));
+ goto repeat;
+ }
+
ret = security_ptrace(current->parent, current);
+
/*
* Set the ptrace bit in the process ptrace flags.
+ * Then link us on our parent's ptraced list.
*/
- if (!ret)
+ if (!ret) {
current->ptrace |= PT_PTRACED;
+ __ptrace_link(current, current->real_parent);
+ }
+
+ write_unlock_irqrestore(&tasklist_lock, flags);
}
task_unlock(current);
return ret;
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/