[PATCH RFC 03/13] pid_ns: Implement rwlock_t pid_ns::cr_lock for locking child_reaper

From: Kirill Tkhai
Date: Mon May 25 2015 - 13:44:40 EST


Protects child_reaper modifitations.

Signed-off-by: Kirill Tkhai <ktkhai@xxxxxxxx>
---
include/linux/pid_namespace.h | 1 +
kernel/exit.c | 15 ++++++++++++---
kernel/fork.c | 1 +
kernel/pid.c | 10 +++++++++-
kernel/pid_namespace.c | 5 +++--
5 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 918b117..3e59d2a 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -28,6 +28,7 @@ struct pid_namespace {
int last_pid;
unsigned int nr_hashed;
struct task_struct *child_reaper;
+ rwlock_t cr_lock;
struct kmem_cache *pid_cachep;
unsigned int level;
struct pid_namespace *parent;
diff --git a/kernel/exit.c b/kernel/exit.c
index a29c35d..a1b2bf7 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -455,16 +455,23 @@ static void check_pid_ns_reaper_exit(struct task_struct *father)
__acquires(&tasklist_lock)
{
struct pid_namespace *pid_ns = task_active_pid_ns(father);
- struct task_struct *reaper = pid_ns->child_reaper;
+ struct task_struct *reaper;
+
+ read_lock(&pid_ns->cr_lock);
+ reaper = pid_ns->child_reaper;
+ read_unlock(&pid_ns->cr_lock);

if (likely(reaper != father))
return;

+ write_lock(&pid_ns->cr_lock);
reaper = find_alive_thread(father);
- if (reaper) {
+ if (reaper)
pid_ns->child_reaper = reaper;
+ write_unlock(&pid_ns->cr_lock);
+
+ if (reaper)
return;
- }

write_unlock_irq(&tasklist_lock);
if (unlikely(pid_ns == &init_pid_ns)) {
@@ -560,6 +567,7 @@ static void forget_original_parent(struct task_struct *father,
if (list_empty(&father->children))
return;

+ read_lock(&task_active_pid_ns(father)->cr_lock);
reaper = find_new_reaper(father);
list_for_each_entry(p, &father->children, sibling) {
for_each_thread(p, t) {
@@ -579,6 +587,7 @@ static void forget_original_parent(struct task_struct *father,
reparent_leader(father, p, dead);
}
list_splice_tail_init(&father->children, &reaper->children);
+ read_unlock(&task_active_pid_ns(father)->cr_lock);
}

/*
diff --git a/kernel/fork.c b/kernel/fork.c
index 0bb88b5..66e31eb 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1563,6 +1563,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
init_task_pid(p, PIDTYPE_SID, task_session(current));

if (is_child_reaper(pid)) {
+ /* Lockless, as we're the only process in ns */
ns_of_pid(pid)->child_reaper = p;
p->signal->flags |= SIGNAL_UNKILLABLE;
}
diff --git a/kernel/pid.c b/kernel/pid.c
index 4fd07d5..39a8b0a 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -78,6 +78,7 @@ struct pid_namespace init_pid_ns = {
.nr_hashed = PIDNS_HASH_ADDING,
.level = 0,
.child_reaper = &init_task,
+ .cr_lock = __RW_LOCK_UNLOCKED(&init_pid_ns.cr_lock),
.user_ns = &init_user_ns,
.ns.inum = PROC_PID_INIT_INO,
#ifdef CONFIG_PID_NS
@@ -259,6 +260,7 @@ static void delayed_put_pid(struct rcu_head *rhp)
void free_pid(struct pid *pid)
{
/* We can be called with write_lock_irq(&tasklist_lock) held */
+ struct task_struct *child_reaper = NULL;
int i;
unsigned long flags;

@@ -274,7 +276,8 @@ void free_pid(struct pid *pid)
* is the reaper wake up the reaper. The reaper
* may be sleeping in zap_pid_ns_processes().
*/
- wake_up_process(ns->child_reaper);
+ child_reaper = ns->child_reaper;
+ get_task_struct(child_reaper);
break;
case PIDNS_HASH_ADDING:
/* Handle a fork failure of the first process */
@@ -288,6 +291,11 @@ void free_pid(struct pid *pid)
}
spin_unlock_irqrestore(&pidmap_lock, flags);

+ if (child_reaper) {
+ wake_up_process(child_reaper);
+ put_task_struct(child_reaper);
+ }
+
for (i = 0; i <= pid->level; i++)
free_pidmap(pid->numbers + i);

diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index a65ba13..bbaa072 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -115,6 +115,7 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns
ns->parent = get_pid_ns(parent_pid_ns);
ns->user_ns = get_user_ns(user_ns);
ns->nr_hashed = PIDNS_HASH_ADDING;
+ rwlock_init(&ns->cr_lock);
INIT_WORK(&ns->proc_work, proc_cleanup_work);

set_bit(0, ns->pidmap[0].page);
@@ -324,9 +325,9 @@ int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
return -EINVAL;
}

- read_lock(&tasklist_lock);
+ read_lock(&pid_ns->cr_lock);
force_sig(SIGKILL, pid_ns->child_reaper);
- read_unlock(&tasklist_lock);
+ read_unlock(&pid_ns->cr_lock);

do_exit(0);




--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/