[PATCH (draft)] Change task_struct->comm to use RCU.

From: Tetsuo Handa
Date: Mon Feb 10 2014 - 08:44:26 EST


Hello.

Tetsuo Handa wrote:
> I think there is some point in applying the preparatory patches (commcpy() and
> %pT patches) now, for they fix time-to-call-strlen()-to-time-to-call-memcpy()
> problem by making sure that the comm string passed to strlen() etc. is also
> passed to memcpy() etc. by taking a snapshot of the comm string.
>
> What the preparatory patches cannot fix is that the comm string is modified
> while taking a snapshot of the comm string. The RCU work will fix it.
>
> Anyway, you want to see the RCU work before you merge commcpy() and %pT
> patches. OK. I'll start making the RCU work.

This is a draft patch which changes task_struct->comm to use RCU.

This patch currently does not build, for this patch can be applied only after
commcpy() and %pT patches are merged and all direct ->comm users are killed.

This patch sleeps when kmalloc() fails when changing task_struct->comm. But are
we allowed to sleep when changing task_struct->comm ? If yes, there is no need
to rename set_task_comm() to commset() in this patch. If no, I need to find a
different answer.

Regards.
----------
>From c1d907a109a5407c7eaf0e81741f99b4715ba55d Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@xxxxxxxxxxxxxxxxxxx>
Date: Mon, 10 Feb 2014 19:25:51 +0900
Subject: [PATCH (draft)] Change task_struct->comm to use RCU.

This patch guarantees that the task_struct->comm is updated atomically.

Signed-off-by: Tetsuo Handa <penguin-kernel@xxxxxxxxxxxxxxxxxxx>
---
fs/exec.c | 94 ++++++++++++++++++++++++++++++++++++++------
fs/proc/base.c | 2 +-
include/linux/init_task.h | 4 +-
include/linux/sched.h | 35 ++++++++++++++---
kernel/fork.c | 11 +++++
kernel/kthread.c | 6 ++-
kernel/sched/core.c | 7 +++-
kernel/sys.c | 2 +-
8 files changed, 135 insertions(+), 26 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 69c6089..d71fd63 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -263,6 +263,11 @@ extern char ___assert_task_state[1 - 2*!!(

/* Task command name length */
#define TASK_COMM_LEN 16
+struct rcu_comm {
+ char name[TASK_COMM_LEN];
+ struct rcu_head rcu;
+ bool is_static;
+};

#include <linux/spinlock.h>

@@ -1317,10 +1322,11 @@ struct task_struct {
* credentials (COW) */
const struct cred __rcu *cred; /* effective (overridable) subjective task
* credentials (COW) */
- char comm[TASK_COMM_LEN]; /* executable name excluding path
- - access with [gs]et_task_comm (which lock
- it with task_lock())
- - initialized normally by setup_new_exec */
+ struct rcu_comm __rcu *rcu_comm; /* executable name excluding path
+ - update with commset()
+ - read with commcpy() or %pT
+ - initialized normally by
+ setup_new_exec */
/* file system info */
int link_count, total_link_count;
#ifdef CONFIG_SYSVIPC
@@ -1805,6 +1811,23 @@ static inline cputime_t task_gtime(struct task_struct *t)
extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);

+/**
+ * commcpy - Copy task_struct->comm to buffer.
+ *
+ * @buf: Buffer to copy @tsk->comm which must be at least TASK_COMM_LEN bytes.
+ * @tsk: Pointer to "struct task_struct".
+ *
+ * Returns @buf .
+ */
+static inline char *commcpy(char *buf, const struct task_struct *tsk)
+{
+ rcu_read_lock();
+ memcpy(buf, rcu_dereference(p->rcu_comm)->name, TASK_COMM_LEN);
+ rcu_read_unlock();
+ buf[TASK_COMM_LEN - 1] = '\0';
+ return buf;
+}
+
/*
* Per process flags
*/
@@ -2332,8 +2355,8 @@ extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, i
struct task_struct *fork_idle(int);
extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);

-extern void set_task_comm(struct task_struct *tsk, char *from);
-extern char *get_task_comm(char *to, struct task_struct *tsk);
+void do_commset(struct task_struct *tsk, const char *buf);
+void commset(struct task_struct *tsk, const char *buf);

#ifdef CONFIG_SMP
void scheduler_ipi(void);
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 6df7f9f..b217f8c 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -154,7 +154,7 @@ extern struct task_group root_task_group;
# define INIT_VTIME(tsk)
#endif

-#define INIT_TASK_COMM "swapper"
+extern struct rcu_comm init_rcu_comm;

#ifdef CONFIG_RT_MUTEXES
# define INIT_RT_MUTEXES(tsk) \
@@ -201,7 +201,7 @@ extern struct task_group root_task_group;
.group_leader = &tsk, \
RCU_POINTER_INITIALIZER(real_cred, &init_cred), \
RCU_POINTER_INITIALIZER(cred, &init_cred), \
- .comm = INIT_TASK_COMM, \
+ .rcu_comm = &init_rcu_comm, \
.thread = INIT_THREAD, \
.fs = &init_fs, \
.files = &init_files, \
diff --git a/fs/exec.c b/fs/exec.c
index 3d78fcc..8c2b1c3 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -67,6 +67,9 @@

int suid_dumpable = 0;

+/* comm name for init_task. */
+struct rcu_comm init_rcu_comm = { .name = "swapper", .is_static = 1 };
+
static LIST_HEAD(formats);
static DEFINE_RWLOCK(binfmt_lock);

@@ -1026,27 +1029,92 @@ killed:
return -EAGAIN;
}

-char *get_task_comm(char *buf, struct task_struct *tsk)
-{
- /* buf must be at least sizeof(tsk->comm) in size */
- task_lock(tsk);
- strncpy(buf, tsk->comm, sizeof(tsk->comm));
- task_unlock(tsk);
- return buf;
-}
-EXPORT_SYMBOL_GPL(get_task_comm);
-
/*
* These functions flushes out all traces of the currently running executable
* so that a new one can be started
*/

-void set_task_comm(struct task_struct *tsk, char *buf)
+/**
+ * do_commset - Change task_struct->comm.
+ *
+ * @tsk: Pointer to "struct task_struct".
+ * @buf: New comm name.
+ *
+ * Returns nothing.
+ *
+ * This function will sleep if kmalloc() failed.
+ *
+ * Question: Are we allowed to sleep?
+ */
+void do_commset(struct task_struct *tsk, const char *buf)
+{
+ struct rcu_comm *new;
+ struct rcu_comm *old;
+ struct rcu_comm tmp;
+
+ new = kzalloc(sizeof(*new), GFP_KERNEL | __GFP_NOWARN);
+ if (likely(new)) {
+ strncpy(new->name, buf, TASK_COMM_LEN - 1);
+ /* No need to wait because the "new" is kmalloc()ed. */
+ smp_wmb();
+ old = xchg(tsk->rcu_comm, new);
+ } else {
+ /* Memory allocation failed. Use slowpath. */
+ static DEFINE_MUTEX(lock);
+
+ memset(&tmp, 0, sizeof(tmp));
+ tmp.is_static = 1;
+ strncpy(tmp.name, buf, TASK_COMM_LEN - 1);
+ mutex_lock(&lock);
+ /*
+ * Publish the "tmp" rcu_comm.
+ * The mutex_lock() above guarantees that the "new" obtained by
+ * the xchg() below refers either kmemdup()ed rcu_comm as of
+ * copy_process() or kmalloc()ed rcu_comm as of commset().
+ */
+ smp_wmb();
+ new = xchg(tsk->rcu_comm, &tmp);
+ /* Wait for readers of the "new" rcu_comm. */
+ synchronize_rcu();
+ /* Reuse the "new" rcu_comm. */
+ memcpy(new->name, tmp.name, TASK_COMM_LEN);
+ /* Publish the "new" rcu_comm. */
+ smp_wmb();
+ old = xchg(tsk->rcu_comm, new);
+ /*
+ * Wait for readers of the "old" rcu_comm. Note that
+ * old == &tmp will be false if kmalloc() above by somebody
+ * else succeeded. Therefore, use the .is_static flag to
+ * determine whether to kfree() or not.
+ */
+ synchronize_rcu();
+ mutex_unlock(&lock);
+ }
+ if (!old->is_static)
+ kfree_rcu(old, rcu);
+}
+
+/**
+ * commset - Change task_struct->comm.
+ *
+ * @tsk: Pointer to "struct task_struct".
+ * @buf: New comm name.
+ *
+ * Returns nothing.
+ *
+ * This function will sleep if kmalloc() failed.
+ *
+ * Question: Are we allowed to sleep?
+ */
+void commset(struct task_struct *tsk, const char *buf)
{
+ /*
+ * Question: Do we need to use task_lock()/task_unlock() ?
+ */
task_lock(tsk);
trace_task_rename(tsk, buf);
- strlcpy(tsk->comm, buf, sizeof(tsk->comm));
task_unlock(tsk);
+ do_commset(tsk, buf);
perf_event_comm(tsk);
}

@@ -1122,7 +1190,7 @@ void setup_new_exec(struct linux_binprm * bprm)
else
set_dumpable(current->mm, suid_dumpable);

- set_task_comm(current, bprm->tcomm);
+ commset(current, bprm->tcomm);

/* Set the new mm task size. We have to do that late because it may
* depend on TIF_32BIT which is only updated in flush_thread() on
diff --git a/kernel/fork.c b/kernel/fork.c
index a17621c..5ce989d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -214,6 +214,12 @@ void free_task(struct task_struct *tsk)
ftrace_graph_exit_task(tsk);
put_seccomp_filter(tsk);
arch_release_task_struct(tsk);
+ /*
+ * It is safe to unconditionally kfree() immediately because this
+ * function must not be called if somebody is doing do_commset(tsk),
+ * and tsk->rcu_comm != &init_rcu_comm because tsk != &init_task .
+ */
+ kfree(tsk->rcu_comm);
free_task_struct(tsk);
}
EXPORT_SYMBOL(free_task);
@@ -1191,6 +1197,11 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p = dup_task_struct(current);
if (!p)
goto fork_out;
+ /* This is safe because p is not visible yet. */
+ p->rcu_comm = kmemdup(current->rcu_comm, sizeof(current->rcu_comm),
+ GFP_KERNEL);
+ if (!p->rcu_comm)
+ goto bad_fork_free;

ftrace_graph_init_task(p);
get_seccomp_filter(p);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 2c355bf..f73d6e0 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -318,10 +318,12 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
if (!IS_ERR(task)) {
static const struct sched_param param = { .sched_priority = 0 };
va_list args;
+ char name[TASK_COMM_LEN];

va_start(args, namefmt);
- vsnprintf(task->comm, sizeof(task->comm), namefmt, args);
+ vsnprintf(name, TASK_COMM_LEN, namefmt, args);
va_end(args);
+ do_commset(task, name);
/*
* root may have changed our (kthreadd's) priority or CPU mask.
* The kernel thread should not inherit these properties.
@@ -494,7 +496,7 @@ int kthreadd(void *unused)
struct task_struct *tsk = current;

/* Setup a clean context for our children to inherit. */
- set_task_comm(tsk, "kthreadd");
+ commset(tsk, "kthreadd");
ignore_signals(tsk);
set_cpus_allowed_ptr(tsk, cpu_all_mask);
set_mems_allowed(node_states[N_MEMORY]);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index feb6630..81f7b16 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4490,7 +4490,12 @@ void init_idle(struct task_struct *idle, int cpu)
ftrace_graph_init_idle_task(idle, cpu);
vtime_init_idle(idle, cpu);
#if defined(CONFIG_SMP)
- sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
+ {
+ char name[TASK_COMM_LEN];
+
+ snprintf(name, TASK_COMM_LEN, "swapper/%d", cpu);
+ do_commset(idle, name);
+ }
#endif
}

diff --git a/kernel/sys.c b/kernel/sys.c
index c0a58be..d6dcaee 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1897,7 +1897,7 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
if (strncpy_from_user(comm, (char __user *)arg2,
sizeof(me->comm) - 1) < 0)
return -EFAULT;
- set_task_comm(me, comm);
+ commset(me, comm);
proc_comm_connector(me);
break;
case PR_GET_NAME:
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 5150706..064f6c3 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1396,7 +1396,7 @@ static ssize_t comm_write(struct file *file, const char __user *buf,
return -ESRCH;

if (same_thread_group(current, p))
- set_task_comm(p, buffer);
+ commset(p, buffer);
else
count = -EINVAL;

--
1.7.1
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/