[PATCH 1/2] kthread: make struct kthread kmalloc'ed

From: Oleg Nesterov
Date: Fri Oct 28 2016 - 12:11:48 EST


I still think we should kill struct kthread in its current form, but this
needs cleanups outside of kthread.c.

So make it kmalloc'ed for now to avoid the problems with stack corruption,
for example the crashed kthread will likely OOPS again because its .exited
was destroyed by rewind_stack_do_exit().

Signed-off-by: Oleg Nesterov <oleg@xxxxxxxxxx>
---
include/linux/kthread.h | 1 +
kernel/fork.c | 2 ++
kernel/kthread.c | 58 ++++++++++++++++++++++++++++++++++++++-----------
3 files changed, 48 insertions(+), 13 deletions(-)

diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index a6e82a6..c1c3e63 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -48,6 +48,7 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
__k; \
})

+void free_kthread_struct(struct task_struct *k);
void kthread_bind(struct task_struct *k, unsigned int cpu);
void kthread_bind_mask(struct task_struct *k, const struct cpumask *mask);
int kthread_stop(struct task_struct *k);
diff --git a/kernel/fork.c b/kernel/fork.c
index 623259f..663c6a7 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -351,6 +351,8 @@ void free_task(struct task_struct *tsk)
ftrace_graph_exit_task(tsk);
put_seccomp_filter(tsk);
arch_release_task_struct(tsk);
+ if (tsk->flags & PF_KTHREAD)
+ free_kthread_struct(tsk);
free_task_struct(tsk);
}
EXPORT_SYMBOL(free_task);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index be2cc1f..9d64b65 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -53,14 +53,38 @@ enum KTHREAD_BITS {
KTHREAD_IS_PARKED,
};

-#define __to_kthread(vfork) \
- container_of(vfork, struct kthread, exited)
+static inline void set_kthread_struct(void *kthread)
+{
+ /*
+ * We abuse ->set_child_tid to avoid the new member and because it
+ * can't be wrongly copied by copy_process(). We also rely on fact
+ * that the caller can't exec, so PF_KTHREAD can't be cleared.
+ */
+ current->set_child_tid = (__force void __user *)kthread;
+}

static inline struct kthread *to_kthread(struct task_struct *k)
{
- return __to_kthread(k->vfork_done);
+ WARN_ON(!(k->flags & PF_KTHREAD));
+ return (__force void *)k->set_child_tid;
+}
+
+void free_kthread_struct(struct task_struct *k)
+{
+ /*
+ * Can be NULL if this kthread was created by kernel_thread()
+ * or if kmalloc() in kthread() failed.
+ */
+ kfree(to_kthread(k));
}

+#define __to_kthread(vfork) \
+ container_of(vfork, struct kthread, exited)
+
+/*
+ * TODO: kill it and use to_kthread(). But we still need the users
+ * like kthread_stop() which has to sync with the exiting kthread.
+ */
static struct kthread *to_live_kthread(struct task_struct *k)
{
struct completion *vfork = ACCESS_ONCE(k->vfork_done);
@@ -181,14 +205,11 @@ static int kthread(void *_create)
int (*threadfn)(void *data) = create->threadfn;
void *data = create->data;
struct completion *done;
- struct kthread self;
+ struct kthread *self;
int ret;

- self.flags = 0;
- self.data = data;
- init_completion(&self.exited);
- init_completion(&self.parked);
- current->vfork_done = &self.exited;
+ self = kmalloc(sizeof(*self), GFP_KERNEL);
+ set_kthread_struct(self);

/* If user was SIGKILLed, I release the structure. */
done = xchg(&create->done, NULL);
@@ -196,6 +217,19 @@ static int kthread(void *_create)
kfree(create);
do_exit(-EINTR);
}
+
+ if (!self) {
+ create->result = ERR_PTR(-ENOMEM);
+ complete(done);
+ do_exit(-ENOMEM);
+ }
+
+ self->flags = 0;
+ self->data = data;
+ init_completion(&self->exited);
+ init_completion(&self->parked);
+ current->vfork_done = &self->exited;
+
/* OK, tell user we're spawned, wait for stop or wakeup */
__set_current_state(TASK_UNINTERRUPTIBLE);
create->result = current;
@@ -203,12 +237,10 @@ static int kthread(void *_create)
schedule();

ret = -EINTR;
-
- if (!test_bit(KTHREAD_SHOULD_STOP, &self.flags)) {
- __kthread_parkme(&self);
+ if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) {
+ __kthread_parkme(self);
ret = threadfn(data);
}
- /* we can't just return, we must preserve "self" on stack */
do_exit(ret);
}

--
2.5.0