[PATCH V4 5/8] fork: add helper to clone a process

From: Mike Christie
Date: Thu Oct 07 2021 - 17:45:44 EST


The vhost layer has similar requirements as io_uring where its worker
threads need to access the userspace thread's memory, want to inherit the
parents's cgroups and namespaces, and be checked against the parent's
RLIMITs. Right now, the vhost layer uses the kthread API which has
kthread_use_mm for mem access, and those threads can use
cgroup_attach_task_all for v1 cgroups, but there are no helpers for the
other items.

This adds a helper to clone a process so we can inherit everything we
want in one call. It's a more generic version of create_io_thread which
will be used by the vhost layer and io_uring in later patches in this set.

[added flag validation code from Christian Brauner's patch]
Signed-off-by: Mike Christie <michael.christie@xxxxxxxxxx>
Acked-by: Christian Brauner <christian.brauner@xxxxxxxxxx>
---
include/linux/sched/task.h | 4 +++
kernel/fork.c | 71 ++++++++++++++++++++++++++++++++++++++
2 files changed, 75 insertions(+)

diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 5f3928fe0544..2bfc0629c868 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -89,6 +89,10 @@ extern void exit_itimers(struct signal_struct *);

extern pid_t kernel_clone(struct kernel_clone_args *kargs);
struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node);
+struct task_struct *kernel_worker(int (*fn)(void *), void *arg, int node,
+ unsigned long clone_flags, u32 worker_flags);
+__printf(2, 3)
+void kernel_worker_start(struct task_struct *tsk, const char namefmt[], ...);
struct task_struct *fork_idle(int);
struct mm_struct *copy_init_mm(void);
extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
diff --git a/kernel/fork.c b/kernel/fork.c
index 07f9e410fb64..b04e61a965e2 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2544,6 +2544,77 @@ struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node)
return copy_process(NULL, 0, node, &args);
}

+static bool kernel_worker_flags_valid(struct kernel_clone_args *kargs)
+{
+ /* Verify that no unknown flags are passed along. */
+ if (kargs->worker_flags & ~(KERN_WORKER_IO | KERN_WORKER_USER |
+ KERN_WORKER_NO_FILES | KERN_WORKER_SIG_IGN))
+ return false;
+
+ /*
+ * If we're ignoring all signals don't allow sharing struct sighand and
+ * don't bother clearing signal handlers.
+ */
+ if ((kargs->flags & (CLONE_SIGHAND | CLONE_CLEAR_SIGHAND)) &&
+ (kargs->worker_flags & KERN_WORKER_SIG_IGN))
+ return false;
+
+ return true;
+}
+
+/**
+ * kernel_worker - create a copy of a process to be used by the kernel
+ * @fn: thread stack
+ * @arg: data to be passed to fn
+ * @node: numa node to allocate task from
+ * @clone_flags: CLONE flags
+ * @worker_flags: KERN_WORKER flags
+ *
+ * This returns a created task, or an error pointer. The returned task is
+ * inactive, and the caller must fire it up through kernel_worker_start(). If
+ * this is an PF_IO_WORKER all singals but KILL and STOP are blocked.
+ */
+struct task_struct *kernel_worker(int (*fn)(void *), void *arg, int node,
+ unsigned long clone_flags, u32 worker_flags)
+{
+ struct kernel_clone_args args = {
+ .flags = ((lower_32_bits(clone_flags) | CLONE_VM |
+ CLONE_UNTRACED) & ~CSIGNAL),
+ .exit_signal = (lower_32_bits(clone_flags) & CSIGNAL),
+ .stack = (unsigned long)fn,
+ .stack_size = (unsigned long)arg,
+ .worker_flags = KERN_WORKER_USER | worker_flags,
+ };
+
+ if (!kernel_worker_flags_valid(&args))
+ return ERR_PTR(-EINVAL);
+
+ return copy_process(NULL, 0, node, &args);
+}
+EXPORT_SYMBOL_GPL(kernel_worker);
+
+/**
+ * kernel_worker_start - Start a task created with kernel_worker
+ * @tsk: task to wake up
+ * @namefmt: printf-style format string for the thread name
+ * @arg: arguments for @namefmt
+ */
+void kernel_worker_start(struct task_struct *tsk, const char namefmt[], ...)
+{
+ char name[TASK_COMM_LEN];
+ va_list args;
+
+ WARN_ON(!(tsk->flags & PF_USER_WORKER));
+
+ va_start(args, namefmt);
+ vsnprintf(name, sizeof(name), namefmt, args);
+ set_task_comm(tsk, name);
+ va_end(args);
+
+ wake_up_new_task(tsk);
+}
+EXPORT_SYMBOL_GPL(kernel_worker_start);
+
/*
* Ok, this is the main fork-routine.
*
--
2.25.1