[PATCH V6 06/10] fork: add helpers to clone a process for kernel use
From: Mike Christie
Date: Mon Nov 29 2021 - 14:50:12 EST
The vhost layer is creating kthreads to execute IO and management
operations. These threads need to share a mm with a userspace thread,
inherit cgroups, and we would like to have the thread accounted for
under the userspace thread's rlimit nproc value so a user can't overwhelm
the system with threads when creating VMs.
We have helpers for cgroups and mm but not for the rlimit nproc and in
the future we will probably want helpers for things like namespaces. For
those two items and to allow future sharing/inheritance, this patch adds
two helpers, user_worker_create and user_worker_start that allow callers
to create threads that copy or inherit the caller's attributes like mm,
cgroups, namespaces, etc, and are accounted for under the callers rlimits
nproc value similar to if the caller did a clone() in userspace. However,
instead of returning to userspace the thread is usable in the kernel for
modules like vhost or layers like io_uring.
[added flag validation code from Christian Brauner's SIG_IGN patch]
Signed-off-by: Mike Christie <michael.christie@xxxxxxxxxx>
Acked-by: Christian Brauner <christian.brauner@xxxxxxxxxx>
Reviewed-by: Christoph Hellwig <hch@xxxxxx>
---
include/linux/sched/task.h | 5 +++
kernel/fork.c | 72 ++++++++++++++++++++++++++++++++++++++
2 files changed, 77 insertions(+)
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index f8a658700075..ecb21c0d95ce 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -95,6 +95,11 @@ struct mm_struct *copy_init_mm(void);
extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
extern long kernel_wait4(pid_t, int __user *, int, struct rusage *);
int kernel_wait(pid_t pid, int *stat);
+struct task_struct *user_worker_create(int (*fn)(void *), void *arg, int node,
+ unsigned long clone_flags,
+ u32 worker_flags);
+__printf(2, 3)
+void user_worker_start(struct task_struct *tsk, const char namefmt[], ...);
extern void free_task(struct task_struct *tsk);
diff --git a/kernel/fork.c b/kernel/fork.c
index c9152596a285..e72239ae1e08 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2543,6 +2543,78 @@ struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node)
return copy_process(NULL, 0, node, &args);
}
+static bool user_worker_flags_valid(struct kernel_clone_args *kargs)
+{
+ /* Verify that no unknown flags are passed along. */
+ if (kargs->worker_flags & ~(USER_WORKER_IO | USER_WORKER |
+ USER_WORKER_NO_FILES | USER_WORKER_SIG_IGN))
+ return false;
+
+ /*
+ * If we're ignoring all signals don't allow sharing struct sighand and
+ * don't bother clearing signal handlers.
+ */
+ if ((kargs->flags & (CLONE_SIGHAND | CLONE_CLEAR_SIGHAND)) &&
+ (kargs->worker_flags & USER_WORKER_SIG_IGN))
+ return false;
+
+ return true;
+}
+
+/**
+ * user_worker_create - create a copy of a process to be used by the kernel
+ * @fn: thread stack
+ * @arg: data to be passed to fn
+ * @node: numa node to allocate task from
+ * @clone_flags: CLONE flags
+ * @worker_flags: USER_WORKER flags
+ *
+ * This returns a created task, or an error pointer. The returned task is
+ * inactive, and the caller must fire it up through user_worker_start(). If
+ * this is an PF_IO_WORKER all singals but KILL and STOP are blocked.
+ */
+struct task_struct *user_worker_create(int (*fn)(void *), void *arg, int node,
+ unsigned long clone_flags,
+ u32 worker_flags)
+{
+ struct kernel_clone_args args = {
+ .flags = ((lower_32_bits(clone_flags) | CLONE_VM |
+ CLONE_UNTRACED) & ~CSIGNAL),
+ .exit_signal = (lower_32_bits(clone_flags) & CSIGNAL),
+ .stack = (unsigned long)fn,
+ .stack_size = (unsigned long)arg,
+ .worker_flags = USER_WORKER | worker_flags,
+ };
+
+ if (!user_worker_flags_valid(&args))
+ return ERR_PTR(-EINVAL);
+
+ return copy_process(NULL, 0, node, &args);
+}
+EXPORT_SYMBOL_GPL(user_worker_create);
+
+/**
+ * user_worker_start - Start a task created with user_worker_create
+ * @tsk: task to wake up
+ * @namefmt: printf-style format string for the thread name
+ * @arg: arguments for @namefmt
+ */
+void user_worker_start(struct task_struct *tsk, const char namefmt[], ...)
+{
+ char name[TASK_COMM_LEN];
+ va_list args;
+
+ WARN_ON(!(tsk->flags & PF_USER_WORKER));
+
+ va_start(args, namefmt);
+ vsnprintf(name, sizeof(name), namefmt, args);
+ set_task_comm(tsk, name);
+ va_end(args);
+
+ wake_up_new_task(tsk);
+}
+EXPORT_SYMBOL_GPL(user_worker_start);
+
/*
* Ok, this is the main fork-routine.
*
--
2.25.1