[PATCH 1/8] fork: add helper to clone a process

From: Mike Christie
Date: Thu Sep 16 2021 - 17:21:32 EST


The vhost layer has similar requirements as io_uring where its worker
threads need to access the userspace thread's memory, want to inherit the
parents's cgroups and namespaces, and be checked against the parent's
RLIMITs. Right now, the vhost layer uses the kthread API which has
kthread_use_mm for mem access, and those threads can use
cgroup_attach_task_all for v1 cgroups, but there are no helpers for the
other items.

This adds a helper to clone a process so we can inherit everything we
want in one call. It's a more generic version of create_io_thread which
will be used by the vhost layer and io_uring in later patches in this set.

This patch also exports __set_task_comm and wake_up_new_task which is
needed by modules to use the new helper. io_uring calls these functions
already but its always built into the kernel so was not needed before.

Signed-off-by: Mike Christie <michael.christie@xxxxxxxxxx>
---
fs/exec.c | 7 +++++++
include/linux/sched/task.h | 3 +++
kernel/fork.c | 29 +++++++++++++++++++++++++++++
kernel/sched/core.c | 4 +++-
4 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/fs/exec.c b/fs/exec.c
index a098c133d8d7..9fc4bb0c5c7e 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1220,6 +1220,12 @@ EXPORT_SYMBOL_GPL(__get_task_comm);
* so that a new one can be started
*/

+/**
+ * __set_task_comm - set the task's executable name
+ * @tsk: task_struct to modify
+ * @buf: executable name
+ * @exec: true if called during a process exec. false for name changes.
+ */
void __set_task_comm(struct task_struct *tsk, const char *buf, bool exec)
{
task_lock(tsk);
@@ -1228,6 +1234,7 @@ void __set_task_comm(struct task_struct *tsk, const char *buf, bool exec)
task_unlock(tsk);
perf_event_comm(tsk, exec);
}
+EXPORT_SYMBOL_GPL(__set_task_comm);

/*
* Calling this is the point of no return. None of the failures will be
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index ef02be869cf2..c55f1eb69d41 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -84,6 +84,9 @@ extern void exit_itimers(struct signal_struct *);

extern pid_t kernel_clone(struct kernel_clone_args *kargs);
struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node);
+struct task_struct *kernel_copy_process(int (*fn)(void *), void *arg, int node,
+ unsigned long clone_flags,
+ int io_thread);
struct task_struct *fork_idle(int);
struct mm_struct *copy_init_mm(void);
extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
diff --git a/kernel/fork.c b/kernel/fork.c
index 38681ad44c76..cec7b6011beb 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2532,6 +2532,35 @@ struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node)
return copy_process(NULL, 0, node, &args);
}

+/**
+ * kernel_copy_process - create a copy of a process to be used by the kernel
+ * @fn: thread stack
+ * @arg: data to be passed to fn
+ * @node: numa node to allocate task from
+ * @clone_flags: CLONE flags
+ * @io_thread: 1 if this will be a PF_IO_WORKER else 0.
+ *
+ * This returns a created task, or an error pointer. The returned task is
+ * inactive, and the caller must fire it up through wake_up_new_task(p). If
+ * this is an PF_IO_WORKER all singals but KILL and STOP are blocked.
+ */
+struct task_struct *kernel_copy_process(int (*fn)(void *), void *arg, int node,
+ unsigned long clone_flags,
+ int io_thread)
+{
+ struct kernel_clone_args args = {
+ .flags = ((lower_32_bits(clone_flags) | CLONE_VM |
+ CLONE_UNTRACED) & ~CSIGNAL),
+ .exit_signal = (lower_32_bits(clone_flags) & CSIGNAL),
+ .stack = (unsigned long)fn,
+ .stack_size = (unsigned long)arg,
+ .io_thread = io_thread,
+ };
+
+ return copy_process(NULL, 0, node, &args);
+}
+EXPORT_SYMBOL_GPL(kernel_copy_process);
+
/*
* Ok, this is the main fork-routine.
*
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 1bba4128a3e6..a0b9508ea202 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4429,8 +4429,9 @@ unsigned long to_ratio(u64 period, u64 runtime)
return div64_u64(runtime << BW_SHIFT, period);
}

-/*
+/**
* wake_up_new_task - wake up a newly created task for the first time.
+ * @p: task to wake up
*
* This function will do some initial scheduler statistics housekeeping
* that must be done for every newly created context, then puts the task
@@ -4476,6 +4477,7 @@ void wake_up_new_task(struct task_struct *p)
#endif
task_rq_unlock(rq, p, &rf);
}
+EXPORT_SYMBOL_GPL(wake_up_new_task);

#ifdef CONFIG_PREEMPT_NOTIFIERS

--
2.25.1