[PATCH 7/8] vhost: use kernel_copy_process to check RLIMITs and inherit cgroups

From: Mike Christie
Date: Thu Sep 16 2021 - 17:21:43 EST


For vhost workers we use the kthread API which inherit's its values from
and checks against the kthreadd thread. This results in cgroups v2 not
working and the wrong RLIMITs being checked. This patch has us use the
kernel_copy_process function which will inherit its values/checks from the
thread that owns the device.

Note this patch converts us. The next patch will remove the code that is
no longer needed.

Signed-off-by: Mike Christie <michael.christie@xxxxxxxxxx>
---
drivers/vhost/vhost.c | 49 +++++++++++++++++++++++++++----------------
drivers/vhost/vhost.h | 7 ++++++-
2 files changed, 37 insertions(+), 19 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index c9a1f706989c..6e58417b13fc 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -344,17 +344,14 @@ static void vhost_vq_reset(struct vhost_dev *dev,
static int vhost_worker(void *data)
{
struct vhost_worker *worker = data;
- struct vhost_dev *dev = worker->dev;
struct vhost_work *work, *work_next;
struct llist_node *node;

- kthread_use_mm(dev->mm);
-
for (;;) {
/* mb paired w/ kthread_stop */
set_current_state(TASK_INTERRUPTIBLE);

- if (kthread_should_stop()) {
+ if (test_bit(VHOST_WORKER_FLAG_STOP, &worker->flags)) {
__set_current_state(TASK_RUNNING);
break;
}
@@ -376,8 +373,9 @@ static int vhost_worker(void *data)
schedule();
}
}
- kthread_unuse_mm(dev->mm);
- return 0;
+
+ complete(worker->exit_done);
+ do_exit(0);
}

static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq)
@@ -579,6 +577,16 @@ static void vhost_detach_mm(struct vhost_dev *dev)
dev->mm = NULL;
}

+static void vhost_worker_stop(struct vhost_worker *worker)
+{
+ DECLARE_COMPLETION_ONSTACK(exit_done);
+
+ worker->exit_done = &exit_done;
+ set_bit(VHOST_WORKER_FLAG_STOP, &worker->flags);
+ wake_up_process(worker->task);
+ wait_for_completion(worker->exit_done);
+}
+
static void vhost_worker_free(struct vhost_dev *dev)
{
struct vhost_worker *worker = dev->worker;
@@ -588,14 +596,16 @@ static void vhost_worker_free(struct vhost_dev *dev)

dev->worker = NULL;
WARN_ON(!llist_empty(&worker->work_list));
- kthread_stop(worker->task);
+ vhost_worker_stop(worker);
kfree(worker);
}

static int vhost_worker_create(struct vhost_dev *dev)
{
+ DECLARE_COMPLETION_ONSTACK(start_done);
struct vhost_worker *worker;
struct task_struct *task;
+ char buf[TASK_COMM_LEN];
int ret;

worker = kzalloc(sizeof(*worker), GFP_KERNEL_ACCOUNT);
@@ -603,27 +613,30 @@ static int vhost_worker_create(struct vhost_dev *dev)
return -ENOMEM;

dev->worker = worker;
- worker->dev = dev;
worker->kcov_handle = kcov_common_handle();
init_llist_head(&worker->work_list);

- task = kthread_create(vhost_worker, worker, "vhost-%d", current->pid);
- if (IS_ERR(task)) {
- ret = PTR_ERR(task);
+ /*
+ * vhost used to use the kthread API which ignores all signals by
+ * default and the drivers expect this behavior. So we do not want to
+ * ineherit the parent's signal handlers and set our worker to ignore
+ * everything below.
+ */
+ task = kernel_copy_process(vhost_worker, worker, NUMA_NO_NODE,
+ CLONE_FS|CLONE_CLEAR_SIGHAND, 0, 1);
+ if (IS_ERR(task))
goto free_worker;
- }

worker->task = task;
- wake_up_process(task); /* avoid contributing to loadavg */

- ret = vhost_attach_cgroups(dev);
- if (ret)
- goto stop_worker;
+ snprintf(buf, sizeof(buf), "vhost-%d", current->pid);
+ set_task_comm(task, buf);
+
+ ignore_signals(task);

+ wake_up_new_task(task);
return 0;

-stop_worker:
- kthread_stop(worker->task);
free_worker:
kfree(worker);
dev->worker = NULL;
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 102ce25e4e13..09748694cb66 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -25,11 +25,16 @@ struct vhost_work {
unsigned long flags;
};

+enum {
+ VHOST_WORKER_FLAG_STOP,
+};
+
struct vhost_worker {
struct task_struct *task;
+ struct completion *exit_done;
struct llist_head work_list;
- struct vhost_dev *dev;
u64 kcov_handle;
+ unsigned long flags;
};

/* Poll a file (eventfd or socket) */
--
2.25.1