Re: [PATCH v11 19/26] gunyah: vm_mgr: Add framework to add VM Functions

From: Alex Elder
Date: Fri Mar 31 2023 - 10:27:44 EST


On 3/3/23 7:06 PM, Elliot Berman wrote:
Introduce a framework for Gunyah userspace to install VM functions. VM
functions are optional interfaces to the virtual machine. vCPUs,
ioeventfs, and irqfds are examples of such VM functions and are
implemented in subsequent patches.

A generic framework is implemented instead of individual ioctls to
create vCPUs, irqfds, etc., in order to simplify the VM manager core
implementation and allow dynamic loading of VM function modules.

Signed-off-by: Elliot Berman <quic_eberman@xxxxxxxxxxx>

I found two bugs here, and have some suggestions that might
improve code readability.

-Alex

---
Documentation/virt/gunyah/vm-manager.rst | 18 ++
drivers/virt/gunyah/vm_mgr.c | 208 ++++++++++++++++++++++-
drivers/virt/gunyah/vm_mgr.h | 4 +
include/linux/gunyah_vm_mgr.h | 73 ++++++++
include/uapi/linux/gunyah.h | 17 ++
5 files changed, 316 insertions(+), 4 deletions(-)
create mode 100644 include/linux/gunyah_vm_mgr.h

diff --git a/Documentation/virt/gunyah/vm-manager.rst b/Documentation/virt/gunyah/vm-manager.rst
index 1b4aa18670a3..af8ad88a88ab 100644
--- a/Documentation/virt/gunyah/vm-manager.rst
+++ b/Documentation/virt/gunyah/vm-manager.rst
@@ -17,6 +17,24 @@ sharing userspace memory with a VM is done via the GH_VM_SET_USER_MEM_REGION
ioctl. The VM itself is configured to use the memory region via the
devicetree.
+Gunyah Functions
+================
+
+Components of a Gunyah VM's configuration that need kernel configuration are
+called "functions" and are built on top of a framework. Functions are identified
+by a string and have some argument(s) to configure them. They are typically
+created by the `GH_VM_ADD_FUNCTION` ioctl.
+
+Functions typically will always do at least one of these operations:
+
+1. Create resource ticket(s). Resource tickets allow a function to register
+ itself as the client for a Gunyah resource (e.g. doorbell or vCPU) and
+ the function is given the pointer to the `struct gh_resource` when the
+ VM is starting.
+
+2. Register IO handler(s). IO handlers allow a function to handle stage-2 faults
+ from the virtual machine.
+
Sample Userspace VMM
====================
diff --git a/drivers/virt/gunyah/vm_mgr.c b/drivers/virt/gunyah/vm_mgr.c
index 299b9bb81edc..88db011395ec 100644
--- a/drivers/virt/gunyah/vm_mgr.c
+++ b/drivers/virt/gunyah/vm_mgr.c
@@ -6,16 +6,165 @@
#define pr_fmt(fmt) "gh_vm_mgr: " fmt
#include <linux/anon_inodes.h>
+#include <linux/compat.h>
#include <linux/file.h>
#include <linux/gunyah_rsc_mgr.h>
+#include <linux/gunyah_vm_mgr.h>
#include <linux/miscdevice.h>
#include <linux/mm.h>
#include <linux/module.h>
+#include <linux/xarray.h>
#include <uapi/linux/gunyah.h>
#include "vm_mgr.h"
+static DEFINE_XARRAY(functions);
+
+int gh_vm_function_register(struct gh_vm_function *fn)
+{
+ if (!fn->bind || !fn->unbind)
+ return -EINVAL;
+
+ return xa_err(xa_store(&functions, fn->type, fn, GFP_KERNEL));
+}
+EXPORT_SYMBOL_GPL(gh_vm_function_register);
+

I would move gh_vm_remove_function_instance() down, grouping it
more closely with the code that uses it.

+static void gh_vm_remove_function_instance(struct gh_vm_function_instance *inst)
+ __must_hold(&inst->ghvm->fn_lock)
+{
+ inst->fn->unbind(inst);
+ list_del(&inst->vm_list);
+ module_put(inst->fn->mod);
+ kfree(inst->argp);
+ kfree(inst);
+}
+
+void gh_vm_function_unregister(struct gh_vm_function *fn)
+{
+ /* Expecting unregister to only come when unloading a module */
+ WARN_ON(fn->mod && module_refcount(fn->mod));
+ xa_erase(&functions, fn->type);
+}
+EXPORT_SYMBOL_GPL(gh_vm_function_unregister);
+

You define gh_vm_get_function(), but you don't define the matching
gh_vm_put_function() abstraction. Instead, you just expect the
caller to know that they should call module_put(). Even if it's
a simple wrapper, please define gh_vm_put_function().

+static struct gh_vm_function *gh_vm_get_function(u32 type)
+{
+ struct gh_vm_function *fn;
+ int r;
+
+ fn = xa_load(&functions, type);
+ if (!fn) {
+ r = request_module("ghfunc:%d", type);
+ if (r)
+ return ERR_PTR(r);
+
+ fn = xa_load(&functions, type);
+ }
+
+ if (!fn || !try_module_get(fn->mod))
+ fn = ERR_PTR(-ENOENT);
+
+ return fn;
+}
+
+static long gh_vm_add_function(struct gh_vm *ghvm, struct gh_fn_desc *f)

This is adding a function *instance*. Maybe it would be clearer
if you included that in the name.

+{
+ struct gh_vm_function_instance *inst;
+ void __user *argp;
+ long r = 0;
+
+ if (f->arg_size > GH_FN_MAX_ARG_SIZE) {
+ dev_err(ghvm->parent, "%s: arg_size > %d\n", __func__, GH_FN_MAX_ARG_SIZE);
+ return -EINVAL;
+ }
+
+ inst = kzalloc(sizeof(*inst), GFP_KERNEL);
+ if (!inst)
+ return -ENOMEM;
+
+ inst->arg_size = f->arg_size;
+ if (inst->arg_size) {
+ inst->argp = kzalloc(inst->arg_size, GFP_KERNEL);
+ if (!inst->argp) {
+ r = -ENOMEM;
+ goto free;
+ }
+
+ argp = u64_to_user_ptr(f->arg);
+ if (copy_from_user(inst->argp, argp, f->arg_size)) {
+ r = -EFAULT;
+ goto free_arg;
+ }
+ }
+
+ inst->fn = gh_vm_get_function(f->type);
+ if (IS_ERR(inst->fn)) {
+ r = PTR_ERR(inst->fn);
+ goto free_arg;
+ }
+
+ inst->ghvm = ghvm;
+ inst->rm = ghvm->rm;
+
+ mutex_lock(&ghvm->fn_lock);
+ r = inst->fn->bind(inst);
+ if (r < 0) {

You need to unlock the mutex here. This is a BUG.

+ module_put(inst->fn->mod);
+ goto free_arg;

Perhaps you should add a new label in the error path and
unlock the mutex and put the function reference there.

+ }
+
+ list_add(&inst->vm_list, &ghvm->functions);
+ mutex_unlock(&ghvm->fn_lock);
+
+ return r;
+free_arg:
+ kfree(inst->argp);
+free:
+ kfree(inst);
+ return r;
+}
+
+static long gh_vm_rm_function(struct gh_vm *ghvm, struct gh_fn_desc *f)

This is removing a function *instance*, right?

+{
+ struct gh_vm_function_instance *inst, *iter;
+ void __user *user_argp;
+ void *argp;
+ long r = 0;
+
+ r = mutex_lock_interruptible(&ghvm->fn_lock);
+ if (r)
+ return r;
+
+ if (f->arg_size) {

This is a BUG. You' aren't freeing things, you seem
to have just duplicated the allocation code.

Actually I think this is just a block of copied code
that's here by mistake. The loop might be doing
the removal you intend.

+ argp = kzalloc(f->arg_size, GFP_KERNEL);
+ if (!argp) {
+ r = -ENOMEM;
+ goto out;
+ }
+
+ user_argp = u64_to_user_ptr(f->arg);
+ if (copy_from_user(argp, user_argp, f->arg_size)) {
+ r = -EFAULT;
+ kfree(argp);
+ goto out;
+ }

I *think* the for loop and freeing the argument here
still does what you want.

+ list_for_each_entry_safe(inst, iter, &ghvm->functions, vm_list) {
+ if (inst->fn->type == f->type &&
+ f->arg_size == inst->arg_size &&
+ !memcmp(argp, inst->argp, f->arg_size))
+ gh_vm_remove_function_instance(inst);
+ }
+
+ kfree(argp);
+ }
+
+out:
+ mutex_unlock(&ghvm->fn_lock);
+ return r;
+}
+
static int gh_vm_rm_notification_status(struct gh_vm *ghvm, void *data)
{
struct gh_rm_vm_status_payload *payload = data;
@@ -80,6 +229,7 @@ static void gh_vm_stop(struct gh_vm *ghvm)
static void gh_vm_free(struct work_struct *work)
{
struct gh_vm *ghvm = container_of(work, struct gh_vm, free_work);
+ struct gh_vm_function_instance *inst, *iiter;
struct gh_vm_mem *mapping, *tmp;
int ret;
@@ -90,6 +240,12 @@ static void gh_vm_free(struct work_struct *work)
case GH_RM_VM_STATUS_INIT_FAILED:
case GH_RM_VM_STATUS_LOAD:
case GH_RM_VM_STATUS_EXITED:
+ mutex_lock(&ghvm->fn_lock);
+ list_for_each_entry_safe(inst, iiter, &ghvm->functions, vm_list) {
+ gh_vm_remove_function_instance(inst);
+ }
+ mutex_unlock(&ghvm->fn_lock);
+
mutex_lock(&ghvm->mm_lock);
list_for_each_entry_safe(mapping, tmp, &ghvm->memory_mappings, list) {
gh_vm_mem_reclaim(ghvm, mapping);
@@ -117,6 +273,28 @@ static void gh_vm_free(struct work_struct *work)
}
}
+static void _gh_vm_put(struct kref *kref)
+{
+ struct gh_vm *ghvm = container_of(kref, struct gh_vm, kref);
+
+ /* VM will be reset and make RM calls which can interruptible sleep.
+ * Defer to a work so this thread can receive signal.
+ */
+ schedule_work(&ghvm->free_work);
+}
+
+int __must_check gh_vm_get(struct gh_vm *ghvm)
+{
+ return kref_get_unless_zero(&ghvm->kref);
+}
+EXPORT_SYMBOL_GPL(gh_vm_get);
+

Maybe move _gh_vm_put() here.

+void gh_vm_put(struct gh_vm *ghvm)
+{
+ kref_put(&ghvm->kref, _gh_vm_put);
+}
+EXPORT_SYMBOL_GPL(gh_vm_put);
+
static __must_check struct gh_vm *gh_vm_alloc(struct gh_rm *rm)
{
struct gh_vm *ghvm;
@@ -150,6 +328,8 @@ static __must_check struct gh_vm *gh_vm_alloc(struct gh_rm *rm)
INIT_LIST_HEAD(&ghvm->memory_mappings);
init_rwsem(&ghvm->status_lock);
INIT_WORK(&ghvm->free_work, gh_vm_free);
+ kref_init(&ghvm->kref);
+ INIT_LIST_HEAD(&ghvm->functions);
ghvm->vm_status = GH_RM_VM_STATUS_LOAD;
return ghvm;
@@ -302,6 +482,29 @@ static long gh_vm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
r = gh_vm_ensure_started(ghvm);
break;
}
+ case GH_VM_ADD_FUNCTION: {
+ struct gh_fn_desc f;
+
+ if (copy_from_user(&f, argp, sizeof(f)))
+ return -EFAULT;
+
+ r = gh_vm_add_function(ghvm, &f);
+ break;
+ }
+ case GH_VM_REMOVE_FUNCTION: {

To be clear, this is adding a function *instance*.
(I'm not suggesting you change the name.)

+ struct gh_fn_desc *f;
+
+ f = kzalloc(sizeof(*f), GFP_KERNEL);

Why do you allocate a function descriptor here dynamically,
while when adding a function you just define a descriptor
as a local variable (on the stack)? It looks to me like
you should be able to do it the same way here and avoid
the possibility of a kzalloc() failure.

+ if (!f)
+ return -ENOMEM;
+
+ if (copy_from_user(f, argp, sizeof(*f)))
+ return -EFAULT;

If the copy_from_user() fails you will have leaked the
memory allocated for the memory descriptor. This is a BUG.

+
+ r = gh_vm_rm_function(ghvm, f);
+ kfree(f);
+ break;
+ }
default:
r = -ENOTTY;
break;

. . .