[KVM PATCH] KVM: introduce "xinterface" API for external interactionwith guests

From: Gregory Haskins
Date: Thu Jul 16 2009 - 11:20:09 EST


What: xinterface is a mechanism that allows kernel modules external to
the kvm.ko proper to interface with a running guest. It accomplishes
this by creating an abstracted interface which does not expose any
private details of the guest or its related KVM structures, and provides
a mechanism to find and bind to this interface at run-time. This
binding mechanism uses a userspace friendly token "u64 vmid" as a
handle. This vmid acts similar to a file-descriptor in the sense that
it can be extracted from a guest, passed to an end-point of interest,
and finally, converted back to a vtable pointer using a stable interface.

Why: There are various subsystems that would like to interact with a KVM
guest which are ideally suited to exist outside the domain of the kvm.ko
core logic. For instance, external pci-passthrough, virtual-bus, and
virtio-net modules are currently under development. In order for these
modules to successfully interact with the guest, they need, at the very
least, various interfaces for signaling IO events, pointer translation,
and possibly memory mapping.

The signaling case is covered by the recent introduction of the
irqfd/ioeventfd mechanisms. This patch provides a mechanism to cover the
other cases. Note that today we only expose pointer-translation related
functions, but more could be added at a future date as needs arise.

Security considerations: This concept is not believed to expose KVM to
any kind of additional security risk. The vmid token itself can only be
acquired via an open handle to the vmfd (i.e. qemu-kvm), and the interface
is only available within the kernel. Therefore the xinterface
admission policy is delegated to the kernel/lkm admission policy, which
must be assumed secure or the system is already compromised independent of
this work.

Additionally, the xinterface design is hardened against malformed vmid
tokens, as well as race conditions against valid tokens (e.g. guest
exiting before the token is redeemed). It is additionally hardened
against races in the kvm.ko module itself by acquiring proper module
references. As a final measure, we link the xinterface code statically
into the kernel so that callers are guaranteed a stable interface to
kvm_xinterface_find() without implicitly pinning kvm.ko or racing against
it.

Example usage: QEMU instantiates a guest, and an external module "foo"
that desires the ability to interface with the guest (say via
open("/dev/foo")). QEMU may then issue a KVM_GET_VMID operation to acquire
the u64-based vmid, and pass it to ioctl(foofd, FOO_SET_VMID, &vmid).
Upon receipt, the foo module can issue kvm_xinterface_find(vmid) to acquire
the proper context. Internally, the struct kvm* and associated
struct module* will remain pinned at least until the foo module calls
kvm_xinterface_put().

Signed-off-by: Gregory Haskins <ghaskins@xxxxxxxxxx>
---

arch/x86/Kbuild | 4 +
arch/x86/kvm/Makefile | 4 +
arch/x86/kvm/x86.c | 1
include/linux/kvm.h | 2 +
include/linux/kvm_host.h | 6 ++
include/linux/kvm_xinterface.h | 58 ++++++++++++++++
virt/kvm/kvm_main.c | 72 ++++++++++++++++++++
virt/kvm/xinterface.c | 147 ++++++++++++++++++++++++++++++++++++++++
8 files changed, 293 insertions(+), 1 deletions(-)
create mode 100644 include/linux/kvm_xinterface.h
create mode 100644 virt/kvm/xinterface.c

diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
index ad8ec35..9f50cc3 100644
--- a/arch/x86/Kbuild
+++ b/arch/x86/Kbuild
@@ -1,5 +1,7 @@

-obj-$(CONFIG_KVM) += kvm/
+ifdef CONFIG_KVM
+obj-y += kvm/
+endif

# Xen paravirtualization support
obj-$(CONFIG_XEN) += xen/
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index afaaa76..80d951d 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -17,3 +17,7 @@ kvm-amd-y += svm.o
obj-$(CONFIG_KVM) += kvm.o
obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
obj-$(CONFIG_KVM_AMD) += kvm-amd.o
+
+ifdef CONFIG_KVM
+obj-y += $(addprefix ../../../virt/kvm/, xinterface.o)
+endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 48567fa..5725527 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1208,6 +1208,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_IOEVENTFD:
case KVM_CAP_PIT2:
case KVM_CAP_PIT_STATE2:
+ case KVM_CAP_XINTERFACE:
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 230a91a..7790894 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -435,6 +435,7 @@ struct kvm_ioeventfd {
#define KVM_CAP_PIT_STATE2 35
#endif
#define KVM_CAP_IOEVENTFD 36
+#define KVM_CAP_XINTERFACE 37

#ifdef KVM_CAP_IRQ_ROUTING

@@ -544,6 +545,7 @@ struct kvm_irqfd {
#define KVM_CREATE_PIT2 _IOW(KVMIO, 0x77, struct kvm_pit_config)
#define KVM_SET_BOOT_CPU_ID _IO(KVMIO, 0x78)
#define KVM_IOEVENTFD _IOW(KVMIO, 0x79, struct kvm_ioeventfd)
+#define KVM_GET_VMID _IOR(KVMIO, 0x7a, __u64)

/*
* ioctls for vcpu fds
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f244f11..0ee95df 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -23,6 +23,7 @@
#include <linux/kvm_para.h>

#include <linux/kvm_types.h>
+#include <linux/kvm_xinterface.h>

#include <asm/kvm_host.h>

@@ -175,6 +176,7 @@ struct kvm {
unsigned long mmu_notifier_seq;
long mmu_notifier_count;
#endif
+ struct kvm_xinterface xinterface; /* interface for external modules */
};

/* The guest did something we don't support. */
@@ -199,6 +201,10 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
idx < atomic_read(&kvm->online_vcpus) && vcpup; \
vcpup = kvm_get_vcpu(kvm, ++idx))

+void kvm_xinterface_register(struct kvm_xinterface *intf,
+ const struct kvm_xinterface_ops *ops);
+void kvm_xinterface_unregister(struct kvm_xinterface *intf);
+
int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id);
void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);

diff --git a/include/linux/kvm_xinterface.h b/include/linux/kvm_xinterface.h
new file mode 100644
index 0000000..858acfd
--- /dev/null
+++ b/include/linux/kvm_xinterface.h
@@ -0,0 +1,58 @@
+#ifndef __KVM_XINTERFACE_H
+#define __KVM_XINTERFACE_H
+
+/*
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/kref.h>
+#include <linux/module.h>
+#include <linux/rbtree.h>
+
+struct kvm_xinterface;
+
+struct kvm_xinterface_ops {
+ struct module *owner;
+
+ unsigned long (*gpa_to_hva)(struct kvm_xinterface *, unsigned long gpa);
+ struct page* (*gpa_to_page)(struct kvm_xinterface *, unsigned long gpa);
+ void (*release)(struct kvm_xinterface *);
+};
+
+struct kvm_xinterface {
+ struct kref kref;
+ const struct kvm_xinterface_ops *ops;
+ struct rb_node node;
+};
+
+static inline void
+kvm_xinterface_get(struct kvm_xinterface *intf)
+{
+ kref_get(&intf->kref);
+}
+
+static inline void
+_kvm_xinterface_release(struct kref *kref)
+{
+ struct kvm_xinterface *intf;
+ struct module *owner;
+
+ intf = container_of(kref, struct kvm_xinterface, kref);
+
+ owner = intf->ops->owner;
+ rmb();
+
+ intf->ops->release(intf);
+ module_put(owner);
+}
+
+static inline void
+kvm_xinterface_put(struct kvm_xinterface *intf)
+{
+ kref_put(&intf->kref, _kvm_xinterface_release);
+}
+
+struct kvm_xinterface *kvm_xinterface_find(long vmid);
+
+#endif /* __KVM_XINTERFACE_H */
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 7cd1c10..058cb6c 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -935,6 +935,58 @@ static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
};
#endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */

+/*
+ * ------------
+ * XINTERFACE (External Interface)
+ * -------------
+ */
+
+static struct kvm *
+intf_to_kvm(struct kvm_xinterface *intf)
+{
+ return container_of(intf, struct kvm, xinterface);
+}
+
+static unsigned long
+xinterface_gpa_to_hva(struct kvm_xinterface *intf, unsigned long gpa)
+{
+ struct kvm *kvm = intf_to_kvm(intf);
+ unsigned long addr;
+
+ addr = gfn_to_hva(kvm, gpa >> PAGE_SHIFT);
+ if (kvm_is_error_hva(addr))
+ return 0;
+
+ return addr + offset_in_page(gpa);
+}
+
+static struct page *
+xinterface_gpa_to_page(struct kvm_xinterface *intf, unsigned long gpa)
+{
+ struct kvm *kvm = intf_to_kvm(intf);
+ struct page *page;
+
+ page = gfn_to_page(kvm, gpa >> PAGE_SHIFT);
+ if (page == bad_page)
+ return ERR_PTR(-EINVAL);
+
+ return page;
+}
+
+static void
+xinterface_release(struct kvm_xinterface *intf)
+{
+ struct kvm *kvm = intf_to_kvm(intf);
+
+ kvm_put_kvm(kvm);
+}
+
+struct kvm_xinterface_ops _kvm_xinterface_ops = {
+ .gpa_to_hva = xinterface_gpa_to_hva,
+ .gpa_to_page = xinterface_gpa_to_page,
+ .release = xinterface_release,
+};
+
static struct kvm *kvm_create_vm(void)
{
struct kvm *kvm = kvm_arch_create_vm();
@@ -991,6 +1043,8 @@ static struct kvm *kvm_create_vm(void)
#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
kvm_coalesced_mmio_init(kvm);
#endif
+ kvm_get_kvm(kvm); /* the xinterface needs another ref */
+ kvm_xinterface_register(&kvm->xinterface, &_kvm_xinterface_ops);
out:
return kvm;
}
@@ -1073,6 +1127,7 @@ static int kvm_vm_release(struct inode *inode, struct file *filp)
struct kvm *kvm = filp->private_data;

kvm_irqfd_release(kvm);
+ kvm_xinterface_unregister(&kvm->xinterface);

kvm_put_kvm(kvm);
return 0;
@@ -2289,6 +2344,22 @@ static long kvm_vm_ioctl(struct file *filp,
mutex_unlock(&kvm->lock);
break;
#endif
+ case KVM_GET_VMID: {
+ u64 vmid = (u64)&kvm->xinterface.node;
+
+ /*
+ * our vmid is simply the address of our rb_node in the
+ * registry, which is guaranteed unique. This also simplifies
+ * the registry map-lookup since we dont need to do a deep
+ * decode on the pointer to figure out if we have a match
+ */
+
+ r = -EFAULT;
+ if (copy_to_user(argp, &vmid, (sizeof vmid)))
+ goto out;
+ r = 0;
+ break;
+ }
default:
r = kvm_arch_vm_ioctl(filp, ioctl, arg);
}
@@ -2761,6 +2832,7 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
kvm_chardev_ops.owner = module;
kvm_vm_fops.owner = module;
kvm_vcpu_fops.owner = module;
+ _kvm_xinterface_ops.owner = module;

r = misc_register(&kvm_dev);
if (r) {
diff --git a/virt/kvm/xinterface.c b/virt/kvm/xinterface.c
new file mode 100644
index 0000000..fe9a214
--- /dev/null
+++ b/virt/kvm/xinterface.c
@@ -0,0 +1,147 @@
+/*
+ * KVM module interface - Allows external modules to interface with a guest
+ *
+ * This code is designed to be statically linked to the kernel, regardless
+ * of the configuration of kvm.ko. This allows the kvm_xinterface_find
+ * routine to be stably exported without dependencies on, or race conditions
+ * against acquiring the kvm.ko module itself.
+ *
+ * Copyright 2009 Novell. All Rights Reserved.
+ *
+ * Author:
+ * Gregory Haskins <ghaskins@xxxxxxxxxx>
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/kvm_host.h>
+#include <linux/kvm_xinterface.h>
+
+struct kvm_registry {
+ struct mutex lock;
+ struct rb_root root;
+};
+
+/* system wide registry of kvm based VMs */
+static struct kvm_registry kvm_registry = {
+ .lock = __MUTEX_INITIALIZER(kvm_registry.lock),
+ .root = RB_ROOT,
+};
+
+static struct kvm_xinterface *
+to_intf(struct rb_node *node)
+{
+ return node ? container_of(node, struct kvm_xinterface, node) : NULL;
+}
+
+struct kvm_xinterface *
+kvm_xinterface_find(long vmid)
+{
+ struct rb_node *node;
+ struct kvm_xinterface *intf;
+
+ mutex_lock(&kvm_registry.lock);
+
+ node = kvm_registry.root.rb_node;
+
+ while (node) {
+ long val;
+
+ val = vmid - (long)node;
+ if (val < 0)
+ node = node->rb_left;
+ else if (val > 0)
+ node = node->rb_right;
+ else
+ break;
+ }
+
+ intf = to_intf(node);
+ if (intf)
+ kvm_xinterface_get(intf);
+
+ mutex_unlock(&kvm_registry.lock);
+
+ return intf;
+}
+EXPORT_SYMBOL_GPL(kvm_xinterface_find);
+
+/*
+ * ------------------------------------------
+ * register/unregister
+ * ------------------------------------------
+ *
+ * These functions are private to the API and are only to be called
+ * by the KVM core
+ * ------------------------------------------
+ */
+
+/* caller must hold intf->ops->owner */
+void
+kvm_xinterface_register(struct kvm_xinterface *intf,
+ const struct kvm_xinterface_ops *ops)
+{
+ struct rb_root *root;
+ struct rb_node **new, *parent = NULL;
+ struct rb_node *node;
+
+ memset(intf, 0, sizeof(*intf));
+ kref_init(&intf->kref);
+ intf->ops = ops;
+
+ mutex_lock(&kvm_registry.lock);
+
+ root = &kvm_registry.root;
+ new = &(root->rb_node);
+ node = &intf->node;
+
+ /* Figure out where to put new node */
+ while (*new) {
+ long val;
+
+ parent = *new;
+
+ val = node - parent;
+ if (val < 0)
+ new = &((*new)->rb_left);
+ else if (val > 0)
+ new = &((*new)->rb_right);
+ else
+ panic("kvm_xinterface: duplicate entry: %ld\n", val);
+ }
+
+ /* Add new node and rebalance tree. */
+ rb_link_node(node, parent, new);
+ rb_insert_color(node, root);
+
+ /* released when the last xinterface reference is released */
+ __module_get(intf->ops->owner);
+
+ mutex_unlock(&kvm_registry.lock);
+}
+EXPORT_SYMBOL_GPL(kvm_xinterface_register);
+
+/* caller must hold intf->ops->owner */
+void
+kvm_xinterface_unregister(struct kvm_xinterface *intf)
+{
+ mutex_lock(&kvm_registry.lock);
+ rb_erase(&intf->node, &kvm_registry.root);
+ mutex_unlock(&kvm_registry.lock);
+
+ kvm_xinterface_put(intf);
+}
+EXPORT_SYMBOL_GPL(kvm_xinterface_unregister);

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/