Re: [RFC v5 12/13] KVM: kvm-vfio: generic forwarding control

From: Alex Williamson
Date: Tue Mar 31 2015 - 13:20:57 EST


On Thu, 2015-03-19 at 15:55 +0100, Eric Auger wrote:
> This patch introduces a new KVM_DEV_VFIO_DEVICE group.
>
> This is a new control channel which enables KVM to cooperate with
> viable VFIO devices.
>
> The patch introduces 2 attributes for this group:
> KVM_DEV_VFIO_DEVICE_FORWARD_IRQ, KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ.
> Their purpose is to turn a VFIO device IRQ into a forwarded IRQ and
> respectively unset the feature.
>
> The generic part introduced here interact with VFIO, genirq, KVM while
> the architecture specific part mostly takes care of the virtual interrupt
> controller programming.
>
> Architecture specific implementation is enabled when
> __KVM_HAVE_ARCH_KVM_VFIO_FORWARD is set. When not set those
> functions are void.
>
> Signed-off-by: Eric Auger <eric.auger@xxxxxxxxxx>
>
> ---
>
> v3 -> v4:
> - use new kvm_vfio_dev_irq struct
> - improve error handling according to Alex comments
> - full rework or generic/arch specific split to accomodate for
> unforward that never fails
> - kvm_vfio_get_vfio_device and kvm_vfio_put_vfio_device removed from
> that patch file and introduced before (since also used by Feng)
> - guard kvm_vfio_control_irq_forward call with
> __KVM_HAVE_ARCH_KVM_VFIO_FORWARD
>
> v2 -> v3:
> - add API comments in kvm_host.h
> - improve the commit message
> - create a private kvm_vfio_fwd_irq struct
> - fwd_irq_action replaced by a bool and removal of VFIO_IRQ_CLEANUP. This
> latter action will be handled in vgic.
> - add a vfio_device handle argument to kvm_arch_set_fwd_state. The goal is
> to move platform specific stuff in architecture specific code.
> - kvm_arch_set_fwd_state renamed into kvm_arch_vfio_set_forward
> - increment the ref counter each time we do an IRQ forwarding and decrement
> this latter each time one IRQ forward is unset. Simplifies the whole
> ref counting.
> - simplification of list handling: create, search, removal
>
> v1 -> v2:
> - __KVM_HAVE_ARCH_KVM_VFIO renamed into __KVM_HAVE_ARCH_KVM_VFIO_FORWARD
> - original patch file separated into 2 parts: generic part moved in vfio.c
> and ARM specific part(kvm_arch_set_fwd_state)
> ---
> include/linux/kvm_host.h | 47 +++++++
> virt/kvm/vfio.c | 311 ++++++++++++++++++++++++++++++++++++++++++++++-
> 2 files changed, 355 insertions(+), 3 deletions(-)
>
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index f4e1829..8f17f87 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -1042,6 +1042,15 @@ struct kvm_device_ops {
> unsigned long arg);
> };
>
> +/* internal self-contained structure describing a forwarded IRQ */
> +struct kvm_fwd_irq {
> + struct kvm *kvm; /* VM to inject the GSI into */
> + struct vfio_device *vdev; /* vfio device the IRQ belongs to */
> + __u32 index; /* VFIO device IRQ index */
> + __u32 subindex; /* VFIO device IRQ subindex */
> + __u32 gsi; /* gsi, ie. virtual IRQ number */
> +};
> +
> void kvm_device_get(struct kvm_device *dev);
> void kvm_device_put(struct kvm_device *dev);
> struct kvm_device *kvm_device_from_filp(struct file *filp);
> @@ -1065,6 +1074,44 @@ inline void kvm_arch_resume_guest(struct kvm *kvm) {}
>
> #endif
>
> +#ifdef __KVM_HAVE_ARCH_KVM_VFIO_FORWARD
> +/**
> + * kvm_arch_set_forward - Sets forwarding for a given IRQ
> + *
> + * @kvm: handle to the VM
> + * @host_irq: physical IRQ number
> + * @guest_irq: virtual IRQ number
> + * returns 0 on success, < 0 on failure
> + */
> +int kvm_arch_set_forward(struct kvm *kvm,
> + unsigned int host_irq, unsigned int guest_irq);
> +
> +/**
> + * kvm_arch_unset_forward - Unsets forwarding for a given IRQ
> + *
> + * @kvm: handle to the VM
> + * @host_irq: physical IRQ number
> + * @guest_irq: virtual IRQ number
> + * @active: returns whether the IRQ is active
> + */
> +void kvm_arch_unset_forward(struct kvm *kvm,
> + unsigned int host_irq,
> + unsigned int guest_irq,
> + bool *active);
> +
> +#else
> +static inline int kvm_arch_set_forward(struct kvm *kvm,
> + unsigned int host_irq,
> + unsigned int guest_irq)
> +{
> + return -ENOENT;
> +}
> +static inline void kvm_arch_unset_forward(struct kvm *kvm,
> + unsigned int host_irq,
> + unsigned int guest_irq,
> + bool *active) {}
> +#endif
> +
> #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
>
> static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val)
> diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
> index c995e51..4847597 100644
> --- a/virt/kvm/vfio.c
> +++ b/virt/kvm/vfio.c
> @@ -19,14 +19,30 @@
> #include <linux/uaccess.h>
> #include <linux/vfio.h>
> #include "vfio.h"
> +#include <linux/platform_device.h>
> +#include <linux/irq.h>
> +#include <linux/spinlock.h>
> +#include <linux/interrupt.h>
> +#include <linux/delay.h>
> +
> +#define DEBUG_FORWARD
> +#define DEBUG_UNFORWARD


These shouldn't be here

>
> struct kvm_vfio_group {
> struct list_head node;
> struct vfio_group *vfio_group;
> };
>
> +/* private linkable kvm_fwd_irq struct */
> +struct kvm_vfio_fwd_irq_node {
> + struct list_head link;
> + struct kvm_fwd_irq fwd_irq;
> +};
> +
> struct kvm_vfio {
> struct list_head group_list;
> + /* list of registered VFIO forwarded IRQs */
> + struct list_head fwd_node_list;
> struct mutex lock;
> bool noncoherent;
> };
> @@ -320,12 +336,293 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
> return -ENXIO;
> }
>
> +/**
> + * kvm_vfio_find_fwd_irq - checks whether a forwarded IRQ already is
> + * registered in the list of forwarded IRQs
> + *
> + * @kv: handle to the kvm-vfio device
> + * @fwd: handle to the forwarded irq struct
> + * In the positive returns the handle to its node in the kvm-vfio
> + * forwarded IRQ list, returns NULL otherwise.
> + * Must be called with kv->lock hold.
> + */
> +static struct kvm_vfio_fwd_irq_node *kvm_vfio_find_fwd_irq(
> + struct kvm_vfio *kv,
> + struct kvm_fwd_irq *fwd)
> +{
> + struct kvm_vfio_fwd_irq_node *node;
> +
> + list_for_each_entry(node, &kv->fwd_node_list, link) {
> + if ((node->fwd_irq.index == fwd->index) &&
> + (node->fwd_irq.subindex == fwd->subindex) &&
> + (node->fwd_irq.vdev == fwd->vdev))
> + return node;
> + }
> + return NULL;
> +}
> +/**
> + * kvm_vfio_register_fwd_irq - Allocates, populates and registers a
> + * forwarded IRQ
> + *
> + * @kv: handle to the kvm-vfio device
> + * @fwd: handle to the forwarded irq struct
> + * In case of success returns a handle to the new list node,
> + * NULL otherwise.
> + * Must be called with kv->lock hold.
> + */
> +static struct kvm_vfio_fwd_irq_node *kvm_vfio_register_fwd_irq(
> + struct kvm_vfio *kv,
> + struct kvm_fwd_irq *fwd)
> +{
> + struct kvm_vfio_fwd_irq_node *node;
> +
> + node = kmalloc(sizeof(*node), GFP_KERNEL);
> + if (!node)
> + return ERR_PTR(-ENOMEM);
> +
> + node->fwd_irq = *fwd;
> +
> + list_add(&node->link, &kv->fwd_node_list);
> +
> + return node;
> +}
> +
> +/**
> + * kvm_vfio_unregister_fwd_irq - unregisters and frees a forwarded IRQ
> + *
> + * @node: handle to the node struct
> + * Must be called with kv->lock hold.
> + */
> +static void kvm_vfio_unregister_fwd_irq(struct kvm_vfio_fwd_irq_node *node)
> +{
> + list_del(&node->link);
> + kvm_vfio_put_vfio_device(node->fwd_irq.vdev);
> + kfree(node);
> +}
> +
> +static int kvm_vfio_platform_get_irq(struct vfio_device *vdev, int index)
> +{
> + int hwirq;
> + struct platform_device *platdev;
> + struct device *dev = kvm_vfio_external_base_device(vdev);
> +
> + if (dev->bus == &platform_bus_type) {
> + platdev = to_platform_device(dev);
> + hwirq = platform_get_irq(platdev, index);
> + return hwirq;
> + } else
> + return -EINVAL;
> +}

This seems like it was intended to be bus_type agnostic, but it's got
platform in the name.

> +
> +/**
> + * kvm_vfio_set_forward - turns a VFIO device IRQ into a forwarded IRQ
> + * @kv: handle to the kvm-vfio device
> + * @fd: file descriptor of the vfio device the IRQ belongs to
> + * @fwd: handle to the forwarded irq struct
> + *
> + * Registers and turns an IRQ as forwarded. The operation only is allowed
> + * if the IRQ is not in progress (active at interrupt controller level or
> + * auto-masked by the handler). In case the user-space masked the IRQ,
> + * the operation will fail too.
> + * returns:
> + * -EAGAIN if the IRQ is in progress or VFIO masked
> + * -EEXIST if the IRQ is already registered as forwarded
> + * -ENOMEM on memory shortage
> + */
> +static int kvm_vfio_set_forward(struct kvm_vfio *kv, int fd,
> + struct kvm_fwd_irq *fwd)
> +{
> + int ret = -EAGAIN;
> + struct kvm_vfio_fwd_irq_node *node;
> + struct vfio_platform_device *vpdev = vfio_device_data(fwd->vdev);
> + int index = fwd->index;
> + int host_irq = kvm_vfio_platform_get_irq(fwd->vdev, fwd->index);
> + bool active;
> +
> + kvm_arch_halt_guest(fwd->kvm);
> +
> + disable_irq(host_irq);
> +
> + active = kvm_vfio_external_is_active(vpdev, index);
> +
> + if (active)
> + goto out;
> +
> + node = kvm_vfio_register_fwd_irq(kv, fwd);
> + if (IS_ERR(node)) {
> + ret = PTR_ERR(node);
> + goto out;
> + }
> +
> + kvm_vfio_external_set_automasked(vpdev, index, false);
> +
> + ret = kvm_arch_set_forward(fwd->kvm, host_irq, fwd->gsi);
> +
> +out:
> + enable_irq(host_irq);
> +
> + kvm_arch_resume_guest(fwd->kvm);
> +
> + return ret;


If only we were passing around a vfio_device instead of a
vfio_platform_device and we had abstraction in place for the
kvm_vfio_external_foo callbacks...

> +}
> +
> +/**
> + * kvm_vfio_unset_forward - Sets a VFIO device IRQ as non forwarded
> + * @kv: handle to the kvm-vfio device
> + * @node: handle to the node to unset
> + *
> + * Calls the architecture specific implementation of set_forward and
> + * unregisters the IRQ from the forwarded IRQ list.
> + */
> +static void kvm_vfio_unset_forward(struct kvm_vfio *kv,
> + struct kvm_vfio_fwd_irq_node *node)
> +{
> + struct kvm_fwd_irq fwd = node->fwd_irq;
> + int host_irq = kvm_vfio_platform_get_irq(fwd.vdev, fwd.index);
> + int index = fwd.index;
> + struct vfio_platform_device *vpdev = vfio_device_data(fwd.vdev);
> + bool active = false;
> +
> + kvm_arch_halt_guest(fwd.kvm);
> +
> + disable_irq(host_irq);
> +
> + kvm_arch_unset_forward(fwd.kvm, host_irq, fwd.gsi, &active);
> +
> + if (active)
> + kvm_vfio_external_mask(vpdev, index);
> +
> + kvm_vfio_external_set_automasked(vpdev, index, true);
> + enable_irq(host_irq);
> +
> + kvm_arch_resume_guest(fwd.kvm);
> +
> + kvm_vfio_unregister_fwd_irq(node);

Same

> +}
> +
> +static int kvm_vfio_control_irq_forward(struct kvm_device *kdev, long attr,
> + int32_t __user *argp)
> +{
> + struct kvm_vfio_dev_irq user_fwd_irq;
> + struct kvm_fwd_irq fwd;
> + struct vfio_device *vdev;
> + struct kvm_vfio *kv = kdev->private;
> + struct kvm_vfio_fwd_irq_node *node;
> + unsigned long minsz;
> + int ret = 0;
> + u32 *gsi = NULL;
> + size_t size;
> +
> + minsz = offsetofend(struct kvm_vfio_dev_irq, count);
> +
> + if (copy_from_user(&user_fwd_irq, argp, minsz))
> + return -EFAULT;
> +
> + if (user_fwd_irq.argsz < minsz)
> + return -EINVAL;
> +
> + vdev = kvm_vfio_get_vfio_device(user_fwd_irq.fd);
> + if (IS_ERR(vdev))
> + return PTR_ERR(vdev);
> +
> + ret = kvm_vfio_platform_get_irq(vdev, user_fwd_irq.index);
> + if (ret < 0)
> + goto error;
> +
> + size = sizeof(__u32);
> + if (user_fwd_irq.argsz - minsz < size) {
> + ret = -EINVAL;
> + goto error;
> + }
> +
> + gsi = memdup_user((void __user *)((unsigned long)argp + minsz), size);
> + if (IS_ERR(gsi)) {
> + ret = PTR_ERR(gsi);
> + goto error;
> + }
> +
> + fwd.vdev = vdev;
> + fwd.kvm = kdev->kvm;
> + fwd.index = user_fwd_irq.index;
> + fwd.subindex = 0;
> + fwd.gsi = *gsi;
> +
> + node = kvm_vfio_find_fwd_irq(kv, &fwd);
> +
> + switch (attr) {
> + case KVM_DEV_VFIO_DEVICE_FORWARD_IRQ:
> + if (node) {
> + ret = -EEXIST;
> + goto error;
> + }
> + mutex_lock(&kv->lock);
> + ret = kvm_vfio_set_forward(kv, user_fwd_irq.fd, &fwd);
> + mutex_unlock(&kv->lock);
> + break;
> + case KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ:
> + if (!node) {
> + ret = -ENOENT;
> + goto error;
> + }
> + mutex_lock(&kv->lock);
> + kvm_vfio_unset_forward(kv, node);
> + mutex_unlock(&kv->lock);
> + kvm_vfio_put_vfio_device(vdev);
> + ret = 0;
> + break;
> + }
> +
> + kfree(gsi);
> +error:
> + if (ret < 0)
> + kvm_vfio_put_vfio_device(vdev);
> + return ret;
> +}
> +
> +static int kvm_vfio_set_device(struct kvm_device *kdev, long attr, u64 arg)
> +{
> + int32_t __user *argp = (int32_t __user *)(unsigned long)arg;
> + int ret;
> +
> + switch (attr) {
> +#ifdef __KVM_HAVE_ARCH_KVM_VFIO_FORWARD
> + case KVM_DEV_VFIO_DEVICE_FORWARD_IRQ:
> + case KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ:
> + ret = kvm_vfio_control_irq_forward(kdev, attr, argp);
> + break;
> +#endif
> + default:
> + ret = -ENXIO;
> + }
> + return ret;
> +}
> +
> +/**
> + * kvm_vfio_clean_fwd_irq - Unset forwarding state of all
> + * registered forwarded IRQs and free their list nodes.
> + * @kv: kvm-vfio device
> + *
> + * Loop on all registered device/IRQ combos, reset the non forwarded state,
> + * void the lists and release the reference
> + */
> +static int kvm_vfio_clean_fwd_irq(struct kvm_vfio *kv)
> +{
> + struct kvm_vfio_fwd_irq_node *node, *tmp;
> +
> + list_for_each_entry_safe(node, tmp, &kv->fwd_node_list, link) {
> + kvm_vfio_unset_forward(kv, node);
> + }
> + return 0;
> +}
> +
> static int kvm_vfio_set_attr(struct kvm_device *dev,
> struct kvm_device_attr *attr)
> {
> switch (attr->group) {
> case KVM_DEV_VFIO_GROUP:
> return kvm_vfio_set_group(dev, attr->attr, attr->addr);
> + case KVM_DEV_VFIO_DEVICE:
> + return kvm_vfio_set_device(dev, attr->attr, attr->addr);
> }
>
> return -ENXIO;
> @@ -341,10 +638,17 @@ static int kvm_vfio_has_attr(struct kvm_device *dev,
> case KVM_DEV_VFIO_GROUP_DEL:
> return 0;
> }
> -
> break;
> +#ifdef __KVM_HAVE_ARCH_KVM_VFIO_FORWARD
> + case KVM_DEV_VFIO_DEVICE:
> + switch (attr->attr) {
> + case KVM_DEV_VFIO_DEVICE_FORWARD_IRQ:
> + case KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ:
> + return 0;
> + }
> + break;
> +#endif
> }
> -
> return -ENXIO;
> }
>
> @@ -358,7 +662,7 @@ static void kvm_vfio_destroy(struct kvm_device *dev)
> list_del(&kvg->node);
> kfree(kvg);
> }
> -
> + kvm_vfio_clean_fwd_irq(kv);
> kvm_vfio_update_coherency(dev);
>
> kfree(kv);
> @@ -390,6 +694,7 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type)
> return -ENOMEM;
>
> INIT_LIST_HEAD(&kv->group_list);
> + INIT_LIST_HEAD(&kv->fwd_node_list);
> mutex_init(&kv->lock);
>
> dev->private = kv;

It's so close to not tainting the kvm-vfio device with anything platform
specific, can't we provide that last bit of abstraction somehow?
Thanks,

Alex

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/