Re: [RFC PATCH] PPC: KVM: vfio kvm device: support spapr tce
From: Alexey Kardashevskiy
Date: Fri Oct 04 2013 - 21:52:22 EST
On 05.10.2013 2:05, Alex Williamson wrote:
> On Fri, 2013-10-04 at 22:24 +1000, Alexey Kardashevskiy wrote:
>> This is a very rough change set required for ppc64 to use this KVM device.
>>
>> vfio_rm.c is a piece of code which is going to be called from the realmode (MMU off),
>> and I will put everything spapr-related under #ifdef CONFIG_SPAPR_TCE_IOMMU,
>> it is just friday and I have to run :)
>>
>> This is an RFC but it works.
>>
>> Signed-off-by: Alexey Kardashevskiy <aik@xxxxxxxxx>
>> ---
>> arch/powerpc/kvm/Kconfig | 1 +
>> arch/powerpc/kvm/Makefile | 4 ++++
>> include/linux/kvm_host.h | 8 ++++---
>> include/linux/vfio.h | 3 +++
>> include/uapi/linux/kvm.h | 1 +
>> virt/kvm/vfio.c | 46 ++++++++++++++++++++++++++++++++++++++++
>> virt/kvm/vfio_rm.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++
>> 7 files changed, 114 insertions(+), 3 deletions(-)
>> create mode 100644 virt/kvm/vfio_rm.c
>>
>> diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
>> index 61b3535..d1b7f64 100644
>> --- a/arch/powerpc/kvm/Kconfig
>> +++ b/arch/powerpc/kvm/Kconfig
>> @@ -60,6 +60,7 @@ config KVM_BOOK3S_64
>> select KVM_BOOK3S_64_HANDLER
>> select KVM
>> select SPAPR_TCE_IOMMU
>> + select KVM_VFIO
>> ---help---
>> Support running unmodified book3s_64 and book3s_32 guest kernels
>> in virtual machines on book3s_64 host processors.
>> diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
>> index 6646c95..fc2878b 100644
>> --- a/arch/powerpc/kvm/Makefile
>> +++ b/arch/powerpc/kvm/Makefile
>> @@ -55,6 +55,8 @@ kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs)
>>
>> kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \
>> $(KVM)/coalesced_mmio.o \
>> + $(KVM)/vfio.o \
>> + $(KVM)/vfio_rm.o \
>> fpu.o \
>> book3s_paired_singles.o \
>> book3s_pr.o \
>> @@ -76,6 +78,7 @@ kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
>> kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \
>> book3s_hv_rm_xics.o
>> kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
>> + $(KVM)/vfio_rm.o \
>> book3s_hv_rmhandlers.o \
>> book3s_hv_rm_mmu.o \
>> book3s_64_vio_hv.o \
>> @@ -89,6 +92,7 @@ kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
>>
>> kvm-book3s_64-module-objs := \
>> $(KVM)/kvm_main.o \
>> + $(KVM)/vfio.o \
>> $(KVM)/eventfd.o \
>> powerpc.o \
>> emulate.o \
>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>> index ad2b581..43c0290 100644
>> --- a/include/linux/kvm_host.h
>> +++ b/include/linux/kvm_host.h
>> @@ -407,6 +407,8 @@ struct kvm {
>> #endif
>> long tlbs_dirty;
>> struct list_head devices;
>> +
>> + struct kvm_vfio *vfio;
>
>
> <cringe> can't this be on kvm->arch?
It can, I just thought since it is valid for more than just one
platform, it can go here.
>> };
>>
>> #define kvm_err(fmt, ...) \
>> @@ -677,15 +679,15 @@ void kvm_arch_register_noncoherent_dma(struct kvm *kvm);
>> void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm);
>> bool kvm_arch_has_noncoherent_dma(struct kvm *kvm);
>> #else
>> -static inline void kvm_arch_register_noncoherent_dma(void)
>> +static inline void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
>> {
>> }
>>
>> -static inline void kvm_arch_unregister_noncoherent_dma(void)
>> +static inline void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm)
>> {
>> }
>>
>> -static inline bool kvm_arch_has_noncoherent_dma(void)
>> +static inline bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
>> {
>> return false;
>> }
>
> Will fix in my series.
>
>> diff --git a/include/linux/vfio.h b/include/linux/vfio.h
>> index 24579a0..681e19b 100644
>> --- a/include/linux/vfio.h
>> +++ b/include/linux/vfio.h
>> @@ -97,4 +97,7 @@ extern struct vfio_group *vfio_group_get_external_user(struct file *filep);
>> extern void vfio_group_put_external_user(struct vfio_group *group);
>> extern int vfio_external_user_iommu_id(struct vfio_group *group);
>>
>> +extern struct iommu_group *vfio_find_group_by_liobn(struct kvm *kvm,
>> + unsigned long liobn);
>> +
>
> Wrong header file.
>
>> #endif /* VFIO_H */
>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
>> index 7c1a349..a74ad16 100644
>> --- a/include/uapi/linux/kvm.h
>> +++ b/include/uapi/linux/kvm.h
>> @@ -847,6 +847,7 @@ struct kvm_device_attr {
>> #define KVM_DEV_VFIO_GROUP 1
>> #define KVM_DEV_VFIO_GROUP_ADD 1
>> #define KVM_DEV_VFIO_GROUP_DEL 2
>> +#define KVM_DEV_VFIO_SPAPR_TCE_LIOBN 2
>>
>> /*
>> * ioctls for VM fds
>> diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
>> index 2e336a7..39dea9f 100644
>> --- a/virt/kvm/vfio.c
>> +++ b/virt/kvm/vfio.c
>> @@ -22,6 +22,7 @@
>> struct kvm_vfio_group {
>> struct list_head node;
>> struct vfio_group *vfio_group;
>> + uint64_t liobn; /* sPAPR */
>
> Perhaps an arch pointer or at least a union.
>
>> };
>>
>> struct kvm_vfio {
>> @@ -188,12 +189,52 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
>> return -ENXIO;
>> }
>>
>> +static int kvm_vfio_set_spapr_tce_liobn(struct kvm_device *dev,
>> + long attr, u64 arg)
>> +{
>> + struct kvm_vfio *kv = dev->private;
>> + struct vfio_group *vfio_group;
>> + struct kvm_vfio_group *kvg;
>> + void __user *argp = (void __user *)arg;
>> + struct fd f;
>> + int32_t fd;
>> + uint64_t liobn = attr;
>> +
>> + if (get_user(fd, (int32_t __user *)argp))
>> + return -EFAULT;
>> +
>> + f = fdget(fd);
>> + if (!f.file)
>> + return -EBADF;
>> +
>> + vfio_group = kvm_vfio_group_get_external_user(f.file);
>> + fdput(f);
>> +
>> + list_for_each_entry(kvg, &kv->group_list, node) {
>> + if (kvg->vfio_group == vfio_group) {
>> + WARN_ON(kvg->liobn);
>
> Users shouldn't be able to trigger WARN_ON so easily, return -EBUSY,
> allow it to be unset and re-set, or just allow the overwrite.
>
>> + kvg->liobn = liobn;
>> + kvm_vfio_group_put_external_user(vfio_group);
>> + return 0;
>> + }
>> + }
>> +
>> + kvm_vfio_group_put_external_user(vfio_group);
>> +
>> + return -ENXIO;
>> +}
>> +
>> static int kvm_vfio_set_attr(struct kvm_device *dev,
>> struct kvm_device_attr *attr)
>> {
>> switch (attr->group) {
>> case KVM_DEV_VFIO_GROUP:
>> return kvm_vfio_set_group(dev, attr->attr, attr->addr);
>> +#ifdef CONFIG_SPAPR_TCE_IOMMU
>> + case KVM_DEV_VFIO_SPAPR_TCE_LIOBN:
>> + return kvm_vfio_set_spapr_tce_liobn(dev, attr->attr,
>> + attr->addr);
>> +#endif
>> }
>>
>> return -ENXIO;
>> @@ -211,6 +252,10 @@ static int kvm_vfio_has_attr(struct kvm_device *dev,
>> }
>>
>> break;
>> +#ifdef CONFIG_SPAPR_TCE_IOMMU
>> + case KVM_DEV_VFIO_SPAPR_TCE_LIOBN:
>> + return 0;
>> +#endif
>> }
>>
>> return -ENXIO;
>> @@ -250,6 +295,7 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type)
>> mutex_init(&kv->lock);
>>
>> dev->private = kv;
>> + dev->kvm->vfio = kv;
>>
>> return 0;
>> }
>> diff --git a/virt/kvm/vfio_rm.c b/virt/kvm/vfio_rm.c
>> new file mode 100644
>> index 0000000..ee9fd96
>> --- /dev/null
>> +++ b/virt/kvm/vfio_rm.c
>> @@ -0,0 +1,54 @@
>> +#include <linux/errno.h>
>> +#include <linux/file.h>
>> +#include <linux/kvm_host.h>
>> +#include <linux/list.h>
>> +#include <linux/module.h>
>> +#include <linux/mutex.h>
>> +#include <linux/slab.h>
>> +#include <linux/uaccess.h>
>> +#include <linux/vfio.h>
>> +
>> +struct kvm_vfio_group {
>> + struct list_head node;
>> + struct vfio_group *vfio_group;
>> + uint64_t liobn; /* sPAPR */
>> +};
>> +
>> +struct kvm_vfio {
>> + struct list_head group_list;
>> + struct mutex lock;
>> + bool noncoherent;
>> +};
>> +
>> +struct vfio_group {
>> + struct kref kref;
>> + int minor;
>> + atomic_t container_users;
>> + struct iommu_group *iommu_group;
>> + struct vfio_container *container;
>> + struct list_head device_list;
>> + struct mutex device_lock;
>> + struct device *dev;
>> + struct notifier_block nb;
>> + struct list_head vfio_next;
>> + struct list_head container_next;
>> + atomic_t opened;
>> +};
>> +
>> +struct iommu_group *vfio_find_group_by_liobn(struct kvm *kvm, unsigned long liobn)
>> +{
>> + struct kvm_vfio_group *kvg;
>> +
>> + if (!kvm->vfio)
>> + return NULL;
>> +
>> + list_for_each_entry(kvg, &kvm->vfio->group_list, node) {
>> + if (kvg->liobn == liobn)
>> + return kvg->vfio_group->iommu_group;
>> + }
>> +
>> + return NULL;
>> +}
>> +EXPORT_SYMBOL_GPL(vfio_find_group_by_liobn);
>> +
>> +
>
> You're kidding, right? These are intentionally private data structures
> that are blatantly copied so that you can extract what you want. NACK.
> The iommu_group is available off struct device, do you even need vfio or
> this kvm-vfio device to get from liobn to iommu_group? Thanks,
This is an RFC. I am not saying this is what can go to upstream or
anything. I am not kidding (why everyone assumes that?), I am showing
what API I would like to have in the VFIO KVM device. I need the way to
get iommu_table (which is in a private data of iommu_group) by LIOBN and
the VFIO KVM device is the _only_ entity which will know about this
connection (LIOBN is made up by the qemu and told to the guest) and it
cannot go to the kvm.ko - and the patch like this is the best way to
show it as my english obviously sucks.
--
With best regards
Alexey Kardashevskiy -- icq: 52150396
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/