Re: [PATCH 4/4] vfio: vfio_iommu_type1: implement VFIO_IOMMU_INFO_CAPABILITIES

From: Alex Williamson
Date: Thu May 16 2019 - 14:42:39 EST


On Fri, 10 May 2019 10:22:35 +0200
Pierre Morel <pmorel@xxxxxxxxxxxxx> wrote:

> We implement a capability intercafe for VFIO_IOMMU_GET_INFO and add the
> first capability: VFIO_IOMMU_INFO_CAPABILITIES.
>
> When calling the ioctl, the user must specify
> VFIO_IOMMU_INFO_CAPABILITIES to retrieve the capabilities and must check
> in the answer if capabilities are supported.
> Older kernel will not check nor set the VFIO_IOMMU_INFO_CAPABILITIES in
> the flags of vfio_iommu_type1_info.
>
> The iommu get_attr callback will be called to retrieve the specific
> attributes and fill the capabilities, VFIO_IOMMU_INFO_CAP_QFN for the
> PCI query function attributes and VFIO_IOMMU_INFO_CAP_QGRP for the
> PCI query function group.
>
> Signed-off-by: Pierre Morel <pmorel@xxxxxxxxxxxxx>
> ---
> drivers/vfio/vfio_iommu_type1.c | 95 ++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 94 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index d0f731c..f7f8120 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -1658,6 +1658,70 @@ static int vfio_domains_have_iommu_cache(struct vfio_iommu *iommu)
> return ret;
> }
>
> +int vfio_iommu_type1_caps(struct vfio_iommu *iommu, struct vfio_info_cap *caps,
> + size_t size)
> +{
> + struct vfio_domain *d;
> + struct vfio_iommu_type1_info_block *info_fn;
> + struct vfio_iommu_type1_info_block *info_grp;
> + unsigned long total_size, fn_size, grp_size;
> + int ret;
> +
> + d = list_first_entry(&iommu->domain_list, struct vfio_domain, next);
> + if (!d)
> + return -ENODEV;
> + /* The size of these capabilities are device dependent */
> + fn_size = iommu_domain_get_attr(d->domain,
> + DOMAIN_ATTR_ZPCI_FN_SIZE, NULL);
> + if (fn_size < 0)
> + return fn_size;

What if non-Z archs want to use this? The function is architected
specifically for this one use case, fail if any component is not there
which means it requires a re-write to add further support. If
ZPCI_FN_SIZE isn't support, move on to the next thing.

> + fn_size += sizeof(struct vfio_info_cap_header);
> + total_size = fn_size;

Here too, total_size should be initialized to zero and each section +=
the size they'd like to add.

> +
> + grp_size = iommu_domain_get_attr(d->domain,
> + DOMAIN_ATTR_ZPCI_GRP_SIZE, NULL);
> + if (grp_size < 0)
> + return grp_size;
> + grp_size += sizeof(struct vfio_info_cap_header);
> + total_size += grp_size;
> +
> + /* Tell caller to call us with a greater buffer */
> + if (total_size > size) {
> + caps->size = total_size;
> + return 0;
> + }
> +
> + info_fn = kzalloc(fn_size, GFP_KERNEL);
> + if (!info_fn)
> + return -ENOMEM;

Maybe fn_size was zero because we're not on Z.

> + ret = iommu_domain_get_attr(d->domain,
> + DOMAIN_ATTR_ZPCI_FN, &info_fn->data);

Kernel internal structures != user api. Thanks,

Alex

> + if (ret < 0)
> + return ret;
> +
> + info_fn->header.id = VFIO_IOMMU_INFO_CAP_QFN;
> +
> + ret = vfio_info_add_capability(caps, &info_fn->header, fn_size);
> + if (ret)
> + goto err_fn;
> +
> + info_grp = kzalloc(grp_size, GFP_KERNEL);
> + if (!info_grp)
> + goto err_fn;
> + ret = iommu_domain_get_attr(d->domain,
> + DOMAIN_ATTR_ZPCI_GRP, &info_grp->data);
> + if (ret < 0)
> + goto err_grp;
> + info_grp->header.id = VFIO_IOMMU_INFO_CAP_QGRP;
> + ret = vfio_info_add_capability(caps, &info_grp->header, grp_size);
> +
> +err_grp:
> + kfree(info_grp);
> +err_fn:
> + kfree(info_fn);
> + return ret;
> +}
> +
> static long vfio_iommu_type1_ioctl(void *iommu_data,
> unsigned int cmd, unsigned long arg)
> {
> @@ -1679,6 +1743,8 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
> }
> } else if (cmd == VFIO_IOMMU_GET_INFO) {
> struct vfio_iommu_type1_info info;
> + struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
> + int ret;
>
> minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes);
>
> @@ -1688,7 +1754,34 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
> if (info.argsz < minsz)
> return -EINVAL;
>
> - info.flags = VFIO_IOMMU_INFO_PGSIZES;
> + if (info.flags & VFIO_IOMMU_INFO_CAPABILITIES) {
> + minsz = offsetofend(struct vfio_iommu_type1_info,
> + cap_offset);
> + if (info.argsz < minsz)
> + return -EINVAL;
> + ret = vfio_iommu_type1_caps(iommu, &caps,
> + info.argsz - sizeof(info));
> + if (ret)
> + return ret;
> + }
> + if (caps.size) {
> + if (info.argsz < sizeof(info) + caps.size) {
> + info.argsz = sizeof(info) + caps.size;
> + info.cap_offset = 0;
> + } else {
> + if (copy_to_user((void __user *)arg +
> + sizeof(info), caps.buf,
> + caps.size)) {
> + kfree(caps.buf);
> + return -EFAULT;
> + }
> +
> + info.cap_offset = sizeof(info);
> + }
> + kfree(caps.buf);
> + }
> +
> + info.flags |= VFIO_IOMMU_INFO_PGSIZES;
>
> info.iova_pgsizes = vfio_pgsize_bitmap(iommu);
>