RE: [RFC v2 4/5] vfio/type1: Add IOVA range capability support
From: Shameerali Kolothum Thodi
Date: Tue Jan 23 2018 - 07:51:56 EST
> -----Original Message-----
> From: Auger Eric [mailto:eric.auger@xxxxxxxxxx]
> Sent: Tuesday, January 23, 2018 11:17 AM
> To: Shameerali Kolothum Thodi <shameerali.kolothum.thodi@xxxxxxxxxx>;
> alex.williamson@xxxxxxxxxx; pmorel@xxxxxxxxxxxxxxxxxx
> Cc: kvm@xxxxxxxxxxxxxxx; linux-kernel@xxxxxxxxxxxxxxx; Linuxarm
> <linuxarm@xxxxxxxxxx>; John Garry <john.garry@xxxxxxxxxx>; xuwei (O)
> <xuwei5@xxxxxxxxxx>
> Subject: Re: [RFC v2 4/5] vfio/type1: Add IOVA range capability support
>
> Hi Shameer,
>
> On 12/01/18 17:45, Shameer Kolothum wrote:
> > This allows the user-space to retrieve the supported IOVA range(s),
> > excluding any reserved regions. The implementation is based on
> > capability chains, added to VFIO_IOMMU_GET_INFO ioctl.
> >
> > Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@xxxxxxxxxx>
> > ---
> > drivers/vfio/vfio_iommu_type1.c | 91
> +++++++++++++++++++++++++++++++++++++++++
> > include/uapi/linux/vfio.h | 23 +++++++++++
> > 2 files changed, 114 insertions(+)
> >
> > diff --git a/drivers/vfio/vfio_iommu_type1.c
> > b/drivers/vfio/vfio_iommu_type1.c index 47ea490..dc6ed85 100644
> > --- a/drivers/vfio/vfio_iommu_type1.c
> > +++ b/drivers/vfio/vfio_iommu_type1.c
> > @@ -1893,6 +1893,67 @@ static int
> vfio_domains_have_iommu_cache(struct vfio_iommu *iommu)
> > return ret;
> > }
> >
> > +static int vfio_add_iova_cap(struct vfio_info_cap *caps, void
> > +*cap_type,
> can't you pass cap_type directly as a struct vfio_iommu_type1_info_cap_iova?
>
> Also maybe using more explicit name for cap_type such as cap_iova_ranges?
> > + size_t size)
> > +{
> > + struct vfio_info_cap_header *header;
> > + struct vfio_iommu_type1_info_cap_iova *iova_cap, *iova = cap_type;
> > +
> > + header = vfio_info_cap_add(caps, size,
> > + VFIO_IOMMU_TYPE1_INFO_CAP_IOVA, 1);
> > + if (IS_ERR(header))
> > + return PTR_ERR(header);
> > +
> > + iova_cap = container_of(header,
> > + struct vfio_iommu_type1_info_cap_iova, header);
> > + iova_cap->nr_iovas = iova->nr_iovas;
> > + memcpy(iova_cap->iova_ranges, iova->iova_ranges,
> > + iova->nr_iovas * sizeof(*iova->iova_ranges));
> > + return 0;
> > +}
> > +
> > +static int vfio_build_iommu_iova_caps(struct vfio_iommu *iommu,
> > + struct vfio_info_cap *caps)
> > +{
> > + struct vfio_iommu_type1_info_cap_iova *iova_cap;
> > + struct vfio_iova *iova;
> > + size_t size;
> > + int iovas = 0, i = 0, ret;
> > +
> > + mutex_lock(&iommu->lock);
> > +
> > + list_for_each_entry(iova, &iommu->iova_list, list)
> > + iovas++;
> > +
> > + if (!iovas) {
> > + ret = EINVAL;
> > + goto out_unlock;
> > + }
> > +
> > + size = sizeof(*iova_cap) + (iovas * sizeof(*iova_cap->iova_ranges));
> > +
> > + iova_cap = kzalloc(size, GFP_KERNEL);
> > + if (!iova_cap) {
> > + ret = -ENOMEM;
> > + goto out_unlock;
> > + }
> > +
> > + iova_cap->nr_iovas = iovas;
> > +
> > + list_for_each_entry(iova, &iommu->iova_list, list) {
> > + iova_cap->iova_ranges[i].start = iova->start;
> > + iova_cap->iova_ranges[i].end = iova->end;
> > + i++;
> > + }
> > +
> > + ret = vfio_add_iova_cap(caps, iova_cap, size);
> > +
> > + kfree(iova_cap);
> > +out_unlock:
> > + mutex_unlock(&iommu->lock);
> > + return ret;
> > +}
> > +
> > static long vfio_iommu_type1_ioctl(void *iommu_data,
> > unsigned int cmd, unsigned long arg) { @@ -
> 1914,6 +1975,8 @@
> > static long vfio_iommu_type1_ioctl(void *iommu_data,
> > }
> > } else if (cmd == VFIO_IOMMU_GET_INFO) {
> > struct vfio_iommu_type1_info info;
> > + struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
> > + int ret;
> >
> > minsz = offsetofend(struct vfio_iommu_type1_info,
> iova_pgsizes);
> >
> > @@ -1927,6 +1990,34 @@ static long vfio_iommu_type1_ioctl(void
> > *iommu_data,
> >
> > info.iova_pgsizes = vfio_pgsize_bitmap(iommu);
> >
> > + if (info.argsz == minsz)
> > + goto done;
> > +
> > + ret = vfio_build_iommu_iova_caps(iommu, &caps);
> > + if (ret)
> > + return ret;
> > +
> > + if (caps.size) {
> > + info.flags |= VFIO_IOMMU_INFO_CAPS;
> > + minsz = offsetofend(struct vfio_iommu_type1_info,
> > + cap_offset);
> > + if (info.argsz < sizeof(info) + caps.size) {
> > + info.argsz = sizeof(info) + caps.size;
> > + info.cap_offset = 0;
> > + } else {
> > + vfio_info_cap_shift(&caps, sizeof(info));
> > + if (copy_to_user((void __user *)arg +
> > + sizeof(info), caps.buf,
> > + caps.size)) {
> > + kfree(caps.buf);
> > + return -EFAULT;
> > + }
> > + info.cap_offset = sizeof(info);
> > + }
> > +
> > + kfree(caps.buf);
> > + }
> > +done:
> > return copy_to_user((void __user *)arg, &info, minsz) ?
> > -EFAULT : 0;
> >
> > diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> > index e3301db..8671448 100644
> > --- a/include/uapi/linux/vfio.h
> > +++ b/include/uapi/linux/vfio.h
> > @@ -517,7 +517,30 @@ struct vfio_iommu_type1_info {
> > __u32 argsz;
> > __u32 flags;
> > #define VFIO_IOMMU_INFO_PGSIZES (1 << 0) /* supported page sizes info */
> > +#define VFIO_IOMMU_INFO_CAPS (1 << 1) /* Info supports caps */
> > __u64 iova_pgsizes; /* Bitmap of supported page sizes */
> > + __u32 cap_offset; /* Offset within info struct of first cap */
> > +};
> > +
> > +/*
> > + * The IOVA capability allows to report the valid IOVA range(s)
> > + * excluding any reserved regions associated with dev group. Any dma
> > + * map attempt outside the valid iova range will return error.
> > + *
> > + * The structures below define version 1 of this capability.
> > + */
> > +#define VFIO_IOMMU_TYPE1_INFO_CAP_IOVA 1
> VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE ?
> > +
> > +struct vfio_iova_range {
> > + __u64 start;
> > + __u64 end;
> > +};
> > +
> > +struct vfio_iommu_type1_info_cap_iova {
> cap_iova_ranges?
> > + struct vfio_info_cap_header header;
> > + __u32 nr_iovas;
> > + __u32 reserved;
> > + struct vfio_iova_range iova_ranges[];
> > };
> >
> > #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
> >
> Otherwise looks good to me.
Ok. I will take care of them.
Thanks,
Shameer
> Thanks
>
> Eric