RE: [PATCH v11 12/13] vfio/pci: Register a DMA fault response region

From: Shameerali Kolothum Thodi
Date: Thu Feb 18 2021 - 07:12:24 EST


Hi Eric,

> > -----Original Message-----
> > From: Eric Auger [mailto:eric.auger@xxxxxxxxxx]
> > Sent: 16 November 2020 11:00
> > To: eric.auger.pro@xxxxxxxxx; eric.auger@xxxxxxxxxx;
> > iommu@xxxxxxxxxxxxxxxxxxxxxxxxxx; linux-kernel@xxxxxxxxxxxxxxx;
> > kvm@xxxxxxxxxxxxxxx; kvmarm@xxxxxxxxxxxxxxxxxxxxx; will@xxxxxxxxxx;
> > joro@xxxxxxxxxx; maz@xxxxxxxxxx; robin.murphy@xxxxxxx;
> > alex.williamson@xxxxxxxxxx
> > Cc: jean-philippe@xxxxxxxxxx; zhangfei.gao@xxxxxxxxxx;
> > zhangfei.gao@xxxxxxxxx; vivek.gautam@xxxxxxx; Shameerali Kolothum
> > Thodi <shameerali.kolothum.thodi@xxxxxxxxxx>;
> > jacob.jun.pan@xxxxxxxxxxxxxxx; yi.l.liu@xxxxxxxxx; tn@xxxxxxxxxxxx;
> > nicoleotsuka@xxxxxxxxx; yuzenghui <yuzenghui@xxxxxxxxxx>
> > Subject: [PATCH v11 12/13] vfio/pci: Register a DMA fault response
> > region
> >
> > In preparation for vSVA, let's register a DMA fault response region,
> > where the userspace will push the page responses and increment the
> > head of the buffer. The kernel will pop those responses and inject
> > them on iommu side.
> >
> > Signed-off-by: Eric Auger <eric.auger@xxxxxxxxxx>
> > ---
> > drivers/vfio/pci/vfio_pci.c | 114 +++++++++++++++++++++++++---
> > drivers/vfio/pci/vfio_pci_private.h | 5 ++
> > drivers/vfio/pci/vfio_pci_rdwr.c | 39 ++++++++++
> > include/uapi/linux/vfio.h | 32 ++++++++
> > 4 files changed, 181 insertions(+), 9 deletions(-)
> >
> > diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
> > index 65a83fd0e8c0..e9a904ce3f0d 100644
> > --- a/drivers/vfio/pci/vfio_pci.c
> > +++ b/drivers/vfio/pci/vfio_pci.c
> > @@ -318,9 +318,20 @@ static void vfio_pci_dma_fault_release(struct
> > vfio_pci_device *vdev,
> > kfree(vdev->fault_pages);
> > }
> >
> > -static int vfio_pci_dma_fault_mmap(struct vfio_pci_device *vdev,
> > - struct vfio_pci_region *region,
> > - struct vm_area_struct *vma)
> > +static void
> > +vfio_pci_dma_fault_response_release(struct vfio_pci_device *vdev,
> > + struct vfio_pci_region *region) {
> > + if (vdev->dma_fault_response_wq)
> > + destroy_workqueue(vdev->dma_fault_response_wq);
> > + kfree(vdev->fault_response_pages);
> > + vdev->fault_response_pages = NULL;
> > +}
> > +
> > +static int __vfio_pci_dma_fault_mmap(struct vfio_pci_device *vdev,
> > + struct vfio_pci_region *region,
> > + struct vm_area_struct *vma,
> > + u8 *pages)
> > {
> > u64 phys_len, req_len, pgoff, req_start;
> > unsigned long long addr;
> > @@ -333,14 +344,14 @@ static int vfio_pci_dma_fault_mmap(struct
> > vfio_pci_device *vdev,
> > ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
> > req_start = pgoff << PAGE_SHIFT;
> >
> > - /* only the second page of the producer fault region is mmappable */
> > + /* only the second page of the fault region is mmappable */
> > if (req_start < PAGE_SIZE)
> > return -EINVAL;
> >
> > if (req_start + req_len > phys_len)
> > return -EINVAL;
> >
> > - addr = virt_to_phys(vdev->fault_pages);
> > + addr = virt_to_phys(pages);
> > vma->vm_private_data = vdev;
> > vma->vm_pgoff = (addr >> PAGE_SHIFT) + pgoff;
> >
> > @@ -349,13 +360,29 @@ static int vfio_pci_dma_fault_mmap(struct
> > vfio_pci_device *vdev,
> > return ret;
> > }
> >
> > -static int vfio_pci_dma_fault_add_capability(struct vfio_pci_device *vdev,
> > - struct vfio_pci_region *region,
> > - struct vfio_info_cap *caps)
> > +static int vfio_pci_dma_fault_mmap(struct vfio_pci_device *vdev,
> > + struct vfio_pci_region *region,
> > + struct vm_area_struct *vma)
> > +{
> > + return __vfio_pci_dma_fault_mmap(vdev, region, vma,
> > vdev->fault_pages);
> > +}
> > +
> > +static int
> > +vfio_pci_dma_fault_response_mmap(struct vfio_pci_device *vdev,
> > + struct vfio_pci_region *region,
> > + struct vm_area_struct *vma)
> > +{
> > + return __vfio_pci_dma_fault_mmap(vdev, region, vma,
> > vdev->fault_response_pages);
> > +}
> > +
> > +static int __vfio_pci_dma_fault_add_capability(struct vfio_pci_device *vdev,
> > + struct vfio_pci_region *region,
> > + struct vfio_info_cap *caps,
> > + u32 cap_id)
> > {
> > struct vfio_region_info_cap_sparse_mmap *sparse = NULL;
> > struct vfio_region_info_cap_fault cap = {
> > - .header.id = VFIO_REGION_INFO_CAP_DMA_FAULT,
> > + .header.id = cap_id,
> > .header.version = 1,
> > .version = 1,
> > };
> > @@ -383,6 +410,14 @@ static int
> > vfio_pci_dma_fault_add_capability(struct
> > vfio_pci_device *vdev,
> > return ret;
> > }
> >
> > +static int vfio_pci_dma_fault_add_capability(struct vfio_pci_device *vdev,
> > + struct vfio_pci_region *region,
> > + struct vfio_info_cap *caps) {
> > + return __vfio_pci_dma_fault_add_capability(vdev, region, caps,
> > + VFIO_REGION_INFO_CAP_DMA_FAULT); }
> > +
> > static const struct vfio_pci_regops vfio_pci_dma_fault_regops = {
> > .rw = vfio_pci_dma_fault_rw,
> > .release = vfio_pci_dma_fault_release,
> > @@ -390,6 +425,13 @@ static const struct vfio_pci_regops
> > vfio_pci_dma_fault_regops = {
> > .add_capability = vfio_pci_dma_fault_add_capability,
> > };
> >
> > +static const struct vfio_pci_regops vfio_pci_dma_fault_response_regops = {
> > + .rw = vfio_pci_dma_fault_response_rw,
> > + .release = vfio_pci_dma_fault_response_release,
> > + .mmap = vfio_pci_dma_fault_response_mmap,
> > + .add_capability = vfio_pci_dma_fault_add_capability,

As I mentioned in the Qemu patch ([RFC v7 26/26] vfio/pci: Implement
return_page_response page response callback), it looks like we are using the
VFIO_REGION_INFO_CAP_DMA_FAULT cap id for the dma_fault_response here
as well. Is that intentional?
(Was wondering how it worked in the first place and noted this).

Please check.

Thanks,
Shameer