Re: [PATCH v3] vhost/vdpa: Add MSI translation tables to iommu for software-managed MSI

From: Stefano Garzarella
Date: Wed Mar 20 2024 - 07:23:39 EST


On Wed, Mar 20, 2024 at 06:19:12PM +0800, Wang Rong wrote:
From: Rong Wang <w_angrong@xxxxxxx>

Once enable iommu domain for one device, the MSI
translation tables have to be there for software-managed MSI.
Otherwise, platform with software-managed MSI without an
irq bypass function, can not get a correct memory write event
from pcie, will not get irqs.
The solution is to obtain the MSI phy base address from
iommu reserved region, and set it to iommu MSI cookie,
then translation tables will be created while request irq.

Change log
----------

v1->v2:
- add resv iotlb to avoid overlap mapping.
v2->v3:
- there is no need to export the iommu symbol anymore.

Signed-off-by: Rong Wang <w_angrong@xxxxxxx>
---
drivers/vhost/vdpa.c | 59 +++++++++++++++++++++++++++++++++++++++++---
1 file changed, 56 insertions(+), 3 deletions(-)

diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index ba52d128aeb7..28b56b10372b 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -49,6 +49,7 @@ struct vhost_vdpa {
struct completion completion;
struct vdpa_device *vdpa;
struct hlist_head as[VHOST_VDPA_IOTLB_BUCKETS];
+ struct vhost_iotlb resv_iotlb;
struct device dev;
struct cdev cdev;
atomic_t opened;
@@ -247,6 +248,7 @@ static int _compat_vdpa_reset(struct vhost_vdpa *v)
static int vhost_vdpa_reset(struct vhost_vdpa *v)
{
v->in_batch = 0;
+ vhost_iotlb_reset(&v->resv_iotlb);
return _compat_vdpa_reset(v);
}

@@ -1219,10 +1221,15 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
msg->iova + msg->size - 1 > v->range.last)
return -EINVAL;

+ if (vhost_iotlb_itree_first(&v->resv_iotlb, msg->iova,
+ msg->iova + msg->size - 1))
+ return -EINVAL;
+
if (vhost_iotlb_itree_first(iotlb, msg->iova,
msg->iova + msg->size - 1))
return -EEXIST;

+

Unnecessary new line here.

if (vdpa->use_va)
return vhost_vdpa_va_map(v, iotlb, msg->iova, msg->size,
msg->uaddr, msg->perm);
@@ -1307,6 +1314,45 @@ static ssize_t vhost_vdpa_chr_write_iter(struct kiocb *iocb,
return vhost_chr_write_iter(dev, from);
}

+static int vhost_vdpa_resv_iommu_region(struct iommu_domain *domain, struct device *dma_dev,
+ struct vhost_iotlb *resv_iotlb)
+{
+ struct list_head dev_resv_regions;
+ phys_addr_t resv_msi_base = 0;
+ struct iommu_resv_region *region;
+ int ret = 0;
+ bool with_sw_msi = false;
+ bool with_hw_msi = false;
+
+ INIT_LIST_HEAD(&dev_resv_regions);
+ iommu_get_resv_regions(dma_dev, &dev_resv_regions);
+
+ list_for_each_entry(region, &dev_resv_regions, list) {
+ ret = vhost_iotlb_add_range_ctx(resv_iotlb, region->start,
+ region->start + region->length - 1,
+ 0, 0, NULL);
+ if (ret) {
+ vhost_iotlb_reset(resv_iotlb);
+ break;
+ }
+
+ if (region->type == IOMMU_RESV_MSI)
+ with_hw_msi = true;
+
+ if (region->type == IOMMU_RESV_SW_MSI) {
+ resv_msi_base = region->start;

Can it happen that there are multiple regions of the IOMMU_RESV_SW_MSI type?

In this case, is it correct to overwrite `resv_msi_base`?

+ with_sw_msi = true;
+ }
+ }
+
+ if (!ret && !with_hw_msi && with_sw_msi)
+ ret = iommu_get_msi_cookie(domain, resv_msi_base);

If `iommu_get_msi_cookie()` fails:
- Should we avoid calling iommu_put_resv_regions()?
- Should we also call `vhost_iotlb_reset(resv_iotlb)` like for the
vhost_iotlb_add_range_ctx() failure ?

If it is the case, maybe it's better to add an error label where do the cleanup.

+
+ iommu_put_resv_regions(dma_dev, &dev_resv_regions);
+
+ return ret;
+}
+
static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
{
struct vdpa_device *vdpa = v->vdpa;
@@ -1335,11 +1381,16 @@ static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)

ret = iommu_attach_device(v->domain, dma_dev);
if (ret)
- goto err_attach;
+ goto err_alloc_domain;

- return 0;
+ ret = vhost_vdpa_resv_iommu_region(v->domain, dma_dev, &v->resv_iotlb);
+ if (ret)
+ goto err_attach_device;

-err_attach:
+ return 0;

I suggest to add a new line here to separate the error path for the success path.

+err_attach_device:
+ iommu_detach_device(v->domain, dma_dev);
+err_alloc_domain:
iommu_domain_free(v->domain);
v->domain = NULL;
return ret;
@@ -1595,6 +1646,8 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa)
goto err;
}

+ vhost_iotlb_init(&v->resv_iotlb, 0, 0);
+

IIUC the lifetime of v->resv_iotlb, we initialize it here in the vdpa_driver.probe() and we fill it during the `open` of the vhost-vdpa character device.

So, should we reset it in the `release` of the vhost-vdpa character device?

Thanks,
Stefano

r = dev_set_name(&v->dev, "vhost-vdpa-%u", minor);
if (r)
goto err;
--
2.27.0