[PATCH] iommufd: Enforce IOMMU_RESV_SW_MSI upon hwpt_paging allocation

From: Nicolin Chen
Date: Sun Jul 28 2024 - 19:51:37 EST


IOMMU_RESV_SW_MSI is a unique region defined by an IOMMU driver. Though it
is eventually used by a device for address translation to an MSI location
(including nested cases), practically it is a universal region across all
domains allocated for the IOMMU that defines it.

Currently IOMMUFD core fetches and reserves the region during an attach to
an hwpt_paging. It works with a hwpt_paging-only case, but might not work
with a nested case where a device could directly attach to a hwpt_nested,
bypassing the hwpt_paging attachment.

Move the enforcement forward, to the hwpt_paging allocation function. Then
clean up all the SW_MSI related things in the attach/replace routine.

Signed-off-by: Nicolin Chen <nicolinc@xxxxxxxxxx>
---
drivers/iommu/iommufd/device.c | 67 ++-----------------------
drivers/iommu/iommufd/hw_pagetable.c | 45 +++++++++++++++++
drivers/iommu/iommufd/io_pagetable.c | 13 +++--
drivers/iommu/iommufd/iommufd_private.h | 5 +-
4 files changed, 55 insertions(+), 75 deletions(-)

diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index 9a7ec5997c61c..bc8baee32a9da 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -90,7 +90,6 @@ static struct iommufd_group *iommufd_get_group(struct iommufd_ctx *ictx,
kref_init(&new_igroup->ref);
mutex_init(&new_igroup->lock);
INIT_LIST_HEAD(&new_igroup->device_list);
- new_igroup->sw_msi_start = PHYS_ADDR_MAX;
/* group reference moves into new_igroup */
new_igroup->group = group;

@@ -293,64 +292,6 @@ u32 iommufd_device_to_id(struct iommufd_device *idev)
}
EXPORT_SYMBOL_NS_GPL(iommufd_device_to_id, IOMMUFD);

-static int iommufd_group_setup_msi(struct iommufd_group *igroup,
- struct iommufd_hwpt_paging *hwpt_paging)
-{
- phys_addr_t sw_msi_start = igroup->sw_msi_start;
- int rc;
-
- /*
- * If the IOMMU driver gives a IOMMU_RESV_SW_MSI then it is asking us to
- * call iommu_get_msi_cookie() on its behalf. This is necessary to setup
- * the MSI window so iommu_dma_prepare_msi() can install pages into our
- * domain after request_irq(). If it is not done interrupts will not
- * work on this domain.
- *
- * FIXME: This is conceptually broken for iommufd since we want to allow
- * userspace to change the domains, eg switch from an identity IOAS to a
- * DMA IOAS. There is currently no way to create a MSI window that
- * matches what the IRQ layer actually expects in a newly created
- * domain.
- */
- if (sw_msi_start != PHYS_ADDR_MAX && !hwpt_paging->msi_cookie) {
- rc = iommu_get_msi_cookie(hwpt_paging->common.domain,
- sw_msi_start);
- if (rc)
- return rc;
-
- /*
- * iommu_get_msi_cookie() can only be called once per domain,
- * it returns -EBUSY on later calls.
- */
- hwpt_paging->msi_cookie = true;
- }
- return 0;
-}
-
-static int iommufd_hwpt_paging_attach(struct iommufd_hwpt_paging *hwpt_paging,
- struct iommufd_device *idev)
-{
- int rc;
-
- lockdep_assert_held(&idev->igroup->lock);
-
- rc = iopt_table_enforce_dev_resv_regions(&hwpt_paging->ioas->iopt,
- idev->dev,
- &idev->igroup->sw_msi_start);
- if (rc)
- return rc;
-
- if (list_empty(&idev->igroup->device_list)) {
- rc = iommufd_group_setup_msi(idev->igroup, hwpt_paging);
- if (rc) {
- iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt,
- idev->dev);
- return rc;
- }
- }
- return 0;
-}
-
int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
struct iommufd_device *idev)
{
@@ -364,7 +305,8 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
}

if (hwpt_is_paging(hwpt)) {
- rc = iommufd_hwpt_paging_attach(to_hwpt_paging(hwpt), idev);
+ rc = iopt_table_enforce_dev_resv_regions(
+ &to_hwpt_paging(hwpt)->ioas->iopt, idev->dev);
if (rc)
goto err_unlock;
}
@@ -453,15 +395,12 @@ iommufd_group_do_replace_paging(struct iommufd_group *igroup,
hwpt_paging->ioas != to_hwpt_paging(old_hwpt)->ioas) {
list_for_each_entry(cur, &igroup->device_list, group_item) {
rc = iopt_table_enforce_dev_resv_regions(
- &hwpt_paging->ioas->iopt, cur->dev, NULL);
+ &hwpt_paging->ioas->iopt, cur->dev);
if (rc)
goto err_unresv;
}
}

- rc = iommufd_group_setup_msi(igroup, hwpt_paging);
- if (rc)
- goto err_unresv;
return 0;

err_unresv:
diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c
index aefde4443671e..dfb132e4dfbd2 100644
--- a/drivers/iommu/iommufd/hw_pagetable.c
+++ b/drivers/iommu/iommufd/hw_pagetable.c
@@ -82,6 +82,42 @@ iommufd_hwpt_paging_enforce_cc(struct iommufd_hwpt_paging *hwpt_paging)
return 0;
}

+static int
+iommufd_hwpt_paging_enforce_sw_msi(struct iommufd_hwpt_paging *hwpt_paging,
+ struct iommufd_device *idev)
+{
+ struct iommu_domain *domain = hwpt_paging->common.domain;
+ struct io_pagetable *iopt = &hwpt_paging->ioas->iopt;
+ phys_addr_t sw_msi_start = PHYS_ADDR_MAX;
+ struct iommu_resv_region *resv;
+ LIST_HEAD(resv_regions);
+ int rc = 0;
+
+ if (iommufd_should_fail())
+ return -EINVAL;
+
+ /* FIXME: drivers allocate memory but there is no failure propogated */
+ iommu_get_resv_regions(idev->dev, &resv_regions);
+ list_for_each_entry(resv, &resv_regions, list) {
+ if (resv->type != IOMMU_RESV_SW_MSI)
+ continue;
+ down_write(&iopt->iova_rwsem);
+ /* owner=domain so that abort/destroy() can clean it up */
+ rc = iopt_reserve_iova(iopt, resv->start,
+ resv->length - 1 + resv->start, domain);
+ up_write(&iopt->iova_rwsem);
+ if (!rc)
+ sw_msi_start = resv->start;
+ break;
+ }
+ iommu_put_resv_regions(idev->dev, &resv_regions);
+
+ if (sw_msi_start == PHYS_ADDR_MAX)
+ return rc;
+
+ return iommu_get_msi_cookie(domain, sw_msi_start);
+}
+
/**
* iommufd_hwpt_paging_alloc() - Get a PAGING iommu_domain for a device
* @ictx: iommufd context
@@ -173,6 +209,15 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
goto out_abort;
}

+ /*
+ * IOMMU_RESV_SW_MSI is a universal per-IOMMU IOVA region arbitrarily
+ * defined by a driver. Any hw_pagetable that is allocated for such an
+ * IOMMU must enforce the region in its reserved space.
+ */
+ rc = iommufd_hwpt_paging_enforce_sw_msi(hwpt_paging, idev);
+ if (rc)
+ goto out_abort;
+
/*
* immediate_attach exists only to accommodate iommu drivers that cannot
* directly allocate a domain. These drivers do not finish creating the
diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c
index 05fd9d3abf1b8..c9b7c7f6e046b 100644
--- a/drivers/iommu/iommufd/io_pagetable.c
+++ b/drivers/iommu/iommufd/io_pagetable.c
@@ -1368,8 +1368,7 @@ void iopt_remove_access(struct io_pagetable *iopt,

/* Narrow the valid_iova_itree to include reserved ranges from a device. */
int iopt_table_enforce_dev_resv_regions(struct io_pagetable *iopt,
- struct device *dev,
- phys_addr_t *sw_msi_start)
+ struct device *dev)
{
struct iommu_resv_region *resv;
LIST_HEAD(resv_regions);
@@ -1387,14 +1386,14 @@ int iopt_table_enforce_dev_resv_regions(struct io_pagetable *iopt,
list_for_each_entry(resv, &resv_regions, list) {
if (resv->type == IOMMU_RESV_DIRECT_RELAXABLE)
continue;
-
- if (sw_msi_start && resv->type == IOMMU_RESV_MSI)
- num_hw_msi++;
- if (sw_msi_start && resv->type == IOMMU_RESV_SW_MSI) {
- *sw_msi_start = resv->start;
+ if (resv->type == IOMMU_RESV_SW_MSI) {
num_sw_msi++;
+ continue;
}

+ if (resv->type == IOMMU_RESV_MSI)
+ num_hw_msi++;
+
rc = iopt_reserve_iova(iopt, resv->start,
resv->length - 1 + resv->start, dev);
if (rc)
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index 92efe30a8f0d0..d61ea73776261 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -89,8 +89,7 @@ int iopt_table_add_domain(struct io_pagetable *iopt,
void iopt_table_remove_domain(struct io_pagetable *iopt,
struct iommu_domain *domain);
int iopt_table_enforce_dev_resv_regions(struct io_pagetable *iopt,
- struct device *dev,
- phys_addr_t *sw_msi_start);
+ struct device *dev);
int iopt_set_allow_iova(struct io_pagetable *iopt,
struct rb_root_cached *allowed_iova);
int iopt_reserve_iova(struct io_pagetable *iopt, unsigned long start,
@@ -302,7 +301,6 @@ struct iommufd_hwpt_paging {
struct iommufd_ioas *ioas;
bool auto_domain : 1;
bool enforce_cache_coherency : 1;
- bool msi_cookie : 1;
bool nest_parent : 1;
/* Head at iommufd_ioas::hwpt_list */
struct list_head hwpt_item;
@@ -382,7 +380,6 @@ struct iommufd_group {
struct iommu_group *group;
struct iommufd_hw_pagetable *hwpt;
struct list_head device_list;
- phys_addr_t sw_msi_start;
};

/*
--
2.43.0