[PATCH v3 2/3] iommufd: Move iommufd_sw_msi and related functions to driver.c

From: Nicolin Chen
Date: Mon Mar 03 2025 - 15:53:59 EST


To provide the iommufd_sw_msi() to the iommu core that is under a different
Kconfig, move it and its related functions to driver.c. Then, stub it into
the iommu-priv header. The iommufd_sw_msi_install() continues to be used by
iommufd internal, so put it in the private header.

Signed-off-by: Nicolin Chen <nicolinc@xxxxxxxxxx>
---
drivers/iommu/iommu-priv.h | 13 +++
drivers/iommu/iommufd/iommufd_private.h | 7 +-
drivers/iommu/iommufd/device.c | 127 +-----------------------
drivers/iommu/iommufd/driver.c | 125 +++++++++++++++++++++++
4 files changed, 148 insertions(+), 124 deletions(-)

diff --git a/drivers/iommu/iommu-priv.h b/drivers/iommu/iommu-priv.h
index b4508423e13b..c74fff25be78 100644
--- a/drivers/iommu/iommu-priv.h
+++ b/drivers/iommu/iommu-priv.h
@@ -5,6 +5,7 @@
#define __LINUX_IOMMU_PRIV_H

#include <linux/iommu.h>
+#include <linux/msi.h>

static inline const struct iommu_ops *dev_iommu_ops(struct device *dev)
{
@@ -43,4 +44,16 @@ void iommu_detach_group_handle(struct iommu_domain *domain,
int iommu_replace_group_handle(struct iommu_group *group,
struct iommu_domain *new_domain,
struct iommu_attach_handle *handle);
+
+#if IS_ENABLED(CONFIG_IOMMUFD_DRIVER_CORE) && IS_ENABLED(CONFIG_IRQ_MSI_IOMMU)
+int iommufd_sw_msi(struct iommu_domain *domain, struct msi_desc *desc,
+ phys_addr_t msi_addr);
+#else /* !CONFIG_IOMMUFD_DRIVER_CORE || !CONFIG_IRQ_MSI_IOMMU */
+static inline int iommufd_sw_msi(struct iommu_domain *domain,
+ struct msi_desc *desc, phys_addr_t msi_addr)
+{
+ return -EOPNOTSUPP;
+}
+#endif /* CONFIG_IOMMUFD_DRIVER_CORE && CONFIG_IRQ_MSI_IOMMU */
+
#endif /* __LINUX_IOMMU_PRIV_H */
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index 246297452a44..b6897a9991ee 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -32,8 +32,11 @@ struct iommufd_sw_msi_maps {
DECLARE_BITMAP(bitmap, 64);
};

-int iommufd_sw_msi(struct iommu_domain *domain, struct msi_desc *desc,
- phys_addr_t msi_addr);
+#ifdef CONFIG_IRQ_MSI_IOMMU
+int iommufd_sw_msi_install(struct iommufd_ctx *ictx,
+ struct iommufd_hwpt_paging *hwpt_paging,
+ struct iommufd_sw_msi_map *msi_map);
+#endif

struct iommufd_ctx {
struct file *file;
diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index 4e107f69f951..ca17c2b7f455 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -294,129 +294,7 @@ u32 iommufd_device_to_id(struct iommufd_device *idev)
}
EXPORT_SYMBOL_NS_GPL(iommufd_device_to_id, "IOMMUFD");

-/*
- * Get a iommufd_sw_msi_map for the msi physical address requested by the irq
- * layer. The mapping to IOVA is global to the iommufd file descriptor, every
- * domain that is attached to a device using the same MSI parameters will use
- * the same IOVA.
- */
-static __maybe_unused struct iommufd_sw_msi_map *
-iommufd_sw_msi_get_map(struct iommufd_ctx *ictx, phys_addr_t msi_addr,
- phys_addr_t sw_msi_start)
-{
- struct iommufd_sw_msi_map *cur;
- unsigned int max_pgoff = 0;
-
- lockdep_assert_held(&ictx->sw_msi_lock);
-
- list_for_each_entry(cur, &ictx->sw_msi_list, sw_msi_item) {
- if (cur->sw_msi_start != sw_msi_start)
- continue;
- max_pgoff = max(max_pgoff, cur->pgoff + 1);
- if (cur->msi_addr == msi_addr)
- return cur;
- }
-
- if (ictx->sw_msi_id >=
- BITS_PER_BYTE * sizeof_field(struct iommufd_sw_msi_maps, bitmap))
- return ERR_PTR(-EOVERFLOW);
-
- cur = kzalloc(sizeof(*cur), GFP_KERNEL);
- if (!cur)
- return ERR_PTR(-ENOMEM);
-
- cur->sw_msi_start = sw_msi_start;
- cur->msi_addr = msi_addr;
- cur->pgoff = max_pgoff;
- cur->id = ictx->sw_msi_id++;
- list_add_tail(&cur->sw_msi_item, &ictx->sw_msi_list);
- return cur;
-}
-
-static int iommufd_sw_msi_install(struct iommufd_ctx *ictx,
- struct iommufd_hwpt_paging *hwpt_paging,
- struct iommufd_sw_msi_map *msi_map)
-{
- unsigned long iova;
-
- lockdep_assert_held(&ictx->sw_msi_lock);
-
- iova = msi_map->sw_msi_start + msi_map->pgoff * PAGE_SIZE;
- if (!test_bit(msi_map->id, hwpt_paging->present_sw_msi.bitmap)) {
- int rc;
-
- rc = iommu_map(hwpt_paging->common.domain, iova,
- msi_map->msi_addr, PAGE_SIZE,
- IOMMU_WRITE | IOMMU_READ | IOMMU_MMIO,
- GFP_KERNEL_ACCOUNT);
- if (rc)
- return rc;
- __set_bit(msi_map->id, hwpt_paging->present_sw_msi.bitmap);
- }
- return 0;
-}
-
-/*
- * Called by the irq code if the platform translates the MSI address through the
- * IOMMU. msi_addr is the physical address of the MSI page. iommufd will
- * allocate a fd global iova for the physical page that is the same on all
- * domains and devices.
- */
#ifdef CONFIG_IRQ_MSI_IOMMU
-int iommufd_sw_msi(struct iommu_domain *domain, struct msi_desc *desc,
- phys_addr_t msi_addr)
-{
- struct device *dev = msi_desc_to_dev(desc);
- struct iommufd_hwpt_paging *hwpt_paging;
- struct iommu_attach_handle *raw_handle;
- struct iommufd_attach_handle *handle;
- struct iommufd_sw_msi_map *msi_map;
- struct iommufd_ctx *ictx;
- unsigned long iova;
- int rc;
-
- /*
- * It is safe to call iommu_attach_handle_get() here because the iommu
- * core code invokes this under the group mutex which also prevents any
- * change of the attach handle for the duration of this function.
- */
- iommu_group_mutex_assert(dev);
-
- raw_handle =
- iommu_attach_handle_get(dev->iommu_group, IOMMU_NO_PASID, 0);
- if (IS_ERR(raw_handle))
- return 0;
- hwpt_paging = find_hwpt_paging(domain->iommufd_hwpt);
-
- handle = to_iommufd_handle(raw_handle);
- /* No IOMMU_RESV_SW_MSI means no change to the msi_msg */
- if (handle->idev->igroup->sw_msi_start == PHYS_ADDR_MAX)
- return 0;
-
- ictx = handle->idev->ictx;
- guard(mutex)(&ictx->sw_msi_lock);
- /*
- * The input msi_addr is the exact byte offset of the MSI doorbell, we
- * assume the caller has checked that it is contained with a MMIO region
- * that is secure to map at PAGE_SIZE.
- */
- msi_map = iommufd_sw_msi_get_map(handle->idev->ictx,
- msi_addr & PAGE_MASK,
- handle->idev->igroup->sw_msi_start);
- if (IS_ERR(msi_map))
- return PTR_ERR(msi_map);
-
- rc = iommufd_sw_msi_install(ictx, hwpt_paging, msi_map);
- if (rc)
- return rc;
- __set_bit(msi_map->id, handle->idev->igroup->required_sw_msi.bitmap);
-
- iova = msi_map->sw_msi_start + msi_map->pgoff * PAGE_SIZE;
- msi_desc_set_iommu_msi_iova(desc, iova, PAGE_SHIFT);
- return 0;
-}
-#endif
-
static int iommufd_group_setup_msi(struct iommufd_group *igroup,
struct iommufd_hwpt_paging *hwpt_paging)
{
@@ -443,6 +321,7 @@ static int iommufd_group_setup_msi(struct iommufd_group *igroup,
}
return 0;
}
+#endif

static int
iommufd_device_attach_reserved_iova(struct iommufd_device *idev,
@@ -458,6 +337,7 @@ iommufd_device_attach_reserved_iova(struct iommufd_device *idev,
if (rc)
return rc;

+#ifdef CONFIG_IRQ_MSI_IOMMU
if (list_empty(&idev->igroup->device_list)) {
rc = iommufd_group_setup_msi(idev->igroup, hwpt_paging);
if (rc) {
@@ -466,6 +346,7 @@ iommufd_device_attach_reserved_iova(struct iommufd_device *idev,
return rc;
}
}
+#endif
return 0;
}

@@ -682,9 +563,11 @@ iommufd_group_do_replace_reserved_iova(struct iommufd_group *igroup,
}
}

+#ifdef CONFIG_IRQ_MSI_IOMMU
rc = iommufd_group_setup_msi(igroup, hwpt_paging);
if (rc)
goto err_unresv;
+#endif
return 0;

err_unresv:
diff --git a/drivers/iommu/iommufd/driver.c b/drivers/iommu/iommufd/driver.c
index 2d98b04ff1cb..386953c510b3 100644
--- a/drivers/iommu/iommufd/driver.c
+++ b/drivers/iommu/iommufd/driver.c
@@ -49,5 +49,130 @@ struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu,
}
EXPORT_SYMBOL_NS_GPL(iommufd_viommu_find_dev, "IOMMUFD");

+#ifdef CONFIG_IRQ_MSI_IOMMU
+/*
+ * Get a iommufd_sw_msi_map for the msi physical address requested by the irq
+ * layer. The mapping to IOVA is global to the iommufd file descriptor, every
+ * domain that is attached to a device using the same MSI parameters will use
+ * the same IOVA.
+ */
+static struct iommufd_sw_msi_map *
+iommufd_sw_msi_get_map(struct iommufd_ctx *ictx, phys_addr_t msi_addr,
+ phys_addr_t sw_msi_start)
+{
+ struct iommufd_sw_msi_map *cur;
+ unsigned int max_pgoff = 0;
+
+ lockdep_assert_held(&ictx->sw_msi_lock);
+
+ list_for_each_entry(cur, &ictx->sw_msi_list, sw_msi_item) {
+ if (cur->sw_msi_start != sw_msi_start)
+ continue;
+ max_pgoff = max(max_pgoff, cur->pgoff + 1);
+ if (cur->msi_addr == msi_addr)
+ return cur;
+ }
+
+ if (ictx->sw_msi_id >=
+ BITS_PER_BYTE * sizeof_field(struct iommufd_sw_msi_maps, bitmap))
+ return ERR_PTR(-EOVERFLOW);
+
+ cur = kzalloc(sizeof(*cur), GFP_KERNEL);
+ if (!cur)
+ return ERR_PTR(-ENOMEM);
+
+ cur->sw_msi_start = sw_msi_start;
+ cur->msi_addr = msi_addr;
+ cur->pgoff = max_pgoff;
+ cur->id = ictx->sw_msi_id++;
+ list_add_tail(&cur->sw_msi_item, &ictx->sw_msi_list);
+ return cur;
+}
+
+int iommufd_sw_msi_install(struct iommufd_ctx *ictx,
+ struct iommufd_hwpt_paging *hwpt_paging,
+ struct iommufd_sw_msi_map *msi_map)
+{
+ unsigned long iova;
+
+ lockdep_assert_held(&ictx->sw_msi_lock);
+
+ iova = msi_map->sw_msi_start + msi_map->pgoff * PAGE_SIZE;
+ if (!test_bit(msi_map->id, hwpt_paging->present_sw_msi.bitmap)) {
+ int rc;
+
+ rc = iommu_map(hwpt_paging->common.domain, iova,
+ msi_map->msi_addr, PAGE_SIZE,
+ IOMMU_WRITE | IOMMU_READ | IOMMU_MMIO,
+ GFP_KERNEL_ACCOUNT);
+ if (rc)
+ return rc;
+ __set_bit(msi_map->id, hwpt_paging->present_sw_msi.bitmap);
+ }
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_sw_msi_install, "IOMMUFD");
+
+/*
+ * Called by the irq code if the platform translates the MSI address through the
+ * IOMMU. msi_addr is the physical address of the MSI page. iommufd will
+ * allocate a fd global iova for the physical page that is the same on all
+ * domains and devices.
+ */
+int iommufd_sw_msi(struct iommu_domain *domain, struct msi_desc *desc,
+ phys_addr_t msi_addr)
+{
+ struct device *dev = msi_desc_to_dev(desc);
+ struct iommufd_hwpt_paging *hwpt_paging;
+ struct iommu_attach_handle *raw_handle;
+ struct iommufd_attach_handle *handle;
+ struct iommufd_sw_msi_map *msi_map;
+ struct iommufd_ctx *ictx;
+ unsigned long iova;
+ int rc;
+
+ /*
+ * It is safe to call iommu_attach_handle_get() here because the iommu
+ * core code invokes this under the group mutex which also prevents any
+ * change of the attach handle for the duration of this function.
+ */
+ iommu_group_mutex_assert(dev);
+
+ raw_handle =
+ iommu_attach_handle_get(dev->iommu_group, IOMMU_NO_PASID, 0);
+ if (IS_ERR(raw_handle))
+ return 0;
+ hwpt_paging = find_hwpt_paging(domain->iommufd_hwpt);
+
+ handle = to_iommufd_handle(raw_handle);
+ /* No IOMMU_RESV_SW_MSI means no change to the msi_msg */
+ if (handle->idev->igroup->sw_msi_start == PHYS_ADDR_MAX)
+ return 0;
+
+ ictx = handle->idev->ictx;
+ guard(mutex)(&ictx->sw_msi_lock);
+ /*
+ * The input msi_addr is the exact byte offset of the MSI doorbell, we
+ * assume the caller has checked that it is contained with a MMIO region
+ * that is secure to map at PAGE_SIZE.
+ */
+ msi_map = iommufd_sw_msi_get_map(handle->idev->ictx,
+ msi_addr & PAGE_MASK,
+ handle->idev->igroup->sw_msi_start);
+ if (IS_ERR(msi_map))
+ return PTR_ERR(msi_map);
+
+ rc = iommufd_sw_msi_install(ictx, hwpt_paging, msi_map);
+ if (rc)
+ return rc;
+ __set_bit(msi_map->id, handle->idev->igroup->required_sw_msi.bitmap);
+
+ iova = msi_map->sw_msi_start + msi_map->pgoff * PAGE_SIZE;
+ msi_desc_set_iommu_msi_iova(desc, iova, PAGE_SHIFT);
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_sw_msi, "IOMMUFD");
+#endif
+
MODULE_DESCRIPTION("iommufd code shared with builtin modules");
MODULE_LICENSE("GPL");
--
2.43.0