[PATCH] iommu/s390: Implement blocking domain
From: Niklas Schnelle
Date: Tue Aug 06 2024 - 09:59:32 EST
This fixes a crash when surprise hot-unplugging a PCI device. This crash
happens because during hot-unplug __iommu_group_set_domain_nofail()
attaching the default domain fails when the platform no longer
recognizes the device as it has already been removed and we end up with
a NULL domain pointer and UAF. This is exactly the case referred to in
the second comment in __iommu_device_set_domain() and just as stated
there if we can instead attach the blocking domain the UAF is prevented
as this can handle the already removed device. Implement the blocking
domain to use this handling. This would still leave us with a warning
for a failed attach. As failing to attach when the device is no longer
present is expected behavior turn this into an explicit -ENODEV error
and don't warn for it. Also change the error return for a NULL zdev to
-EIO as we don't want to ignore this case that would be a serious bug.
Fixes: c76c067e488c ("s390/pci: Use dma-iommu layer")
Signed-off-by: Niklas Schnelle <schnelle@xxxxxxxxxxxxx>
---
Note: I somewhat suspect this to be related to the following discussion
or at least we have seen the same backtraces in reports that we suspect
to be caused by the issue fixed with this patch. In the case I was able
to reproduce with vfio-pci pass-through to a KVM guest I got a different
trace though.
Organizational note: I'll be on vacation starting Thursday. Matt will
then take over and sent new revisions as necessary.
---
drivers/iommu/iommu.c | 7 ++++--
drivers/iommu/s390-iommu.c | 55 ++++++++++++++++++++++++++++++++++++++--------
2 files changed, 51 insertions(+), 11 deletions(-)
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index ed6c5cb60c5a..91b3b23bf55c 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -119,8 +119,11 @@ static int __iommu_group_set_domain(struct iommu_group *group,
static void __iommu_group_set_domain_nofail(struct iommu_group *group,
struct iommu_domain *new_domain)
{
- WARN_ON(__iommu_group_set_domain_internal(
- group, new_domain, IOMMU_SET_DOMAIN_MUST_SUCCEED));
+ int ret = __iommu_group_set_domain_internal(
+ group, new_domain, IOMMU_SET_DOMAIN_MUST_SUCCEED);
+
+ /* Allow attach to fail when the device is gone */
+ WARN_ON(ret && ret != -ENODEV);
}
static int iommu_setup_default_domain(struct iommu_group *group,
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index d8eaa7ea380b..3d8b5fe9a555 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -33,6 +33,8 @@ struct s390_domain {
struct rcu_head rcu;
};
+static struct s390_domain s390_blocking_domain;
+
static inline unsigned int calc_rtx(dma_addr_t ptr)
{
return ((unsigned long)ptr >> ZPCI_RT_SHIFT) & ZPCI_INDEX_MASK;
@@ -376,12 +378,21 @@ static void s390_iommu_detach_device(struct iommu_domain *domain,
struct zpci_dev *zdev = to_zpci_dev(dev);
unsigned long flags;
+ /*
+ * The static blocking domain doesn't need to track devices nor
+ * does it have an IOAT registered. As there is no harm
+ * in keeping zdev->s390_domain set to blocking until
+ * it is overwritten detach is a no-op.
+ */
+ if (s390_domain->domain.type == IOMMU_DOMAIN_BLOCKED)
+ return;
+
spin_lock_irqsave(&s390_domain->list_lock, flags);
list_del_rcu(&zdev->iommu_list);
spin_unlock_irqrestore(&s390_domain->list_lock, flags);
zpci_unregister_ioat(zdev, 0);
- zdev->s390_domain = NULL;
+ zdev->s390_domain = &s390_blocking_domain;
zdev->dma_table = NULL;
}
@@ -395,7 +406,7 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
int cc;
if (!zdev)
- return -ENODEV;
+ return -EIO;
if (WARN_ON(domain->geometry.aperture_start > zdev->end_dma ||
domain->geometry.aperture_end < zdev->start_dma))
@@ -403,16 +414,16 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
if (zdev->s390_domain)
s390_iommu_detach_device(&zdev->s390_domain->domain, dev);
-
+ /*
+ * Detach set the blocking domain. If we fail now DMA remains blocked
+ * and the blocking domain attached.
+ */
cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
virt_to_phys(s390_domain->dma_table), &status);
- /*
- * If the device is undergoing error recovery the reset code
- * will re-establish the new domain.
- */
- if (cc && status != ZPCI_PCI_ST_FUNC_NOT_AVAIL)
+ if (cc == 3 || status == ZPCI_PCI_ST_FUNC_NOT_AVAIL)
+ return -ENODEV;
+ else if (cc)
return -EIO;
-
zdev->dma_table = s390_domain->dma_table;
zdev->s390_domain = s390_domain;
@@ -702,6 +713,30 @@ struct zpci_iommu_ctrs *zpci_get_iommu_ctrs(struct zpci_dev *zdev)
return &zdev->s390_domain->ctrs;
}
+static int blocking_domain_attach_device(struct iommu_domain *domain,
+ struct device *dev)
+{
+ struct s390_domain *s390_domain = to_s390_domain(domain);
+ struct zpci_dev *zdev = to_zpci_dev(dev);
+ unsigned long flags;
+
+ if (!zdev)
+ return 0;
+
+ /* Detach sets the blocking domain */
+ if (zdev->s390_domain)
+ s390_iommu_detach_device(&zdev->s390_domain->domain, dev);
+ return 0;
+}
+
+static struct s390_domain s390_blocking_domain = {
+ .domain = {
+ .type = IOMMU_DOMAIN_BLOCKED,
+ .ops = &(const struct iommu_domain_ops) {
+ .attach_dev = blocking_domain_attach_device,
+ }}
+};
+
int zpci_init_iommu(struct zpci_dev *zdev)
{
u64 aperture_size;
@@ -777,6 +812,8 @@ static int __init s390_iommu_init(void)
subsys_initcall(s390_iommu_init);
static const struct iommu_ops s390_iommu_ops = {
+ .blocked_domain = &s390_blocking_domain.domain,
+ .release_domain = &s390_blocking_domain.domain,
.capable = s390_iommu_capable,
.domain_alloc_paging = s390_domain_alloc_paging,
.probe_device = s390_iommu_probe_device,
---
base-commit: de9c2c66ad8e787abec7c9d7eff4f8c3cdd28aed
change-id: 20240806-blocking_domain-25b6c2fc6a62
Best regards,
--
Niklas Schnelle