[PATCH v8 3/6] iommu/s390: Fix potential s390_domain aperture shrinking

From: Niklas Schnelle
Date: Tue Oct 25 2022 - 07:57:42 EST


The s390 IOMMU driver currently sets the IOMMU domain's aperture to
match the device specific DMA address range of the device that is first
attached. This is not ideal. For one if the domain has no device
attached in the meantime the aperture could be shrunk allowing
translations outside the aperture to exist in the translation tables.
Also this is a bit of a misuse of the aperture which really should
describe what addresses can be translated and not some device specific
limitations.

Instead of misusing the aperture like this we can instead create
reserved ranges for the ranges inaccessible to the attached devices
allowing devices with overlapping ranges to still share an IOMMU domain.
This also significantly simplifies s390_iommu_attach_device() allowing
us to move the aperture check to the beginning of the function and
removing the need to hold the device list's lock to check the aperture.

As we then use the same aperture for all domains and it only depends on
the table properties we can already check zdev->start_dma/end_dma at
probe time and turn the check on attach into a WARN_ON().

Suggested-by: Jason Gunthorpe <jgg@xxxxxxxxxx>
Reviewed-by: Matthew Rosato <mjrosato@xxxxxxxxxxxxx>
Reviewed-by: Jason Gunthorpe <jgg@xxxxxxxxxx>
Signed-off-by: Niklas Schnelle <schnelle@xxxxxxxxxxxxx>
---
v7->v8:
- Added Jason's R-b
- Added GFP_KERNEL param to iommu_alloc_resv_region()
v5->v6:
- Return -EINVAL after WARN_ON() in attach
v4->v5:
- Make aperture check in attach a WARN_ON() and fail in probe if
zdev->start_dma/end_dma doesn't git in aperture (Jason)

drivers/iommu/s390-iommu.c | 63 ++++++++++++++++++++++++++------------
1 file changed, 43 insertions(+), 20 deletions(-)

diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index af83ccde16a4..9b3adc61005c 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -62,6 +62,9 @@ static struct iommu_domain *s390_domain_alloc(unsigned domain_type)
kfree(s390_domain);
return NULL;
}
+ s390_domain->domain.geometry.force_aperture = true;
+ s390_domain->domain.geometry.aperture_start = 0;
+ s390_domain->domain.geometry.aperture_end = ZPCI_TABLE_SIZE_RT - 1;

spin_lock_init(&s390_domain->dma_table_lock);
spin_lock_init(&s390_domain->list_lock);
@@ -102,11 +105,15 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
struct s390_domain *s390_domain = to_s390_domain(domain);
struct zpci_dev *zdev = to_zpci_dev(dev);
unsigned long flags;
- int cc, rc = 0;
+ int cc;

if (!zdev)
return -ENODEV;

+ if (WARN_ON(domain->geometry.aperture_start > zdev->end_dma ||
+ domain->geometry.aperture_end < zdev->start_dma))
+ return -EINVAL;
+
if (zdev->s390_domain)
__s390_iommu_detach_device(zdev);
else if (zdev->dma_table)
@@ -118,30 +125,14 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
return -EIO;
zdev->dma_table = s390_domain->dma_table;

- spin_lock_irqsave(&s390_domain->list_lock, flags);
- /* First device defines the DMA range limits */
- if (list_empty(&s390_domain->devices)) {
- domain->geometry.aperture_start = zdev->start_dma;
- domain->geometry.aperture_end = zdev->end_dma;
- domain->geometry.force_aperture = true;
- /* Allow only devices with identical DMA range limits */
- } else if (domain->geometry.aperture_start != zdev->start_dma ||
- domain->geometry.aperture_end != zdev->end_dma) {
- spin_unlock_irqrestore(&s390_domain->list_lock, flags);
- rc = -EINVAL;
- goto out_unregister;
- }
+ zdev->dma_table = s390_domain->dma_table;
zdev->s390_domain = s390_domain;
+
+ spin_lock_irqsave(&s390_domain->list_lock, flags);
list_add(&zdev->iommu_list, &s390_domain->devices);
spin_unlock_irqrestore(&s390_domain->list_lock, flags);

return 0;
-
-out_unregister:
- zpci_unregister_ioat(zdev, 0);
- zdev->dma_table = NULL;
-
- return rc;
}

static void s390_iommu_detach_device(struct iommu_domain *domain,
@@ -155,6 +146,30 @@ static void s390_iommu_detach_device(struct iommu_domain *domain,
zpci_dma_init_device(zdev);
}

+static void s390_iommu_get_resv_regions(struct device *dev,
+ struct list_head *list)
+{
+ struct zpci_dev *zdev = to_zpci_dev(dev);
+ struct iommu_resv_region *region;
+
+ if (zdev->start_dma) {
+ region = iommu_alloc_resv_region(0, zdev->start_dma, 0,
+ IOMMU_RESV_RESERVED, GFP_KERNEL);
+ if (!region)
+ return;
+ list_add_tail(&region->list, list);
+ }
+
+ if (zdev->end_dma < ZPCI_TABLE_SIZE_RT - 1) {
+ region = iommu_alloc_resv_region(zdev->end_dma + 1,
+ ZPCI_TABLE_SIZE_RT - zdev->end_dma - 1,
+ 0, IOMMU_RESV_RESERVED, GFP_KERNEL);
+ if (!region)
+ return;
+ list_add_tail(&region->list, list);
+ }
+}
+
static struct iommu_device *s390_iommu_probe_device(struct device *dev)
{
struct zpci_dev *zdev;
@@ -164,6 +179,13 @@ static struct iommu_device *s390_iommu_probe_device(struct device *dev)

zdev = to_zpci_dev(dev);

+ if (zdev->start_dma > zdev->end_dma ||
+ zdev->start_dma > ZPCI_TABLE_SIZE_RT - 1)
+ return ERR_PTR(-EINVAL);
+
+ if (zdev->end_dma > ZPCI_TABLE_SIZE_RT - 1)
+ zdev->end_dma = ZPCI_TABLE_SIZE_RT - 1;
+
return &zdev->iommu_dev;
}

@@ -342,6 +364,7 @@ static const struct iommu_ops s390_iommu_ops = {
.release_device = s390_iommu_release_device,
.device_group = generic_device_group,
.pgsize_bitmap = S390_IOMMU_PGSIZES,
+ .get_resv_regions = s390_iommu_get_resv_regions,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = s390_iommu_attach_device,
.detach_dev = s390_iommu_detach_device,
--
2.34.1