Re: [PATCH v7 3/6] iommu/s390: Fix potential s390_domain aperture shrinking
From: Niklas Schnelle
Date: Mon Oct 24 2022 - 12:42:48 EST
On Mon, 2022-10-17 at 14:45 +0200, Niklas Schnelle wrote:
> The s390 IOMMU driver currently sets the IOMMU domain's aperture to
> match the device specific DMA address range of the device that is first
> attached. This is not ideal. For one if the domain has no device
> attached in the meantime the aperture could be shrunk allowing
> translations outside the aperture to exist in the translation tables.
> Also this is a bit of a misuse of the aperture which really should
> describe what addresses can be translated and not some device specific
> limitations.
>
> Instead of misusing the aperture like this we can instead create
> reserved ranges for the ranges inaccessible to the attached devices
> allowing devices with overlapping ranges to still share an IOMMU domain.
> This also significantly simplifies s390_iommu_attach_device() allowing
> us to move the aperture check to the beginning of the function and
> removing the need to hold the device list's lock to check the aperture.
>
> As we then use the same aperture for all domains and it only depends on
> the table properties we can already check zdev->start_dma/end_dma at
> probe time and turn the check on attach into a WARN_ON().
>
> Suggested-by: Jason Gunthorpe <jgg@xxxxxxxxxx>
> Reviewed-by: Matthew Rosato <mjrosato@xxxxxxxxxxxxx>
> Signed-off-by: Niklas Schnelle <schnelle@xxxxxxxxxxxxx>
> ---
> v5->v6:
> - Return -EINVAL after WARN_ON() in attach
> v4->v5:
> - Make aperture check in attach a WARN_ON() and fail in probe if
> zdev->start_dma/end_dma doesn't git in aperture (Jason)
>
> drivers/iommu/s390-iommu.c | 63 ++++++++++++++++++++++++++------------
> 1 file changed, 43 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
> index af83ccde16a4..c4203a37faa4 100644
> --- a/drivers/iommu/s390-iommu.c
> +++ b/drivers/iommu/s390-iommu.c
> @@ -62,6 +62,9 @@ static struct iommu_domain *s390_domain_alloc(unsigned domain_type)
> kfree(s390_domain);
> return NULL;
> }
> + s390_domain->domain.geometry.force_aperture = true;
> + s390_domain->domain.geometry.aperture_start = 0;
> + s390_domain->domain.geometry.aperture_end = ZPCI_TABLE_SIZE_RT - 1;
>
> spin_lock_init(&s390_domain->dma_table_lock);
> spin_lock_init(&s390_domain->list_lock);
> @@ -102,11 +105,15 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
> struct s390_domain *s390_domain = to_s390_domain(domain);
> struct zpci_dev *zdev = to_zpci_dev(dev);
> unsigned long flags;
> - int cc, rc = 0;
> + int cc;
>
> if (!zdev)
> return -ENODEV;
>
> + if (WARN_ON(domain->geometry.aperture_start > zdev->end_dma ||
> + domain->geometry.aperture_end < zdev->start_dma))
> + return -EINVAL;
> +
> if (zdev->s390_domain)
> __s390_iommu_detach_device(zdev);
> else if (zdev->dma_table)
> @@ -118,30 +125,14 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
> return -EIO;
> zdev->dma_table = s390_domain->dma_table;
>
> - spin_lock_irqsave(&s390_domain->list_lock, flags);
> - /* First device defines the DMA range limits */
> - if (list_empty(&s390_domain->devices)) {
> - domain->geometry.aperture_start = zdev->start_dma;
> - domain->geometry.aperture_end = zdev->end_dma;
> - domain->geometry.force_aperture = true;
> - /* Allow only devices with identical DMA range limits */
> - } else if (domain->geometry.aperture_start != zdev->start_dma ||
> - domain->geometry.aperture_end != zdev->end_dma) {
> - spin_unlock_irqrestore(&s390_domain->list_lock, flags);
> - rc = -EINVAL;
> - goto out_unregister;
> - }
> + zdev->dma_table = s390_domain->dma_table;
> zdev->s390_domain = s390_domain;
> +
> + spin_lock_irqsave(&s390_domain->list_lock, flags);
> list_add(&zdev->iommu_list, &s390_domain->devices);
> spin_unlock_irqrestore(&s390_domain->list_lock, flags);
>
> return 0;
> -
> -out_unregister:
> - zpci_unregister_ioat(zdev, 0);
> - zdev->dma_table = NULL;
> -
> - return rc;
> }
>
> static void s390_iommu_detach_device(struct iommu_domain *domain,
> @@ -155,6 +146,30 @@ static void s390_iommu_detach_device(struct iommu_domain *domain,
> zpci_dma_init_device(zdev);
> }
>
> +static void s390_iommu_get_resv_regions(struct device *dev,
> + struct list_head *list)
> +{
> + struct zpci_dev *zdev = to_zpci_dev(dev);
> + struct iommu_resv_region *region;
> +
> + if (zdev->start_dma) {
> + region = iommu_alloc_resv_region(0, zdev->start_dma, 0,
> + IOMMU_RESV_RESERVED);
Heads up! The iommu_alloc_resv_region() function gained a gfp parameter
from v6.1-rc1 to v6.1-rc2 so the above needs "…, GFP_KERNEL);".
@Joerg, @Will if you don't mind I'll rebase on v6.1-rc2 and resend.
@Jason if you want to re-add your R-b this would be good time.
> + if (!region)
> + return;
> + list_add_tail(®ion->list, list);
> + }
> +
> + if (zdev->end_dma < ZPCI_TABLE_SIZE_RT - 1) {
> + region = iommu_alloc_resv_region(zdev->end_dma + 1,
> + ZPCI_TABLE_SIZE_RT - zdev->end_dma - 1,
> + 0, IOMMU_RESV_RESERVED);
Same as above.
> + if (!region)
> + return;
> + list_add_tail(®ion->list, list);
> + }
> +}
> +
> static struct iommu_device *s390_iommu_probe_device(struct device *dev)
> {
> struct zpci_dev *zdev;
> @@ -164,6 +179,13 @@ static struct iommu_device *s390_iommu_probe_device(struct device *dev)
>
> zdev = to_zpci_dev(dev);
>
> + if (zdev->start_dma > zdev->end_dma ||
> + zdev->start_dma > ZPCI_TABLE_SIZE_RT - 1)
> + return ERR_PTR(-EINVAL);
> +
> + if (zdev->end_dma > ZPCI_TABLE_SIZE_RT - 1)
> + zdev->end_dma = ZPCI_TABLE_SIZE_RT - 1;
> +
> return &zdev->iommu_dev;
> }
>
> @@ -342,6 +364,7 @@ static const struct iommu_ops s390_iommu_ops = {
> .release_device = s390_iommu_release_device,
> .device_group = generic_device_group,
> .pgsize_bitmap = S390_IOMMU_PGSIZES,
> + .get_resv_regions = s390_iommu_get_resv_regions,
> .default_domain_ops = &(const struct iommu_domain_ops) {
> .attach_dev = s390_iommu_attach_device,
> .detach_dev = s390_iommu_detach_device,