Re: [PATCH v1 01/14] iommu: Add iommu_get_unmanaged_domain helper

From: Eric Auger
Date: Fri Mar 10 2023 - 05:16:07 EST


Hi Nicolin,

On 3/9/23 11:53, Nicolin Chen wrote:
> The nature of ITS virtualization on ARM is done via hypercalls, so kernel
> handles all IOVA mappings for the MSI doorbell in iommu_dma_prepare_msi()
> and iommu_dma_compose_msi_msg(). The current virtualization solution with
> a 2-stage nested translation setup is to do 1:1 IOVA mappings at stage-1
Note that if we still intend to use that trick there is a known issue at
kernel side that needs to be fixed.

ARM DEN 0049E.b IORT specification mandates that when
RMRs are present, the OS must preserve PCIe configuration
performed by the boot FW.

As discussed in the past, enforcing this causes issue with PCI devices
with IO ports. See qemu commit
40c3472a29c9 ("Revert "acpi/gpex: Inform os to keep firmware resource
map"). This seemed to require a fix at kernel level. I am not sure this
fix has been worked on.

Thanks

Eric

> guest-level IO page table via a RMR region in guest-level IORT, aligning
> with an IOVA region that's predefined and mapped in the host kernel:
>
> [stage-2 host level]
> #define MSI_IOVA_BASE 0x8000000
> #define MSI_IOVA_LENGTH 0x100000
> ...
> iommu_get_msi_cookie():
> cookie->msi_iova = MSI_IOVA_BASE;
> ...
> iommu_dma_prepare_msi(its_pa):
> domain = iommu_get_domain_for_dev(dev);
> iommu_dma_get_msi_page(its_pa, domain):
> cookie = domain->iova_cookie;
> iova = iommu_dma_alloc_iova():
> return cookie->msi_iova - size;
> iommu_map(iova, its_pa, ...);
>
> [stage-1 guest level]
> // Define in IORT a RMR [MSI_IOVA_BASE, MSI_IOVA_LENGTH]
> ...
> iommu_create_device_direct_mappings():
> iommu_map(iova=MSI_IOVA_BASE, pa=MSI_IOVA_BASE, len=MSI_IOVA_LENGTH);
>
> This solution calling iommu_get_domain_for_dev() needs the device to get
> attached to a host-level iommu_domain that has the msi_cookie.
>
> On the other hand, IOMMUFD designs two iommu_domain objects to represent
> the two stages: a stage-1 domain (IOMMU_DOMAIN_NESTED type) and a stage-2
> domain (IOMMU_DOMAIN_UNMANAGED type). In this design, the device will be
> attached to the stage-1 domain representing a guest-level IO page table,
> or a Context Descriptor Table in SMMU's term.
>
> This is obviously a mismatch, as the iommu_get_domain_for_dev() does not
> return the correct domain pointer in iommu_dma_prepare_msi().
>
> Add an iommu_get_unmanaged_domain helper to allow drivers to return the
> correct IOMMU_DOMAIN_UNMANAGED iommu_domain having the IOVA mappings for
> the msi_cookie. Keep it in the iommu-priv header for internal use only.
>
> Suggested-by: Jason Gunthorpe <jgg@xxxxxxxxxx>
> Signed-off-by: Nicolin Chen <nicolinc@xxxxxxxxxx>
> ---
> drivers/iommu/dma-iommu.c | 5 +++--
> drivers/iommu/iommu-priv.h | 15 +++++++++++++++
> include/linux/iommu.h | 2 ++
> 3 files changed, 20 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> index 99b2646cb5c7..6b0409d0ff85 100644
> --- a/drivers/iommu/dma-iommu.c
> +++ b/drivers/iommu/dma-iommu.c
> @@ -31,6 +31,7 @@
> #include <linux/vmalloc.h>
>
> #include "dma-iommu.h"
> +#include "iommu-priv.h"
>
> struct iommu_dma_msi_page {
> struct list_head list;
> @@ -1652,7 +1653,7 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
> int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr)
> {
> struct device *dev = msi_desc_to_dev(desc);
> - struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
> + struct iommu_domain *domain = iommu_get_unmanaged_domain(dev);
> struct iommu_dma_msi_page *msi_page;
> static DEFINE_MUTEX(msi_prepare_lock); /* see below */
>
> @@ -1685,7 +1686,7 @@ int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr)
> void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
> {
> struct device *dev = msi_desc_to_dev(desc);
> - const struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
> + const struct iommu_domain *domain = iommu_get_unmanaged_domain(dev);
> const struct iommu_dma_msi_page *msi_page;
>
> msi_page = msi_desc_get_iommu_cookie(desc);
> diff --git a/drivers/iommu/iommu-priv.h b/drivers/iommu/iommu-priv.h
> index a6e694f59f64..da8044da9ad8 100644
> --- a/drivers/iommu/iommu-priv.h
> +++ b/drivers/iommu/iommu-priv.h
> @@ -15,6 +15,21 @@ static inline const struct iommu_ops *dev_iommu_ops(struct device *dev)
> return dev->iommu->iommu_dev->ops;
> }
>
> +static inline struct iommu_domain *iommu_get_unmanaged_domain(struct device *dev)
> +{
> + const struct iommu_ops *ops;
> +
> + if (!dev->iommu || !dev->iommu->iommu_dev)
> + goto attached_domain;
> +
> + ops = dev_iommu_ops(dev);
> + if (ops->get_unmanaged_domain)
> + return ops->get_unmanaged_domain(dev);
> +
> +attached_domain:
> + return iommu_get_domain_for_dev(dev);
> +}
> +
> int iommu_group_replace_domain(struct iommu_group *group,
> struct iommu_domain *new_domain);
>
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index 080278c8154d..76c65cc4fc15 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -275,6 +275,8 @@ struct iommu_ops {
> struct iommu_domain *parent,
> const void *user_data);
>
> + struct iommu_domain *(*get_unmanaged_domain)(struct device *dev);
> +
> struct iommu_device *(*probe_device)(struct device *dev);
> void (*release_device)(struct device *dev);
> void (*probe_finalize)(struct device *dev);