RE: [RFC PATCH 3/4] iommu/vt-d: Map/unmap domain with mmmap/mmunmap

From: Tian, Kevin
Date: Wed Sep 25 2019 - 01:00:47 EST


> From: Lu Baolu [mailto:baolu.lu@xxxxxxxxxxxxxxx]
> Sent: Monday, September 23, 2019 8:25 PM
>
> If a dmar domain has DOMAIN_FLAG_FIRST_LEVEL_TRANS bit set
> in its flags, IOMMU will use the first level page table for
> translation. Hence, we need to map or unmap addresses in the
> first level page table.
>
> Cc: Ashok Raj <ashok.raj@xxxxxxxxx>
> Cc: Jacob Pan <jacob.jun.pan@xxxxxxxxxxxxxxx>
> Cc: Kevin Tian <kevin.tian@xxxxxxxxx>
> Cc: Liu Yi L <yi.l.liu@xxxxxxxxx>
> Cc: Yi Sun <yi.y.sun@xxxxxxxxxxxxxxx>
> Signed-off-by: Lu Baolu <baolu.lu@xxxxxxxxxxxxxxx>
> ---
> drivers/iommu/intel-iommu.c | 94 ++++++++++++++++++++++++++++++++-
> ----
> 1 file changed, 82 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
> index 9cfe8098d993..103480016010 100644
> --- a/drivers/iommu/intel-iommu.c
> +++ b/drivers/iommu/intel-iommu.c
> @@ -168,6 +168,11 @@ static inline unsigned long virt_to_dma_pfn(void
> *p)
> return page_to_dma_pfn(virt_to_page(p));
> }
>
> +static inline unsigned long dma_pfn_to_addr(unsigned long pfn)
> +{
> + return pfn << VTD_PAGE_SHIFT;
> +}
> +
> /* global iommu list, set NULL for ignored DMAR units */
> static struct intel_iommu **g_iommus;
>
> @@ -307,6 +312,9 @@ static int hw_pass_through = 1;
> */
> #define DOMAIN_FLAG_LOSE_CHILDREN BIT(1)
>
> +/* Domain uses first level translation for DMA remapping. */
> +#define DOMAIN_FLAG_FIRST_LEVEL_TRANS BIT(2)
> +
> #define for_each_domain_iommu(idx, domain) \
> for (idx = 0; idx < g_num_of_iommus; idx++) \
> if (domain->iommu_refcnt[idx])
> @@ -552,6 +560,11 @@ static inline int domain_type_is_si(struct
> dmar_domain *domain)
> return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
> }
>
> +static inline int domain_type_is_flt(struct dmar_domain *domain)
> +{
> + return domain->flags & DOMAIN_FLAG_FIRST_LEVEL_TRANS;
> +}
> +
> static inline int domain_pfn_supported(struct dmar_domain *domain,
> unsigned long pfn)
> {
> @@ -1147,8 +1160,15 @@ static struct page *domain_unmap(struct
> dmar_domain *domain,
> BUG_ON(start_pfn > last_pfn);
>
> /* we don't need lock here; nobody else touches the iova range */
> - freelist = dma_pte_clear_level(domain, agaw_to_level(domain-
> >agaw),
> - domain->pgd, 0, start_pfn, last_pfn,
> NULL);
> + if (domain_type_is_flt(domain))
> + freelist = intel_mmunmap_range(domain,
> + dma_pfn_to_addr(start_pfn),
> + dma_pfn_to_addr(last_pfn + 1));
> + else
> + freelist = dma_pte_clear_level(domain,
> + agaw_to_level(domain->agaw),
> + domain->pgd, 0, start_pfn,
> + last_pfn, NULL);

what about providing an unified interface at the caller side, then having
the level differentiated within the interface?

>
> /* free pgd */
> if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw))
> {
> @@ -2213,9 +2233,10 @@ static inline int hardware_largepage_caps(struct
> dmar_domain *domain,
> return level;
> }
>
> -static int __domain_mapping(struct dmar_domain *domain, unsigned long
> iov_pfn,
> - struct scatterlist *sg, unsigned long phys_pfn,
> - unsigned long nr_pages, int prot)
> +static int
> +__domain_mapping_dma(struct dmar_domain *domain, unsigned long
> iov_pfn,
> + struct scatterlist *sg, unsigned long phys_pfn,
> + unsigned long nr_pages, int prot)
> {
> struct dma_pte *first_pte = NULL, *pte = NULL;
> phys_addr_t uninitialized_var(pteval);
> @@ -2223,13 +2244,6 @@ static int __domain_mapping(struct
> dmar_domain *domain, unsigned long iov_pfn,
> unsigned int largepage_lvl = 0;
> unsigned long lvl_pages = 0;
>
> - BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
> -
> - if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
> - return -EINVAL;
> -
> - prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
> -
> if (!sg) {
> sg_res = nr_pages;
> pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) |
> prot;
> @@ -2328,6 +2342,62 @@ static int __domain_mapping(struct
> dmar_domain *domain, unsigned long iov_pfn,
> return 0;
> }
>
> +static int
> +__domain_mapping_mm(struct dmar_domain *domain, unsigned long
> iov_pfn,
> + struct scatterlist *sg, unsigned long phys_pfn,
> + unsigned long nr_pages, int prot)
> +{
> + int ret = 0;
> +
> + if (!sg)
> + return intel_mmmap_range(domain,
> dma_pfn_to_addr(iov_pfn),
> + dma_pfn_to_addr(iov_pfn +
> nr_pages),
> + dma_pfn_to_addr(phys_pfn), prot);
> +
> + while (nr_pages > 0) {
> + unsigned long sg_pages, phys;
> + unsigned long pgoff = sg->offset & ~PAGE_MASK;
> +
> + sg_pages = aligned_nrpages(sg->offset, sg->length);
> + phys = sg_phys(sg) - pgoff;
> +
> + ret = intel_mmmap_range(domain,
> dma_pfn_to_addr(iov_pfn),
> + dma_pfn_to_addr(iov_pfn +
> sg_pages),
> + phys, prot);
> + if (ret)
> + break;
> +
> + sg->dma_address =
> ((dma_addr_t)dma_pfn_to_addr(iov_pfn)) + pgoff;
> + sg->dma_length = sg->length;
> +
> + nr_pages -= sg_pages;
> + iov_pfn += sg_pages;
> + sg = sg_next(sg);
> + }
> +
> + return ret;
> +}
> +
> +static int
> +__domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
> + struct scatterlist *sg, unsigned long phys_pfn,
> + unsigned long nr_pages, int prot)
> +{
> + BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
> +
> + if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
> + return -EINVAL;
> +
> + prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
> +
> + if (domain_type_is_flt(domain))
> + return __domain_mapping_mm(domain, iov_pfn, sg,
> + phys_pfn, nr_pages, prot);
> + else
> + return __domain_mapping_dma(domain, iov_pfn, sg,
> + phys_pfn, nr_pages, prot);
> +}
> +
> static int domain_mapping(struct dmar_domain *domain, unsigned long
> iov_pfn,
> struct scatterlist *sg, unsigned long phys_pfn,
> unsigned long nr_pages, int prot)
> --
> 2.17.1