[RFC PATCH 3/4] iommu/vt-d: Map/unmap domain with mmmap/mmunmap

From: Lu Baolu
Date: Mon Sep 23 2019 - 08:27:30 EST


If a dmar domain has DOMAIN_FLAG_FIRST_LEVEL_TRANS bit set
in its flags, IOMMU will use the first level page table for
translation. Hence, we need to map or unmap addresses in the
first level page table.

Cc: Ashok Raj <ashok.raj@xxxxxxxxx>
Cc: Jacob Pan <jacob.jun.pan@xxxxxxxxxxxxxxx>
Cc: Kevin Tian <kevin.tian@xxxxxxxxx>
Cc: Liu Yi L <yi.l.liu@xxxxxxxxx>
Cc: Yi Sun <yi.y.sun@xxxxxxxxxxxxxxx>
Signed-off-by: Lu Baolu <baolu.lu@xxxxxxxxxxxxxxx>
---
drivers/iommu/intel-iommu.c | 94 ++++++++++++++++++++++++++++++++-----
1 file changed, 82 insertions(+), 12 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 9cfe8098d993..103480016010 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -168,6 +168,11 @@ static inline unsigned long virt_to_dma_pfn(void *p)
return page_to_dma_pfn(virt_to_page(p));
}

+static inline unsigned long dma_pfn_to_addr(unsigned long pfn)
+{
+ return pfn << VTD_PAGE_SHIFT;
+}
+
/* global iommu list, set NULL for ignored DMAR units */
static struct intel_iommu **g_iommus;

@@ -307,6 +312,9 @@ static int hw_pass_through = 1;
*/
#define DOMAIN_FLAG_LOSE_CHILDREN BIT(1)

+/* Domain uses first level translation for DMA remapping. */
+#define DOMAIN_FLAG_FIRST_LEVEL_TRANS BIT(2)
+
#define for_each_domain_iommu(idx, domain) \
for (idx = 0; idx < g_num_of_iommus; idx++) \
if (domain->iommu_refcnt[idx])
@@ -552,6 +560,11 @@ static inline int domain_type_is_si(struct dmar_domain *domain)
return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
}

+static inline int domain_type_is_flt(struct dmar_domain *domain)
+{
+ return domain->flags & DOMAIN_FLAG_FIRST_LEVEL_TRANS;
+}
+
static inline int domain_pfn_supported(struct dmar_domain *domain,
unsigned long pfn)
{
@@ -1147,8 +1160,15 @@ static struct page *domain_unmap(struct dmar_domain *domain,
BUG_ON(start_pfn > last_pfn);

/* we don't need lock here; nobody else touches the iova range */
- freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
- domain->pgd, 0, start_pfn, last_pfn, NULL);
+ if (domain_type_is_flt(domain))
+ freelist = intel_mmunmap_range(domain,
+ dma_pfn_to_addr(start_pfn),
+ dma_pfn_to_addr(last_pfn + 1));
+ else
+ freelist = dma_pte_clear_level(domain,
+ agaw_to_level(domain->agaw),
+ domain->pgd, 0, start_pfn,
+ last_pfn, NULL);

/* free pgd */
if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
@@ -2213,9 +2233,10 @@ static inline int hardware_largepage_caps(struct dmar_domain *domain,
return level;
}

-static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
- struct scatterlist *sg, unsigned long phys_pfn,
- unsigned long nr_pages, int prot)
+static int
+__domain_mapping_dma(struct dmar_domain *domain, unsigned long iov_pfn,
+ struct scatterlist *sg, unsigned long phys_pfn,
+ unsigned long nr_pages, int prot)
{
struct dma_pte *first_pte = NULL, *pte = NULL;
phys_addr_t uninitialized_var(pteval);
@@ -2223,13 +2244,6 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
unsigned int largepage_lvl = 0;
unsigned long lvl_pages = 0;

- BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
-
- if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
- return -EINVAL;
-
- prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
-
if (!sg) {
sg_res = nr_pages;
pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
@@ -2328,6 +2342,62 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
return 0;
}

+static int
+__domain_mapping_mm(struct dmar_domain *domain, unsigned long iov_pfn,
+ struct scatterlist *sg, unsigned long phys_pfn,
+ unsigned long nr_pages, int prot)
+{
+ int ret = 0;
+
+ if (!sg)
+ return intel_mmmap_range(domain, dma_pfn_to_addr(iov_pfn),
+ dma_pfn_to_addr(iov_pfn + nr_pages),
+ dma_pfn_to_addr(phys_pfn), prot);
+
+ while (nr_pages > 0) {
+ unsigned long sg_pages, phys;
+ unsigned long pgoff = sg->offset & ~PAGE_MASK;
+
+ sg_pages = aligned_nrpages(sg->offset, sg->length);
+ phys = sg_phys(sg) - pgoff;
+
+ ret = intel_mmmap_range(domain, dma_pfn_to_addr(iov_pfn),
+ dma_pfn_to_addr(iov_pfn + sg_pages),
+ phys, prot);
+ if (ret)
+ break;
+
+ sg->dma_address = ((dma_addr_t)dma_pfn_to_addr(iov_pfn)) + pgoff;
+ sg->dma_length = sg->length;
+
+ nr_pages -= sg_pages;
+ iov_pfn += sg_pages;
+ sg = sg_next(sg);
+ }
+
+ return ret;
+}
+
+static int
+__domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
+ struct scatterlist *sg, unsigned long phys_pfn,
+ unsigned long nr_pages, int prot)
+{
+ BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
+
+ if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
+ return -EINVAL;
+
+ prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
+
+ if (domain_type_is_flt(domain))
+ return __domain_mapping_mm(domain, iov_pfn, sg,
+ phys_pfn, nr_pages, prot);
+ else
+ return __domain_mapping_dma(domain, iov_pfn, sg,
+ phys_pfn, nr_pages, prot);
+}
+
static int domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
struct scatterlist *sg, unsigned long phys_pfn,
unsigned long nr_pages, int prot)
--
2.17.1