[RFC PATCH 4/7] iommu/amd: Initial support for AMD IOMMU v2 page table

From: Suravee Suthikulpanit
Date: Fri Mar 12 2021 - 03:56:22 EST


Introduce IO page table framework support for AMD IOMMU v2 page table.

Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@xxxxxxx>
---
drivers/iommu/amd/Makefile | 2 +-
drivers/iommu/amd/amd_iommu_types.h | 2 +
drivers/iommu/amd/io_pgtable_v2.c | 239 ++++++++++++++++++++++++++++
drivers/iommu/io-pgtable.c | 1 +
include/linux/io-pgtable.h | 2 +
5 files changed, 245 insertions(+), 1 deletion(-)
create mode 100644 drivers/iommu/amd/io_pgtable_v2.c

diff --git a/drivers/iommu/amd/Makefile b/drivers/iommu/amd/Makefile
index a935f8f4b974..773d8aa00283 100644
--- a/drivers/iommu/amd/Makefile
+++ b/drivers/iommu/amd/Makefile
@@ -1,4 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o
+obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o io_pgtable_v2.o
obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o
obj-$(CONFIG_AMD_IOMMU_V2) += iommu_v2.o
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index 6937e3674a16..25062eb86c8b 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -265,6 +265,7 @@
* 512GB Pages are not supported due to a hardware bug
*/
#define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38))
+#define AMD_IOMMU_PGSIZES_V2 (PAGE_SIZE | (1ULL << 12) | (1ULL << 30))

/* Bit value definition for dte irq remapping fields*/
#define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6)
@@ -503,6 +504,7 @@ struct amd_io_pgtable {
int mode;
u64 *root;
atomic64_t pt_root; /* pgtable root and pgtable mode */
+ struct mm_struct v2_mm;
};

/*
diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c
new file mode 100644
index 000000000000..b0b6ba2d8d35
--- /dev/null
+++ b/drivers/iommu/amd/io_pgtable_v2.c
@@ -0,0 +1,239 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * CPU-agnostic AMD IO page table v2 allocator.
+ *
+ * Copyright (C) 2020 Advanced Micro Devices, Inc.
+ * Author: Suravee Suthikulpanit <suravee.suthikulpanit@xxxxxxx>
+ */
+
+#define pr_fmt(fmt) "AMD-Vi: " fmt
+#define dev_fmt(fmt) pr_fmt(fmt)
+
+#include <linux/atomic.h>
+#include <linux/bitops.h>
+#include <linux/io-pgtable.h>
+#include <linux/kernel.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/dma-mapping.h>
+#include <linux/mmu_context.h>
+
+#include <asm/barrier.h>
+#include <asm/pgalloc.h>
+
+#include "amd_iommu_types.h"
+#include "amd_iommu.h"
+
+static pte_t *fetch_pte(struct amd_io_pgtable *pgtable,
+ unsigned long iova,
+ unsigned long *page_size)
+{
+ int level;
+ pte_t *ptep;
+
+ ptep = lookup_address_in_mm(&pgtable->v2_mm, iova, &level);
+ if (!ptep || pte_none(*ptep) || (level == PG_LEVEL_NONE))
+ return NULL;
+
+ *page_size = PTE_LEVEL_PAGE_SIZE(level-1);
+ return ptep;
+}
+
+static pte_t *v2_pte_alloc_map(struct mm_struct *mm, unsigned long vaddr)
+{
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ pgd = pgd_offset(mm, vaddr);
+ p4d = p4d_alloc(mm, pgd, vaddr);
+ if (!p4d)
+ return NULL;
+ pud = pud_alloc(mm, p4d, vaddr);
+ if (!pud)
+ return NULL;
+ pmd = pmd_alloc(mm, pud, vaddr);
+ if (!pmd)
+ return NULL;
+ pte = pte_alloc_map(mm, pmd, vaddr);
+ return pte;
+}
+
+static int iommu_v2_map_page(struct io_pgtable_ops *ops, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+{
+ struct protection_domain *dom = io_pgtable_ops_to_domain(ops);
+ struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
+ pte_t *pte;
+ int ret, i, count;
+ bool updated = false;
+ unsigned long o_iova = iova;
+ unsigned long pte_pgsize;
+
+ BUG_ON(!IS_ALIGNED(iova, size) || !IS_ALIGNED(paddr, size));
+
+ ret = -EINVAL;
+ if (!(prot & IOMMU_PROT_MASK))
+ goto out;
+
+ count = PAGE_SIZE_PTE_COUNT(size);
+
+ for (i = 0; i < count; ++i, iova += PAGE_SIZE, paddr += PAGE_SIZE) {
+ pte = fetch_pte(pgtable, iova, &pte_pgsize);
+ if (!pte || pte_none(*pte)) {
+ pte = v2_pte_alloc_map(&dom->iop.v2_mm, iova);
+ if (!pte)
+ goto out;
+ } else {
+ updated = true;
+ }
+ set_pte(pte, __pte((paddr & PAGE_MASK)|_PAGE_PRESENT|_PAGE_USER));
+ if (prot & IOMMU_PROT_IW)
+ *pte = pte_mkwrite(*pte);
+ }
+
+ if (updated) {
+ if (count > 1)
+ amd_iommu_flush_tlb(&dom->domain, 0);
+ else
+ amd_iommu_flush_page(&dom->domain, 0, o_iova);
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static unsigned long iommu_v2_unmap_page(struct io_pgtable_ops *ops,
+ unsigned long iova,
+ size_t size,
+ struct iommu_iotlb_gather *gather)
+{
+ struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
+ unsigned long vaddr_end, vaddr_next;
+ unsigned long long unmapped;
+ unsigned long pte_pgsize;
+ pte_t *ptep;
+
+ BUG_ON(!is_power_of_2(size));
+
+ unmapped = 0;
+ vaddr_next = iova;
+ vaddr_end = iova + size;
+
+ for (; iova < vaddr_end; iova = vaddr_next) {
+ ptep = fetch_pte(pgtable, iova, &pte_pgsize);
+ if (!ptep || pte_none(*ptep))
+ return 0;
+ pte_unmap(ptep);
+ unmapped += pte_pgsize;
+ vaddr_next = (iova & PAGE_MASK) + pte_pgsize;
+ }
+
+ BUG_ON(unmapped && !is_power_of_2(unmapped));
+ return unmapped;
+}
+
+static phys_addr_t iommu_v2_iova_to_phys(struct io_pgtable_ops *ops, unsigned long iova)
+{
+ struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
+ unsigned long offset_mask, pte_pgsize;
+ u64 __pte;
+ pte_t *ptep;
+
+ if (pgtable->mode == PAGE_MODE_NONE)
+ return iova;
+
+ ptep = fetch_pte(pgtable, iova, &pte_pgsize);
+
+ if (!ptep || pte_none(*ptep))
+ return 0;
+
+ offset_mask = pte_pgsize - 1;
+ __pte = __sme_clr(ptep->pte & PM_ADDR_MASK);
+
+ return (__pte & ~offset_mask) | (iova & offset_mask);
+}
+
+/*
+ * ----------------------------------------------------
+ */
+static void v2_tlb_flush_all(void *cookie)
+{
+}
+
+static void v2_tlb_flush_walk(unsigned long iova, size_t size,
+ size_t granule, void *cookie)
+{
+}
+
+static void v2_tlb_add_page(struct iommu_iotlb_gather *gather,
+ unsigned long iova, size_t granule,
+ void *cookie)
+{
+}
+
+static const struct iommu_flush_ops v2_flush_ops = {
+ .tlb_flush_all = v2_tlb_flush_all,
+ .tlb_flush_walk = v2_tlb_flush_walk,
+ .tlb_add_page = v2_tlb_add_page,
+};
+
+static void v2_free_pgtable(struct io_pgtable *iop)
+{
+ struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, iop);
+ struct protection_domain *pdom;
+ pgd_t *pgd;
+ struct mm_struct *mm;
+
+ pdom = container_of(pgtable, struct protection_domain, iop);
+ if (!(pdom->flags & PD_IOMMUV2_MASK))
+ return;
+
+ /* Update data structure */
+ mm = &pdom->iop.v2_mm;
+ pgd = mm->pgd;
+ pgd_free(mm, pgd);
+
+ /* Make changes visible to IOMMUs */
+ amd_iommu_domain_update(pdom);
+ amd_iommu_domain_clear_gcr3(&pdom->domain, 0);
+}
+
+/*
+ * Assume protection_domain already setup at this point
+ */
+static struct io_pgtable *v2_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
+{
+ int ret;
+ struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg);
+ struct protection_domain *pdom = (struct protection_domain *)cookie;
+ struct mm_struct *mm = &pdom->iop.v2_mm;
+
+ mm->pgd = pgd_alloc(mm);
+ if (!mm->pgd)
+ return NULL;
+
+ ret = amd_iommu_domain_set_gcr3(&pdom->domain, 0, __pa(mm->pgd));
+ if (ret)
+ return NULL;
+
+ pgtable->iop.ops.map = iommu_v2_map_page;
+ pgtable->iop.ops.unmap = iommu_v2_unmap_page;
+ pgtable->iop.ops.iova_to_phys = iommu_v2_iova_to_phys;
+
+ cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES_V2,
+ cfg->ias = IOMMU_IN_ADDR_BIT_SIZE,
+ cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE,
+ cfg->tlb = &v2_flush_ops;
+
+ return &pgtable->iop;
+}
+
+struct io_pgtable_init_fns io_pgtable_amd_iommu_v2_init_fns = {
+ .alloc = v2_alloc_pgtable,
+ .free = v2_free_pgtable,
+};
diff --git a/drivers/iommu/io-pgtable.c b/drivers/iommu/io-pgtable.c
index 6e9917ce980f..6494657e4a34 100644
--- a/drivers/iommu/io-pgtable.c
+++ b/drivers/iommu/io-pgtable.c
@@ -26,6 +26,7 @@ io_pgtable_init_table[IO_PGTABLE_NUM_FMTS] = {
#endif
#ifdef CONFIG_AMD_IOMMU
[AMD_IOMMU_V1] = &io_pgtable_amd_iommu_v1_init_fns,
+ [AMD_IOMMU_V2] = &io_pgtable_amd_iommu_v2_init_fns,
#endif
};

diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index a4c9ca2c31f1..17951204126e 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -16,6 +16,7 @@ enum io_pgtable_fmt {
ARM_V7S,
ARM_MALI_LPAE,
AMD_IOMMU_V1,
+ AMD_IOMMU_V2,
IO_PGTABLE_NUM_FMTS,
};

@@ -250,5 +251,6 @@ extern struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s2_init_fns;
extern struct io_pgtable_init_fns io_pgtable_arm_v7s_init_fns;
extern struct io_pgtable_init_fns io_pgtable_arm_mali_lpae_init_fns;
extern struct io_pgtable_init_fns io_pgtable_amd_iommu_v1_init_fns;
+extern struct io_pgtable_init_fns io_pgtable_amd_iommu_v2_init_fns;

#endif /* __IO_PGTABLE_H */
--
2.17.1