[PATCH 2/4] iommu/riscv: Use pte_t to represent page table entry
From: Xu Lu
Date: Mon Mar 10 2025 - 09:23:11 EST
Since RISC-V IOMMU has the same pte format and translation process with
MMU as is specified in RISC-V Privileged specification, we use pte_t to
represent IOMMU pte too to reuse existing pte operation functions.
Signed-off-by: Xu Lu <luxu.kernel@xxxxxxxxxxxxx>
---
drivers/iommu/riscv/iommu.c | 66 ++++++++++++++++++-------------------
1 file changed, 33 insertions(+), 33 deletions(-)
diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c
index 8f049d4a0e2cb..f752096989a79 100644
--- a/drivers/iommu/riscv/iommu.c
+++ b/drivers/iommu/riscv/iommu.c
@@ -812,7 +812,7 @@ struct riscv_iommu_domain {
bool amo_enabled;
int numa_node;
unsigned int pgd_mode;
- unsigned long *pgd_root;
+ pte_t *pgd_root;
};
#define iommu_domain_to_riscv(iommu_domain) \
@@ -1081,27 +1081,29 @@ static void riscv_iommu_iotlb_sync(struct iommu_domain *iommu_domain,
#define PT_SHIFT (PAGE_SHIFT - ilog2(sizeof(pte_t)))
-#define _io_pte_present(pte) ((pte) & (_PAGE_PRESENT | _PAGE_PROT_NONE))
-#define _io_pte_leaf(pte) ((pte) & _PAGE_LEAF)
-#define _io_pte_none(pte) ((pte) == 0)
-#define _io_pte_entry(pn, prot) ((_PAGE_PFN_MASK & ((pn) << _PAGE_PFN_SHIFT)) | (prot))
+#define _io_pte_present(pte) (pte_val(pte) & (_PAGE_PRESENT | _PAGE_PROT_NONE))
+#define _io_pte_leaf(pte) (pte_val(pte) & _PAGE_LEAF)
+#define _io_pte_none(pte) (pte_val(pte) == 0)
+#define _io_pte_entry(pn, prot) (__pte((_PAGE_PFN_MASK & ((pn) << _PAGE_PFN_SHIFT)) | (prot)))
static void riscv_iommu_pte_free(struct riscv_iommu_domain *domain,
- unsigned long pte, struct list_head *freelist)
+ pte_t pte, struct list_head *freelist)
{
- unsigned long *ptr;
+ pte_t *ptr;
int i;
if (!_io_pte_present(pte) || _io_pte_leaf(pte))
return;
- ptr = (unsigned long *)pfn_to_virt(__page_val_to_pfn(pte));
+ ptr = (pte_t *)pfn_to_virt(pte_pfn(pte));
/* Recursively free all sub page table pages */
for (i = 0; i < PTRS_PER_PTE; i++) {
- pte = READ_ONCE(ptr[i]);
- if (!_io_pte_none(pte) && cmpxchg_relaxed(ptr + i, pte, 0) == pte)
+ pte = ptr[i];
+ if (!_io_pte_none(pte)) {
+ ptr[i] = __pte(0);
riscv_iommu_pte_free(domain, pte, freelist);
+ }
}
if (freelist)
@@ -1110,12 +1112,12 @@ static void riscv_iommu_pte_free(struct riscv_iommu_domain *domain,
iommu_free_page(ptr);
}
-static unsigned long *riscv_iommu_pte_alloc(struct riscv_iommu_domain *domain,
+static pte_t *riscv_iommu_pte_alloc(struct riscv_iommu_domain *domain,
unsigned long iova, size_t pgsize,
gfp_t gfp)
{
- unsigned long *ptr = domain->pgd_root;
- unsigned long pte, old;
+ pte_t *ptr = domain->pgd_root;
+ pte_t pte, old;
int level = domain->pgd_mode - RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39 + 2;
void *addr;
@@ -1131,7 +1133,7 @@ static unsigned long *riscv_iommu_pte_alloc(struct riscv_iommu_domain *domain,
if (((size_t)1 << shift) == pgsize)
return ptr;
pte_retry:
- pte = READ_ONCE(*ptr);
+ pte = ptep_get(ptr);
/*
* This is very likely incorrect as we should not be adding
* new mapping with smaller granularity on top
@@ -1154,31 +1156,31 @@ static unsigned long *riscv_iommu_pte_alloc(struct riscv_iommu_domain *domain,
goto pte_retry;
}
}
- ptr = (unsigned long *)pfn_to_virt(__page_val_to_pfn(pte));
+ ptr = (pte_t *)pfn_to_virt(pte_pfn(pte));
} while (level-- > 0);
return NULL;
}
-static unsigned long *riscv_iommu_pte_fetch(struct riscv_iommu_domain *domain,
- unsigned long iova, size_t *pte_pgsize)
+static pte_t *riscv_iommu_pte_fetch(struct riscv_iommu_domain *domain,
+ unsigned long iova, size_t *pte_pgsize)
{
- unsigned long *ptr = domain->pgd_root;
- unsigned long pte;
+ pte_t *ptr = domain->pgd_root;
+ pte_t pte;
int level = domain->pgd_mode - RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39 + 2;
do {
const int shift = PAGE_SHIFT + PT_SHIFT * level;
ptr += ((iova >> shift) & (PTRS_PER_PTE - 1));
- pte = READ_ONCE(*ptr);
+ pte = ptep_get(ptr);
if (_io_pte_present(pte) && _io_pte_leaf(pte)) {
*pte_pgsize = (size_t)1 << shift;
return ptr;
}
if (_io_pte_none(pte))
return NULL;
- ptr = (unsigned long *)pfn_to_virt(__page_val_to_pfn(pte));
+ ptr = (pte_t *)pfn_to_virt(pte_pfn(pte));
} while (level-- > 0);
return NULL;
@@ -1191,8 +1193,9 @@ static int riscv_iommu_map_pages(struct iommu_domain *iommu_domain,
{
struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);
size_t size = 0;
- unsigned long *ptr;
- unsigned long pte, old, pte_prot;
+ pte_t *ptr;
+ pte_t pte, old;
+ unsigned long pte_prot;
int rc = 0;
LIST_HEAD(freelist);
@@ -1210,10 +1213,9 @@ static int riscv_iommu_map_pages(struct iommu_domain *iommu_domain,
break;
}
- old = READ_ONCE(*ptr);
+ old = ptep_get(ptr);
pte = _io_pte_entry(phys_to_pfn(phys), pte_prot);
- if (cmpxchg_relaxed(ptr, old, pte) != old)
- continue;
+ set_pte(ptr, pte);
riscv_iommu_pte_free(domain, old, &freelist);
@@ -1247,7 +1249,7 @@ static size_t riscv_iommu_unmap_pages(struct iommu_domain *iommu_domain,
{
struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);
size_t size = pgcount << __ffs(pgsize);
- unsigned long *ptr, old;
+ pte_t *ptr;
size_t unmapped = 0;
size_t pte_size;
@@ -1260,9 +1262,7 @@ static size_t riscv_iommu_unmap_pages(struct iommu_domain *iommu_domain,
if (iova & (pte_size - 1))
return unmapped;
- old = READ_ONCE(*ptr);
- if (cmpxchg_relaxed(ptr, old, 0) != old)
- continue;
+ set_pte(ptr, __pte(0));
iommu_iotlb_gather_add_page(&domain->domain, gather, iova,
pte_size);
@@ -1279,13 +1279,13 @@ static phys_addr_t riscv_iommu_iova_to_phys(struct iommu_domain *iommu_domain,
{
struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);
size_t pte_size;
- unsigned long *ptr;
+ pte_t *ptr;
ptr = riscv_iommu_pte_fetch(domain, iova, &pte_size);
- if (_io_pte_none(*ptr) || !_io_pte_present(*ptr))
+ if (_io_pte_none(ptep_get(ptr)) || !_io_pte_present(ptep_get(ptr)))
return 0;
- return pfn_to_phys(__page_val_to_pfn(*ptr)) | (iova & (pte_size - 1));
+ return pfn_to_phys(pte_pfn(ptep_get(ptr))) | (iova & (pte_size - 1));
}
static void riscv_iommu_free_paging_domain(struct iommu_domain *iommu_domain)
--
2.20.1