[PATCH v2 4/4] iommu/riscv: Add support for Svnapot

From: Xu Lu
Date: Tue Mar 11 2025 - 08:33:19 EST


Add Svnapot size as supported page size and apply Svnapot when it is
possible.

Signed-off-by: Xu Lu <luxu.kernel@xxxxxxxxxxxxx>
---
drivers/iommu/riscv/iommu.c | 86 +++++++++++++++++++++++++++++++++----
1 file changed, 77 insertions(+), 9 deletions(-)

diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c
index ce4cf6569ffb4..7cc736abd2a61 100644
--- a/drivers/iommu/riscv/iommu.c
+++ b/drivers/iommu/riscv/iommu.c
@@ -1158,6 +1158,26 @@ static int pgsize_to_level(size_t pgsize)
return level;
}

+static unsigned long napot_size_to_order(unsigned long size)
+{
+ unsigned long order;
+
+ if (!has_svnapot())
+ return 0;
+
+ for_each_napot_order(order) {
+ if (size == napot_cont_size(order))
+ return order;
+ }
+
+ return 0;
+}
+
+static bool is_napot_size(unsigned long size)
+{
+ return napot_size_to_order(size) != 0;
+}
+
static void riscv_iommu_pte_free(struct riscv_iommu_domain *domain,
pte_t pte, int level,
struct list_head *freelist)
@@ -1205,7 +1225,8 @@ static pte_t *riscv_iommu_pte_alloc(struct riscv_iommu_domain *domain,
* existing mapping with smaller granularity. Up to the caller
* to replace and invalidate.
*/
- if (((size_t)1 << shift) == pgsize)
+ if ((((size_t)1 << shift) == pgsize) ||
+ (is_napot_size(pgsize) && pgsize_to_level(pgsize) == level))
return ptr;
pte_retry:
pte = ptep_get(ptr);
@@ -1256,7 +1277,10 @@ static pte_t *riscv_iommu_pte_fetch(struct riscv_iommu_domain *domain,
ptr += ((iova >> shift) & (PTRS_PER_PTE - 1));
pte = ptep_get(ptr);
if (_io_pte_present(pte) && _io_pte_leaf(pte)) {
- *pte_pgsize = (size_t)1 << shift;
+ if (pte_napot(pte))
+ *pte_pgsize = napot_cont_size(napot_cont_order(pte));
+ else
+ *pte_pgsize = (size_t)1 << shift;
return ptr;
}
if (_io_pte_none(pte))
@@ -1274,13 +1298,18 @@ static int riscv_iommu_map_pages(struct iommu_domain *iommu_domain,
{
struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);
size_t size = 0;
- pte_t *ptr;
- pte_t pte;
- unsigned long pte_prot;
- int rc = 0, level;
+ pte_t *ptr, old, pte;
+ unsigned long pte_prot, order = 0;
+ int rc = 0, level, i;
spinlock_t *ptl; /* page table page lock */
LIST_HEAD(freelist);

+ if (iova & (pgsize - 1))
+ return -EINVAL;
+
+ if (is_napot_size(pgsize))
+ order = napot_size_to_order(pgsize);
+
if (!(prot & IOMMU_WRITE))
pte_prot = _PAGE_BASE | _PAGE_READ;
else if (domain->amo_enabled)
@@ -1297,9 +1326,27 @@ static int riscv_iommu_map_pages(struct iommu_domain *iommu_domain,

level = pgsize_to_level(pgsize);
ptl = riscv_iommu_ptlock(domain, ptr, level);
- riscv_iommu_pte_free(domain, ptep_get(ptr), level, &freelist);
+
+ old = ptep_get(ptr);
+ if (pte_napot(old) && napot_cont_size(napot_cont_order(old)) > pgsize) {
+ spin_unlock(ptl);
+ rc = -EFAULT;
+ break;
+ }
+
pte = _io_pte_entry(phys_to_pfn(phys), pte_prot);
- set_pte(ptr, pte);
+ if (order) {
+ pte = pte_mknapot(pte, order);
+ for (i = 0; i < napot_pte_num(order); i++, ptr++) {
+ old = ptep_get(ptr);
+ riscv_iommu_pte_free(domain, old, level, &freelist);
+ set_pte(ptr, pte);
+ }
+ } else {
+ riscv_iommu_pte_free(domain, old, level, &freelist);
+ set_pte(ptr, pte);
+ }
+
spin_unlock(ptl);

size += pgsize;
@@ -1336,6 +1383,9 @@ static size_t riscv_iommu_unmap_pages(struct iommu_domain *iommu_domain,
size_t unmapped = 0;
size_t pte_size;
spinlock_t *ptl; /* page table page lock */
+ unsigned long pte_num;
+ pte_t pte;
+ int i;

while (unmapped < size) {
ptr = riscv_iommu_pte_fetch(domain, iova, &pte_size);
@@ -1347,7 +1397,21 @@ static size_t riscv_iommu_unmap_pages(struct iommu_domain *iommu_domain,
return unmapped;

ptl = riscv_iommu_ptlock(domain, ptr, pgsize_to_level(pte_size));
- set_pte(ptr, __pte(0));
+ if (is_napot_size(pte_size)) {
+ pte = ptep_get(ptr);
+
+ if (!pte_napot(pte) ||
+ napot_cont_size(napot_cont_order(pte)) != pte_size) {
+ spin_unlock(ptl);
+ return unmapped;
+ }
+
+ pte_num = napot_pte_num(napot_cont_order(pte));
+ for (i = 0; i < pte_num; i++, ptr++)
+ set_pte(ptr, __pte(0));
+ } else {
+ set_pte(ptr, __pte(0));
+ }
spin_unlock(ptl);

iommu_iotlb_gather_add_page(&domain->domain, gather, iova,
@@ -1447,6 +1511,7 @@ static struct iommu_domain *riscv_iommu_alloc_paging_domain(struct device *dev)
unsigned int pgd_mode;
dma_addr_t va_mask;
int va_bits, level;
+ size_t order;

iommu = dev_to_iommu(dev);
if (iommu->caps & RISCV_IOMMU_CAPABILITIES_SV57) {
@@ -1506,6 +1571,9 @@ static struct iommu_domain *riscv_iommu_alloc_paging_domain(struct device *dev)
domain->domain.geometry.aperture_end = va_mask;
domain->domain.geometry.force_aperture = true;
domain->domain.pgsize_bitmap = va_mask & (SZ_4K | SZ_2M | SZ_1G | SZ_512G);
+ if (has_svnapot())
+ for_each_napot_order(order)
+ domain->domain.pgsize_bitmap |= napot_cont_size(order) & va_mask;

domain->domain.ops = &riscv_iommu_paging_domain_ops;

--
2.20.1