[PATCH 1/2] iommu/io-pgtable-arm: Fix stage-2 map/umap for concatenated tables

From: Mostafa Saleh
Date: Thu Oct 24 2024 - 12:26:07 EST


When calculating the max number of entries in a table, Where
RM_LPAE_LVL_IDX() understands the concatenated pgds and can return
an index spanning more than one concatenated table (for ex for 4K
page size > 512).
But then, max_entries is calculated as follows:
max_entries = ARM_LPAE_PTES_PER_TABLE(data) - map_idx_start;

This leads to a negative index in the page table, where for:
- map: do nothing (no OOB) as fortunately all comparisons are signed,
but it would return a negative mapped value.

- unmap: it would leak any child tables as it skips the loop over
“__arm_lpae_free_pgtable”

This bug only happens when map/unmap is requested with a page size
equals to the first level of the concatenated table (for 40 bits input
and 4K granule would be 1GB) and can be triggered from userspace with
VFIO, by choosing a VM IPA in a concatenated table > 0 and using
huge pages to mmap with the first level size.

For example, I was able to reproduce it with the following command
with mainline linux and mainline kvmtool:

./lkvm run --irqchip gicv3 -k {$KERNEL} -p "earlycon" -d {$ROOTFS} --force-pci -c \
`nproc` --debug -m 4096@525312 --vfio-pci 0000:00:03.0 --hugetlbfs /hugepages

Where huge pages with 1GB would be used and using IPA in the second table
(512GB for 40 bits SMMU and 4K granule).

Signed-off-by: Mostafa Saleh <smostafa@xxxxxxxxxx>
---
drivers/iommu/io-pgtable-arm.c | 17 ++++++++++++++---
1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 0e67f1721a3d..3ecbc024e440 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -199,6 +199,17 @@ static phys_addr_t iopte_to_paddr(arm_lpae_iopte pte,
return (paddr | (paddr << (48 - 12))) & (ARM_LPAE_PTE_ADDR_MASK << 4);
}

+/*
+ * Using an index returned from ARM_LPAE_PGD_IDX(), which can point to
+ * concatenated PGD concatenated, get the max entries of a that table.
+ */
+static inline int arm_lpae_max_entries(int i, struct arm_lpae_io_pgtable *data)
+{
+ int ptes_per_table = ARM_LPAE_PTES_PER_TABLE(data);
+
+ return ptes_per_table - (i & (ptes_per_table - 1));
+}
+
static bool selftest_running = false;

static dma_addr_t __arm_lpae_dma_addr(void *pages)
@@ -390,7 +401,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,

/* If we can install a leaf entry at this level, then do so */
if (size == block_size) {
- max_entries = ARM_LPAE_PTES_PER_TABLE(data) - map_idx_start;
+ max_entries = arm_lpae_max_entries(map_idx_start, data);
num_entries = min_t(int, pgcount, max_entries);
ret = arm_lpae_init_pte(data, iova, paddr, prot, lvl, num_entries, ptep);
if (!ret)
@@ -592,7 +603,7 @@ static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,

if (size == split_sz) {
unmap_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data);
- max_entries = ptes_per_table - unmap_idx_start;
+ max_entries = arm_lpae_max_entries(unmap_idx_start, data);
num_entries = min_t(int, pgcount, max_entries);
}

@@ -650,7 +661,7 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,

/* If the size matches this level, we're in the right place */
if (size == ARM_LPAE_BLOCK_SIZE(lvl, data)) {
- max_entries = ARM_LPAE_PTES_PER_TABLE(data) - unmap_idx_start;
+ max_entries = arm_lpae_max_entries(unmap_idx_start, data);
num_entries = min_t(int, pgcount, max_entries);

/* Find and handle non-leaf entries */
--
2.47.0.105.g07ac214952-goog