[RFC PATCH v4 03/13] iommu/io-pgtable-arm: Add and realize split_block ops

From: Keqian Zhu
Date: Fri May 07 2021 - 06:23:27 EST


From: Kunkun Jiang <jiangkunkun@xxxxxxxxxx>

Block(largepage) mapping is not a proper granule for dirty log tracking.
Take an extreme example, if DMA writes one byte, under 1G mapping, the
dirty amount reported is 1G, but under 4K mapping, the dirty amount is
just 4K.

This splits block descriptor to an span of page descriptors. BBML1 or
BBML2 feature is required.

Spliting block is designed to be only used by dirty log tracking, which
does not concurrently work with other pgtable ops that access underlying
page table, so race condition does not exist.

Co-developed-by: Keqian Zhu <zhukeqian1@xxxxxxxxxx>
Signed-off-by: Kunkun Jiang <jiangkunkun@xxxxxxxxxx>
---
drivers/iommu/io-pgtable-arm.c | 122 +++++++++++++++++++++++++++++++++
include/linux/io-pgtable.h | 2 +
2 files changed, 124 insertions(+)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 94d790b8ed27..664a9548b199 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -79,6 +79,8 @@
#define ARM_LPAE_PTE_SH_IS (((arm_lpae_iopte)3) << 8)
#define ARM_LPAE_PTE_NS (((arm_lpae_iopte)1) << 5)
#define ARM_LPAE_PTE_VALID (((arm_lpae_iopte)1) << 0)
+/* Block descriptor bits */
+#define ARM_LPAE_PTE_NT (((arm_lpae_iopte)1) << 16)

#define ARM_LPAE_PTE_ATTR_LO_MASK (((arm_lpae_iopte)0x3ff) << 2)
/* Ignore the contiguous bit for block splitting */
@@ -679,6 +681,125 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
return iopte_to_paddr(pte, data) | iova;
}

+static size_t __arm_lpae_split_block(struct arm_lpae_io_pgtable *data,
+ unsigned long iova, size_t size, int lvl,
+ arm_lpae_iopte *ptep);
+
+static size_t arm_lpae_do_split_blk(struct arm_lpae_io_pgtable *data,
+ unsigned long iova, size_t size,
+ arm_lpae_iopte blk_pte, int lvl,
+ arm_lpae_iopte *ptep)
+{
+ struct io_pgtable_cfg *cfg = &data->iop.cfg;
+ arm_lpae_iopte pte, *tablep;
+ phys_addr_t blk_paddr;
+ size_t tablesz = ARM_LPAE_GRANULE(data);
+ size_t split_sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
+ int i;
+
+ if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS))
+ return 0;
+
+ tablep = __arm_lpae_alloc_pages(tablesz, GFP_ATOMIC, cfg);
+ if (!tablep)
+ return 0;
+
+ blk_paddr = iopte_to_paddr(blk_pte, data);
+ pte = iopte_prot(blk_pte);
+ for (i = 0; i < tablesz / sizeof(pte); i++, blk_paddr += split_sz)
+ __arm_lpae_init_pte(data, blk_paddr, pte, lvl, &tablep[i]);
+
+ if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_BBML1) {
+ /* Race does not exist */
+ blk_pte |= ARM_LPAE_PTE_NT;
+ __arm_lpae_set_pte(ptep, blk_pte, cfg);
+ io_pgtable_tlb_flush_walk(&data->iop, iova, size, size);
+ }
+ /* Race does not exist */
+ pte = arm_lpae_install_table(tablep, ptep, blk_pte, cfg);
+
+ /* Have splited it into page? */
+ if (lvl == (ARM_LPAE_MAX_LEVELS - 1))
+ return size;
+
+ /* Go back to lvl - 1 */
+ ptep -= ARM_LPAE_LVL_IDX(iova, lvl - 1, data);
+ return __arm_lpae_split_block(data, iova, size, lvl - 1, ptep);
+}
+
+static size_t __arm_lpae_split_block(struct arm_lpae_io_pgtable *data,
+ unsigned long iova, size_t size, int lvl,
+ arm_lpae_iopte *ptep)
+{
+ arm_lpae_iopte pte;
+ struct io_pgtable *iop = &data->iop;
+ size_t base, next_size, total_size;
+
+ if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS))
+ return 0;
+
+ ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
+ pte = READ_ONCE(*ptep);
+ if (WARN_ON(!pte))
+ return 0;
+
+ if (size == ARM_LPAE_BLOCK_SIZE(lvl, data)) {
+ if (iopte_leaf(pte, lvl, iop->fmt)) {
+ if (lvl == (ARM_LPAE_MAX_LEVELS - 1) ||
+ (pte & ARM_LPAE_PTE_AP_RDONLY))
+ return size;
+
+ /* We find a writable block, split it. */
+ return arm_lpae_do_split_blk(data, iova, size, pte,
+ lvl + 1, ptep);
+ } else {
+ /* If it is the last table level, then nothing to do */
+ if (lvl == (ARM_LPAE_MAX_LEVELS - 2))
+ return size;
+
+ total_size = 0;
+ next_size = ARM_LPAE_BLOCK_SIZE(lvl + 1, data);
+ ptep = iopte_deref(pte, data);
+ for (base = 0; base < size; base += next_size)
+ total_size += __arm_lpae_split_block(data,
+ iova + base, next_size, lvl + 1,
+ ptep);
+ return total_size;
+ }
+ } else if (iopte_leaf(pte, lvl, iop->fmt)) {
+ WARN(1, "Can't split behind a block.\n");
+ return 0;
+ }
+
+ /* Keep on walkin */
+ ptep = iopte_deref(pte, data);
+ return __arm_lpae_split_block(data, iova, size, lvl + 1, ptep);
+}
+
+static size_t arm_lpae_split_block(struct io_pgtable_ops *ops,
+ unsigned long iova, size_t size)
+{
+ struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
+ struct io_pgtable_cfg *cfg = &data->iop.cfg;
+ arm_lpae_iopte *ptep = data->pgd;
+ int lvl = data->start_level;
+ long iaext = (s64)iova >> cfg->ias;
+
+ if (WARN_ON(!size || (size & cfg->pgsize_bitmap) != size))
+ return 0;
+
+ if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
+ iaext = ~iaext;
+ if (WARN_ON(iaext))
+ return 0;
+
+ /* If it is smallest granule, then nothing to do */
+ if (size == ARM_LPAE_BLOCK_SIZE(ARM_LPAE_MAX_LEVELS - 1, data))
+ return size;
+
+ return __arm_lpae_split_block(data, iova, size, lvl, ptep);
+}
+
static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg)
{
unsigned long granule, page_sizes;
@@ -757,6 +878,7 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg)
.map = arm_lpae_map,
.unmap = arm_lpae_unmap,
.iova_to_phys = arm_lpae_iova_to_phys,
+ .split_block = arm_lpae_split_block,
};

return data;
diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index 92274705b772..eba6c6ccbe49 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -167,6 +167,8 @@ struct io_pgtable_ops {
size_t size, struct iommu_iotlb_gather *gather);
phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops,
unsigned long iova);
+ size_t (*split_block)(struct io_pgtable_ops *ops, unsigned long iova,
+ size_t size);
};

/**
--
2.19.1