[RFC PATCH v3 3/8] mm: Implement folio_pmd_batch
From: Oscar Salvador
Date: Mon May 25 2026 - 12:57:15 EST
HugeTLB can be mapped as contiguous PMDs, so we need a way to be able
to batch them as we do for contiguous PTEs.
Implement folio_pmd_batch in order to do that.
Signed-off-by: Oscar Salvador <osalvador@xxxxxxx>
---
arch/arm64/include/asm/pgtable.h | 19 ++++++++
include/linux/pgtable.h | 28 ++++++++++++
mm/internal.h | 75 +++++++++++++++++++++++++++++++-
3 files changed, 121 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index ca0f1fcae7e8..08ae4ee7d1da 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -164,6 +164,8 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
(__boundary - 1 < (end) - 1) ? __boundary : (end); \
})
+#define pmd_valid_cont(pmd) (pmd_valid(pmd) && pmd_cont(pmd))
+
#define pte_hw_dirty(pte) (pte_write(pte) && !pte_rdonly(pte))
#define pte_sw_dirty(pte) (!!(pte_val(pte) & PTE_DIRTY))
#define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte))
@@ -669,6 +671,12 @@ static inline pgprot_t pmd_pgprot(pmd_t pmd)
return __pgprot(pmd_val(pfn_pmd(pfn, __pgprot(0))) ^ pmd_val(pmd));
}
+#define pmd_advance_pfn pmd_advance_pfn
+static inline pmd_t pmd_advance_pfn(pmd_t pmd, unsigned long nr)
+{
+ return pfn_pmd(pmd_pfn(pmd) + nr, pmd_pgprot(pmd));
+}
+
#define pud_pgprot pud_pgprot
static inline pgprot_t pud_pgprot(pud_t pud)
{
@@ -1656,6 +1664,17 @@ extern void modify_prot_commit_ptes(struct vm_area_struct *vma, unsigned long ad
pte_t *ptep, pte_t old_pte, pte_t pte,
unsigned int nr);
+#ifdef CONFIG_HUGETLB_PAGE
+#define pmd_batch_hint pmd_batch_hint
+static inline unsigned int pmd_batch_hint(pmd_t *pmdp, pmd_t pmd)
+{
+ if (!pmd_valid_cont(pmd))
+ return 1;
+
+ return CONT_PMDS - (((unsigned long)pmdp >> 3) & (CONT_PMDS - 1));
+}
+#endif
+
#ifdef CONFIG_ARM64_CONTPTE
/*
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 70aae957be5b..f5291f9ce583 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -358,6 +358,34 @@ static inline void lazy_mmu_mode_pause(void) {}
static inline void lazy_mmu_mode_resume(void) {}
#endif
+#ifndef pmd_batch_hint
+/**
+ * pmd_batch_hint - Number of PMD entries that can be added to batch without scanning.
+ * @pmdp: Page table pointer for the entry.
+ * @pmd: Page table entry.
+ *
+ * Some architectures know that a set of contiguous pmds all map the same
+ * contiguous memory with the same permissions. In this case, it can provide a
+ * hint to aid pmd batching without the core code needing to scan every pmd.
+ *
+ * An architecture implementation may ignore the PMD accessed state. Further,
+ * the dirty state must apply atomically to all the PMDs described by the hint.
+ *
+ * May be overridden by the architecture, else pmd_batch_hint is always 1.
+ */
+static inline unsigned int pmd_batch_hint(pmd_t *pmdp, pmd_t pmd)
+{
+ return 1;
+}
+#endif
+
+#ifndef pmd_advance_pfn
+static inline pmd_t pmd_advance_pfn(pmd_t pmd, unsigned long nr)
+{
+ return __pmd(pmd_val(pmd) + (nr << PFN_PTE_SHIFT));
+}
+#endif
+
#ifndef pte_batch_hint
/**
* pte_batch_hint - Number of pages that can be added to batch without scanning.
diff --git a/mm/internal.h b/mm/internal.h
index 5a2ddcf68e0b..9a0f9e89b054 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -270,7 +270,7 @@ static inline int anon_vma_prepare(struct vm_area_struct *vma)
return __anon_vma_prepare(vma);
}
-/* Flags for folio_pte_batch(). */
+/* Flags for folio_{pmd,pte}_batch(). */
typedef int __bitwise fpb_t;
/* Compare PTEs respecting the dirty bit. */
@@ -294,6 +294,79 @@ typedef int __bitwise fpb_t;
*/
#define FPB_MERGE_YOUNG_DIRTY ((__force fpb_t)BIT(4))
+static inline pmd_t __pmd_batch_clear_ignored(pmd_t pmd, fpb_t flags)
+{
+ if (!(flags & FPB_RESPECT_DIRTY))
+ pmd = pmd_mkclean(pmd);
+ if (likely(!(flags & FPB_RESPECT_SOFT_DIRTY)))
+ pmd = pmd_clear_soft_dirty(pmd);
+ if (likely(!(flags & FPB_RESPECT_WRITE)))
+ pmd = pmd_wrprotect(pmd);
+ return pmd_mkold(pmd);
+}
+
+/**
+ * folio_pmd_batch - detect a PMD batch for a large folio.
+ * - The only user of this is hugetlb for contiguous
+ * PMDs
+ **/
+static inline unsigned int folio_pmd_batch(struct folio *folio, pmd_t *pmdp, pmd_t *pmdentp,
+ unsigned int max_nr, fpb_t flags, bool *any_writable,
+ bool *any_young, bool *any_dirty)
+{
+ pmd_t expected_pmd, pmd = *pmdentp;
+ bool writable, young, dirty;
+ unsigned int nr, cur_nr;
+
+ if (any_writable)
+ *any_writable = !!pmd_write(*pmdentp);
+ if (any_young)
+ *any_young = !!pmd_young(*pmdentp);
+ if (any_dirty)
+ *any_dirty = !!pmd_dirty(*pmdentp);
+
+ VM_WARN_ON_FOLIO(!pmd_present(pmd), folio);
+ VM_WARN_ON_FOLIO(!folio_test_large(folio) || max_nr < 1, folio);
+ VM_WARN_ON_FOLIO(page_folio(pfn_to_page(pmd_pfn(pmd))) != folio, folio);
+
+ /* Limit max_nr to the actual remaining PFNs in the folio we could batch. */
+ max_nr = min_t(unsigned long, max_nr,
+ (folio_pfn(folio) + folio_nr_pages(folio) -
+ pmd_pfn(pmd)) >> (PMD_SHIFT - PAGE_SHIFT));
+
+ nr = pmd_batch_hint(pmdp, pmd);
+ expected_pmd = __pmd_batch_clear_ignored(pmd_advance_pfn(pmd, nr << (PMD_SHIFT - PAGE_SHIFT)), flags);
+ pmdp = pmdp + nr;
+
+ while (nr < max_nr) {
+ pmd = pmdp_get(pmdp);
+ if (any_writable)
+ writable = !!pmd_write(pmd);
+ if (any_young)
+ young = !!pmd_young(pmd);
+ if (any_dirty)
+ dirty = !!pmd_dirty(pmd);
+ pmd = __pmd_batch_clear_ignored(pmd, flags);
+
+ if (!pmd_same(pmd, expected_pmd))
+ break;
+
+ if (any_writable)
+ *any_writable |= writable;
+ if (any_young)
+ *any_young |= young;
+ if (any_dirty)
+ *any_dirty |= dirty;
+
+ cur_nr = pmd_batch_hint(pmdp, pmd);
+ expected_pmd = pmd_advance_pfn(expected_pmd, cur_nr << (PMD_SHIFT - PAGE_SHIFT));
+ pmdp += cur_nr;
+ nr += cur_nr;
+ }
+
+ return min(nr, max_nr);
+}
+
static inline pte_t __pte_batch_clear_ignored(pte_t pte, fpb_t flags)
{
if (!(flags & FPB_RESPECT_DIRTY))
--
2.53.0