[RFC PATCH v1 05/57] mm: Avoid split pmd ptl if pmd level is run-time folded

From: Ryan Roberts
Date: Mon Oct 14 2024 - 07:00:28 EST


If there are only 2 levels of translation, the first level (pgd) may not
be an entire page and so does not have a ptdesc backing it (this may be
true on arm64 depending on the VA size and page size). Even if it is an
entire page and does therefore have an entire ptdesc,
pagetable_pmd_ctor() won't be called for the ptdesc (since it's a pgd
not pmd table) and so the per-ptdec ptl fields won't be initialised.

To date this has been fine; the arch knows at compile time if it needs
to fold the pmd level and in this case does not select
CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK. However, if the number of levels
are not known at compile time (as is the case for boot-time page size
selection), we want to be able to choose at boot whether to use split
pmd ptls in the pmd's ptdesc or simply fall back to the lock in the
mm_struct.

So let's make that change; when CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK is
selected, determine if it should be used at run-time based on
mm_pmd_folded().

This sets us up for arm64 to support boot-time page size selection.

Signed-off-by: Ryan Roberts <ryan.roberts@xxxxxxx>
---

***NOTE***
Any confused maintainers may want to read the cover note here for context:
https://lore.kernel.org/all/20241014105514.3206191-1-ryan.roberts@xxxxxxx/

include/linux/mm.h | 15 ++++++++++++++-
include/linux/mm_types.h | 2 +-
kernel/fork.c | 4 ++--
3 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1470736017168..09a840517c23a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3037,6 +3037,8 @@ static inline struct ptdesc *pmd_ptdesc(pmd_t *pmd)

static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd)
{
+ if (mm_pmd_folded(mm))
+ return &mm->page_table_lock;
return ptlock_ptr(pmd_ptdesc(pmd));
}

@@ -3056,7 +3058,18 @@ static inline void pmd_ptlock_free(struct ptdesc *ptdesc)
ptlock_free(ptdesc);
}

-#define pmd_huge_pte(mm, pmd) (pmd_ptdesc(pmd)->pmd_huge_pte)
+static inline pgtable_t *__pmd_huge_pte(struct mm_struct *mm, pmd_t *pmd)
+{
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ if (mm_pmd_folded(mm))
+ return &mm->pmd_huge_pte;
+ return &pmd_ptdesc(pmd)->pmd_huge_pte;
+#else
+ return NULL;
+#endif
+}
+
+#define pmd_huge_pte(mm, pmd) (*__pmd_huge_pte(mm, pmd))

#else

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 0844ed7cfaa53..87dc6de7b7baf 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -946,7 +946,7 @@ struct mm_struct {
#ifdef CONFIG_MMU_NOTIFIER
struct mmu_notifier_subscriptions *notifier_subscriptions;
#endif
-#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE)
pgtable_t pmd_huge_pte; /* protected by page_table_lock */
#endif
#ifdef CONFIG_NUMA_BALANCING
diff --git a/kernel/fork.c b/kernel/fork.c
index cc760491f2012..ea472566d4fcc 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -832,7 +832,7 @@ static void check_mm(struct mm_struct *mm)
pr_alert("BUG: non-zero pgtables_bytes on freeing mm: %ld\n",
mm_pgtables_bytes(mm));

-#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE)
VM_BUG_ON_MM(mm->pmd_huge_pte, mm);
#endif
}
@@ -1276,7 +1276,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
RCU_INIT_POINTER(mm->exe_file, NULL);
mmu_notifier_subscriptions_init(mm);
init_tlb_flush_pending(mm);
-#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE)
mm->pmd_huge_pte = NULL;
#endif
mm_init_uprobes_state(mm);
--
2.43.0