Re: [PATCH v3 00/13] mm/huge_memory: refactor zap_huge_pmd()
From: Andrew Morton
Date: Fri Mar 20 2026 - 14:44:58 EST
On Fri, 20 Mar 2026 18:14:50 +0000 "Lorenzo Stoakes (Oracle)" <ljs@xxxxxxxxxx> wrote:
> The zap_huge_pmd() function is overly complicated, clean it up and also add
> an assert in the case that we encounter a buggy PMD entry that doesn't
> match expectations.
>
> This is motivated by a bug discovered [0] where the PMD entry was none of:
>
> * A non-DAX, PFN or mixed map.
> * The huge zero folio
> * A present PMD entry
> * A softleaf entry
>
> In zap_huge_pmd(), but due to the bug we manged to reach this code.
>
> It is useful to explicitly call this out rather than have an arbitrary NULL
> pointer dereference happen, which also improves understanding of what's
> going on.
>
> The series goes further to make use of vm_normal_folio_pmd() rather than
> implementing custom logic for retrieving the folio, and extends softleaf
> functionality to provide and use an equivalent softleaf function.
>
Thanks, I updated mm-unstable to this version.
> v3:
> * Propagated tags, thanks everybody!
> * Fixed const vma parameter in vma_is_special_huge() in 1/13 as per
> Sashiko.
> * Renamed needs_deposit -> has_deposit as per Kiryl, better describing the
> situation as we're zapping deposited tables, not depositing them.
> * Initialised has_deposit to arch_needs_pgtable_deposit(), and updated huge
> zero page case to account for that as per Kiryl.
> * Dropped separated logic approach as per Baolin.
> * Added 'No functional change intended.' caveats.
> * Removed seemingly superfluous, inconsistent pot-folio_remove_rmap_pmd()
> mapcount sanity checks.
> * De-duplicated tlb->mm's.
> * Separated folio-specific logic into another function.
> * Added softleaf_is_valid_pmd_entry(), pmd_to_softleaf_folio() functions.
> * Add and use normal_or_softleaf_folio_pmd() to make use of
> vm_normal_folio_pmd() and pmd_to_softleaf_folio() for obtaining the
> folio.
> * Add and use has_deposited_pgtable() to figure out deposits.
> * Added a bunch of explanatory comments as per Baolin.
Here's how v3 altered mm.git:
include/linux/leafops.h | 39 +++++++++++-
mm/huge_memory.c | 115 ++++++++++++++++++++++----------------
2 files changed, 102 insertions(+), 52 deletions(-)
--- a/include/linux/leafops.h~b
+++ a/include/linux/leafops.h
@@ -603,7 +603,20 @@ static inline bool pmd_is_migration_entr
}
/**
- * pmd_is_valid_softleaf() - Is this PMD entry a valid leaf entry?
+ * softleaf_is_valid_pmd_entry() - Is the specified softleaf entry obtained from
+ * a PMD one that we support at PMD level?
+ * @entry: Entry to check.
+ * Returns: true if the softleaf entry is valid at PMD, otherwise false.
+ */
+static inline bool softleaf_is_valid_pmd_entry(softleaf_t entry)
+{
+ /* Only device private, migration entries valid for PMD. */
+ return softleaf_is_device_private(entry) ||
+ softleaf_is_migration(entry);
+}
+
+/**
+ * pmd_is_valid_softleaf() - Is this PMD entry a valid softleaf entry?
* @pmd: PMD entry.
*
* PMD leaf entries are valid only if they are device private or migration
@@ -616,9 +629,27 @@ static inline bool pmd_is_valid_softleaf
{
const softleaf_t entry = softleaf_from_pmd(pmd);
- /* Only device private, migration entries valid for PMD. */
- return softleaf_is_device_private(entry) ||
- softleaf_is_migration(entry);
+ return softleaf_is_valid_pmd_entry(entry);
+}
+
+/**
+ * pmd_to_softleaf_folio() - Convert the PMD entry to a folio.
+ * @pmd: PMD entry.
+ *
+ * The PMD entry is expected to be a valid PMD softleaf entry.
+ *
+ * Returns: the folio the softleaf entry references if this is a valid softleaf
+ * entry, otherwise NULL.
+ */
+static inline struct folio *pmd_to_softleaf_folio(pmd_t pmd)
+{
+ const softleaf_t entry = softleaf_from_pmd(pmd);
+
+ if (!softleaf_is_valid_pmd_entry(entry)) {
+ VM_WARN_ON_ONCE(true);
+ return NULL;
+ }
+ return softleaf_to_folio(entry);
}
#endif /* CONFIG_MMU */
--- a/mm/huge_memory.c~b
+++ a/mm/huge_memory.c
@@ -104,7 +104,7 @@ static inline bool file_thp_enabled(stru
}
/* If returns true, we are unable to access the VMA's folios. */
-static bool vma_is_special_huge(struct vm_area_struct *vma)
+static bool vma_is_special_huge(const struct vm_area_struct *vma)
{
if (vma_is_dax(vma))
return false;
@@ -2325,6 +2325,63 @@ static inline void zap_deposited_table(s
mm_dec_nr_ptes(mm);
}
+static void zap_huge_pmd_folio(struct mm_struct *mm, struct vm_area_struct *vma,
+ pmd_t pmdval, struct folio *folio, bool is_present)
+{
+ const bool is_device_private = folio_is_device_private(folio);
+
+ /* Present and device private folios are rmappable. */
+ if (is_present || is_device_private)
+ folio_remove_rmap_pmd(folio, &folio->page, vma);
+
+ if (folio_test_anon(folio)) {
+ add_mm_counter(mm, MM_ANONPAGES, -HPAGE_PMD_NR);
+ } else {
+ add_mm_counter(mm, mm_counter_file(folio),
+ -HPAGE_PMD_NR);
+
+ if (is_present && pmd_young(pmdval) &&
+ likely(vma_has_recency(vma)))
+ folio_mark_accessed(folio);
+ }
+
+ /* Device private folios are pinned. */
+ if (is_device_private)
+ folio_put(folio);
+}
+
+static struct folio *normal_or_softleaf_folio_pmd(struct vm_area_struct *vma,
+ unsigned long addr, pmd_t pmdval, bool is_present)
+{
+ if (is_present)
+ return vm_normal_folio_pmd(vma, addr, pmdval);
+
+ if (!thp_migration_supported())
+ WARN_ONCE(1, "Non present huge pmd without pmd migration enabled!");
+ return pmd_to_softleaf_folio(pmdval);
+}
+
+static bool has_deposited_pgtable(struct vm_area_struct *vma, pmd_t pmdval,
+ struct folio *folio)
+{
+ /* Some architectures require unconditional depositing. */
+ if (arch_needs_pgtable_deposit())
+ return true;
+
+ /*
+ * Huge zero always deposited except for DAX which handles itself, see
+ * set_huge_zero_folio().
+ */
+ if (is_huge_zero_pmd(pmdval))
+ return !vma_is_dax(vma);
+
+ /*
+ * Otherwise, only anonymous folios are deposited, see
+ * __do_huge_pmd_anonymous_page().
+ */
+ return folio && folio_test_anon(folio);
+}
+
/**
* zap_huge_pmd - Zap a huge THP which is of PMD size.
* @tlb: The MMU gather TLB state associated with the operation.
@@ -2337,10 +2394,9 @@ static inline void zap_deposited_table(s
bool zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
pmd_t *pmd, unsigned long addr)
{
- bool needs_remove_rmap = false;
- bool needs_deposit = false;
+ struct mm_struct *mm = tlb->mm;
+ struct folio *folio = NULL;
bool is_present = false;
- struct folio *folio;
spinlock_t *ptl;
pmd_t orig_pmd;
@@ -2357,56 +2413,19 @@ bool zap_huge_pmd(struct mmu_gather *tlb
*/
orig_pmd = pmdp_huge_get_and_clear_full(vma, addr, pmd,
tlb->fullmm);
-
arch_check_zapped_pmd(vma, orig_pmd);
tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
- if (pmd_present(orig_pmd)) {
- folio = vm_normal_folio_pmd(vma, addr, orig_pmd);
- if (folio) {
- needs_remove_rmap = true;
- is_present = true;
- } else if (is_huge_zero_pmd(orig_pmd)) {
- needs_deposit = !vma_is_dax(vma);
- }
- } else if (pmd_is_valid_softleaf(orig_pmd)) {
- folio = softleaf_to_folio(softleaf_from_pmd(orig_pmd));
- needs_remove_rmap = folio_is_device_private(folio);
- if (!thp_migration_supported())
- WARN_ONCE(1, "Non present huge pmd without pmd migration enabled!");
- } else {
- WARN_ON_ONCE(true);
- folio = NULL;
- }
- if (!folio)
- goto out;
-
- if (folio_test_anon(folio)) {
- needs_deposit = true;
- add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
- } else {
- add_mm_counter(tlb->mm, mm_counter_file(folio),
- -HPAGE_PMD_NR);
-
- if (is_present && pmd_young(orig_pmd) &&
- likely(vma_has_recency(vma)))
- folio_mark_accessed(folio);
- }
+ is_present = pmd_present(orig_pmd);
+ folio = normal_or_softleaf_folio_pmd(vma, addr, orig_pmd, is_present);
+ if (folio)
+ zap_huge_pmd_folio(mm, vma, orig_pmd, folio, is_present);
- if (needs_remove_rmap) {
- folio_remove_rmap_pmd(folio, &folio->page, vma);
- WARN_ON_ONCE(folio_mapcount(folio) < 0);
- }
-
-out:
- if (arch_needs_pgtable_deposit() || needs_deposit)
- zap_deposited_table(tlb->mm, pmd);
-
- if (needs_remove_rmap && !is_present)
- folio_put(folio);
+ if (has_deposited_pgtable(vma, orig_pmd, folio))
+ zap_deposited_table(mm, pmd);
spin_unlock(ptl);
- if (is_present)
+ if (is_present && folio)
tlb_remove_page_size(tlb, &folio->page, HPAGE_PMD_SIZE);
return true;
}
_