Re: [PATCH v5 14/14] mm: thp: always enable mTHP support

From: Luiz Capitulino

Date: Wed Jun 03 2026 - 13:47:13 EST

On 2026-06-03 04:12, Lance Yang wrote:

Would it makes sense to call khugepaged_enter_vma() for anon mTHP faults
as well?

vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
{
[...]
if (!thp_vma_suitable_order(vma, haddr, PMD_ORDER))
return VM_FAULT_FALLBACK;
[...]
khugepaged_enter_vma(vma, vma->vm_flags);
[...]
}

Without PMD leaves, do_huge_pmd_anonymous_page() is not reached. Apart
from MADV_HUGEPAGE, AFAIK, the mm has no chance to enter khugepaged
from the fault side :)

Hey Lance,

Wouldn't we also skip do_huge_pmd_anonymous_page() page today if
PMD-sized THPs are disabled in sysfs but mTHP is enabled?

In any case, by reading through khugepaged_enter_vma() I see that
khugepaged only supports PMD-size for now. So, shouldn't this case be
addressed in Nico's series [1] ?

[1] https://lore.kernel.org/linux-mm/20260522150009.121603-1-npache@xxxxxxxxxx/

Cheers, Lance

On Fri, May 29, 2026 at 10:55:32AM -0400, Luiz Capitulino wrote:

If PMD-sized pages are not supported on an architecture (ie. the
arch implements arch_has_pmd_leaves() and it returns false) then the
current code disables all THP, including mTHP.

This commit fixes this by allowing mTHP to be always enabled for all
archs. When PMD-sized pages are not supported, its sysfs entry won't be
created and their mapping will be disallowed at page-fault time.

Similarly, this commit implements the following changes for shmem in
shmem_allowable_huge_orders():

- Drop the pgtable_has_pmd_leaves() check so that mTHP sizes are
considered
- Filter out PMD and PUD orders from allowable orders when
PMD-sized pages are not supported by the CPU

Signed-off-by: Luiz Capitulino <luizcap@xxxxxxxxxx>
---
mm/huge_memory.c | 25 ++++++++++++++++++++-----
mm/shmem.c | 14 +++++++++-----
2 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 32254febe097..059901a8c6cb 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -126,6 +126,15 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
else
supported_orders = THP_ORDERS_ALL_FILE_DEFAULT;

+ if (!pgtable_has_pmd_leaves()) {
+ /*
+ * If the CPU does not support PMD leaves, assume for
+ * now that it does not support PUD leaves and disable
+ * both folio orders.
+ */
+ supported_orders &= ~(BIT(PMD_ORDER) | BIT(PUD_ORDER));
+ }
+
orders &= supported_orders;
if (!orders)
return 0;
@@ -133,7 +142,7 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
if (!vma->vm_mm) /* vdso */
return 0;

- if (!pgtable_has_pmd_leaves() || vma_thp_disabled(vma, vm_flags, forced_collapse))
+ if (vma_thp_disabled(vma, vm_flags, forced_collapse))
return 0;

/* khugepaged doesn't collapse DAX vma, but page fault is fine. */
@@ -848,7 +857,7 @@ static int __init hugepage_init_sysfs(struct kobject **hugepage_kobj)
* disable all other sizes. powerpc's PMD_ORDER isn't a compile-time
* constant so we have to do this here.
*/
- if (!anon_orders_configured)
+ if (!anon_orders_configured && pgtable_has_pmd_leaves())
huge_anon_orders_inherit = BIT(PMD_ORDER);

*hugepage_kobj = kobject_create_and_add("transparent_hugepage", mm_kobj);
@@ -870,6 +879,15 @@ static int __init hugepage_init_sysfs(struct kobject **hugepage_kobj)
}

orders = THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_FILE_DEFAULT;
+ if (!pgtable_has_pmd_leaves()) {
+ /*
+ * If the CPU does not support PMD leaves, assume for
+ * now that it does not support PUD leaves and disable
+ * both folio orders.
+ */
+ orders &= ~(BIT(PMD_ORDER) | BIT(PUD_ORDER));
+ }
+
order = highest_order(orders);
while (orders) {
thpsize = thpsize_create(order, *hugepage_kobj);
@@ -969,9 +987,6 @@ static int __init hugepage_init(void)
int err;
struct kobject *hugepage_kobj;

- if (!pgtable_has_pmd_leaves())
- return -EINVAL;
-
/*
* hugepages can't be allocated by the buddy allocator
*/
diff --git a/mm/shmem.c b/mm/shmem.c
index 079e299ea789..c15dffd0eb41 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1844,16 +1844,19 @@ unsigned long shmem_allowable_huge_orders(struct inode *inode,
unsigned long mask = READ_ONCE(huge_shmem_orders_always);
unsigned long within_size_orders = READ_ONCE(huge_shmem_orders_within_size);
vm_flags_t vm_flags = vma ? vma->vm_flags : 0;
- unsigned int global_orders;
+ unsigned int global_orders, disabled_orders = 0;

- if (!pgtable_has_pmd_leaves() || (vma && vma_thp_disabled(vma, vm_flags, shmem_huge_force)))
+ if (vma && vma_thp_disabled(vma, vm_flags, shmem_huge_force))
return 0;

+ if (!pgtable_has_pmd_leaves())
+ disabled_orders = BIT(PMD_ORDER);
+
global_orders = shmem_huge_global_enabled(inode, index, write_end,
shmem_huge_force, vma, vm_flags);
/* Tmpfs huge pages allocation */
if (!vma || !vma_is_anon_shmem(vma))
- return global_orders;
+ return global_orders & ~disabled_orders;

/*
* Following the 'deny' semantics of the top level, force the huge
@@ -1867,7 +1870,7 @@ unsigned long shmem_allowable_huge_orders(struct inode *inode,
* means non-PMD sized THP can not override 'huge' mount option now.
*/
if (shmem_huge == SHMEM_HUGE_FORCE)
- return READ_ONCE(huge_shmem_orders_inherit);
+ return READ_ONCE(huge_shmem_orders_inherit) & ~disabled_orders;

/* Allow mTHP that will be fully within i_size. */
mask |= shmem_get_orders_within_size(inode, within_size_orders, index, 0);
@@ -1878,6 +1881,7 @@ unsigned long shmem_allowable_huge_orders(struct inode *inode,
if (global_orders > 0)
mask |= READ_ONCE(huge_shmem_orders_inherit);

+ mask &= ~disabled_orders;
return THP_ORDERS_ALL_FILE_DEFAULT & mask;
}

@@ -5461,7 +5465,7 @@ void __init shmem_init(void)
* Default to setting PMD-sized THP to inherit the global setting and
* disable all other multi-size THPs.
*/
- if (!shmem_orders_configured)
+ if (!shmem_orders_configured && pgtable_has_pmd_leaves())
huge_shmem_orders_inherit = BIT(HPAGE_PMD_ORDER);
#endif
return;
--
2.54.0