Re: [PATCH v2 20/22] mm/page_alloc: implement __GFP_UNMAPPED|__GFP_ZERO allocations

From: Vlastimil Babka (SUSE)

Date: Wed May 13 2026 - 13:27:58 EST


On 3/20/26 19:23, Brendan Jackman wrote:
> The pages being zeroed here are unmapped, so they can't be zeroed via
> the direct map. Temporarily mapping them in the direct map is not
> possible because:
>
> - In general this requires allocating pagetables,
>
> - Unmapping them would require a TLB shootdown, which can't be done in
> general from the allocator (x86 requires IRQs on).
>
> Therefore, use the new mermap mechanism to zero these pages.
>
> The main mermap API is expected to fail very often. In order to avoid
> needing to fail allocations when that happens, instead fallback to the
> special mermap_get_reserved() variant, which is less efficient.
>
> Signed-off-by: Brendan Jackman <jackmanb@xxxxxxxxxx>
> ---
> arch/x86/include/asm/pgtable_types.h | 2 +
> mm/Kconfig | 11 +++++-
> mm/page_alloc.c | 76 +++++++++++++++++++++++++++++++-----
> 3 files changed, 78 insertions(+), 11 deletions(-)
>
> diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
> index 2ec250ba467e2..c3d73bdfff1fa 100644
> --- a/arch/x86/include/asm/pgtable_types.h
> +++ b/arch/x86/include/asm/pgtable_types.h
> @@ -223,6 +223,7 @@ enum page_cache_mode {
> #define __PAGE_KERNEL_RO (__PP| 0| 0|___A|__NX| 0| 0|___G)
> #define __PAGE_KERNEL_ROX (__PP| 0| 0|___A| 0| 0| 0|___G)
> #define __PAGE_KERNEL (__PP|__RW| 0|___A|__NX|___D| 0|___G)
> +#define __PAGE_KERNEL_NOGLOBAL (__PP|__RW| 0|___A|__NX|___D| 0| 0)
> #define __PAGE_KERNEL_EXEC (__PP|__RW| 0|___A| 0|___D| 0|___G)
> #define __PAGE_KERNEL_NOCACHE (__PP|__RW| 0|___A|__NX|___D| 0|___G| __NC)
> #define __PAGE_KERNEL_VVAR (__PP| 0|_USR|___A|__NX| 0| 0|___G)
> @@ -245,6 +246,7 @@ enum page_cache_mode {
> #define __pgprot_mask(x) __pgprot((x) & __default_kernel_pte_mask)
>
> #define PAGE_KERNEL __pgprot_mask(__PAGE_KERNEL | _ENC)
> +#define PAGE_KERNEL_NOGLOBAL __pgprot_mask(__PAGE_KERNEL_NOGLOBAL | _ENC)
> #define PAGE_KERNEL_NOENC __pgprot_mask(__PAGE_KERNEL | 0)
> #define PAGE_KERNEL_RO __pgprot_mask(__PAGE_KERNEL_RO | _ENC)
> #define PAGE_KERNEL_EXEC __pgprot_mask(__PAGE_KERNEL_EXEC | _ENC)

Should this be part of earlier mermap x86 patches?

> diff --git a/mm/Kconfig b/mm/Kconfig
> index e4cb52149acad..05b2bb841d0e0 100644
> --- a/mm/Kconfig
> +++ b/mm/Kconfig
> @@ -1506,7 +1506,14 @@ config MERMAP_KUNIT_TEST
> If unsure, say N.
>
> config PAGE_ALLOC_UNMAPPED
> - bool "Support allocating pages that aren't in the direct map" if COMPILE_TEST
> - default COMPILE_TEST
> + bool "Support allocating pages that aren't in the direct map"
> + depends on MERMAP
> +
> +config PAGE_ALLOC_KUNIT_TESTS
> + tristate "KUnit tests for the page allocator" if !KUNIT_ALL_TESTS
> + depends on KUNIT
> + default KUNIT_ALL_TESTS
> + help
> + Builds KUnit tests for the page allocator.

This belongs to the next patch?

> endmenu
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 710ee9f46d467..7c91dcbe32576 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -14,6 +14,7 @@
> * (lots of bits borrowed from Ingo Molnar & Andrew Morton)
> */
>
> +#include <linux/mermap.h>
> #include <linux/stddef.h>
> #include <linux/mm.h>
> #include <linux/highmem.h>
> @@ -1327,15 +1328,72 @@ static inline bool should_skip_kasan_poison(struct page *page)
> return page_kasan_tag(page) == KASAN_TAG_KERNEL;
> }
>
> -static void kernel_init_pages(struct page *page, int numpages)
> +#ifdef CONFIG_PAGE_ALLOC_UNMAPPED
> +static inline bool pageblock_unmapped(struct page *page)
> {
> - int i;
> + return freetype_flags(get_pageblock_freetype(page)) & FREETYPE_UNMAPPED;
> +}
>
> - /* s390's use of memset() could override KASAN redzones. */
> - kasan_disable_current();
> - for (i = 0; i < numpages; i++)
> - clear_highpage_kasan_tagged(page + i);
> - kasan_enable_current();
> +static inline void clear_page_mermap(struct page *page, unsigned int numpages)
> +{
> + void *mermap;
> +
> + BUILD_BUG_ON(IS_ENABLED(CONFIG_HIGHMEM));
> +
> + /* Fast path: single mapping (may fail under preemption). */
> + mermap = mermap_get(page, numpages << PAGE_SHIFT, PAGE_KERNEL_NOGLOBAL);
> + if (mermap) {
> + void *buf = kasan_reset_tag(mermap_addr(mermap));
> +
> + for (int i = 0; i < numpages; i++)
> + clear_page(buf + (i << PAGE_SHIFT));
> + mermap_put(mermap);
> + return;
> + }
> +
> + /* Slow path, map each page individually (always succeeds). */
> + for (int i = 0; i < numpages; i++) {
> + unsigned long flags;
> +
> + local_irq_save(flags);
> + mermap = mermap_get_reserved(page + i, PAGE_KERNEL_NOGLOBAL);
> + clear_page(kasan_reset_tag(mermap_addr(mermap)));
> + mermap_put(mermap);
> + local_irq_restore(flags);
> + }
> +}
> +#else
> +static inline bool pageblock_unmapped(struct page *page)
> +{
> + return false;
> +}
> +
> +static inline void clear_page_mermap(struct page *page, unsigned int numpages)
> +{
> + BUG();
> +}
> +#endif
> +
> +static void kernel_init_pages(struct page *page, unsigned int numpages)
> +{
> + int num_blocks = DIV_ROUND_UP(numpages, pageblock_nr_pages);
> +
> + for (int block = 0; block < num_blocks; block++) {
> + struct page *block_page = page + (block << pageblock_order);
> + bool unmapped = pageblock_unmapped(block_page);
> +
> + /* s390's use of memset() could override KASAN redzones. */
> + kasan_disable_current();
> + if (unmapped) {
> + clear_page_mermap(block_page, numpages);
> + } else {
> + for (int i = 0; i < min(numpages, pageblock_nr_pages); i++)
> + clear_highpage_kasan_tagged(block_page + i);
> + }
> + kasan_enable_current();
> +
> + numpages -= pageblock_nr_pages;
> + }
> }
>
> #ifdef CONFIG_MEM_ALLOC_PROFILING
> @@ -5250,8 +5308,8 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
> ac->nodemask = nodemask;
> ac->freetype = gfp_freetype(gfp_mask);
>
> - /* Not implemented yet. */
> - if (freetype_flags(ac->freetype) & FREETYPE_UNMAPPED && gfp_mask & __GFP_ZERO)
> + if (freetype_flags(ac->freetype) & FREETYPE_UNMAPPED &&
> + WARN_ON(!mermap_ready()))
> return false;
>
> if (cpusets_enabled()) {
>