[PATCH v2 03/13] mm/page_alloc: unify __alloc_frozen_pages[_nolock]_noprof()

From: Brendan Jackman

Date: Mon Jun 22 2026 - 06:03:01 EST


Currently the core allocator code is controlled by ALLOC_NOLOCK, but the
main entry point function is significantly different from the normal
__alloc_frozen_pages_nolock(), this is tiring when reading the code.

Plumb the ALLOC_NOLOCK control one layer up in the call stack: create
an alloc_flags argument to __alloc_frozen_pages_nolock() (which is only
exposed to mm/) and then turn the nolock variant into a thin wrapper
that just sets that flag (as well as handling NUMA_NO_NODE, similar to
how some of the wrappers in gfp.h do).

Rationale that this doesn't change anything:

1. Simple bits: A bunch of the nolock-specific handling is just moved to
the new alloc_order_allowed(), alloc_trylock_allowed() and
gfp_trylock.

2. __alloc_frozen_pages_noprof() has some extra logic that wasn't
previously in the nolock variant:

a. Application of gfp_allowed_mask; this only affects early boot, and
only flags that affect the slowpath get changed here.

b. Application of current_gfp_context() - also only affects the
slowpath

3. The slowpath itself: this is now just explicitly skipped under
!ALLOC_TRYLOCK.

Ulterior motive: adding an alloc_flags arg to the allocator's
mm-internal entrypoint can later be used to do more allocation
customisation without needing to create new GFP flags.

While adding this flag to a bunch of places, create ALLOC_DEFAULT to
avoid a mysterious literal 0 in most places. alloc_frozen_pages_noprof()
is defined above the alloc flags so just leave that as a slightly messy
exception instead of trying to fully reorder mm/internal.h for that one
case.

No functional change intended.

Signed-off-by: Brendan Jackman <jackmanb@xxxxxxxxxx>
---
mm/hugetlb.c | 3 +-
mm/internal.h | 8 ++-
mm/mempolicy.c | 10 ++--
mm/page_alloc.c | 178 +++++++++++++++++++++++++++++---------------------------
mm/slub.c | 6 +-
5 files changed, 110 insertions(+), 95 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 571212b80835e..2ce6169ca0dfd 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1806,7 +1806,8 @@ static struct folio *alloc_buddy_frozen_folio(int order, gfp_t gfp_mask,
if (alloc_try_hard)
gfp_mask |= __GFP_RETRY_MAYFAIL;

- folio = (struct folio *)__alloc_frozen_pages(gfp_mask, order, nid, nmask);
+ folio = (struct folio *)__alloc_frozen_pages(gfp_mask, order, nid, nmask,
+ ALLOC_DEFAULT);

/*
* If we did not specify __GFP_RETRY_MAYFAIL, but still got a
diff --git a/mm/internal.h b/mm/internal.h
index 1483a4fcdfce1..6bc89ec62e527 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -913,7 +913,7 @@ extern bool free_pages_prepare(struct page *page, unsigned int order);
extern int user_min_free_kbytes;

struct page *__alloc_frozen_pages_noprof(gfp_t, unsigned int order, int nid,
- nodemask_t *);
+ nodemask_t *, unsigned int alloc_flags);
#define __alloc_frozen_pages(...) \
alloc_hooks(__alloc_frozen_pages_noprof(__VA_ARGS__))
void free_frozen_pages(struct page *page, unsigned int order);
@@ -924,7 +924,8 @@ struct page *alloc_frozen_pages_noprof(gfp_t, unsigned int order);
#else
static inline struct page *alloc_frozen_pages_noprof(gfp_t gfp, unsigned int order)
{
- return __alloc_frozen_pages_noprof(gfp, order, numa_node_id(), NULL);
+ return __alloc_frozen_pages_noprof(gfp, order, numa_node_id(), NULL,
+ 0 /* ALLOC_DEFAULT */);
}
#endif

@@ -1440,6 +1441,9 @@ extern void set_pageblock_order(void);
unsigned long reclaim_pages(struct list_head *folio_list);
unsigned int reclaim_clean_pages_from_list(struct zone *zone,
struct list_head *folio_list);
+
+
+#define ALLOC_DEFAULT 0
/* The ALLOC_WMARK bits are used as an index to zone->watermark */
#define ALLOC_WMARK_MIN WMARK_MIN
#define ALLOC_WMARK_LOW WMARK_LOW
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 36699fabd3c22..40bbea614aced 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2425,9 +2425,11 @@ static struct page *alloc_pages_preferred_many(gfp_t gfp, unsigned int order,
*/
preferred_gfp = gfp | __GFP_NOWARN;
preferred_gfp &= ~(__GFP_DIRECT_RECLAIM | __GFP_NOFAIL);
- page = __alloc_frozen_pages_noprof(preferred_gfp, order, nid, nodemask);
+ page = __alloc_frozen_pages_noprof(preferred_gfp, order, nid, nodemask,
+ ALLOC_DEFAULT);
if (!page)
- page = __alloc_frozen_pages_noprof(gfp, order, nid, NULL);
+ page = __alloc_frozen_pages_noprof(gfp, order, nid, NULL,
+ ALLOC_DEFAULT);

return page;
}
@@ -2475,7 +2477,7 @@ static struct page *alloc_pages_mpol(gfp_t gfp, unsigned int order,
*/
page = __alloc_frozen_pages_noprof(
gfp | __GFP_THISNODE | __GFP_NORETRY, order,
- nid, NULL);
+ nid, NULL, ALLOC_DEFAULT);
if (page || !(gfp & __GFP_DIRECT_RECLAIM))
return page;
/*
@@ -2487,7 +2489,7 @@ static struct page *alloc_pages_mpol(gfp_t gfp, unsigned int order,
}
}

- page = __alloc_frozen_pages_noprof(gfp, order, nid, nodemask);
+ page = __alloc_frozen_pages_noprof(gfp, order, nid, nodemask, ALLOC_DEFAULT);

if (unlikely(pol->mode == MPOL_INTERLEAVE ||
pol->mode == MPOL_WEIGHTED_INTERLEAVE) && page) {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index bc05d75a41627..e31babe2181a1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5204,7 +5204,7 @@ unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid,
}
nr_account++;

- prep_new_page(page, 0, gfp, 0);
+ prep_new_page(page, 0, gfp, ALLOC_DEFAULT);
set_page_refcounted(page);
page_array[nr_populated++] = page;
}
@@ -5253,24 +5253,98 @@ void free_pages_bulk(struct page **page_array, unsigned long nr_pages)
}
}

-/*
- * This is the 'heart' of the zoned buddy allocator.
- */
-struct page *__alloc_frozen_pages_noprof(gfp_t gfp, unsigned int order,
- int preferred_nid, nodemask_t *nodemask)
+static inline bool alloc_order_allowed(gfp_t gfp, unsigned int order,
+ unsigned int alloc_flags)
{
- struct page *page;
- unsigned int fastpath_alloc_flags = ALLOC_WMARK_LOW;
- gfp_t alloc_gfp; /* The gfp_t that was actually used for allocation */
- struct alloc_context ac = { };
+ if (alloc_flags & ALLOC_NOLOCK)
+ return pcp_allowed_order(order);

/*
* There are several places where we assume that the order value is sane
* so bail out early if the request is out of bound.
*/
- if (WARN_ON_ONCE_GFP(order > MAX_PAGE_ORDER, gfp))
+ return !(WARN_ON_ONCE_GFP(order > MAX_PAGE_ORDER, gfp));
+}
+
+static inline bool alloc_trylock_allowed(void)
+{
+ /*
+ * In PREEMPT_RT spin_trylock() will call raw_spin_lock() which is
+ * unsafe in NMI. If spin_trylock() is called from hard IRQ the current
+ * task may be waiting for one rt_spin_lock, but rt_spin_trylock() will
+ * mark the task as the owner of another rt_spin_lock which will
+ * confuse PI logic, so return immediately if called from hard IRQ or
+ * NMI.
+ *
+ * Note, irqs_disabled() case is ok. This function can be called
+ * from raw_spin_lock_irqsave region.
+ */
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && (in_nmi() || in_hardirq()))
+ return false;
+
+ /* On UP, spin_trylock() always succeeds even when it is locked */
+ if (!IS_ENABLED(CONFIG_SMP) && in_nmi())
+ return false;
+
+ /* Bailout, since _deferred_grow_zone() needs to take a lock */
+ if (deferred_pages_enabled())
+ return false;
+
+ return true;
+}
+
+/*
+ * GFP flags to set for ALLOC_NOLOCK i.e. alloc_pages_nolock().
+ *
+ * Do not specify __GFP_DIRECT_RECLAIM, since direct claim is not allowed.
+ * Do not specify __GFP_KSWAPD_RECLAIM either, since wake up of kswapd
+ * is not safe in arbitrary context.
+ *
+ * These two are the conditions for gfpflags_allow_spinning() being true.
+ *
+ * Specify __GFP_NOWARN since failing alloc_pages_nolock() is not a reason
+ * to warn. Also warn would trigger printk() which is unsafe from
+ * various contexts. We cannot use printk_deferred_enter() to mitigate,
+ * since the running context is unknown.
+ *
+ * Specify __GFP_ZERO to make sure that call to kmsan_alloc_page() below
+ * is safe in any context. Also zeroing the page is mandatory for
+ * BPF use cases.
+ *
+ * Though __GFP_NOMEMALLOC is not checked in the code path below,
+ * specify it here to highlight that alloc_pages_nolock()
+ * doesn't want to deplete reserves.
+ */
+static const gfp_t gfp_nolock = __GFP_NOWARN | __GFP_ZERO | __GFP_NOMEMALLOC |
+ __GFP_COMP;
+
+/*
+ * This is the 'heart' of the zoned buddy allocator.
+ */
+struct page *__alloc_frozen_pages_noprof(gfp_t gfp, unsigned int order,
+ int preferred_nid, nodemask_t *nodemask, unsigned int alloc_flags)
+{
+ struct page *page;
+ gfp_t alloc_gfp; /* The gfp_t that was actually used for allocation */
+ struct alloc_context ac = { };
+ unsigned int fastpath_alloc_flags = alloc_flags;
+
+ /* Other flags could be supported later if needed. */
+ if (WARN_ON(alloc_flags & ~ALLOC_NOLOCK))
return NULL;

+ if (!alloc_order_allowed(gfp, order, alloc_flags))
+ return NULL;
+
+ if (alloc_flags & ALLOC_NOLOCK) {
+ VM_WARN_ON_ONCE(gfp & ~__GFP_ACCOUNT);
+ if (!alloc_trylock_allowed())
+ return NULL;
+ gfp |= gfp_nolock;
+ } else {
+ fastpath_alloc_flags |= ALLOC_WMARK_LOW;
+ }
+
gfp &= gfp_allowed_mask;
/*
* Apply scoped allocation constraints. This is mainly about GFP_NOFS
@@ -5291,9 +5365,9 @@ struct page *__alloc_frozen_pages_noprof(gfp_t gfp, unsigned int order,
*/
fastpath_alloc_flags |= alloc_flags_nofragment(zonelist_zone(ac.preferred_zoneref), gfp);

- /* First allocation attempt */
+ /* First allocation attempt (or, for nolock, only attempt) */
page = get_page_from_freelist(alloc_gfp, order, fastpath_alloc_flags, &ac);
- if (likely(page))
+ if (likely(page) || (alloc_flags & ALLOC_NOLOCK))
goto out;

alloc_gfp = gfp;
@@ -5310,7 +5384,8 @@ struct page *__alloc_frozen_pages_noprof(gfp_t gfp, unsigned int order,
out:
if (memcg_kmem_online() && (gfp & __GFP_ACCOUNT) && page &&
unlikely(__memcg_kmem_charge_page(page, gfp, order) != 0)) {
- free_frozen_pages(page, order);
+ __free_frozen_pages(page, order,
+ alloc_flags & ALLOC_NOLOCK ? FPI_TRYLOCK : 0);
page = NULL;
}

@@ -5326,7 +5401,8 @@ struct page *__alloc_pages_noprof(gfp_t gfp, unsigned int order,
{
struct page *page;

- page = __alloc_frozen_pages_noprof(gfp, order, preferred_nid, nodemask);
+ page = __alloc_frozen_pages_noprof(gfp, order, preferred_nid, nodemask,
+ ALLOC_DEFAULT);
if (page)
set_page_refcounted(page);
return page;
@@ -7856,80 +7932,10 @@ static bool __free_unaccepted(struct page *page)

struct page *alloc_frozen_pages_nolock_noprof(gfp_t gfp_flags, int nid, unsigned int order)
{
- /*
- * Do not specify __GFP_DIRECT_RECLAIM, since direct claim is not allowed.
- * Do not specify __GFP_KSWAPD_RECLAIM either, since wake up of kswapd
- * is not safe in arbitrary context.
- *
- * These two are the conditions for gfpflags_allow_spinning() being true.
- *
- * Specify __GFP_NOWARN since failing alloc_pages_nolock() is not a reason
- * to warn. Also warn would trigger printk() which is unsafe from
- * various contexts. We cannot use printk_deferred_enter() to mitigate,
- * since the running context is unknown.
- *
- * Specify __GFP_ZERO to make sure that call to kmsan_alloc_page() below
- * is safe in any context. Also zeroing the page is mandatory for
- * BPF use cases.
- *
- * Though __GFP_NOMEMALLOC is not checked in the code path below,
- * specify it here to highlight that alloc_pages_nolock()
- * doesn't want to deplete reserves.
- */
- gfp_t alloc_gfp = __GFP_NOWARN | __GFP_ZERO | __GFP_NOMEMALLOC | __GFP_COMP
- | gfp_flags;
- unsigned int alloc_flags = ALLOC_NOLOCK;
- struct alloc_context ac = { };
- struct page *page;
-
- VM_WARN_ON_ONCE(gfp_flags & ~__GFP_ACCOUNT);
- /*
- * In PREEMPT_RT spin_trylock() will call raw_spin_lock() which is
- * unsafe in NMI. If spin_trylock() is called from hard IRQ the current
- * task may be waiting for one rt_spin_lock, but rt_spin_trylock() will
- * mark the task as the owner of another rt_spin_lock which will
- * confuse PI logic, so return immediately if called from hard IRQ or
- * NMI.
- *
- * Note, irqs_disabled() case is ok. This function can be called
- * from raw_spin_lock_irqsave region.
- */
- if (IS_ENABLED(CONFIG_PREEMPT_RT) && (in_nmi() || in_hardirq()))
- return NULL;
-
- /* On UP, spin_trylock() always succeeds even when it is locked */
- if (!IS_ENABLED(CONFIG_SMP) && in_nmi())
- return NULL;
-
- if (!pcp_allowed_order(order))
- return NULL;
-
- /* Bailout, since _deferred_grow_zone() needs to take a lock */
- if (deferred_pages_enabled())
- return NULL;
-
if (nid == NUMA_NO_NODE)
nid = numa_node_id();

- prepare_alloc_pages(alloc_gfp, order, nid, NULL, &ac,
- &alloc_gfp, &alloc_flags);
-
- /*
- * Best effort allocation from percpu free list.
- * If it's empty attempt to spin_trylock zone->lock.
- */
- page = get_page_from_freelist(alloc_gfp, order, alloc_flags, &ac);
-
- /* Unlike regular alloc_pages() there is no __alloc_pages_slowpath(). */
-
- if (memcg_kmem_online() && page && (gfp_flags & __GFP_ACCOUNT) &&
- unlikely(__memcg_kmem_charge_page(page, alloc_gfp, order) != 0)) {
- __free_frozen_pages(page, order, FPI_TRYLOCK);
- page = NULL;
- }
- trace_mm_page_alloc(page, order, alloc_gfp, ac.migratetype);
- kmsan_alloc_page(page, order, alloc_gfp);
- return page;
+ return __alloc_frozen_pages_noprof(gfp_flags, order, nid, NULL, ALLOC_NOLOCK);
}
/**
* alloc_pages_nolock - opportunistic reentrant allocation from any context
diff --git a/mm/slub.c b/mm/slub.c
index a2bf3756ca7d0..b9c1284844a0a 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3275,7 +3275,8 @@ static inline struct slab *alloc_slab_page(gfp_t flags, int node,
else if (node == NUMA_NO_NODE)
page = alloc_frozen_pages(flags, order);
else
- page = __alloc_frozen_pages(flags, order, node, NULL);
+ page = __alloc_frozen_pages(flags, order, node, NULL,
+ ALLOC_DEFAULT);

if (!page)
return NULL;
@@ -5236,7 +5237,8 @@ static void *___kmalloc_large_node(size_t size, gfp_t flags, int node)
if (node == NUMA_NO_NODE)
page = alloc_frozen_pages_noprof(flags, order);
else
- page = __alloc_frozen_pages_noprof(flags, order, node, NULL);
+ page = __alloc_frozen_pages_noprof(flags, order, node, NULL,
+ ALLOC_DEFAULT);

if (page) {
ptr = page_address(page);

--
2.54.0