[PATCH 3/4] mm: page_alloc: move capture_control to the page allocator

From: Johannes Weiner

Date: Fri Jun 26 2026 - 14:23:09 EST


The compaction capturing code assumes the allocation request order
and compaction target order are the same. That won't be true once
defrag_mode promotes sub-block allocations to pageblock-order
compaction: compaction targets the larger order, capture should
remain at the original allocation order.

Move the per-task capture_control to the page allocator, so its
fields can carry alloc-side information that compaction's
compact_control does not. Pass the capture_control through
try_to_compact_pages() / compact_zone_order() instead of a bare
struct page **; compact_zone_order() sets capc->cc while running.

task_capc() now also checks capc->cc to handle the new
not-yet-running state.

No functional change.

Signed-off-by: Johannes Weiner <hannes@xxxxxxxxxxx>
---
include/linux/compaction.h | 3 ++-
mm/compaction.c | 33 ++++++++++-----------------------
mm/page_alloc.c | 23 +++++++++++++++++++++--
3 files changed, 33 insertions(+), 26 deletions(-)

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index f29ef0653546..66a2f70e9e01 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -58,6 +58,7 @@ enum compact_result {
};

struct alloc_context; /* in mm/internal.h */
+struct capture_control; /* in mm/internal.h */

/*
* Number of free order-0 pages that should be available above given watermark
@@ -92,7 +93,7 @@ extern int fragmentation_index(struct zone *zone, unsigned int order);
extern enum compact_result try_to_compact_pages(gfp_t gfp_mask,
unsigned int order, unsigned int alloc_flags,
const struct alloc_context *ac, enum compact_priority prio,
- struct page **page);
+ struct capture_control *capc);
extern void reset_isolation_suitable(pg_data_t *pgdat);
extern bool compaction_suitable(struct zone *zone, int order,
unsigned long watermark, int highest_zoneidx);
diff --git a/mm/compaction.c b/mm/compaction.c
index 7df3a85d43af..c2701bf1d04e 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -2791,7 +2791,7 @@ compact_zone(struct compact_control *cc, struct capture_control *capc)
static enum compact_result compact_zone_order(struct zone *zone, int order,
gfp_t gfp_mask, enum compact_priority prio,
unsigned int alloc_flags, int highest_zoneidx,
- struct page **capture)
+ struct capture_control *capc)
{
enum compact_result ret;
struct compact_control cc = {
@@ -2808,35 +2808,22 @@ static enum compact_result compact_zone_order(struct zone *zone, int order,
.ignore_skip_hint = (prio == MIN_COMPACT_PRIORITY),
.ignore_block_suitable = (prio == MIN_COMPACT_PRIORITY)
};
- struct capture_control capc = {
- .cc = &cc,
- .page = NULL,
- };

- /*
- * Make sure the structs are really initialized before we expose the
- * capture control, in case we are interrupted and the interrupt handler
- * frees a page.
- */
+ /* See the comment in __alloc_pages_direct_compact() */
barrier();
- WRITE_ONCE(current->capture_control, &capc);
+ WRITE_ONCE(capc->cc, &cc);

- ret = compact_zone(&cc, &capc);
+ ret = compact_zone(&cc, capc);
+
+ WRITE_ONCE(capc->cc, NULL);

- /*
- * Make sure we hide capture control first before we read the captured
- * page pointer, otherwise an interrupt could free and capture a page
- * and we would leak it.
- */
- WRITE_ONCE(current->capture_control, NULL);
- *capture = READ_ONCE(capc.page);
/*
* Technically, it is also possible that compaction is skipped but
* the page is still captured out of luck(IRQ came and freed the page).
* Returning COMPACT_SUCCESS in such cases helps in properly accounting
* the COMPACT[STALL|FAIL] when compaction is skipped.
*/
- if (*capture)
+ if (capc->page)
ret = COMPACT_SUCCESS;

return ret;
@@ -2849,13 +2836,13 @@ static enum compact_result compact_zone_order(struct zone *zone, int order,
* @alloc_flags: The allocation flags of the current allocation
* @ac: The context of current allocation
* @prio: Determines how hard direct compaction should try to succeed
- * @capture: Pointer to free page created by compaction will be stored here
+ * @capc: The context for capturing pages during freeing
*
* This is the main entry point for direct page compaction.
*/
enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
unsigned int alloc_flags, const struct alloc_context *ac,
- enum compact_priority prio, struct page **capture)
+ enum compact_priority prio, struct capture_control *capc)
{
struct zoneref *z;
struct zone *zone;
@@ -2883,7 +2870,7 @@ enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
}

status = compact_zone_order(zone, order, gfp_mask, prio,
- alloc_flags, ac->highest_zoneidx, capture);
+ alloc_flags, ac->highest_zoneidx, capc);
rc = max(status, rc);

/* The allocation should succeed, stop compacting */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index cb422505c6ef..9dee1c47e795 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -718,7 +718,7 @@ static inline struct capture_control *task_capc(struct zone *zone)
{
struct capture_control *capc = current->capture_control;

- return unlikely(capc) &&
+ return unlikely(capc && capc->cc) &&
!(current->flags & PF_KTHREAD) &&
!capc->page &&
capc->cc->zone == zone ? capc : NULL;
@@ -4146,23 +4146,42 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
struct page *page = NULL;
unsigned long pflags;
unsigned int noreclaim_flag;
+ struct capture_control capc = {
+ .page = NULL,
+ };

if (!order)
return NULL;

+ /*
+ * Make sure the structs are really initialized before we expose the
+ * capture control, in case we are interrupted and the interrupt handler
+ * frees a page.
+ */
+ barrier();
+ WRITE_ONCE(current->capture_control, &capc);
+
psi_memstall_enter(&pflags);
delayacct_compact_start();
fs_reclaim_acquire(gfp_mask);
noreclaim_flag = memalloc_noreclaim_save();

*compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac,
- prio, &page);
+ prio, &capc);

memalloc_noreclaim_restore(noreclaim_flag);
fs_reclaim_release(gfp_mask);
psi_memstall_leave(&pflags);
delayacct_compact_end();

+ /*
+ * Make sure we hide capture control first before we read the captured
+ * page pointer, otherwise an interrupt could free and capture a page
+ * and we would leak it.
+ */
+ WRITE_ONCE(current->capture_control, NULL);
+ page = READ_ONCE(capc.page);
+
if (*compact_result == COMPACT_SKIPPED ||
*compact_result == COMPACT_DEFERRED)
return NULL;
--
2.54.0