[PATCH 3/4] mm: page_alloc: move capture_control to the page allocator
From: Johannes Weiner
Date: Fri Jun 26 2026 - 14:23:09 EST
The compaction capturing code assumes the allocation request order
and compaction target order are the same. That won't be true once
defrag_mode promotes sub-block allocations to pageblock-order
compaction: compaction targets the larger order, capture should
remain at the original allocation order.
Move the per-task capture_control to the page allocator, so its
fields can carry alloc-side information that compaction's
compact_control does not. Pass the capture_control through
try_to_compact_pages() / compact_zone_order() instead of a bare
struct page **; compact_zone_order() sets capc->cc while running.
task_capc() now also checks capc->cc to handle the new
not-yet-running state.
No functional change.
Signed-off-by: Johannes Weiner <hannes@xxxxxxxxxxx>
---
include/linux/compaction.h | 3 ++-
mm/compaction.c | 33 ++++++++++-----------------------
mm/page_alloc.c | 23 +++++++++++++++++++++--
3 files changed, 33 insertions(+), 26 deletions(-)
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index f29ef0653546..66a2f70e9e01 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -58,6 +58,7 @@ enum compact_result {
};
struct alloc_context; /* in mm/internal.h */
+struct capture_control; /* in mm/internal.h */
/*
* Number of free order-0 pages that should be available above given watermark
@@ -92,7 +93,7 @@ extern int fragmentation_index(struct zone *zone, unsigned int order);
extern enum compact_result try_to_compact_pages(gfp_t gfp_mask,
unsigned int order, unsigned int alloc_flags,
const struct alloc_context *ac, enum compact_priority prio,
- struct page **page);
+ struct capture_control *capc);
extern void reset_isolation_suitable(pg_data_t *pgdat);
extern bool compaction_suitable(struct zone *zone, int order,
unsigned long watermark, int highest_zoneidx);
diff --git a/mm/compaction.c b/mm/compaction.c
index 7df3a85d43af..c2701bf1d04e 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -2791,7 +2791,7 @@ compact_zone(struct compact_control *cc, struct capture_control *capc)
static enum compact_result compact_zone_order(struct zone *zone, int order,
gfp_t gfp_mask, enum compact_priority prio,
unsigned int alloc_flags, int highest_zoneidx,
- struct page **capture)
+ struct capture_control *capc)
{
enum compact_result ret;
struct compact_control cc = {
@@ -2808,35 +2808,22 @@ static enum compact_result compact_zone_order(struct zone *zone, int order,
.ignore_skip_hint = (prio == MIN_COMPACT_PRIORITY),
.ignore_block_suitable = (prio == MIN_COMPACT_PRIORITY)
};
- struct capture_control capc = {
- .cc = &cc,
- .page = NULL,
- };
- /*
- * Make sure the structs are really initialized before we expose the
- * capture control, in case we are interrupted and the interrupt handler
- * frees a page.
- */
+ /* See the comment in __alloc_pages_direct_compact() */
barrier();
- WRITE_ONCE(current->capture_control, &capc);
+ WRITE_ONCE(capc->cc, &cc);
- ret = compact_zone(&cc, &capc);
+ ret = compact_zone(&cc, capc);
+
+ WRITE_ONCE(capc->cc, NULL);
- /*
- * Make sure we hide capture control first before we read the captured
- * page pointer, otherwise an interrupt could free and capture a page
- * and we would leak it.
- */
- WRITE_ONCE(current->capture_control, NULL);
- *capture = READ_ONCE(capc.page);
/*
* Technically, it is also possible that compaction is skipped but
* the page is still captured out of luck(IRQ came and freed the page).
* Returning COMPACT_SUCCESS in such cases helps in properly accounting
* the COMPACT[STALL|FAIL] when compaction is skipped.
*/
- if (*capture)
+ if (capc->page)
ret = COMPACT_SUCCESS;
return ret;
@@ -2849,13 +2836,13 @@ static enum compact_result compact_zone_order(struct zone *zone, int order,
* @alloc_flags: The allocation flags of the current allocation
* @ac: The context of current allocation
* @prio: Determines how hard direct compaction should try to succeed
- * @capture: Pointer to free page created by compaction will be stored here
+ * @capc: The context for capturing pages during freeing
*
* This is the main entry point for direct page compaction.
*/
enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
unsigned int alloc_flags, const struct alloc_context *ac,
- enum compact_priority prio, struct page **capture)
+ enum compact_priority prio, struct capture_control *capc)
{
struct zoneref *z;
struct zone *zone;
@@ -2883,7 +2870,7 @@ enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
}
status = compact_zone_order(zone, order, gfp_mask, prio,
- alloc_flags, ac->highest_zoneidx, capture);
+ alloc_flags, ac->highest_zoneidx, capc);
rc = max(status, rc);
/* The allocation should succeed, stop compacting */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index cb422505c6ef..9dee1c47e795 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -718,7 +718,7 @@ static inline struct capture_control *task_capc(struct zone *zone)
{
struct capture_control *capc = current->capture_control;
- return unlikely(capc) &&
+ return unlikely(capc && capc->cc) &&
!(current->flags & PF_KTHREAD) &&
!capc->page &&
capc->cc->zone == zone ? capc : NULL;
@@ -4146,23 +4146,42 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
struct page *page = NULL;
unsigned long pflags;
unsigned int noreclaim_flag;
+ struct capture_control capc = {
+ .page = NULL,
+ };
if (!order)
return NULL;
+ /*
+ * Make sure the structs are really initialized before we expose the
+ * capture control, in case we are interrupted and the interrupt handler
+ * frees a page.
+ */
+ barrier();
+ WRITE_ONCE(current->capture_control, &capc);
+
psi_memstall_enter(&pflags);
delayacct_compact_start();
fs_reclaim_acquire(gfp_mask);
noreclaim_flag = memalloc_noreclaim_save();
*compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac,
- prio, &page);
+ prio, &capc);
memalloc_noreclaim_restore(noreclaim_flag);
fs_reclaim_release(gfp_mask);
psi_memstall_leave(&pflags);
delayacct_compact_end();
+ /*
+ * Make sure we hide capture control first before we read the captured
+ * page pointer, otherwise an interrupt could free and capture a page
+ * and we would leak it.
+ */
+ WRITE_ONCE(current->capture_control, NULL);
+ page = READ_ONCE(capc.page);
+
if (*compact_result == COMPACT_SKIPPED ||
*compact_result == COMPACT_DEFERRED)
return NULL;
--
2.54.0