[RFC PATCH 5/7] proactively move pages out of unmovable zones in kcompactd

From: kaiyang2
Date: Tue Mar 19 2024 - 22:43:48 EST


From: Kaiyang Zhao <kaiyang2@xxxxxxxxxx>

Proactively move pages out of unmovable zones in kcompactd
Debug only: zone start and end pfn printed in vmstat
Added counters for cross zone compaction start and scan

Signed-off-by: Kaiyang Zhao <zh_kaiyang@xxxxxxxxxxx>
---
include/linux/vm_event_item.h | 3 +
mm/compaction.c | 101 +++++++++++++++++++++++++++++++---
mm/vmstat.c | 11 +++-
3 files changed, 104 insertions(+), 11 deletions(-)

diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index be88819085b6..c9183117c8f7 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -80,6 +80,9 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
KCOMPACTD_WAKE,
KCOMPACTD_MIGRATE_SCANNED, KCOMPACTD_FREE_SCANNED,
COMPACT_CROSS_ZONE_MIGRATED,
+ KCOMPACTD_CROSS_ZONE_START,
+ COMPACT_CROSS_ZONE_MIGRATE_SCANNED,
+ COMPACT_CROSS_ZONE_FREE_SCANNED,
#endif
#ifdef CONFIG_HUGETLB_PAGE
HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL,
diff --git a/mm/compaction.c b/mm/compaction.c
index dea10ad8ec64..94ce1282f17b 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1436,7 +1436,10 @@ fast_isolate_freepages(struct compact_control *cc)
* Preferred point is in the top quarter of the scan space but take
* a pfn from the top half if the search is problematic.
*/
- distance = (cc->free_pfn - cc->migrate_pfn);
+ if (cc->zone != dst_zone)
+ distance = (cc->free_pfn - dst_zone->zone_start_pfn) >> 1;
+ else
+ distance = (cc->free_pfn - cc->migrate_pfn);
low_pfn = pageblock_start_pfn(cc->free_pfn - (distance >> 2));
min_pfn = pageblock_start_pfn(cc->free_pfn - (distance >> 1));

@@ -1602,7 +1605,10 @@ static void isolate_freepages(struct compact_control *cc)
block_start_pfn = pageblock_start_pfn(isolate_start_pfn);
block_end_pfn = min(block_start_pfn + pageblock_nr_pages,
zone_end_pfn(zone));
- low_pfn = pageblock_end_pfn(cc->migrate_pfn);
+ if (cc->dst_zone && cc->zone != cc->dst_zone)
+ low_pfn = pageblock_end_pfn(cc->dst_zone->zone_start_pfn);
+ else
+ low_pfn = pageblock_end_pfn(cc->migrate_pfn);
stride = cc->mode == MIGRATE_ASYNC ? COMPACT_CLUSTER_MAX : 1;

/*
@@ -1822,7 +1828,11 @@ static unsigned long fast_find_migrateblock(struct compact_control *cc)
* within the first eighth to reduce the chances that a migration
* target later becomes a source.
*/
- distance = (cc->free_pfn - cc->migrate_pfn) >> 1;
+ if (cc->dst_zone && cc->zone != cc->dst_zone)
+ distance = (zone_end_pfn(cc->zone) - cc->migrate_pfn) >> 1;
+ else
+ distance = (cc->free_pfn - cc->migrate_pfn) >> 1;
+
if (cc->migrate_pfn != cc->zone->zone_start_pfn)
distance >>= 2;
high_pfn = pageblock_start_pfn(cc->migrate_pfn + distance);
@@ -1897,7 +1907,7 @@ static isolate_migrate_t isolate_migratepages(struct compact_control *cc)
{
unsigned long block_start_pfn;
unsigned long block_end_pfn;
- unsigned long low_pfn;
+ unsigned long low_pfn, high_pfn;
struct page *page;
const isolate_mode_t isolate_mode =
(sysctl_compact_unevictable_allowed ? ISOLATE_UNEVICTABLE : 0) |
@@ -1924,11 +1934,16 @@ static isolate_migrate_t isolate_migratepages(struct compact_control *cc)
/* Only scan within a pageblock boundary */
block_end_pfn = pageblock_end_pfn(low_pfn);

+ if (cc->dst_zone && cc->zone != cc->dst_zone)
+ high_pfn = zone_end_pfn(cc->zone);
+ else
+ high_pfn = cc->free_pfn;
+
/*
* Iterate over whole pageblocks until we find the first suitable.
* Do not cross the free scanner.
*/
- for (; block_end_pfn <= cc->free_pfn;
+ for (; block_end_pfn <= high_pfn;
fast_find_block = false,
cc->migrate_pfn = low_pfn = block_end_pfn,
block_start_pfn = block_end_pfn,
@@ -1954,6 +1969,7 @@ static isolate_migrate_t isolate_migratepages(struct compact_control *cc)
* before making it "skip" so other compaction instances do
* not scan the same block.
*/
+
if (pageblock_aligned(low_pfn) &&
!fast_find_block && !isolation_suitable(cc, page))
continue;
@@ -1976,6 +1992,10 @@ static isolate_migrate_t isolate_migratepages(struct compact_control *cc)
isolate_mode))
return ISOLATE_ABORT;

+ /* free_pfn may have changed. update high_pfn. */
+ if (!cc->dst_zone || cc->zone == cc->dst_zone)
+ high_pfn = cc->free_pfn;
+
/*
* Either we isolated something and proceed with migration. Or
* we failed and compact_zone should decide if we should
@@ -2141,7 +2161,9 @@ static enum compact_result __compact_finished(struct compact_control *cc)
goto out;
}

- if (is_via_compact_memory(cc->order))
+ /* Don't check if a suitable page is free if doing cross zone compaction. */
+ if (is_via_compact_memory(cc->order) ||
+ (cc->dst_zone && cc->dst_zone != cc->zone))
return COMPACT_CONTINUE;

/*
@@ -2224,7 +2246,8 @@ static enum compact_result __compaction_suitable(struct zone *zone, int order,
* should be no need for compaction at all.
*/
if (zone_watermark_ok(zone, order, watermark, highest_zoneidx,
- alloc_flags))
+ alloc_flags) &&
+ dst_zone == zone)
return COMPACT_SUCCESS;

/*
@@ -2270,6 +2293,11 @@ enum compact_result compaction_suitable(struct zone *zone, int order,

ret = __compaction_suitable(zone, order, alloc_flags, highest_zoneidx,
zone_page_state(dst_zone, NR_FREE_PAGES), dst_zone);
+
+ /* Allow migrating movable pages to ZONE_MOVABLE regardless of frag index */
+ if (ret == COMPACT_CONTINUE && dst_zone != zone)
+ return ret;
+
/*
* fragmentation index determines if allocation failures are due to
* low memory or external fragmentation
@@ -2841,6 +2869,14 @@ void compaction_unregister_node(struct node *node)
}
#endif /* CONFIG_SYSFS && CONFIG_NUMA */

+static inline bool should_compact_unmovable_zones(pg_data_t *pgdat)
+{
+ if (populated_zone(&pgdat->node_zones[ZONE_MOVABLE]))
+ return true;
+ else
+ return false;
+}
+
static inline bool kcompactd_work_requested(pg_data_t *pgdat)
{
return pgdat->kcompactd_max_order > 0 || kthread_should_stop() ||
@@ -2942,6 +2978,48 @@ static void kcompactd_do_work(pg_data_t *pgdat)
pgdat->kcompactd_highest_zoneidx = pgdat->nr_zones - 1;
}

+static void kcompactd_clean_unmovable_zones(pg_data_t *pgdat)
+{
+ int zoneid;
+ struct zone *zone;
+ struct compact_control cc = {
+ .order = 0,
+ .search_order = 0,
+ .highest_zoneidx = ZONE_MOVABLE,
+ .mode = MIGRATE_SYNC,
+ .ignore_skip_hint = true,
+ .gfp_mask = GFP_KERNEL,
+ .dst_zone = &pgdat->node_zones[ZONE_MOVABLE],
+ .whole_zone = true
+ };
+ count_compact_event(KCOMPACTD_CROSS_ZONE_START);
+
+ for (zoneid = 0; zoneid < ZONE_MOVABLE; zoneid++) {
+ int status;
+
+ zone = &pgdat->node_zones[zoneid];
+ if (!populated_zone(zone))
+ continue;
+
+ if (compaction_suitable(zone, cc.order, 0, zoneid, cc.dst_zone) !=
+ COMPACT_CONTINUE)
+ continue;
+
+ if (kthread_should_stop())
+ return;
+
+ /* Not participating in compaction defer. */
+
+ cc.zone = zone;
+ status = compact_zone(&cc, NULL);
+
+ count_compact_events(COMPACT_CROSS_ZONE_MIGRATE_SCANNED,
+ cc.total_migrate_scanned);
+ count_compact_events(COMPACT_CROSS_ZONE_FREE_SCANNED,
+ cc.total_free_scanned);
+ }
+}
+
void wakeup_kcompactd(pg_data_t *pgdat, int order, int highest_zoneidx)
{
if (!order)
@@ -2994,9 +3072,10 @@ static int kcompactd(void *p)

/*
* Avoid the unnecessary wakeup for proactive compaction
- * when it is disabled.
+ * and cleanup of unmovable zones
+ * when they are disabled.
*/
- if (!sysctl_compaction_proactiveness)
+ if (!sysctl_compaction_proactiveness && !should_compact_unmovable_zones(pgdat))
timeout = MAX_SCHEDULE_TIMEOUT;
trace_mm_compaction_kcompactd_sleep(pgdat->node_id);
if (wait_event_freezable_timeout(pgdat->kcompactd_wait,
@@ -3017,6 +3096,10 @@ static int kcompactd(void *p)
continue;
}

+ /* Migrates movable pages out of unmovable zones if ZONE_MOVABLE exists */
+ if (should_compact_unmovable_zones(pgdat))
+ kcompactd_clean_unmovable_zones(pgdat);
+
/*
* Start the proactive work with default timeout. Based
* on the fragmentation score, this timeout is updated.
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 98af82e65ad9..444740605f2f 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1325,6 +1325,9 @@ const char * const vmstat_text[] = {
"compact_daemon_migrate_scanned",
"compact_daemon_free_scanned",
"compact_cross_zone_migrated",
+ "compact_cross_zone_start",
+ "compact_cross_zone_migrate_scanned",
+ "compact_cross_zone_free_scanned",
#endif

#ifdef CONFIG_HUGETLB_PAGE
@@ -1692,7 +1695,9 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
"\n spanned %lu"
"\n present %lu"
"\n managed %lu"
- "\n cma %lu",
+ "\n cma %lu"
+ "\n start %lu"
+ "\n end %lu",
zone_page_state(zone, NR_FREE_PAGES),
zone->watermark_boost,
min_wmark_pages(zone),
@@ -1701,7 +1706,9 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
zone->spanned_pages,
zone->present_pages,
zone_managed_pages(zone),
- zone_cma_pages(zone));
+ zone_cma_pages(zone),
+ zone->zone_start_pfn,
+ zone_end_pfn(zone));

seq_printf(m,
"\n protection: (%ld",
--
2.40.1