[PATCH 1/2] mm: vmscan: Split proactive reclaim statistics from direct reclaim statistics

From: Hao Jia
Date: Tue Mar 18 2025 - 04:02:48 EST


From: Hao Jia <jiahao1@xxxxxxxxxxx>

In proactive memory reclaim scenarios, it is necessary to
accurately track proactive reclaim statistics to dynamically
adjust the frequency and amount of memory being reclaimed
proactively. Currently, proactive reclaim is included in
direct reclaim statistics, which can make these
direct reclaim statistics misleading.

Therefore, separate proactive reclaim memory from the
direct reclaim counters by introducing new counters:
pgsteal_proactive, pgdemote_proactive, and pgscan_proactive,
to avoid confusion with direct reclaim.

Signed-off-by: Hao Jia <jiahao1@xxxxxxxxxxx>
---
Documentation/admin-guide/cgroup-v2.rst | 9 +++++++
include/linux/mmzone.h | 1 +
include/linux/vm_event_item.h | 2 ++
mm/memcontrol.c | 7 +++++
mm/vmscan.c | 35 ++++++++++++++-----------
mm/vmstat.c | 3 +++
6 files changed, 42 insertions(+), 15 deletions(-)

diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index cb1b4e759b7e..d6692607f80a 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1570,6 +1570,9 @@ The following nested keys are defined.
pgscan_khugepaged (npn)
Amount of scanned pages by khugepaged (in an inactive LRU list)

+ pgscan_proactive (npn)
+ Amount of scanned pages proactively (in an inactive LRU list)
+
pgsteal_kswapd (npn)
Amount of reclaimed pages by kswapd

@@ -1579,6 +1582,9 @@ The following nested keys are defined.
pgsteal_khugepaged (npn)
Amount of reclaimed pages by khugepaged

+ pgsteal_proactive (npn)
+ Amount of reclaimed pages proactively
+
pgfault (npn)
Total number of page faults incurred

@@ -1656,6 +1662,9 @@ The following nested keys are defined.
pgdemote_khugepaged
Number of pages demoted by khugepaged.

+ pgdemote_proactive
+ Number of pages demoted by proactively.
+
hugetlb
Amount of memory used by hugetlb pages. This metric only shows
up if hugetlb usage is accounted for in memory.current (i.e.
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 9540b41894da..69b4996dadc8 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -220,6 +220,7 @@ enum node_stat_item {
PGDEMOTE_KSWAPD,
PGDEMOTE_DIRECT,
PGDEMOTE_KHUGEPAGED,
+ PGDEMOTE_PROACTIVE,
#ifdef CONFIG_HUGETLB_PAGE
NR_HUGETLB,
#endif
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index f70d0958095c..f11b6fa9c5b3 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -41,9 +41,11 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
PGSTEAL_KSWAPD,
PGSTEAL_DIRECT,
PGSTEAL_KHUGEPAGED,
+ PGSTEAL_PROACTIVE,
PGSCAN_KSWAPD,
PGSCAN_DIRECT,
PGSCAN_KHUGEPAGED,
+ PGSCAN_PROACTIVE,
PGSCAN_DIRECT_THROTTLE,
PGSCAN_ANON,
PGSCAN_FILE,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 4de6acb9b8ec..32e28ab90914 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -315,6 +315,7 @@ static const unsigned int memcg_node_stat_items[] = {
PGDEMOTE_KSWAPD,
PGDEMOTE_DIRECT,
PGDEMOTE_KHUGEPAGED,
+ PGDEMOTE_PROACTIVE,
#ifdef CONFIG_HUGETLB_PAGE
NR_HUGETLB,
#endif
@@ -431,9 +432,11 @@ static const unsigned int memcg_vm_event_stat[] = {
PGSCAN_KSWAPD,
PGSCAN_DIRECT,
PGSCAN_KHUGEPAGED,
+ PGSCAN_PROACTIVE,
PGSTEAL_KSWAPD,
PGSTEAL_DIRECT,
PGSTEAL_KHUGEPAGED,
+ PGSTEAL_PROACTIVE,
PGFAULT,
PGMAJFAULT,
PGREFILL,
@@ -1390,6 +1393,7 @@ static const struct memory_stat memory_stats[] = {
{ "pgdemote_kswapd", PGDEMOTE_KSWAPD },
{ "pgdemote_direct", PGDEMOTE_DIRECT },
{ "pgdemote_khugepaged", PGDEMOTE_KHUGEPAGED },
+ { "pgdemote_proactive", PGDEMOTE_PROACTIVE },
#ifdef CONFIG_NUMA_BALANCING
{ "pgpromote_success", PGPROMOTE_SUCCESS },
#endif
@@ -1432,6 +1436,7 @@ static int memcg_page_state_output_unit(int item)
case PGDEMOTE_KSWAPD:
case PGDEMOTE_DIRECT:
case PGDEMOTE_KHUGEPAGED:
+ case PGDEMOTE_PROACTIVE:
#ifdef CONFIG_NUMA_BALANCING
case PGPROMOTE_SUCCESS:
#endif
@@ -1503,10 +1508,12 @@ static void memcg_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
seq_buf_printf(s, "pgscan %lu\n",
memcg_events(memcg, PGSCAN_KSWAPD) +
memcg_events(memcg, PGSCAN_DIRECT) +
+ memcg_events(memcg, PGSCAN_PROACTIVE) +
memcg_events(memcg, PGSCAN_KHUGEPAGED));
seq_buf_printf(s, "pgsteal %lu\n",
memcg_events(memcg, PGSTEAL_KSWAPD) +
memcg_events(memcg, PGSTEAL_DIRECT) +
+ memcg_events(memcg, PGSTEAL_PROACTIVE) +
memcg_events(memcg, PGSTEAL_KHUGEPAGED));

for (i = 0; i < ARRAY_SIZE(memcg_vm_event_stat); i++) {
diff --git a/mm/vmscan.c b/mm/vmscan.c
index c767d71c43d7..fa816cd08ac3 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -441,21 +441,26 @@ void drop_slab(void)
} while ((freed >> shift++) > 1);
}

-static int reclaimer_offset(void)
+#define CHECK_RECLAIMER_OFFSET(type) \
+ do { \
+ BUILD_BUG_ON(PGSTEAL_##type - PGSTEAL_KSWAPD != \
+ PGDEMOTE_##type - PGDEMOTE_KSWAPD); \
+ BUILD_BUG_ON(PGSTEAL_##type - PGSTEAL_KSWAPD != \
+ PGSCAN_##type - PGSCAN_KSWAPD); \
+ } while (0)
+
+static int reclaimer_offset(struct scan_control *sc)
{
- BUILD_BUG_ON(PGSTEAL_DIRECT - PGSTEAL_KSWAPD !=
- PGDEMOTE_DIRECT - PGDEMOTE_KSWAPD);
- BUILD_BUG_ON(PGSTEAL_KHUGEPAGED - PGSTEAL_KSWAPD !=
- PGDEMOTE_KHUGEPAGED - PGDEMOTE_KSWAPD);
- BUILD_BUG_ON(PGSTEAL_DIRECT - PGSTEAL_KSWAPD !=
- PGSCAN_DIRECT - PGSCAN_KSWAPD);
- BUILD_BUG_ON(PGSTEAL_KHUGEPAGED - PGSTEAL_KSWAPD !=
- PGSCAN_KHUGEPAGED - PGSCAN_KSWAPD);
+ CHECK_RECLAIMER_OFFSET(DIRECT);
+ CHECK_RECLAIMER_OFFSET(KHUGEPAGED);
+ CHECK_RECLAIMER_OFFSET(PROACTIVE);

if (current_is_kswapd())
return 0;
if (current_is_khugepaged())
return PGSTEAL_KHUGEPAGED - PGSTEAL_KSWAPD;
+ if (sc->proactive)
+ return PGSTEAL_PROACTIVE - PGSTEAL_KSWAPD;
return PGSTEAL_DIRECT - PGSTEAL_KSWAPD;
}

@@ -1986,7 +1991,7 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan,
&nr_scanned, sc, lru);

__mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, nr_taken);
- item = PGSCAN_KSWAPD + reclaimer_offset();
+ item = PGSCAN_KSWAPD + reclaimer_offset(sc);
if (!cgroup_reclaim(sc))
__count_vm_events(item, nr_scanned);
__count_memcg_events(lruvec_memcg(lruvec), item, nr_scanned);
@@ -2002,10 +2007,10 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan,
spin_lock_irq(&lruvec->lru_lock);
move_folios_to_lru(lruvec, &folio_list);

- __mod_lruvec_state(lruvec, PGDEMOTE_KSWAPD + reclaimer_offset(),
+ __mod_lruvec_state(lruvec, PGDEMOTE_KSWAPD + reclaimer_offset(sc),
stat.nr_demoted);
__mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken);
- item = PGSTEAL_KSWAPD + reclaimer_offset();
+ item = PGSTEAL_KSWAPD + reclaimer_offset(sc);
if (!cgroup_reclaim(sc))
__count_vm_events(item, nr_reclaimed);
__count_memcg_events(lruvec_memcg(lruvec), item, nr_reclaimed);
@@ -4545,7 +4550,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
break;
}

- item = PGSCAN_KSWAPD + reclaimer_offset();
+ item = PGSCAN_KSWAPD + reclaimer_offset(sc);
if (!cgroup_reclaim(sc)) {
__count_vm_events(item, isolated);
__count_vm_events(PGREFILL, sorted);
@@ -4695,10 +4700,10 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap
reset_batch_size(walk);
}

- __mod_lruvec_state(lruvec, PGDEMOTE_KSWAPD + reclaimer_offset(),
+ __mod_lruvec_state(lruvec, PGDEMOTE_KSWAPD + reclaimer_offset(sc),
stat.nr_demoted);

- item = PGSTEAL_KSWAPD + reclaimer_offset();
+ item = PGSTEAL_KSWAPD + reclaimer_offset(sc);
if (!cgroup_reclaim(sc))
__count_vm_events(item, reclaimed);
__count_memcg_events(memcg, item, reclaimed);
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 16bfe1c694dd..eff4d833ff8a 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1273,6 +1273,7 @@ const char * const vmstat_text[] = {
"pgdemote_kswapd",
"pgdemote_direct",
"pgdemote_khugepaged",
+ "pgdemote_proactive",
#ifdef CONFIG_HUGETLB_PAGE
"nr_hugetlb",
#endif
@@ -1307,9 +1308,11 @@ const char * const vmstat_text[] = {
"pgsteal_kswapd",
"pgsteal_direct",
"pgsteal_khugepaged",
+ "pgsteal_proactive",
"pgscan_kswapd",
"pgscan_direct",
"pgscan_khugepaged",
+ "pgscan_proactive",
"pgscan_direct_throttle",
"pgscan_anon",
"pgscan_file",
--
2.34.1