[RFC PATCH] mm/mglru: improve positive_ctrl_err for low refault samples
From: zhaoyang.huang
Date: Wed May 27 2026 - 04:57:04 EST
From: Zhaoyang Huang <zhaoyang.huang@xxxxxxxxxx>
The memory.stat under per-pid memcg & MGLRU[1] observed in android
system which means current type selection by judging the refault rate
almost failed since file page's refault count remains 0 which will
shortcut to file by 'pv->refaulted < MIN_LRU_BATCH'. The reason could be
both of small lruvec size and bypass of the file refault count when it
refaulted in another memcg.
Refine the MGLRU PID comparison when lruvecs lack enough refault
statistics, using gain-aware fallbacks which is simply respect the
swappiness and historical integral value without affecting the usual
refault-rate path under sustained reclaim pressure.
[1]
/sys/fs/cgroup/apps/uid_10123/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=1123 steal_direct=32 pgactive=58
/sys/fs/cgroup/apps/uid_10136/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=2491 steal_direct=64 pgactive=48
/sys/fs/cgroup/apps/uid_10237/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=1355 steal_direct=0 pgactive=1
/sys/fs/cgroup/system/uid_1002/memory.stat: refault_anon=3 active_anon=1 refault_file=0 active_file=0 steal_kswapd=1162 steal_direct=4 pgactive=3987
/sys/fs/cgroup/system/uid_1027/memory.stat: refault_anon=1 active_anon=0 refault_file=0 active_file=0 steal_kswapd=1157 steal_direct=14 pgactive=1517
/sys/fs/cgroup/system/uid_1037/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=1461 steal_direct=0 pgactive=247
/sys/fs/cgroup/system/uid_1046/pid_1251/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=1643 steal_direct=18 pgactive=2914
/sys/fs/cgroup/system/uid_1047/pid_724/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=8924 steal_direct=449 pgactive=3753
/sys/fs/cgroup/system/uid_1047/pid_789/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=3864 steal_direct=157 pgactive=3062
/sys/fs/cgroup/apps/uid_10090/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=1285 steal_direct=32 pgactive=36
/sys/fs/cgroup/apps/uid_10123/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=1126 steal_direct=32 pgactive=70
/sys/fs/cgroup/apps/uid_10136/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=2474 steal_direct=69 pgactive=52
/sys/fs/cgroup/apps/uid_10175/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=3264 steal_direct=10 pgactive=861
/sys/fs/cgroup/apps/uid_10211/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=1447 steal_direct=32 pgactive=21
/sys/fs/cgroup/apps/uid_10237/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=1354 steal_direct=0 pgactive=2
/sys/fs/cgroup/system/uid_1000/pid_471/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=1296 steal_direct=4 pgactive=65
/sys/fs/cgroup/system/uid_1002/memory.stat: refault_anon=18 active_anon=3 refault_file=0 active_file=0 steal_kswapd=1286 steal_direct=4 pgactive=3979
/sys/fs/cgroup/system/uid_1037/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=1462 steal_direct=0 pgactive=246
/sys/fs/cgroup/system/uid_1041/memory.stat: refault_anon=17 active_anon=5 refault_file=0 active_file=0 steal_kswapd=4429 steal_direct=30 pgactive=5239
/sys/fs/cgroup/system/uid_1041/pid_820/memory.stat: refault_anon=12 active_anon=0 refault_file=0 active_file=0 steal_kswapd=2441 steal_direct=17 pgactive=2034
/sys/fs/cgroup/system/uid_1041/pid_839/memory.stat: refault_anon=5 active_anon=5 refault_file=0 active_file=0 steal_kswapd=1357 steal_direct=10 pgactive=2055
/sys/fs/cgroup/system/uid_1047/pid_1166/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=1577 steal_direct=70 pgactive=1130
/sys/fs/cgroup/system/uid_1047/pid_732/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=7661 steal_direct=115 pgactive=3681
/sys/fs/cgroup/system/uid_1047/pid_799/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=2847 steal_direct=62 pgactive=3573
/sys/fs/cgroup/apps/uid_10129/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=1153 steal_direct=18 pgactive=42
/sys/fs/cgroup/apps/uid_10136/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=2336 steal_direct=210 pgactive=49
/sys/fs/cgroup/apps/uid_10165/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=1491 steal_direct=43 pgactive=525
/sys/fs/cgroup/apps/uid_10175/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=2229 steal_direct=108 pgactive=863
/sys/fs/cgroup/apps/uid_10211/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=1299 steal_direct=163 pgactive=23
/sys/fs/cgroup/apps/uid_10237/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=1323 steal_direct=32 pgactive=2
/sys/fs/cgroup/system/uid_0/pid_1203/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=2927 steal_direct=12 pgactive=252
/sys/fs/cgroup/system/uid_1037/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=1359 steal_direct=34 pgactive=253
/sys/fs/cgroup/system/uid_1047/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=4269 steal_direct=55 pgactive=6308
/sys/fs/cgroup/system/uid_1047/pid_721/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=2745 steal_direct=44 pgactive=2761
Assisted-by: Cursor:GPT-5.x
Signed-off-by: Zhaoyang Huang <zhaoyang.huang@xxxxxxxxxx>
---
mm/vmscan.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 52 insertions(+), 4 deletions(-)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index bd1b1aa12581..5fecfaa392d8 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3179,14 +3179,62 @@ static void reset_ctrl_pos(struct lruvec *lruvec, int type, bool carryover)
}
}
+/*
+ * positive_ctrl_err() - return true if SP is hotter than PV
+ *
+ * Drives MGLRU type and tier selection in get_type_to_scan() and
+ * get_tier_idx(). A true return means the setpoint is hotter than the process
+ * variable and reclaim should favor the other type or a higher tier.
+ *
+ * With reliable refault statistics on both sides, compare refault rates. That
+ * path dominates on large lruvecs under sustained memory pressure, including
+ * the node-level lruvec when memcg is disabled.
+ *
+ * Small memcg leaf lruvecs often lack reliable refault statistics: few pages,
+ * infrequent reclaim, and counters reset on generation advance. Treating a
+ * low PV refault count as unconditional evidence that SP is hotter ignores
+ * swappiness and tier gain and confuses "no samples yet" with "cold". Using
+ * the rate comparison when only SP lacks samples skews the ratio. The
+ * low-sample paths below apply gain-aware fallbacks and apply defaulting to SP
+ * is hotter when neither side has eviction history.
+ */
static bool positive_ctrl_err(struct ctrl_pos *sp, struct ctrl_pos *pv)
{
+ unsigned long pv_ref = pv->refaulted;
+ /* Used by the rate comparison (SP only) and the total fallback (both). */
+ unsigned long sp_total = sp->total + MIN_LRU_BATCH;
+ unsigned long pv_total = pv->total + MIN_LRU_BATCH;
+
+ /* Both sides: compare refault rates. */
+ if (sp->refaulted >= MIN_LRU_BATCH && pv->refaulted >= MIN_LRU_BATCH)
+ goto compare_rates;
+
+ /*
+ * SP only: PV refault count is not statistically meaningful; treat PV
+ * refaults as zero and use the rate comparison (yields SP hotter).
+ */
+ if (sp->refaulted >= MIN_LRU_BATCH) {
+ pv_ref = 0;
+ goto compare_rates;
+ }
+
+ /* PV only: trust PV refault signal; SP is not hotter. */
+ if (pv->refaulted >= MIN_LRU_BATCH)
+ return false;
+
+ /* Neither side has eviction history, pv is colder */
+ if (!sp->total && !pv->total)
+ return true;
+
+ /* Neither side has enough refaults: compare gain-weighted totals. */
+ return sp->gain * pv_total <= pv->gain * sp_total;
+
+compare_rates:
/*
- * Return true if the PV has a limited number of refaults or a lower
- * refaulted/total than the SP.
+ * Rate comparison with margin on SP total and refault only; PV total is
+ * not padded so a meaningful PV refault rate is preserved when sampled.
*/
- return pv->refaulted < MIN_LRU_BATCH ||
- pv->refaulted * (sp->total + MIN_LRU_BATCH) * sp->gain <=
+ return pv_ref * sp_total * sp->gain <=
(sp->refaulted + 1) * pv->total * pv->gain;
}
--
2.25.1