Re: [RFC PATCH] mm/mglru: improve positive_ctrl_err for low refault samples
From: Barry Song
Date: Wed May 27 2026 - 05:19:23 EST
On Wed, May 27, 2026 at 4:57 PM zhaoyang.huang
<zhaoyang.huang@xxxxxxxxxx> wrote:
>
> From: Zhaoyang Huang <zhaoyang.huang@xxxxxxxxxx>
>
> The memory.stat under per-pid memcg & MGLRU[1] observed in android
> system which means current type selection by judging the refault rate
> almost failed since file page's refault count remains 0 which will
> shortcut to file by 'pv->refaulted < MIN_LRU_BATCH'. The reason could be
> both of small lruvec size and bypass of the file refault count when it
> refaulted in another memcg.
> Refine the MGLRU PID comparison when lruvecs lack enough refault
> statistics, using gain-aware fallbacks which is simply respect the
> swappiness and historical integral value without affecting the usual
> refault-rate path under sustained reclaim pressure.
>
> [1]
> /sys/fs/cgroup/apps/uid_10123/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=1123 steal_direct=32 pgactive=58
> /sys/fs/cgroup/apps/uid_10136/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=2491 steal_direct=64 pgactive=48
> /sys/fs/cgroup/apps/uid_10237/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=1355 steal_direct=0 pgactive=1
> /sys/fs/cgroup/system/uid_1002/memory.stat: refault_anon=3 active_anon=1 refault_file=0 active_file=0 steal_kswapd=1162 steal_direct=4 pgactive=3987
> /sys/fs/cgroup/system/uid_1027/memory.stat: refault_anon=1 active_anon=0 refault_file=0 active_file=0 steal_kswapd=1157 steal_direct=14 pgactive=1517
> /sys/fs/cgroup/system/uid_1037/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=1461 steal_direct=0 pgactive=247
> /sys/fs/cgroup/system/uid_1046/pid_1251/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=1643 steal_direct=18 pgactive=2914
> /sys/fs/cgroup/system/uid_1047/pid_724/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=8924 steal_direct=449 pgactive=3753
> /sys/fs/cgroup/system/uid_1047/pid_789/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=3864 steal_direct=157 pgactive=3062
> /sys/fs/cgroup/apps/uid_10090/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=1285 steal_direct=32 pgactive=36
> /sys/fs/cgroup/apps/uid_10123/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=1126 steal_direct=32 pgactive=70
> /sys/fs/cgroup/apps/uid_10136/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=2474 steal_direct=69 pgactive=52
> /sys/fs/cgroup/apps/uid_10175/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=3264 steal_direct=10 pgactive=861
> /sys/fs/cgroup/apps/uid_10211/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=1447 steal_direct=32 pgactive=21
> /sys/fs/cgroup/apps/uid_10237/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=1354 steal_direct=0 pgactive=2
> /sys/fs/cgroup/system/uid_1000/pid_471/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=1296 steal_direct=4 pgactive=65
> /sys/fs/cgroup/system/uid_1002/memory.stat: refault_anon=18 active_anon=3 refault_file=0 active_file=0 steal_kswapd=1286 steal_direct=4 pgactive=3979
> /sys/fs/cgroup/system/uid_1037/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=1462 steal_direct=0 pgactive=246
> /sys/fs/cgroup/system/uid_1041/memory.stat: refault_anon=17 active_anon=5 refault_file=0 active_file=0 steal_kswapd=4429 steal_direct=30 pgactive=5239
> /sys/fs/cgroup/system/uid_1041/pid_820/memory.stat: refault_anon=12 active_anon=0 refault_file=0 active_file=0 steal_kswapd=2441 steal_direct=17 pgactive=2034
> /sys/fs/cgroup/system/uid_1041/pid_839/memory.stat: refault_anon=5 active_anon=5 refault_file=0 active_file=0 steal_kswapd=1357 steal_direct=10 pgactive=2055
> /sys/fs/cgroup/system/uid_1047/pid_1166/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=1577 steal_direct=70 pgactive=1130
> /sys/fs/cgroup/system/uid_1047/pid_732/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=7661 steal_direct=115 pgactive=3681
> /sys/fs/cgroup/system/uid_1047/pid_799/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=2847 steal_direct=62 pgactive=3573
> /sys/fs/cgroup/apps/uid_10129/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=1153 steal_direct=18 pgactive=42
> /sys/fs/cgroup/apps/uid_10136/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=2336 steal_direct=210 pgactive=49
> /sys/fs/cgroup/apps/uid_10165/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=1491 steal_direct=43 pgactive=525
> /sys/fs/cgroup/apps/uid_10175/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=2229 steal_direct=108 pgactive=863
> /sys/fs/cgroup/apps/uid_10211/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=1299 steal_direct=163 pgactive=23
> /sys/fs/cgroup/apps/uid_10237/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=1323 steal_direct=32 pgactive=2
> /sys/fs/cgroup/system/uid_0/pid_1203/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=2927 steal_direct=12 pgactive=252
> /sys/fs/cgroup/system/uid_1037/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=1359 steal_direct=34 pgactive=253
> /sys/fs/cgroup/system/uid_1047/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=4269 steal_direct=55 pgactive=6308
> /sys/fs/cgroup/system/uid_1047/pid_721/memory.stat: refault_anon=0 active_anon=0 refault_file=0 active_file=0 steal_kswapd=2745 steal_direct=44 pgactive=2761
Yes.
I agree that the current positive_ctrl_err() has some issues
that could be improved. But could you describe the user-visible
behavior affected by your patch? For example, does it improve
swappiness behavior? Does it eliminate the over-protection of
anon memory?
>
> Assisted-by: Cursor:GPT-5.x
> Signed-off-by: Zhaoyang Huang <zhaoyang.huang@xxxxxxxxxx>
> ---
> mm/vmscan.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++----
> 1 file changed, 52 insertions(+), 4 deletions(-)
>
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index bd1b1aa12581..5fecfaa392d8 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -3179,14 +3179,62 @@ static void reset_ctrl_pos(struct lruvec *lruvec, int type, bool carryover)
> }
> }
>
> +/*
> + * positive_ctrl_err() - return true if SP is hotter than PV
> + *
> + * Drives MGLRU type and tier selection in get_type_to_scan() and
> + * get_tier_idx(). A true return means the setpoint is hotter than the process
> + * variable and reclaim should favor the other type or a higher tier.
> + *
> + * With reliable refault statistics on both sides, compare refault rates. That
> + * path dominates on large lruvecs under sustained memory pressure, including
> + * the node-level lruvec when memcg is disabled.
> + *
> + * Small memcg leaf lruvecs often lack reliable refault statistics: few pages,
> + * infrequent reclaim, and counters reset on generation advance. Treating a
> + * low PV refault count as unconditional evidence that SP is hotter ignores
> + * swappiness and tier gain and confuses "no samples yet" with "cold". Using
> + * the rate comparison when only SP lacks samples skews the ratio. The
> + * low-sample paths below apply gain-aware fallbacks and apply defaulting to SP
> + * is hotter when neither side has eviction history.
> + */
I assume the new algorithm and the above comment were generated
by AI. Could you rephrase them in more human-understandable
language?
> static bool positive_ctrl_err(struct ctrl_pos *sp, struct ctrl_pos *pv)
> {
> + unsigned long pv_ref = pv->refaulted;
> + /* Used by the rate comparison (SP only) and the total fallback (both). */
> + unsigned long sp_total = sp->total + MIN_LRU_BATCH;
> + unsigned long pv_total = pv->total + MIN_LRU_BATCH;
> +
> + /* Both sides: compare refault rates. */
> + if (sp->refaulted >= MIN_LRU_BATCH && pv->refaulted >= MIN_LRU_BATCH)
> + goto compare_rates;
> +
> + /*
> + * SP only: PV refault count is not statistically meaningful; treat PV
> + * refaults as zero and use the rate comparison (yields SP hotter).
> + */
> + if (sp->refaulted >= MIN_LRU_BATCH) {
> + pv_ref = 0;
> + goto compare_rates;
> + }
> +
> + /* PV only: trust PV refault signal; SP is not hotter. */
> + if (pv->refaulted >= MIN_LRU_BATCH)
> + return false;
> +
> + /* Neither side has eviction history, pv is colder */
> + if (!sp->total && !pv->total)
> + return true;
> +
> + /* Neither side has enough refaults: compare gain-weighted totals. */
> + return sp->gain * pv_total <= pv->gain * sp_total;
> +
> +compare_rates:
> /*
> - * Return true if the PV has a limited number of refaults or a lower
> - * refaulted/total than the SP.
> + * Rate comparison with margin on SP total and refault only; PV total is
> + * not padded so a meaningful PV refault rate is preserved when sampled.
> */
> - return pv->refaulted < MIN_LRU_BATCH ||
> - pv->refaulted * (sp->total + MIN_LRU_BATCH) * sp->gain <=
> + return pv_ref * sp_total * sp->gain <=
> (sp->refaulted + 1) * pv->total * pv->gain;
> }
>
> --
> 2.25.1
>
>