Re: [PATCH] mm/mglru: fix cgroup OOM during MGLRU state switching
From: Barry Song
Date: Wed Mar 04 2026 - 23:46:20 EST
[...]
>
> --
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 614ccf39fe3f..d7ff7a6ed088 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -2652,6 +2652,43 @@ static bool can_age_anon_pages(struct lruvec *lruvec,
>
> #ifdef CONFIG_LRU_GEN
>
> +DEFINE_STATIC_KEY_FALSE(lru_gen_draining);
> +
> +static inline bool lru_gen_is_draining(void)
> +{
> + return static_branch_unlikely(&lru_gen_draining);
> +}
> +
> +/*
> + * Lazily wait for the draining thread to finish if it's running.
> + *
> + * Return: whether we'd like to reclaim from multi-gen LRU.
> + */
> +static inline bool lru_gen_draining_wait(struct lruvec *lruvec, struct scan_control *sc)
> +{
> + bool global_enabled = lru_gen_enabled();
> +
> + /* Try reclaiming from the current LRU first */
> + if (sc->priority > DEF_PRIORITY / 2)
> + return READ_ONCE(lruvec->lrugen.enabled);
> +
> + /* Oops, try from the other side... */
> + if (sc->priority > 1)
> + return global_enabled;
> +
> + /*
> + * If we see lrugen.enabled is consistent here, when we get the lru
> + * spinlock, the migrating thread will have filled the lruvec with some
> + * pages, so we can continue without waiting.
> + */
> + while (global_enabled ^ READ_ONCE(lruvec->lrugen.enabled)) {
> + /* Not switching this one yet. Wait for a while. */
> + schedule_timeout_uninterruptible(1);
> + }
> +
> + return global_enabled;
> +}
Hi Bingfang,
Thanks very much for sharing. In my humble opinion, the code feels a
bit hacky. If we need to do something to reduce the probability of OOM
without fully fixing the issue, Leon’s approach—trying to reclaim from
both MGLRU and the active/inactive LRU during the transition—seems
less hacky, assuming they show the same likelihood of triggering OOM,
while still leaving some race conditions there.
> +
> #ifdef CONFIG_LRU_GEN_ENABLED
> DEFINE_STATIC_KEY_ARRAY_TRUE(lru_gen_caps, NR_LRU_GEN_CAPS);
> #define get_cap(cap) static_branch_likely(&lru_gen_caps[cap])
> @@ -5171,6 +5208,8 @@ static void lru_gen_change_state(bool enabled)
> if (enabled == lru_gen_enabled())
> goto unlock;
>
> + static_branch_enable_cpuslocked(&lru_gen_draining);
> +
> if (enabled)
> static_branch_enable_cpuslocked(&lru_gen_caps[LRU_GEN_CORE]);
> else
> @@ -5201,6 +5240,9 @@ static void lru_gen_change_state(bool enabled)
>
> cond_resched();
> } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
> +
> + static_branch_disable_cpuslocked(&lru_gen_draining);
> +
> unlock:
> mutex_unlock(&state_mutex);
> put_online_mems();
> @@ -5752,6 +5794,16 @@ late_initcall(init_lru_gen);
>
> #else /* !CONFIG_LRU_GEN */
>
> +static inline bool lru_gen_is_draining(void)
> +{
> + return false;
> +}
> +
> +static inline bool shrink_lruvec_draining(struct lruvec *lruvec, struct scan_control *sc)
> +{
> + return false;
> +}
> +
> static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
> {
> BUILD_BUG();
> @@ -5780,7 +5832,10 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
> bool proportional_reclaim;
> struct blk_plug plug;
>
> - if (lru_gen_enabled() && !root_reclaim(sc)) {
> + if (lru_gen_is_draining() && lru_gen_draining_wait(lruvec, sc)) {
> + lru_gen_shrink_lruvec(lruvec, sc);
> + return;
> + } else if (lru_gen_enabled() && !root_reclaim(sc)) {
> lru_gen_shrink_lruvec(lruvec, sc);
> return;
> }
Thanks
Barry