[PATCH RFC 01/32] mm/memcontrol: make lru_zone_size atomic and simplify sanity check

From: Kairui Song via B4 Relay

Date: Fri May 01 2026 - 17:05:12 EST


From: Kairui Song <kasong@xxxxxxxxxxx>

commit ca707239e8a7 ("mm: update_lru_size warn and reset bad lru_size")
introduced a sanity check to catch memcg counter underflow, which is
more like a workaround for another bug: lru_zone_size is unsigned, so
underflow will wrap it around and return an enormously large number,
then the memcg shrinker will loop almost forever as the calculated
number of folios to shrink is huge. That commit also checks if a zero
value matches the empty LRU list, so we have to hold the LRU lock, and
do the counter adding differently depending on whether the nr_pages is
negative.

But later commit b4536f0c829c ("mm, memcg: fix the active list aging for
lowmem requests when memcg is enabled") already removed the LRU
emptiness check, doing the adding differently is meaningless now. And if
we just turn it into an atomic long, underflow isn't a big issue either,
and can be checked at the reader side. The reader size is much less
frequently called than the updater.

So let's turn the counter into an atomic long and check at the
reader side instead, which has a smaller overhead. Use atomic to avoid
potential locking issue. The underflow correction is removed, which
should be fine as if there is a mass leaking of the LRU size counter,
something else may also have gone very wrong, and one should fix that
leaking site instead. Besides, doing the sanity check in updater is
unlikely to catch the leaking site, e.g. a folio was removed minutes ago
without updating the counter, while there are still other folios on the
LRU, the WARN won't be triggered until other folios are removed from a
likely correct callsite.

For now still keep the LRU lock context, in theory that can be removed
too since the update is atomic, if we can tolerate a temporary
inaccurate reading, but currently there is no benefit doing so yet.

Signed-off-by: Kairui Song <kasong@xxxxxxxxxxx>
---
include/linux/memcontrol.h | 9 +++++++--
mm/memcontrol.c | 18 +-----------------
mm/vmscan.c | 5 -----
3 files changed, 8 insertions(+), 24 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index dc3fa687759b..345a6ba8a3a7 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -112,7 +112,7 @@ struct mem_cgroup_per_node {
/* Fields which get updated often at the end. */
struct lruvec lruvec;
CACHELINE_PADDING(_pad2_);
- unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS];
+ atomic_long_t lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS];
struct mem_cgroup_reclaim_iter iter;

/*
@@ -884,10 +884,15 @@ static inline
unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec,
enum lru_list lru, int zone_idx)
{
+ long val;
struct mem_cgroup_per_node *mz;

mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
- return READ_ONCE(mz->lru_zone_size[zone_idx][lru]);
+ val = atomic_long_read(&mz->lru_zone_size[zone_idx][lru]);
+ if (WARN_ON_ONCE(val < 0))
+ return 0;
+
+ return val;
}

void __mem_cgroup_handle_over_high(gfp_t gfp_mask);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c03d4787d466..71fad2239973 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1484,28 +1484,12 @@ void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
int zid, long nr_pages)
{
struct mem_cgroup_per_node *mz;
- unsigned long *lru_size;
- long size;

if (mem_cgroup_disabled())
return;

mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
- lru_size = &mz->lru_zone_size[zid][lru];
-
- if (nr_pages < 0)
- *lru_size += nr_pages;
-
- size = *lru_size;
- if (WARN_ONCE(size < 0,
- "%s(%p, %d, %ld): lru_size %ld\n",
- __func__, lruvec, lru, nr_pages, size)) {
- VM_BUG_ON(1);
- *lru_size = 0;
- }
-
- if (nr_pages > 0)
- *lru_size += nr_pages;
+ atomic_long_add(nr_pages, &mz->lru_zone_size[zid][lru]);
}

/**
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 8df21364ef71..53b43e3f5795 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1641,10 +1641,6 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
return nr_reclaimed;
}

-/*
- * Update LRU sizes after isolating pages. The LRU size updates must
- * be complete before mem_cgroup_update_lru_size due to a sanity check.
- */
static __always_inline void update_lru_sizes(struct lruvec *lruvec,
enum lru_list lru, unsigned long *nr_zone_taken)
{
@@ -1656,7 +1652,6 @@ static __always_inline void update_lru_sizes(struct lruvec *lruvec,

update_lru_size(lruvec, lru, zid, -nr_zone_taken[zid]);
}
-
}

/*

--
2.54.0