[PATCH] cachestat: do not flush stats in recency check

From: Nhat Pham
Date: Thu Jun 27 2024 - 16:17:53 EST


syzbot detects that cachestat() is flushing stats, which can sleep, in
its RCU read section (see [1]). This is done in the
workingset_test_recent() step (which checks if the folio's eviction is
recent).

Move the stat flushing step to before the RCU read section of cachestat,
and skip stat flushing during the recency check.

[1]: https://lore.kernel.org/cgroups/000000000000f71227061bdf97e0@xxxxxxxxxx/

Reported-by: syzbot+b7f13b2d0cc156edf61a@xxxxxxxxxxxxxxxxxxxxxxxxx
Closes: https://lore.kernel.org/cgroups/000000000000f71227061bdf97e0@xxxxxxxxxx/
Debugged-by: Johannes Weiner <hannes@xxxxxxxxxxx>
Suggested-by: Johannes Weiner <hannes@xxxxxxxxxxx>
Signed-off-by: Nhat Pham <nphamcs@xxxxxxxxx>
Fixes: b00684722262 ("mm: workingset: move the stats flush into workingset_test_recent()")
Cc: stable@xxxxxxxxxxxxxxx # v6.8+
---
include/linux/swap.h | 3 ++-
mm/filemap.c | 5 ++++-
mm/workingset.c | 14 +++++++++++---
3 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index bd450023b9a4..e685e93ba354 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -354,7 +354,8 @@ static inline swp_entry_t page_swap_entry(struct page *page)
}

/* linux/mm/workingset.c */
-bool workingset_test_recent(void *shadow, bool file, bool *workingset);
+bool workingset_test_recent(void *shadow, bool file, bool *workingset,
+ bool flush);
void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages);
void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg);
void workingset_refault(struct folio *folio, void *shadow);
diff --git a/mm/filemap.c b/mm/filemap.c
index fedefb10d947..298485d4b992 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -4248,6 +4248,9 @@ static void filemap_cachestat(struct address_space *mapping,
XA_STATE(xas, &mapping->i_pages, first_index);
struct folio *folio;

+ /* Flush stats (and potentially sleep) outside the RCU read section. */
+ mem_cgroup_flush_stats_ratelimited(NULL);
+
rcu_read_lock();
xas_for_each(&xas, folio, last_index) {
int order;
@@ -4311,7 +4314,7 @@ static void filemap_cachestat(struct address_space *mapping,
goto resched;
}
#endif
- if (workingset_test_recent(shadow, true, &workingset))
+ if (workingset_test_recent(shadow, true, &workingset, false))
cs->nr_recently_evicted += nr_pages;

goto resched;
diff --git a/mm/workingset.c b/mm/workingset.c
index c22adb93622a..a2b28e356e68 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -412,10 +412,12 @@ void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg)
* @file: whether the corresponding folio is from the file lru.
* @workingset: where the workingset value unpacked from shadow should
* be stored.
+ * @flush: whether to flush cgroup rstat.
*
* Return: true if the shadow is for a recently evicted folio; false otherwise.
*/
-bool workingset_test_recent(void *shadow, bool file, bool *workingset)
+bool workingset_test_recent(void *shadow, bool file, bool *workingset,
+ bool flush)
{
struct mem_cgroup *eviction_memcg;
struct lruvec *eviction_lruvec;
@@ -467,10 +469,16 @@ bool workingset_test_recent(void *shadow, bool file, bool *workingset)

/*
* Flush stats (and potentially sleep) outside the RCU read section.
+ *
+ * Note that workingset_test_recent() itself might be called in RCU read
+ * section (for e.g, in cachestat) - these callers need to skip flushing
+ * stats (via the flush argument).
+ *
* XXX: With per-memcg flushing and thresholding, is ratelimiting
* still needed here?
*/
- mem_cgroup_flush_stats_ratelimited(eviction_memcg);
+ if (flush)
+ mem_cgroup_flush_stats_ratelimited(eviction_memcg);

eviction_lruvec = mem_cgroup_lruvec(eviction_memcg, pgdat);
refault = atomic_long_read(&eviction_lruvec->nonresident_age);
@@ -558,7 +566,7 @@ void workingset_refault(struct folio *folio, void *shadow)

mod_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file, nr);

- if (!workingset_test_recent(shadow, file, &workingset))
+ if (!workingset_test_recent(shadow, file, &workingset, true))
return;

folio_set_active(folio);

base-commit: a5c6fededf806aba1ff9b0f01278f7d089da5725
--
2.43.0