Re: [RFC PATCH] mm: Avoiding split large folios if swap has no space

From: Johannes Weiner

Date: Fri Jun 26 2026 - 06:02:11 EST


On Fri, Jun 26, 2026 at 02:15:58PM +0800, Barry Song wrote:
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -5578,7 +5578,7 @@ int __init mem_cgroup_init(void)
> *
> * Returns 0 on success, -ENOMEM on failure.
> */
> -int __mem_cgroup_try_charge_swap(struct folio *folio)
> +int __mem_cgroup_try_charge_swap(struct folio *folio, long *left_space)
> {
> unsigned int nr_pages = folio_nr_pages(folio);
> struct swap_cluster_info *ci;
> @@ -5611,6 +5611,10 @@ int __mem_cgroup_try_charge_swap(struct folio *folio)
> memcg_memory_event(memcg, MEMCG_SWAP_MAX);
> memcg_memory_event(memcg, MEMCG_SWAP_FAIL);
> mem_cgroup_private_id_put(memcg, nr_pages);
> + if (folio_test_large(folio))
> + *left_space = mem_cgroup_get_nr_swap_pages(memcg);

It's a bit awkward to walk up the whole hierarchy again when we
already have the counter that failed. Please do something like this
(not tested!), then use page_counter_margin() against @counter:

---

diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h
index d649b6bbbc87..07b7cb12249c 100644
--- a/include/linux/page_counter.h
+++ b/include/linux/page_counter.h
@@ -68,6 +68,7 @@ static inline unsigned long page_counter_read(struct page_counter *counter)
return atomic_long_read(&counter->usage);
}

+long page_counter_margin(struct page_counter *counter);
void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages);
void page_counter_charge(struct page_counter *counter, unsigned long nr_pages);
bool page_counter_try_charge(struct page_counter *counter,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 772bac21d155..02472008144f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5275,12 +5275,9 @@ long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg)
{
long nr_swap_pages = get_nr_swap_pages();

- if (mem_cgroup_disabled() || do_memsw_account())
- return nr_swap_pages;
- for (; !mem_cgroup_is_root(memcg); memcg = parent_mem_cgroup(memcg))
- nr_swap_pages = min_t(long, nr_swap_pages,
- READ_ONCE(memcg->swap.max) -
- page_counter_read(&memcg->swap));
+ if (!mem_cgroup_disabled() && !do_memsw_account())
+ nr_swap_pages = min(nr_swap_pages, page_counter_margin(&memcg->swap));
+
return nr_swap_pages;
}

diff --git a/mm/page_counter.c b/mm/page_counter.c
index 661e0f2a5127..a0874f853ae0 100644
--- a/mm/page_counter.c
+++ b/mm/page_counter.c
@@ -46,6 +46,22 @@ static void propagate_protected_usage(struct page_counter *c,
}
}

+/**
+ * page_counter_margin - remaining usable space within hierarchical limits
+ * @counter: counter
+ */
+long page_counter_margin(struct page_counter *counter)
+{
+ long margin = PAGE_COUNTER_MAX;
+
+ do {
+ long m = READ_ONCE(counter->max) - page_counter_read(counter);
+ margin = min(margin, m);
+ } while ((counter = counter->parent));
+
+ return margin;
+}
+
/**
* page_counter_cancel - take pages out of the local counter
* @counter: counter