[PATCH v2 07/11] mm, swap: support flexible batch freeing of slots in different memcg
From: Kairui Song via B4 Relay
Date: Thu Apr 16 2026 - 14:39:08 EST
From: Kairui Song <kasong@xxxxxxxxxxx>
Instead of requiring the caller to ensure all slots are in the same
memcg, make the function handle different memcgs at once.
Signed-off-by: Kairui Song <kasong@xxxxxxxxxxx>
---
mm/swapfile.c | 33 +++++++++++++++++++++++++++++----
1 file changed, 29 insertions(+), 4 deletions(-)
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 2211d290ae95..b0efae57b973 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1872,21 +1872,46 @@ void __swap_cluster_free_entries(struct swap_info_struct *si,
unsigned int ci_start, unsigned int nr_pages)
{
unsigned long old_tb;
+ unsigned int type = si->type;
+ unsigned short id = 0, id_cur;
unsigned int ci_off = ci_start, ci_end = ci_start + nr_pages;
- unsigned long offset = cluster_offset(si, ci) + ci_start;
+ unsigned long offset = cluster_offset(si, ci);
+ unsigned int ci_batch = ci_off;
+ swp_entry_t entry;
VM_WARN_ON(ci->count < nr_pages);
ci->count -= nr_pages;
do {
old_tb = __swap_table_get(ci, ci_off);
- /* Release the last ref, or after swap cache is dropped */
+ /*
+ * Freeing is done after release of the last swap count
+ * ref, or after swap cache is dropped
+ */
VM_WARN_ON(!swp_tb_is_shadow(old_tb) || __swp_tb_get_count(old_tb) > 1);
__swap_table_set(ci, ci_off, null_to_swp_tb());
+
+ /*
+ * Uncharge swap slots by memcg in batches. Consecutive
+ * slots with the same cgroup id are uncharged together.
+ */
+ entry = swp_entry(type, offset + ci_off);
+ id_cur = lookup_swap_cgroup_id(entry);
+ if (id != id_cur) {
+ if (id)
+ mem_cgroup_uncharge_swap(swp_entry(type, offset + ci_batch),
+ ci_off - ci_batch);
+ id = id_cur;
+ ci_batch = ci_off;
+ }
} while (++ci_off < ci_end);
- mem_cgroup_uncharge_swap(swp_entry(si->type, offset), nr_pages);
- swap_range_free(si, offset, nr_pages);
+ if (id) {
+ mem_cgroup_uncharge_swap(swp_entry(type, offset + ci_batch),
+ ci_off - ci_batch);
+ }
+
+ swap_range_free(si, offset + ci_start, nr_pages);
swap_cluster_assert_empty(ci, ci_start, nr_pages, false);
if (!ci->count)
--
2.53.0