Re: [PATCH v3 09/12] mm, swap: consolidate cluster allocation helpers
From: Chris Li
Date: Fri May 08 2026 - 01:03:09 EST
On Tue, Apr 21, 2026 at 2:16 AM Kairui Song via B4 Relay
<devnull+kasong.tencent.com@xxxxxxxxxx> wrote:
>
> From: Kairui Song <kasong@xxxxxxxxxxx>
>
> Swap cluster table management is spread across several narrow
> helpers. As a result, the allocation and fallback sequences are
> open-coded in multiple places.
>
> A few more per-cluster tables will be added soon, so avoid
> duplicating these sequences per table type. Fold the existing
> pairs into cluster-oriented helpers, and rename for consistency.
>
> No functional change, only a few sanity checks are slightly adjusted.
>
> Signed-off-by: Kairui Song <kasong@xxxxxxxxxxx>
Acked-by: Chris Li <chrisl@xxxxxxxxxx>
Chris
> ---
> mm/swapfile.c | 110 ++++++++++++++++++++++++++--------------------------------
> 1 file changed, 49 insertions(+), 61 deletions(-)
>
> diff --git a/mm/swapfile.c b/mm/swapfile.c
> index 8d3d22c463f3..2d16aa89a4fd 100644
> --- a/mm/swapfile.c
> +++ b/mm/swapfile.c
> @@ -411,20 +411,7 @@ static inline unsigned int cluster_offset(struct swap_info_struct *si,
> return cluster_index(si, ci) * SWAPFILE_CLUSTER;
> }
>
> -static struct swap_table *swap_table_alloc(gfp_t gfp)
> -{
> - struct folio *folio;
> -
> - if (!SWP_TABLE_USE_PAGE)
> - return kmem_cache_zalloc(swap_table_cachep, gfp);
> -
> - folio = folio_alloc(gfp | __GFP_ZERO, 0);
> - if (folio)
> - return folio_address(folio);
> - return NULL;
> -}
> -
> -static void swap_table_free_folio_rcu_cb(struct rcu_head *head)
> +static void swap_cluster_free_table_folio_rcu_cb(struct rcu_head *head)
> {
> struct folio *folio;
>
> @@ -432,15 +419,46 @@ static void swap_table_free_folio_rcu_cb(struct rcu_head *head)
> folio_put(folio);
> }
>
> -static void swap_table_free(struct swap_table *table)
> +static void swap_cluster_free_table(struct swap_cluster_info *ci)
> {
> + struct swap_table *table;
> +
> + table = (struct swap_table *)rcu_dereference_protected(ci->table, true);
> + if (!table)
> + return;
> +
> + rcu_assign_pointer(ci->table, NULL);
> if (!SWP_TABLE_USE_PAGE) {
> kmem_cache_free(swap_table_cachep, table);
> return;
> }
>
> call_rcu(&(folio_page(virt_to_folio(table), 0)->rcu_head),
> - swap_table_free_folio_rcu_cb);
> + swap_cluster_free_table_folio_rcu_cb);
> +}
> +
> +static int swap_cluster_alloc_table(struct swap_cluster_info *ci, gfp_t gfp)
> +{
> + struct swap_table *table = NULL;
> + struct folio *folio;
> +
> + /* The cluster must be empty and not on any list during allocation. */
> + VM_WARN_ON_ONCE(ci->flags || !cluster_is_empty(ci));
> + if (rcu_access_pointer(ci->table))
> + return 0;
> +
> + if (SWP_TABLE_USE_PAGE) {
> + folio = folio_alloc(gfp | __GFP_ZERO, 0);
> + if (folio)
> + table = folio_address(folio);
> + } else {
> + table = kmem_cache_zalloc(swap_table_cachep, gfp);
> + }
> + if (!table)
> + return -ENOMEM;
> +
> + rcu_assign_pointer(ci->table, table);
> + return 0;
> }
>
> /*
> @@ -471,27 +489,15 @@ static void swap_cluster_assert_empty(struct swap_cluster_info *ci,
> WARN_ON_ONCE(nr == SWAPFILE_CLUSTER && ci->extend_table);
> }
>
> -static void swap_cluster_free_table(struct swap_cluster_info *ci)
> -{
> - struct swap_table *table;
> -
> - /* Only empty cluster's table is allow to be freed */
> - lockdep_assert_held(&ci->lock);
> - table = (void *)rcu_dereference_protected(ci->table, true);
> - rcu_assign_pointer(ci->table, NULL);
> -
> - swap_table_free(table);
> -}
> -
> /*
> * Allocate swap table for one cluster. Attempt an atomic allocation first,
> * then fallback to sleeping allocation.
> */
> static struct swap_cluster_info *
> -swap_cluster_alloc_table(struct swap_info_struct *si,
> +swap_cluster_populate(struct swap_info_struct *si,
> struct swap_cluster_info *ci)
> {
> - struct swap_table *table;
> + int ret;
>
> /*
> * Only cluster isolation from the allocator does table allocation.
> @@ -502,14 +508,9 @@ swap_cluster_alloc_table(struct swap_info_struct *si,
> lockdep_assert_held(&si->global_cluster_lock);
> lockdep_assert_held(&ci->lock);
>
> - /* The cluster must be free and was just isolated from the free list. */
> - VM_WARN_ON_ONCE(ci->flags || !cluster_is_empty(ci));
> -
> - table = swap_table_alloc(__GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN);
> - if (table) {
> - rcu_assign_pointer(ci->table, table);
> + if (!swap_cluster_alloc_table(ci, __GFP_HIGH | __GFP_NOMEMALLOC |
> + __GFP_NOWARN))
> return ci;
> - }
>
> /*
> * Try a sleep allocation. Each isolated free cluster may cause
> @@ -521,7 +522,8 @@ swap_cluster_alloc_table(struct swap_info_struct *si,
> spin_unlock(&si->global_cluster_lock);
> local_unlock(&percpu_swap_cluster.lock);
>
> - table = swap_table_alloc(__GFP_HIGH | __GFP_NOMEMALLOC | GFP_KERNEL);
> + ret = swap_cluster_alloc_table(ci, __GFP_HIGH | __GFP_NOMEMALLOC |
> + GFP_KERNEL);
>
> /*
> * Back to atomic context. We might have migrated to a new CPU with a
> @@ -536,20 +538,11 @@ swap_cluster_alloc_table(struct swap_info_struct *si,
> spin_lock(&si->global_cluster_lock);
> spin_lock(&ci->lock);
>
> - /* Nothing except this helper should touch a dangling empty cluster. */
> - if (WARN_ON_ONCE(cluster_table_is_alloced(ci))) {
> - if (table)
> - swap_table_free(table);
> - return ci;
> - }
> -
> - if (!table) {
> + if (ret) {
> move_cluster(si, ci, &si->free_clusters, CLUSTER_FLAG_FREE);
> spin_unlock(&ci->lock);
> return NULL;
> }
> -
> - rcu_assign_pointer(ci->table, table);
> return ci;
> }
>
> @@ -621,12 +614,11 @@ static struct swap_cluster_info *isolate_lock_cluster(
> }
> spin_unlock(&si->lock);
>
> - if (found && !cluster_table_is_alloced(found)) {
> - /* Only an empty free cluster's swap table can be freed. */
> - VM_WARN_ON_ONCE(flags != CLUSTER_FLAG_FREE);
> + /* Cluster's table is freed when and only when it's on the free list. */
> + if (found && flags == CLUSTER_FLAG_FREE) {
> VM_WARN_ON_ONCE(list != &si->free_clusters);
> - VM_WARN_ON_ONCE(!cluster_is_empty(found));
> - return swap_cluster_alloc_table(si, found);
> + VM_WARN_ON_ONCE(cluster_table_is_alloced(found));
> + return swap_cluster_populate(si, found);
> }
>
> return found;
> @@ -769,7 +761,6 @@ static int swap_cluster_setup_bad_slot(struct swap_info_struct *si,
> unsigned int ci_off = offset % SWAPFILE_CLUSTER;
> unsigned long idx = offset / SWAPFILE_CLUSTER;
> struct swap_cluster_info *ci;
> - struct swap_table *table;
> int ret = 0;
>
> /* si->max may got shrunk by swap swap_activate() */
> @@ -790,12 +781,9 @@ static int swap_cluster_setup_bad_slot(struct swap_info_struct *si,
> }
>
> ci = cluster_info + idx;
> - if (!ci->table) {
> - table = swap_table_alloc(GFP_KERNEL);
> - if (!table)
> - return -ENOMEM;
> - rcu_assign_pointer(ci->table, table);
> - }
> + /* Need to allocate swap table first for initial bad slot marking. */
> + if (!ci->count && swap_cluster_alloc_table(ci, GFP_KERNEL))
> + return -ENOMEM;
> spin_lock(&ci->lock);
> /* Check for duplicated bad swap slots. */
> if (__swap_table_xchg(ci, ci_off, SWP_TB_BAD) != SWP_TB_NULL) {
> @@ -2992,7 +2980,7 @@ static void free_swap_cluster_info(struct swap_cluster_info *cluster_info,
> ci = cluster_info + i;
> /* Cluster with bad marks count will have a remaining table */
> spin_lock(&ci->lock);
> - if (rcu_dereference_protected(ci->table, true)) {
> + if (cluster_table_is_alloced(ci)) {
> swap_cluster_assert_empty(ci, 0, SWAPFILE_CLUSTER, true);
> swap_cluster_free_table(ci);
> }
>
> --
> 2.53.0
>
>