[PATCH RESEND v3 2/2] mm/shmem: use xas_try_split() in shmem_split_large_entry()

From: Zi Yan
Date: Fri Mar 14 2025 - 18:22:04 EST


During shmem_split_large_entry(), large swap entries are covering n slots
and an order-0 folio needs to be inserted.

Instead of splitting all n slots, only the 1 slot covered by the folio
need to be split and the remaining n-1 shadow entries can be retained with
orders ranging from 0 to n-1. This method only requires
(n/XA_CHUNK_SHIFT) new xa_nodes instead of (n % XA_CHUNK_SHIFT) *
(n/XA_CHUNK_SHIFT) new xa_nodes, compared to the original
xas_split_alloc() + xas_split() one.

For example, to split an order-9 large swap entry (assuming XA_CHUNK_SHIFT
is 6), 1 xa_node is needed instead of 8.

xas_try_split_min_order() is used to reduce the number of calls to
xas_try_split() during split.

Cc: Baolin Wang <baolin.wang@xxxxxxxxxxxxxxxxx>
Cc: Hugh Dickins <hughd@xxxxxxxxxx>
Cc: Kairui Song <kasong@xxxxxxxxxxx>
Cc: Mattew Wilcox <willy@xxxxxxxxxxxxx>
Cc: Miaohe Lin <linmiaohe@xxxxxxxxxx>
Cc: David Hildenbrand <david@xxxxxxxxxx>
Cc: John Hubbard <jhubbard@xxxxxxxxxx>
Cc: Kefeng Wang <wangkefeng.wang@xxxxxxxxxx>
Cc: Kirill A. Shuemov <kirill.shutemov@xxxxxxxxxxxxxxx>
Cc: Ryan Roberts <ryan.roberts@xxxxxxx>
Cc: Yang Shi <yang@xxxxxxxxxxxxxxxxxxxxxx>
Cc: Yu Zhao <yuzhao@xxxxxxxxxx>
Reviewed-by: Baolin Wang <baolin.wang@xxxxxxxxxxxxxxxxx>
Tested-by: Baolin Wang <baolin.wang@xxxxxxxxxxxxxxxxx>
Signed-off-by: Zi Yan <ziy@xxxxxxxxxx>
---
mm/shmem.c | 59 ++++++++++++++++++++++++++----------------------------
1 file changed, 28 insertions(+), 31 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index 5586903950b3..7b738d8d6581 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2153,15 +2153,16 @@ static int shmem_split_large_entry(struct inode *inode, pgoff_t index,
{
struct address_space *mapping = inode->i_mapping;
XA_STATE_ORDER(xas, &mapping->i_pages, index, 0);
- void *alloced_shadow = NULL;
- int alloced_order = 0, i;
+ int split_order = 0, entry_order;
+ int i;

/* Convert user data gfp flags to xarray node gfp flags */
gfp &= GFP_RECLAIM_MASK;

for (;;) {
- int order = -1, split_order = 0;
void *old = NULL;
+ int cur_order;
+ pgoff_t swap_index;

xas_lock_irq(&xas);
old = xas_load(&xas);
@@ -2170,60 +2171,56 @@ static int shmem_split_large_entry(struct inode *inode, pgoff_t index,
goto unlock;
}

- order = xas_get_order(&xas);
+ entry_order = xas_get_order(&xas);

- /* Swap entry may have changed before we re-acquire the lock */
- if (alloced_order &&
- (old != alloced_shadow || order != alloced_order)) {
- xas_destroy(&xas);
- alloced_order = 0;
- }
+ if (!entry_order)
+ goto unlock;

/* Try to split large swap entry in pagecache */
- if (order > 0) {
- if (!alloced_order) {
- split_order = order;
+ cur_order = entry_order;
+ swap_index = round_down(index, 1 << entry_order);
+
+ split_order = xas_try_split_min_order(cur_order);
+
+ while (cur_order > 0) {
+ pgoff_t aligned_index =
+ round_down(index, 1 << cur_order);
+ pgoff_t swap_offset = aligned_index - swap_index;
+
+ xas_set_order(&xas, index, split_order);
+ xas_try_split(&xas, old, cur_order);
+ if (xas_error(&xas))
goto unlock;
- }
- xas_split(&xas, old, order);

/*
* Re-set the swap entry after splitting, and the swap
* offset of the original large entry must be continuous.
*/
- for (i = 0; i < 1 << order; i++) {
- pgoff_t aligned_index = round_down(index, 1 << order);
+ for (i = 0; i < 1 << cur_order;
+ i += (1 << split_order)) {
swp_entry_t tmp;

- tmp = swp_entry(swp_type(swap), swp_offset(swap) + i);
+ tmp = swp_entry(swp_type(swap),
+ swp_offset(swap) + swap_offset +
+ i);
__xa_store(&mapping->i_pages, aligned_index + i,
swp_to_radix_entry(tmp), 0);
}
+ cur_order = split_order;
+ split_order = xas_try_split_min_order(split_order);
}

unlock:
xas_unlock_irq(&xas);

- /* split needed, alloc here and retry. */
- if (split_order) {
- xas_split_alloc(&xas, old, split_order, gfp);
- if (xas_error(&xas))
- goto error;
- alloced_shadow = old;
- alloced_order = split_order;
- xas_reset(&xas);
- continue;
- }
-
if (!xas_nomem(&xas, gfp))
break;
}

-error:
if (xas_error(&xas))
return xas_error(&xas);

- return alloced_order;
+ return entry_order;
}

/*
--
2.47.2