[PATCH RESEND v3 3/9] mm/huge_memory: add buddy allocator like folio_split()

From: Zi Yan
Date: Wed Dec 04 2024 - 19:19:17 EST


folio_split() splits a large folio in the same way as buddy allocator
splits a large free page for allocation. The purpose is to minimize the
number of folios after the split. For example, if user wants to free the
3rd subpage in a order-9 folio, folio_split() will split the order-9 folio
as:
O-0, O-0, O-0, O-0, O-2, O-3, O-4, O-5, O-6, O-7, O-8 if it is anon
O-1, O-0, O-0, O-2, O-3, O-4, O-5, O-6, O-7, O-9 if it is pagecache
Since anon folio does not support order-1 yet.

It generates fewer folios than existing page split approach, which splits
the order-9 to 512 order-0 folios.

folio_split() and existing split_huge_page_to_list_to_order() share
the folio unmapping and remapping code in __folio_split() and the common
backend split code in __split_unmapped_folio() using
uniform_split variable to distinguish their operations.

Signed-off-by: Zi Yan <ziy@xxxxxxxxxx>
---
mm/huge_memory.c | 56 +++++++++++++++++++++++++++++++++++-------------
1 file changed, 41 insertions(+), 15 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index e928082be3b2..4f4f3b50c537 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3731,11 +3731,10 @@ static int __split_unmapped_folio(struct folio *folio, int new_order,
}

static int __folio_split(struct folio *folio, unsigned int new_order,
- struct page *page, struct list_head *list)
+ struct page *page, struct list_head *list, bool uniform_split)
{
struct deferred_split *ds_queue = get_deferred_split_queue(folio);
- /* reset xarray order to new order after split */
- XA_STATE_ORDER(xas, &folio->mapping->i_pages, folio->index, new_order);
+ XA_STATE(xas, &folio->mapping->i_pages, folio->index);
bool is_anon = folio_test_anon(folio);
struct address_space *mapping = NULL;
struct anon_vma *anon_vma = NULL;
@@ -3756,9 +3755,10 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
VM_WARN_ONCE(1, "Cannot split to order-1 folio");
return -EINVAL;
}
- } else if (new_order) {
+ } else {
/* Split shmem folio to non-zero order not supported */
- if (shmem_mapping(folio->mapping)) {
+ if ((!uniform_split || new_order) &&
+ shmem_mapping(folio->mapping)) {
VM_WARN_ONCE(1,
"Cannot split shmem folio to non-0 order");
return -EINVAL;
@@ -3769,7 +3769,7 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
* CONFIG_READ_ONLY_THP_FOR_FS. But in that case, the mapping
* does not actually support large folios properly.
*/
- if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
+ if (new_order && IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
!mapping_large_folio_support(folio->mapping)) {
VM_WARN_ONCE(1,
"Cannot split file folio to non-0 order");
@@ -3778,7 +3778,7 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
}

/* Only swapping a whole PMD-mapped folio is supported */
- if (folio_test_swapcache(folio) && new_order)
+ if (folio_test_swapcache(folio) && (!uniform_split || new_order))
return -EINVAL;

is_hzp = is_huge_zero_folio(folio);
@@ -3835,10 +3835,13 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
goto out;
}

- xas_split_alloc(&xas, folio, folio_order(folio), gfp);
- if (xas_error(&xas)) {
- ret = xas_error(&xas);
- goto out;
+ if (uniform_split) {
+ xas_set_order(&xas, folio->index, new_order);
+ xas_split_alloc(&xas, folio, folio_order(folio), gfp);
+ if (xas_error(&xas)) {
+ ret = xas_error(&xas);
+ goto out;
+ }
}

anon_vma = NULL;
@@ -3903,7 +3906,6 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
if (mapping) {
int nr = folio_nr_pages(folio);

- xas_split(&xas, folio, folio_order(folio));
if (folio_test_pmd_mappable(folio) &&
new_order < HPAGE_PMD_ORDER) {
if (folio_test_swapbacked(folio)) {
@@ -3921,8 +3923,8 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
mod_mthp_stat(order, MTHP_STAT_NR_ANON, -1);
mod_mthp_stat(new_order, MTHP_STAT_NR_ANON, 1 << (order - new_order));
}
- __split_huge_page(page, list, end, new_order);
- ret = 0;
+ ret = __split_unmapped_folio(page_folio(page), new_order,
+ page, list, end, &xas, mapping, uniform_split);
} else {
spin_unlock(&ds_queue->split_queue_lock);
fail:
@@ -4000,7 +4002,31 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
{
struct folio *folio = page_folio(page);

- return __folio_split(folio, new_order, page, list);
+ return __folio_split(folio, new_order, page, list, true);
+}
+
+/*
+ * folio_split: split a folio at offset_in_new_order to a new_order folio
+ * @folio: folio to split
+ * @new_order: the order of the new folio
+ * @page: a page within the new folio
+ *
+ * return: 0: successful, <0 failed (if -ENOMEM is returned, @folio might be
+ * split but not to @new_order, the caller needs to check)
+ *
+ * Split a folio at offset_in_new_order to a new_order folio, leave the
+ * remaining subpages of the original folio as large as possible. For example,
+ * split an order-9 folio at its third order-3 subpages to an order-3 folio.
+ * There are 2^6=64 order-3 subpages in an order-9 folio and the result will be
+ * a set of folios with different order and the new folio is in bracket:
+ * [order-4, {order-3}, order-3, order-5, order-6, order-7, order-8].
+ *
+ * After split, folio is left locked for caller.
+ */
+int folio_split(struct folio *folio, unsigned int new_order,
+ struct page *page, struct list_head *list)
+{
+ return __folio_split(folio, new_order, page, list, false);
}

int min_order_for_split(struct folio *folio)
--
2.45.2