[PATCH] mm: ALLOC_HIGHATOMIC flag allocation issue

From: Zhiguo Jiang
Date: Thu Nov 23 2023 - 09:51:17 EST


In case that alloc_flags contains ALLOC_HIGHATOMIC and alloc order
is order1/2/3/10 in rmqueue(), if pages are allocated from pcplist
successfully, a free pageblock will be also moved from the allocated
migratetype freelist to MIGRATE_HIGHATOMIC freelist, rather than
alloc from MIGRATE_HIGHATOMIC freelist first. So this will result
in an increasing number of pages on the buddy highatomic freelist
and an increased risk of allocation failure on other migrate freelists
in buddy.

Currently the sequence of ALLOC_HIGHATOMIC allocation is:
pcplist --> rmqueue_bulk() --> rmqueue_buddy() MIGRATE_HIGHATOMIC -->
rmqueue_buddy() allocated migratetype.

Due to the fact that requesting pages from the pcplist is faster than
buddy, the sequence of the ALLOC_HIGHATOMIC allocation is modified:
pcplist --> rmqueue_buddy() MIGRATE_HIGHATOMIC --> rmqueue_buddy()
allocation migratetype.

This patch can solve the failure problem of allocating other migrate
type pages due to the excessive MIGRATE_HIGHATOMIC freelist reserved
pages.

In comparative testing, cat /proc/pagetypeinfo and the HighAtomic
freelist size is:
Test without this patch:
Node 0, zone Normal, type HighAtomic 2369 771 138 15 0 0 0 0 0 0 0
Test with this patch:
Node 0, zone Normal, type HighAtomic 206 82 4 2 1 0 0 0 0 0 0

Signed-off-by: Zhiguo Jiang <justinjiang@xxxxxxxx>
---
mm/page_alloc.c | 34 +++++++++++++++++++++++++++++++---
1 file changed, 31 insertions(+), 3 deletions(-)
mode change 100644 => 100755 mm/page_alloc.c

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 7a8dac0c1c74..693e86fc9850
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2850,11 +2850,20 @@ struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order,
int batch = nr_pcp_alloc(pcp, zone, order);
int alloced;

+ /*
+ * If pcplist is empty and alloc_flags contains
+ * ALLOC_HIGHATOMIC, alloc from buddy highatomic
+ * freelist first.
+ */
+ if (alloc_flags & ALLOC_HIGHATOMIC)
+ goto out;
+
alloced = rmqueue_bulk(zone, order,
batch, list,
migratetype, alloc_flags);

pcp->count += alloced << order;
+out:
if (unlikely(list_empty(list)))
return NULL;
}
@@ -2918,7 +2927,7 @@ static inline
struct page *rmqueue(struct zone *preferred_zone,
struct zone *zone, unsigned int order,
gfp_t gfp_flags, unsigned int alloc_flags,
- int migratetype)
+ int migratetype, bool *highatomic)
{
struct page *page;

@@ -2938,6 +2947,24 @@ struct page *rmqueue(struct zone *preferred_zone,
page = rmqueue_buddy(preferred_zone, zone, order, alloc_flags,
migratetype);

+ /*
+ * The high-order atomic allocation pageblock reserved:
+ *
+ * If the high-order atomic page is allocated from pcplist,
+ * the highatomic pageblock does not need to be reserved,
+ * which can avoid migrating an increasing number of pages
+ * into buddy highatomic freelist and leading to an increased
+ * risk of allocation failure on other migrate freelists in
+ * buddy.
+ *
+ * If the high-order atomic page is allocated from buddy
+ * highatomic freelist, regardless of whether the allocation
+ * is successful or not, the highatomic pageblock can try to
+ * be reserved.
+ */
+ if (unlikely(alloc_flags & ALLOC_HIGHATOMIC))
+ *highatomic = true;
+
out:
/* Separate test+clear to avoid unnecessary atomics */
if ((alloc_flags & ALLOC_KSWAPD) &&
@@ -3208,6 +3235,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
struct pglist_data *last_pgdat = NULL;
bool last_pgdat_dirty_ok = false;
bool no_fallback;
+ bool highatomic = false;

retry:
/*
@@ -3339,7 +3367,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,

try_this_zone:
page = rmqueue(ac->preferred_zoneref->zone, zone, order,
- gfp_mask, alloc_flags, ac->migratetype);
+ gfp_mask, alloc_flags, ac->migratetype, &highatomic);
if (page) {
prep_new_page(page, order, gfp_mask, alloc_flags);

@@ -3347,7 +3375,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
* If this is a high-order atomic allocation then check
* if the pageblock should be reserved for the future
*/
- if (unlikely(alloc_flags & ALLOC_HIGHATOMIC))
+ if (unlikely(highatomic))
reserve_highatomic_pageblock(page, zone);

return page;
--
2.39.0