Re: [PATCH 2/2] mm: page_alloc: tighten up find_suitable_fallback()
From: Shivank Garg
Date: Thu Apr 10 2025 - 06:51:02 EST
On 4/7/2025 11:31 PM, Johannes Weiner wrote:
> find_suitable_fallback() is not as efficient as it could be, and
> somewhat difficult to follow.
>
> 1. should_try_claim_block() is a loop invariant. There is no point in
> checking fallback areas if the caller is interested in claimable
> blocks but the order and the migratetype don't allow for that.
>
> 2. __rmqueue_steal() doesn't care about claimability, so it shouldn't
> have to run those tests.
>
> Different callers want different things from this helper:
>
> 1. __compact_finished() scans orders up until it finds a claimable block
> 2. __rmqueue_claim() scans orders down as long as blocks are claimable
> 3. __rmqueue_steal() doesn't care about claimability at all
>
> Move should_try_claim_block() out of the loop. Only test it for the
> two callers who care in the first place. Distinguish "no blocks" from
> "order + mt are not claimable" in the return value; __rmqueue_claim()
> can stop once order becomes unclaimable, __compact_finished() can keep
> advancing until order becomes claimable.
>
> Before:
>
> Performance counter stats for './run case-lru-file-mmap-read' (5 runs):
>
> 85,294.85 msec task-clock # 5.644 CPUs utilized ( +- 0.32% )
> 15,968 context-switches # 187.209 /sec ( +- 3.81% )
> 153 cpu-migrations # 1.794 /sec ( +- 3.29% )
> 801,808 page-faults # 9.400 K/sec ( +- 0.10% )
> 733,358,331,786 instructions # 1.87 insn per cycle ( +- 0.20% ) (64.94%)
> 392,622,904,199 cycles # 4.603 GHz ( +- 0.31% ) (64.84%)
> 148,563,488,531 branches # 1.742 G/sec ( +- 0.18% ) (63.86%)
> 152,143,228 branch-misses # 0.10% of all branches ( +- 1.19% ) (62.82%)
>
> 15.1128 +- 0.0637 seconds time elapsed ( +- 0.42% )
>
> After:
>
> Performance counter stats for './run case-lru-file-mmap-read' (5 runs):
>
> 84,380.21 msec task-clock # 5.664 CPUs utilized ( +- 0.21% )
> 16,656 context-switches # 197.392 /sec ( +- 3.27% )
> 151 cpu-migrations # 1.790 /sec ( +- 3.28% )
> 801,703 page-faults # 9.501 K/sec ( +- 0.09% )
> 731,914,183,060 instructions # 1.88 insn per cycle ( +- 0.38% ) (64.90%)
> 388,673,535,116 cycles # 4.606 GHz ( +- 0.24% ) (65.06%)
> 148,251,482,143 branches # 1.757 G/sec ( +- 0.37% ) (63.92%)
> 149,766,550 branch-misses # 0.10% of all branches ( +- 1.22% ) (62.88%)
>
> 14.8968 +- 0.0486 seconds time elapsed ( +- 0.33% )
>
> Signed-off-by: Johannes Weiner <hannes@xxxxxxxxxxx>
> ---
> mm/compaction.c | 4 +---
> mm/internal.h | 2 +-
> mm/page_alloc.c | 31 +++++++++++++------------------
> 3 files changed, 15 insertions(+), 22 deletions(-)
>
> diff --git a/mm/compaction.c b/mm/compaction.c
> index 139f00c0308a..7462a02802a5 100644
> --- a/mm/compaction.c
> +++ b/mm/compaction.c
> @@ -2348,7 +2348,6 @@ static enum compact_result __compact_finished(struct compact_control *cc)
> ret = COMPACT_NO_SUITABLE_PAGE;
> for (order = cc->order; order < NR_PAGE_ORDERS; order++) {
> struct free_area *area = &cc->zone->free_area[order];
> - bool claim_block;
>
> /* Job done if page is free of the right migratetype */
> if (!free_area_empty(area, migratetype))
> @@ -2364,8 +2363,7 @@ static enum compact_result __compact_finished(struct compact_control *cc)
> * Job done if allocation would steal freepages from
> * other migratetype buddy lists.
> */
> - if (find_suitable_fallback(area, order, migratetype,
> - true, &claim_block) != -1)
> + if (find_suitable_fallback(area, order, migratetype, true) >= 0)
> /*
> * Movable pages are OK in any pageblock. If we are
> * stealing for a non-movable allocation, make sure
> diff --git a/mm/internal.h b/mm/internal.h
> index 50c2f590b2d0..55384b9971c3 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -915,7 +915,7 @@ static inline void init_cma_pageblock(struct page *page)
>
>
> int find_suitable_fallback(struct free_area *area, unsigned int order,
> - int migratetype, bool claim_only, bool *claim_block);
> + int migratetype, bool claimable);
>
> static inline bool free_area_empty(struct free_area *area, int migratetype)
> {
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 03b0d45ed45a..1522e3a29b16 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -2077,31 +2077,25 @@ static bool should_try_claim_block(unsigned int order, int start_mt)
>
> /*
> * Check whether there is a suitable fallback freepage with requested order.
> - * Sets *claim_block to instruct the caller whether it should convert a whole
> - * pageblock to the returned migratetype.
> - * If only_claim is true, this function returns fallback_mt only if
> + * If claimable is true, this function returns fallback_mt only if
> * we would do this whole-block claiming. This would help to reduce
> * fragmentation due to mixed migratetype pages in one pageblock.
> */
> int find_suitable_fallback(struct free_area *area, unsigned int order,
> - int migratetype, bool only_claim, bool *claim_block)
> + int migratetype, bool claimable)
> {
> int i;
> - int fallback_mt;
> +
> + if (claimable && !should_try_claim_block(order, migratetype))
> + return -2;
>
> if (area->nr_free == 0)
> return -1;
>
> - *claim_block = false;
> for (i = 0; i < MIGRATE_PCPTYPES - 1 ; i++) {
> - fallback_mt = fallbacks[migratetype][i];
> - if (free_area_empty(area, fallback_mt))
> - continue;
> + int fallback_mt = fallbacks[migratetype][i];
>
> - if (should_try_claim_block(order, migratetype))
> - *claim_block = true;
> -
> - if (*claim_block || !only_claim)
> + if (!free_area_empty(area, fallback_mt))
> return fallback_mt;
> }
>
> @@ -2206,7 +2200,6 @@ __rmqueue_claim(struct zone *zone, int order, int start_migratetype,
> int min_order = order;
> struct page *page;
> int fallback_mt;
> - bool claim_block;
>
> /*
> * Do not steal pages from freelists belonging to other pageblocks
> @@ -2225,11 +2218,14 @@ __rmqueue_claim(struct zone *zone, int order, int start_migratetype,
> --current_order) {
> area = &(zone->free_area[current_order]);
> fallback_mt = find_suitable_fallback(area, current_order,
> - start_migratetype, false, &claim_block);
> + start_migratetype, true);
> +
> + /* No block in that order */
> if (fallback_mt == -1)
> continue;
>
> - if (!claim_block)
> + /* Advanced into orders too low to claim, abort */
> + if (fallback_mt == -2)
> break;
>
> page = get_page_from_free_area(area, fallback_mt);
> @@ -2254,12 +2250,11 @@ __rmqueue_steal(struct zone *zone, int order, int start_migratetype)
> int current_order;
> struct page *page;
> int fallback_mt;
> - bool claim_block;
>
> for (current_order = order; current_order < NR_PAGE_ORDERS; current_order++) {
> area = &(zone->free_area[current_order]);
> fallback_mt = find_suitable_fallback(area, current_order,
> - start_migratetype, false, &claim_block);
> + start_migratetype, false);
> if (fallback_mt == -1)
> continue;
>
Tested-by: Shivank Garg <shivankg@xxxxxxx>
Thanks,
Shivank