Re: [PATCH 2/2] mm/compaction: speed up pageblock_pfn_to_page() when zone is contiguous

From: zhong jiang
Date: Tue Jan 19 2016 - 03:29:31 EST


On 2015/12/21 14:13, Joonsoo Kim wrote:
> There is a performance drop report due to hugepage allocation and in there
> half of cpu time are spent on pageblock_pfn_to_page() in compaction [1].
> In that workload, compaction is triggered to make hugepage but most of
> pageblocks are un-available for compaction due to pageblock type and
> skip bit so compaction usually fails. Most costly operations in this case
> is to find valid pageblock while scanning whole zone range. To check
> if pageblock is valid to compact, valid pfn within pageblock is required
> and we can obtain it by calling pageblock_pfn_to_page(). This function
> checks whether pageblock is in a single zone and return valid pfn
> if possible. Problem is that we need to check it every time before
> scanning pageblock even if we re-visit it and this turns out to
> be very expensive in this workload.
>
> Although we have no way to skip this pageblock check in the system
> where hole exists at arbitrary position, we can use cached value for
> zone continuity and just do pfn_to_page() in the system where hole doesn't
> exist. This optimization considerably speeds up in above workload.
>
> Before vs After
> Max: 1096 MB/s vs 1325 MB/s
> Min: 635 MB/s 1015 MB/s
> Avg: 899 MB/s 1194 MB/s
>
> Avg is improved by roughly 30% [2].
>
> [1]: http://www.spinics.net/lists/linux-mm/msg97378.html
> [2]: https://lkml.org/lkml/2015/12/9/23
>
> v2
> o checking zone continuity after initialization
> o handle memory-hotplug case
>
> Reported and Tested-by: Aaron Lu <aaron.lu@xxxxxxxxx>
> Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@xxxxxxx>
> ---
> include/linux/gfp.h | 6 ---
> include/linux/memory_hotplug.h | 3 ++
> include/linux/mmzone.h | 2 +
> mm/compaction.c | 43 ---------------------
> mm/internal.h | 12 ++++++
> mm/memory_hotplug.c | 10 +++++
> mm/page_alloc.c | 85 +++++++++++++++++++++++++++++++++++++++++-
> 7 files changed, 111 insertions(+), 50 deletions(-)
>
> diff --git a/include/linux/gfp.h b/include/linux/gfp.h
> index 91f74e7..6eb3eca 100644
> --- a/include/linux/gfp.h
> +++ b/include/linux/gfp.h
> @@ -515,13 +515,7 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp);
> void drain_all_pages(struct zone *zone);
> void drain_local_pages(struct zone *zone);
>
> -#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
> void page_alloc_init_late(void);
> -#else
> -static inline void page_alloc_init_late(void)
> -{
> -}
> -#endif
>
> /*
> * gfp_allowed_mask is set to GFP_BOOT_MASK during early boot to restrict what
> diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
> index 2ea574f..18c2676 100644
> --- a/include/linux/memory_hotplug.h
> +++ b/include/linux/memory_hotplug.h
> @@ -196,6 +196,9 @@ void put_online_mems(void);
> void mem_hotplug_begin(void);
> void mem_hotplug_done(void);
>
> +extern void set_zone_contiguous(struct zone *zone);
> +extern void clear_zone_contiguous(struct zone *zone);
> +
> #else /* ! CONFIG_MEMORY_HOTPLUG */
> /*
> * Stub functions for when hotplug is off
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index 68cc063..eb5d88e 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -523,6 +523,8 @@ struct zone {
> bool compact_blockskip_flush;
> #endif
>
> + bool contiguous;
> +
> ZONE_PADDING(_pad3_)
> /* Zone statistics */
> atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
> diff --git a/mm/compaction.c b/mm/compaction.c
> index 56fa321..9c89d46 100644
> --- a/mm/compaction.c
> +++ b/mm/compaction.c
> @@ -71,49 +71,6 @@ static inline bool migrate_async_suitable(int migratetype)
> return is_migrate_cma(migratetype) || migratetype == MIGRATE_MOVABLE;
> }
>
> -/*
> - * Check that the whole (or subset of) a pageblock given by the interval of
> - * [start_pfn, end_pfn) is valid and within the same zone, before scanning it
> - * with the migration of free compaction scanner. The scanners then need to
> - * use only pfn_valid_within() check for arches that allow holes within
> - * pageblocks.
> - *
> - * Return struct page pointer of start_pfn, or NULL if checks were not passed.
> - *
> - * It's possible on some configurations to have a setup like node0 node1 node0
> - * i.e. it's possible that all pages within a zones range of pages do not
> - * belong to a single zone. We assume that a border between node0 and node1
> - * can occur within a single pageblock, but not a node0 node1 node0
> - * interleaving within a single pageblock. It is therefore sufficient to check
> - * the first and last page of a pageblock and avoid checking each individual
> - * page in a pageblock.
> - */
> -static struct page *pageblock_pfn_to_page(unsigned long start_pfn,
> - unsigned long end_pfn, struct zone *zone)
> -{
> - struct page *start_page;
> - struct page *end_page;
> -
> - /* end_pfn is one past the range we are checking */
> - end_pfn--;
> -
> - if (!pfn_valid(start_pfn) || !pfn_valid(end_pfn))
> - return NULL;
> -
> - start_page = pfn_to_page(start_pfn);
> -
> - if (page_zone(start_page) != zone)
> - return NULL;
> -
> - end_page = pfn_to_page(end_pfn);
> -
> - /* This gives a shorter code than deriving page_zone(end_page) */
> - if (page_zone_id(start_page) != page_zone_id(end_page))
> - return NULL;
> -
> - return start_page;
> -}
> -
> #ifdef CONFIG_COMPACTION
>
> /* Do not skip compaction more than 64 times */
> diff --git a/mm/internal.h b/mm/internal.h
> index d01a41c..bc9d337 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -137,6 +137,18 @@ __find_buddy_index(unsigned long page_idx, unsigned int order)
> return page_idx ^ (1 << order);
> }
>
> +extern struct page *__pageblock_pfn_to_page(unsigned long start_pfn,
> + unsigned long end_pfn, struct zone *zone);
> +
> +static inline struct page *pageblock_pfn_to_page(unsigned long start_pfn,
> + unsigned long end_pfn, struct zone *zone)
> +{
> + if (zone->contiguous)
> + return pfn_to_page(start_pfn);
> +
> + return __pageblock_pfn_to_page(start_pfn, end_pfn, zone);
> +}
> +
> extern int __isolate_free_page(struct page *page, unsigned int order);
> extern void __free_pages_bootmem(struct page *page, unsigned long pfn,
> unsigned int order);
> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> index d8016a2..f7b6e6b 100644
> --- a/mm/memory_hotplug.c
> +++ b/mm/memory_hotplug.c
> @@ -505,6 +505,9 @@ int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn,
> unsigned long i;
> int err = 0;
> int start_sec, end_sec;
> +
> + clear_zone_contiguous(zone);
> +
> /* during initialize mem_map, align hot-added range to section */
> start_sec = pfn_to_section_nr(phys_start_pfn);
> end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1);
> @@ -523,6 +526,8 @@ int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn,
> }
> vmemmap_populate_print_last();
>
> + set_zone_contiguous(zone);
> +
> return err;
> }
> EXPORT_SYMBOL_GPL(__add_pages);
> @@ -770,6 +775,8 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
> resource_size_t start, size;
> int ret = 0;
>
> + clear_zone_contiguous(zone);
> +
> /*
> * We can only remove entire sections
> */
> @@ -796,6 +803,9 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
> if (ret)
> break;
> }
> +
> + set_zone_contiguous(zone);
> +
> return ret;
> }
> EXPORT_SYMBOL_GPL(__remove_pages);
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index bac8842..4f5ad2b 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -1271,9 +1271,13 @@ free_range:
> pgdat_init_report_one_done();
> return 0;
> }
> +#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
>
> void __init page_alloc_init_late(void)
> {
> + struct zone *zone;
> +
> +#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
> int nid;
>
> /* There will be num_node_state(N_MEMORY) threads */
> @@ -1287,8 +1291,87 @@ void __init page_alloc_init_late(void)
>
> /* Reinit limits that are based on free pages after the kernel is up */
> files_maxfiles_init();
> +#endif
> +
> + for_each_populated_zone(zone)
> + set_zone_contiguous(zone);
> +}
> +
> +/*
> + * Check that the whole (or subset of) a pageblock given by the interval of
> + * [start_pfn, end_pfn) is valid and within the same zone, before scanning it
> + * with the migration of free compaction scanner. The scanners then need to
> + * use only pfn_valid_within() check for arches that allow holes within
> + * pageblocks.
> + *
> + * Return struct page pointer of start_pfn, or NULL if checks were not passed.
> + *
> + * It's possible on some configurations to have a setup like node0 node1 node0
> + * i.e. it's possible that all pages within a zones range of pages do not
> + * belong to a single zone. We assume that a border between node0 and node1
> + * can occur within a single pageblock, but not a node0 node1 node0
> + * interleaving within a single pageblock. It is therefore sufficient to check
> + * the first and last page of a pageblock and avoid checking each individual
> + * page in a pageblock.
> + */
> +struct page *__pageblock_pfn_to_page(unsigned long start_pfn,
> + unsigned long end_pfn, struct zone *zone)
> +{
> + struct page *start_page;
> + struct page *end_page;
> +
> + /* end_pfn is one past the range we are checking */
> + end_pfn--;
> +
> + if (!pfn_valid(start_pfn) || !pfn_valid(end_pfn))
> + return NULL;
> +
> + start_page = pfn_to_page(start_pfn);
> +
> + if (page_zone(start_page) != zone)
> + return NULL;
> +
> + end_page = pfn_to_page(end_pfn);
> +
> + /* This gives a shorter code than deriving page_zone(end_page) */
> + if (page_zone_id(start_page) != page_zone_id(end_page))
> + return NULL;
> +
> + return start_page;
> +}
> +
> +void set_zone_contiguous(struct zone *zone)
> +{
> + unsigned long block_start_pfn = zone->zone_start_pfn;
> + unsigned long block_end_pfn;
> + unsigned long pfn;
> +
> + block_end_pfn = ALIGN(block_start_pfn + 1, pageblock_nr_pages);
> + for (; block_start_pfn < zone_end_pfn(zone);
> + block_start_pfn = block_end_pfn,
> + block_end_pfn += pageblock_nr_pages) {
> +
> + block_end_pfn = min(block_end_pfn, zone_end_pfn(zone));
> +
> + if (!__pageblock_pfn_to_page(block_start_pfn,
> + block_end_pfn, zone))
> + return;
> +
> + /* Check validity of pfn within pageblock */
> + for (pfn = block_start_pfn; pfn < block_end_pfn; pfn++) {
> + if (!pfn_valid_within(pfn))
> + return;
> + }
> + }
> +
> + /* We confirm that there is no hole */
> + zone->contiguous = true;
> +}
pfn_valid_within just to check whether the page frame have a valid
section. buf if this section have a hole, it will not work.

Thanks
zhongjiang


> +void clear_zone_contiguous(struct zone *zone)
> +{
> + zone->contiguous = false;
> }
> -#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
>
> #ifdef CONFIG_CMA
> /* Free whole pageblock and set its migration type to MIGRATE_CMA. */