Re: [v5 4/4] mm: hugetlb: Skip initialization of gigantic tail struct pages if freed by HVO

From: Mike Kravetz
Date: Thu Sep 14 2023 - 11:04:24 EST


On 09/13/23 11:54, Usama Arif wrote:
> The new boot flow when it comes to initialization of gigantic pages
> is as follows:
> - At boot time, for a gigantic page during __alloc_bootmem_hugepage,
> the region after the first struct page is marked as noinit.
> - This results in only the first struct page to be
> initialized in reserve_bootmem_region. As the tail struct pages are
> not initialized at this point, there can be a significant saving
> in boot time if HVO succeeds later on.
> - Later on in the boot, the head page is prepped and the first
> HUGETLB_VMEMMAP_RESERVE_SIZE / sizeof(struct page) - 1 tail struct pages
> are initialized.
> - HVO is attempted. If it is not successful, then the rest of the
> tail struct pages are initialized. If it is successful, no more
> tail struct pages need to be initialized saving significant boot time.
>
> The WARN_ON for increased ref count in gather_bootmem_prealloc was changed
> to a VM_BUG_ON. This is OK as there should be no speculative references
> this early in boot process. The VM_BUG_ON's are there just in case such code
> is introduced.
>
> Signed-off-by: Usama Arif <usama.arif@xxxxxxxxxxxxx>
> ---
> mm/hugetlb.c | 63 +++++++++++++++++++++++++++++++++++++-------
> mm/hugetlb_vmemmap.c | 2 +-
> mm/hugetlb_vmemmap.h | 9 ++++---
> mm/internal.h | 3 +++
> mm/mm_init.c | 2 +-
> 5 files changed, 64 insertions(+), 15 deletions(-)

Thank you con continued changes. Code looks good,

Reviewed-by: Mike Kravetz <mike.kravetz@xxxxxxxxxx>
--
Mike Kravetz

>
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index c32ca241df4b..ed37c6e4e952 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -3169,6 +3169,15 @@ int __alloc_bootmem_huge_page(struct hstate *h, int nid)
> }
>
> found:
> +
> + /*
> + * Only initialize the head struct page in memmap_init_reserved_pages,
> + * rest of the struct pages will be initialized by the HugeTLB subsystem itself.
> + * The head struct page is used to get folio information by the HugeTLB
> + * subsystem like zone id and node id.
> + */
> + memblock_reserved_mark_noinit(virt_to_phys((void *)m + PAGE_SIZE),
> + huge_page_size(h) - PAGE_SIZE);
> /* Put them into a private list first because mem_map is not up yet */
> INIT_LIST_HEAD(&m->list);
> list_add(&m->list, &huge_boot_pages);
> @@ -3176,6 +3185,42 @@ int __alloc_bootmem_huge_page(struct hstate *h, int nid)
> return 1;
> }
>
> +/* Initialize [start_page:end_page_number] tail struct pages of a hugepage */
> +static void __init hugetlb_folio_init_tail_vmemmap(struct folio *folio,
> + unsigned long start_page_number,
> + unsigned long end_page_number)
> +{
> + enum zone_type zone = zone_idx(folio_zone(folio));
> + int nid = folio_nid(folio);
> + unsigned long head_pfn = folio_pfn(folio);
> + unsigned long pfn, end_pfn = head_pfn + end_page_number;
> + int ret;
> +
> + for (pfn = head_pfn + start_page_number; pfn < end_pfn; pfn++) {
> + struct page *page = pfn_to_page(pfn);
> +
> + __init_single_page(page, pfn, zone, nid);
> + prep_compound_tail((struct page *)folio, pfn - head_pfn);
> + ret = page_ref_freeze(page, 1);
> + VM_BUG_ON(!ret);
> + }
> +}
> +
> +static void __init hugetlb_folio_init_vmemmap(struct folio *folio, struct hstate *h,
> + unsigned long nr_pages)
> +{
> + int ret;
> +
> + /* Prepare folio head */
> + __folio_clear_reserved(folio);
> + __folio_set_head(folio);
> + ret = page_ref_freeze(&folio->page, 1);
> + VM_BUG_ON(!ret);
> + /* Initialize the necessary tail struct pages */
> + hugetlb_folio_init_tail_vmemmap(folio, 1, nr_pages);
> + prep_compound_head((struct page *)folio, huge_page_order(h));
> +}
> +
> /*
> * Put bootmem huge pages into the standard lists after mem_map is up.
> * Note: This only applies to gigantic (order > MAX_ORDER) pages.
> @@ -3186,19 +3231,19 @@ static void __init gather_bootmem_prealloc(void)
>
> list_for_each_entry(m, &huge_boot_pages, list) {
> struct page *page = virt_to_page(m);
> - struct folio *folio = page_folio(page);
> + struct folio *folio = (void *)page;
> struct hstate *h = m->hstate;
>
> VM_BUG_ON(!hstate_is_gigantic(h));
> WARN_ON(folio_ref_count(folio) != 1);
> - if (prep_compound_gigantic_folio(folio, huge_page_order(h))) {
> - WARN_ON(folio_test_reserved(folio));
> - prep_new_hugetlb_folio(h, folio, folio_nid(folio));
> - free_huge_folio(folio); /* add to the hugepage allocator */
> - } else {
> - /* VERY unlikely inflated ref count on a tail page */
> - free_gigantic_folio(folio, huge_page_order(h));
> - }
> +
> + hugetlb_folio_init_vmemmap(folio, h, HUGETLB_VMEMMAP_RESERVE_PAGES);
> + prep_new_hugetlb_folio(h, folio, folio_nid(folio));
> + /* If HVO fails, initialize all tail struct pages */
> + if (!HPageVmemmapOptimized(&folio->page))
> + hugetlb_folio_init_tail_vmemmap(folio, HUGETLB_VMEMMAP_RESERVE_PAGES,
> + pages_per_huge_page(h));
> + free_huge_folio(folio); /* add to the hugepage allocator */
>
> /*
> * We need to restore the 'stolen' pages to totalram_pages
> diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
> index 3cdb38d87a95..772a877918d7 100644
> --- a/mm/hugetlb_vmemmap.c
> +++ b/mm/hugetlb_vmemmap.c
> @@ -589,7 +589,7 @@ static int __init hugetlb_vmemmap_init(void)
> const struct hstate *h;
>
> /* HUGETLB_VMEMMAP_RESERVE_SIZE should cover all used struct pages */
> - BUILD_BUG_ON(__NR_USED_SUBPAGE * sizeof(struct page) > HUGETLB_VMEMMAP_RESERVE_SIZE);
> + BUILD_BUG_ON(__NR_USED_SUBPAGE > HUGETLB_VMEMMAP_RESERVE_PAGES);
>
> for_each_hstate(h) {
> if (hugetlb_vmemmap_optimizable(h)) {
> diff --git a/mm/hugetlb_vmemmap.h b/mm/hugetlb_vmemmap.h
> index 25bd0e002431..4573899855d7 100644
> --- a/mm/hugetlb_vmemmap.h
> +++ b/mm/hugetlb_vmemmap.h
> @@ -10,15 +10,16 @@
> #define _LINUX_HUGETLB_VMEMMAP_H
> #include <linux/hugetlb.h>
>
> -#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
> -int hugetlb_vmemmap_restore(const struct hstate *h, struct page *head);
> -void hugetlb_vmemmap_optimize(const struct hstate *h, struct page *head);
> -
> /*
> * Reserve one vmemmap page, all vmemmap addresses are mapped to it. See
> * Documentation/vm/vmemmap_dedup.rst.
> */
> #define HUGETLB_VMEMMAP_RESERVE_SIZE PAGE_SIZE
> +#define HUGETLB_VMEMMAP_RESERVE_PAGES (HUGETLB_VMEMMAP_RESERVE_SIZE / sizeof(struct page))
> +
> +#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
> +int hugetlb_vmemmap_restore(const struct hstate *h, struct page *head);
> +void hugetlb_vmemmap_optimize(const struct hstate *h, struct page *head);
>
> static inline unsigned int hugetlb_vmemmap_size(const struct hstate *h)
> {
> diff --git a/mm/internal.h b/mm/internal.h
> index d1d4bf4e63c0..d74061aa6de7 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -1154,4 +1154,7 @@ struct vma_prepare {
> struct vm_area_struct *remove;
> struct vm_area_struct *remove2;
> };
> +
> +void __meminit __init_single_page(struct page *page, unsigned long pfn,
> + unsigned long zone, int nid);
> #endif /* __MM_INTERNAL_H */
> diff --git a/mm/mm_init.c b/mm/mm_init.c
> index 50f2f34745af..fed4370b02e1 100644
> --- a/mm/mm_init.c
> +++ b/mm/mm_init.c
> @@ -555,7 +555,7 @@ static void __init find_zone_movable_pfns_for_nodes(void)
> node_states[N_MEMORY] = saved_node_state;
> }
>
> -static void __meminit __init_single_page(struct page *page, unsigned long pfn,
> +void __meminit __init_single_page(struct page *page, unsigned long pfn,
> unsigned long zone, int nid)
> {
> mm_zero_struct_page(page);
> --
> 2.25.1
>