Re: [PATCH mm-unstable v1 2/3] mm/codetag: fix pgalloc_tag_split()

From: Suren Baghdasaryan
Date: Thu Sep 05 2024 - 23:00:59 EST


On Tue, Sep 3, 2024 at 2:36 PM Yu Zhao <yuzhao@xxxxxxxxxx> wrote:
>
> Only tag the new head pages when splitting one large folio to multiple
> ones of a lower order. Tagging tail pages can cause imbalanced "calls"
> counters, since only head pages are untagged by pgalloc_tag_sub() and
> reference counts on tail pages are leaked, e.g.,
> # echo 2048kB >/sys/kernel/mm/hugepages/hugepages-1048576kB/demote_size
> # echo 700 >/sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages
> # time echo 700 >/sys/kernel/mm/hugepages/hugepages-1048576kB/demote
> # grep alloc_gigantic_folio /proc/allocinfo
>
> Before this patch:
> 0 549427200 mm/hugetlb.c:1549 func:alloc_gigantic_folio
>
> real 0m2.057s
> user 0m0.000s
> sys 0m2.051s
>
> After this patch:
> 0 0 mm/hugetlb.c:1549 func:alloc_gigantic_folio
>
> real 0m1.711s
> user 0m0.000s
> sys 0m1.704s
>
> Not tagging tail pages also improves the splitting time, e.g., by
> about 15% when demoting 1GB hugeTLB folios to 2MB ones, as shown
> above.
>
> Fixes: be25d1d4e822 ("mm: create new codetag references during page splitting")
> Signed-off-by: Yu Zhao <yuzhao@xxxxxxxxxx>
> ---
> include/linux/mm.h | 30 ++++++++++++++++++++++++++++++
> include/linux/pgalloc_tag.h | 31 -------------------------------
> mm/huge_memory.c | 2 +-
> mm/hugetlb.c | 2 +-
> mm/page_alloc.c | 4 ++--
> 5 files changed, 34 insertions(+), 35 deletions(-)
>
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index b31d4bdd65ad..a07e93adb8ad 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -4137,4 +4137,34 @@ void vma_pgtable_walk_end(struct vm_area_struct *vma);
>
> int reserve_mem_find_by_name(const char *name, phys_addr_t *start, phys_addr_t *size);
>
> +#ifdef CONFIG_MEM_ALLOC_PROFILING
> +static inline void pgalloc_tag_split(struct folio *folio, int old_order, int new_order)
> +{
> + int i;
> + struct alloc_tag *tag;
> + unsigned int nr_pages = 1 << new_order;
> +
> + if (!mem_alloc_profiling_enabled())
> + return;
> +
> + tag = pgalloc_tag_get(&folio->page);
> + if (!tag)
> + return;
> +
> + for (i = nr_pages; i < (1 << old_order); i += nr_pages) {
> + union codetag_ref *ref = get_page_tag_ref(folio_page(folio, i));
> +
> + if (ref) {
> + /* Set new reference to point to the original tag */
> + alloc_tag_ref_set(ref, tag);
> + put_page_tag_ref(ref);
> + }
> + }
> +}
> +#else /* !CONFIG_MEM_ALLOC_PROFILING */
> +static inline void pgalloc_tag_split(struct folio *folio, int old_order, int new_order)
> +{
> +}
> +#endif /* CONFIG_MEM_ALLOC_PROFILING */
> +
> #endif /* _LINUX_MM_H */
> diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h
> index 207f0c83c8e9..59a3deb792a8 100644
> --- a/include/linux/pgalloc_tag.h
> +++ b/include/linux/pgalloc_tag.h
> @@ -80,36 +80,6 @@ static inline void pgalloc_tag_sub(struct page *page, unsigned int nr)
> }
> }
>
> -static inline void pgalloc_tag_split(struct page *page, unsigned int nr)
> -{
> - int i;
> - struct page_ext *first_page_ext;
> - struct page_ext *page_ext;
> - union codetag_ref *ref;
> - struct alloc_tag *tag;
> -
> - if (!mem_alloc_profiling_enabled())
> - return;
> -
> - first_page_ext = page_ext = page_ext_get(page);
> - if (unlikely(!page_ext))
> - return;
> -
> - ref = codetag_ref_from_page_ext(page_ext);
> - if (!ref->ct)
> - goto out;
> -
> - tag = ct_to_alloc_tag(ref->ct);
> - page_ext = page_ext_next(page_ext);
> - for (i = 1; i < nr; i++) {
> - /* Set new reference to point to the original tag */
> - alloc_tag_ref_set(codetag_ref_from_page_ext(page_ext), tag);
> - page_ext = page_ext_next(page_ext);
> - }
> -out:
> - page_ext_put(first_page_ext);
> -}
> -
> static inline struct alloc_tag *pgalloc_tag_get(struct page *page)
> {
> struct alloc_tag *tag = NULL;
> @@ -142,7 +112,6 @@ static inline void clear_page_tag_ref(struct page *page) {}
> static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
> unsigned int nr) {}
> static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {}
> -static inline void pgalloc_tag_split(struct page *page, unsigned int nr) {}
> static inline struct alloc_tag *pgalloc_tag_get(struct page *page) { return NULL; }
> static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) {}
>
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 0993dfe9ae94..aa8a4c938ba9 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -3244,7 +3244,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
> /* Caller disabled irqs, so they are still disabled here */
>
> split_page_owner(head, order, new_order);
> - pgalloc_tag_split(head, 1 << order);
> + pgalloc_tag_split(folio, order, new_order);

Looks like here and in the next diff you are fixing an additional bug.
I was assuming that we are always splitting into order-0 pages, which
is not the case anymore. It's worth mentioning that in the changelog.
With that added:

Acked-by: Suren Baghdasaryan <surenb@xxxxxxxxxx>

>
> /* See comment in __split_huge_page_tail() */
> if (folio_test_anon(folio)) {
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 3faf5aad142d..a8624c07d8bf 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -3778,7 +3778,7 @@ static long demote_free_hugetlb_folios(struct hstate *src, struct hstate *dst,
> list_del(&folio->lru);
>
> split_page_owner(&folio->page, huge_page_order(src), huge_page_order(dst));
> - pgalloc_tag_split(&folio->page, 1 << huge_page_order(src));
> + pgalloc_tag_split(folio, huge_page_order(src), huge_page_order(dst));
>
> for (i = 0; i < pages_per_huge_page(src); i += pages_per_huge_page(dst)) {
> struct page *page = folio_page(folio, i);
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index c242d61fc4fd..13ce8e8899ed 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -2822,7 +2822,7 @@ void split_page(struct page *page, unsigned int order)
> for (i = 1; i < (1 << order); i++)
> set_page_refcounted(page + i);
> split_page_owner(page, order, 0);
> - pgalloc_tag_split(page, 1 << order);
> + pgalloc_tag_split(page_folio(page), order, 0);
> split_page_memcg(page, order, 0);
> }
> EXPORT_SYMBOL_GPL(split_page);
> @@ -5020,7 +5020,7 @@ static void *make_alloc_exact(unsigned long addr, unsigned int order,
> struct page *last = page + nr;
>
> split_page_owner(page, order, 0);
> - pgalloc_tag_split(page, 1 << order);
> + pgalloc_tag_split(page_folio(page), order, 0);
> split_page_memcg(page, order, 0);
> while (page < --last)
> set_page_refcounted(last);
> --
> 2.46.0.469.g59c65b2a67-goog
>