Re: [PATCH v2 1/5] hugetlb: use page.private for hugetlb specific page flags

From: Miaohe Lin
Date: Wed Jan 20 2021 - 03:13:17 EST


Hi:
On 2021/1/20 9:30, Mike Kravetz wrote:
> As hugetlbfs evolved, state information about hugetlb pages was added.
> One 'convenient' way of doing this was to use available fields in tail
> pages. Over time, it has become difficult to know the meaning or contents
> of fields simply by looking at a small bit of code. Sometimes, the
> naming is just confusing. For example: The PagePrivate flag indicates
> a huge page reservation was consumed and needs to be restored if an error
> is encountered and the page is freed before it is instantiated. The

This PagePrivate flag really confused me for a long time. :(

> page.private field contains the pointer to a subpool if the page is
> associated with one.
>
> In an effort to make the code more readable, use page.private to contain
> hugetlb specific page flags. These flags will have test, set and clear
> functions similar to those used for 'normal' page flags. More importantly,
> an enum of flag values will be created with names that actually reflect
> their purpose.

Thanks for doing this. This would make life easier.

>
> In this patch,
> - Create infrastructure for hugetlb specific page flag functions
> - Move subpool pointer to page[1].private to make way for flags
> Create routines with meaningful names to modify subpool field
> - Use new HPageRestoreReserve flag instead of PagePrivate
>
> Conversion of other state information will happen in subsequent patches.
>
> Signed-off-by: Mike Kravetz <mike.kravetz@xxxxxxxxxx>
> ---
> fs/hugetlbfs/inode.c | 12 ++-----
> include/linux/hugetlb.h | 72 +++++++++++++++++++++++++++++++++++++++++
> mm/hugetlb.c | 45 +++++++++++++-------------
> 3 files changed, 97 insertions(+), 32 deletions(-)
>
> diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
> index 740693d7f255..b8a661780c4a 100644
> --- a/fs/hugetlbfs/inode.c
> +++ b/fs/hugetlbfs/inode.c
> @@ -955,15 +955,9 @@ static int hugetlbfs_migrate_page(struct address_space *mapping,
> if (rc != MIGRATEPAGE_SUCCESS)
> return rc;
>
> - /*
> - * page_private is subpool pointer in hugetlb pages. Transfer to
> - * new page. PagePrivate is not associated with page_private for
> - * hugetlb pages and can not be set here as only page_huge_active
> - * pages can be migrated.
> - */
> - if (page_private(page)) {
> - set_page_private(newpage, page_private(page));
> - set_page_private(page, 0);
> + if (hugetlb_page_subpool(page)) {
> + hugetlb_set_page_subpool(newpage, hugetlb_page_subpool(page));
> + hugetlb_set_page_subpool(page, NULL);
> }
>
> if (mode != MIGRATE_SYNC_NO_COPY)
> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> index ef5b144b8aac..be71a00ee2a0 100644
> --- a/include/linux/hugetlb.h
> +++ b/include/linux/hugetlb.h
> @@ -472,6 +472,64 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
> unsigned long flags);
> #endif /* HAVE_ARCH_HUGETLB_UNMAPPED_AREA */
>
> +/*
> + * huegtlb page specific state flags. These flags are located in page.private
> + * of the hugetlb head page. Functions created via the below macros should be
> + * used to manipulate these flags.
> + *
> + * HPG_restore_reserve - Set when a hugetlb page consumes a reservation at
> + * allocation time. Cleared when page is fully instantiated. Free
> + * routine checks flag to restore a reservation on error paths.
> + */
> +enum hugetlb_page_flags {
> + HPG_restore_reserve = 0,
> + __NR_HPAGEFLAGS,
> +};
> +
> +/*
> + * Macros to create test, set and clear function definitions for
> + * hugetlb specific page flags.
> + */
> +#ifdef CONFIG_HUGETLB_PAGE
> +#define TESTHPAGEFLAG(uname, flname) \
> +static inline int HPage##uname(struct page *page) \
> + { BUILD_BUG_ON(sizeof_field(struct page, private) * \
> + BITS_PER_BYTE < __NR_HPAGEFLAGS); \
> + return test_bit(HPG_##flname, &(page->private)); }
> +
> +#define SETHPAGEFLAG(uname, flname) \
> +static inline void SetHPage##uname(struct page *page) \
> + { set_bit(HPG_##flname, &(page->private)); }
> +
> +#define CLEARHPAGEFLAG(uname, flname) \
> +static inline void ClearHPage##uname(struct page *page) \
> + { clear_bit(HPG_##flname, &(page->private)); }
> +#else
> +#define TESTHPAGEFLAG(uname, flname) \
> +static inline int HPage##uname(struct page *page) \
> + { BUILD_BUG_ON(sizeof_field(struct page, private) * \
> + BITS_PER_BYTE < __NR_HPAGEFLAGS); \
> + return 0 }
> +
> +#define SETHPAGEFLAG(uname, flname) \
> +static inline void SetHPage##uname(struct page *page) \
> + { }
> +
> +#define CLEARHPAGEFLAG(uname, flname) \
> +static inline void ClearHPage##uname(struct page *page) \
> + { }
> +#endif
> +
> +#define HPAGEFLAG(uname, flname) \
> + TESTHPAGEFLAG(uname, flname) \
> + SETHPAGEFLAG(uname, flname) \
> + CLEARHPAGEFLAG(uname, flname) \
> +
> +/*
> + * Create functions associated with hugetlb page flags
> + */
> +HPAGEFLAG(RestoreReserve, restore_reserve)
> +
> #ifdef CONFIG_HUGETLB_PAGE
>
> #define HSTATE_NAME_LEN 32
> @@ -531,6 +589,20 @@ extern unsigned int default_hstate_idx;
>
> #define default_hstate (hstates[default_hstate_idx])
>
> +/*
> + * hugetlb page subpool pointer located in hpage[1].private
> + */
> +static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage)
> +{
> + return (struct hugepage_subpool *)(hpage+1)->private;
> +}
> +
> +static inline void hugetlb_set_page_subpool(struct page *hpage,
> + struct hugepage_subpool *subpool)
> +{
> + set_page_private(hpage+1, (unsigned long)subpool);
> +}
> +
> static inline struct hstate *hstate_file(struct file *f)
> {
> return hstate_inode(file_inode(f));
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 737b2dce19e6..8bed6b5202d2 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -1133,7 +1133,7 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
> nid = huge_node(vma, address, gfp_mask, &mpol, &nodemask);
> page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask);
> if (page && !avoid_reserve && vma_has_reserves(vma, chg)) {
> - SetPagePrivate(page);
> + SetHPageRestoreReserve(page);
> h->resv_huge_pages--;
> }
>
> @@ -1407,20 +1407,19 @@ static void __free_huge_page(struct page *page)
> */
> struct hstate *h = page_hstate(page);
> int nid = page_to_nid(page);
> - struct hugepage_subpool *spool =
> - (struct hugepage_subpool *)page_private(page);
> + struct hugepage_subpool *spool = hugetlb_page_subpool(page);
> bool restore_reserve;
>
> VM_BUG_ON_PAGE(page_count(page), page);
> VM_BUG_ON_PAGE(page_mapcount(page), page);
>
> - set_page_private(page, 0);
> + hugetlb_set_page_subpool(page, NULL);
> page->mapping = NULL;
> - restore_reserve = PagePrivate(page);
> - ClearPagePrivate(page);
> + restore_reserve = HPageRestoreReserve(page);
> + ClearHPageRestoreReserve(page);
>
> /*
> - * If PagePrivate() was set on page, page allocation consumed a
> + * If HPageRestoreReserve was set on page, page allocation consumed a
> * reservation. If the page was associated with a subpool, there
> * would have been a page reserved in the subpool before allocation
> * via hugepage_subpool_get_pages(). Since we are 'restoring' the
> @@ -2254,24 +2253,24 @@ static long vma_add_reservation(struct hstate *h,
> * This routine is called to restore a reservation on error paths. In the
> * specific error paths, a huge page was allocated (via alloc_huge_page)
> * and is about to be freed. If a reservation for the page existed,
> - * alloc_huge_page would have consumed the reservation and set PagePrivate
> - * in the newly allocated page. When the page is freed via free_huge_page,
> - * the global reservation count will be incremented if PagePrivate is set.
> - * However, free_huge_page can not adjust the reserve map. Adjust the
> - * reserve map here to be consistent with global reserve count adjustments
> - * to be made by free_huge_page.
> + * alloc_huge_page would have consumed the reservation and set
> + * HPageRestoreReserve in the newly allocated page. When the page is freed
> + * via free_huge_page, the global reservation count will be incremented if
> + * HPageRestoreReserve is set. However, free_huge_page can not adjust the
> + * reserve map. Adjust the reserve map here to be consistent with global
> + * reserve count adjustments to be made by free_huge_page.
> */
> static void restore_reserve_on_error(struct hstate *h,
> struct vm_area_struct *vma, unsigned long address,
> struct page *page)
> {
> - if (unlikely(PagePrivate(page))) {
> + if (unlikely(HPageRestoreReserve(page))) {
> long rc = vma_needs_reservation(h, vma, address);
>
> if (unlikely(rc < 0)) {
> /*
> * Rare out of memory condition in reserve map
> - * manipulation. Clear PagePrivate so that
> + * manipulation. Clear HPageRestoreReserve so that
> * global reserve count will not be incremented
> * by free_huge_page. This will make it appear
> * as though the reservation for this page was
> @@ -2280,7 +2279,7 @@ static void restore_reserve_on_error(struct hstate *h,
> * is better than inconsistent global huge page
> * accounting of reserve counts.
> */
> - ClearPagePrivate(page);
> + ClearHPageRestoreReserve(page);
> } else if (rc) {
> rc = vma_add_reservation(h, vma, address);
> if (unlikely(rc < 0))
> @@ -2288,7 +2287,7 @@ static void restore_reserve_on_error(struct hstate *h,
> * See above comment about rare out of
> * memory condition.
> */
> - ClearPagePrivate(page);
> + ClearHPageRestoreReserve(page);
> } else
> vma_end_reservation(h, vma, address);
> }
> @@ -2369,7 +2368,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
> if (!page)
> goto out_uncharge_cgroup;
> if (!avoid_reserve && vma_has_reserves(vma, gbl_chg)) {
> - SetPagePrivate(page);
> + SetHPageRestoreReserve(page);
> h->resv_huge_pages--;
> }
> spin_lock(&hugetlb_lock);
> @@ -2387,7 +2386,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
>
> spin_unlock(&hugetlb_lock);
>
> - set_page_private(page, (unsigned long)spool);
> + hugetlb_set_page_subpool(page, spool);
>
> map_commit = vma_commit_reservation(h, vma, addr);
> if (unlikely(map_chg > map_commit)) {
> @@ -4212,7 +4211,7 @@ static vm_fault_t hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
> spin_lock(ptl);
> ptep = huge_pte_offset(mm, haddr, huge_page_size(h));
> if (likely(ptep && pte_same(huge_ptep_get(ptep), pte))) {
> - ClearPagePrivate(new_page);
> + ClearHPageRestoreReserve(new_page);
>
> /* Break COW */
> huge_ptep_clear_flush(vma, haddr, ptep);
> @@ -4279,7 +4278,7 @@ int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
>
> if (err)
> return err;
> - ClearPagePrivate(page);
> + ClearHPageRestoreReserve(page);
>
> /*
> * set page dirty so that it will not be removed from cache/file
> @@ -4441,7 +4440,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
> goto backout;
>
> if (anon_rmap) {
> - ClearPagePrivate(page);
> + ClearHPageRestoreReserve(page);
> hugepage_add_new_anon_rmap(page, vma, haddr);
> } else
> page_dup_rmap(page, true);
> @@ -4755,7 +4754,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
> if (vm_shared) {
> page_dup_rmap(page, true);
> } else {
> - ClearPagePrivate(page);
> + ClearHPageRestoreReserve(page);
> hugepage_add_new_anon_rmap(page, dst_vma, dst_addr);
> }
>
>

Looks good to me.Thanks.

Reviewed-by: Miaohe Lin <linmiaohe@xxxxxxxxxx>