Re: [PATCH v2 05/46] rmap: hugetlb: switch from page_dup_file_rmap to page_add_file_rmap

From: Jiaqi Yan
Date: Wed Mar 01 2023 - 20:07:21 EST


On Fri, Feb 17, 2023 at 4:28 PM James Houghton <jthoughton@xxxxxxxxxx> wrote:
>
> This only applies to file-backed HugeTLB, and it should be a no-op until
> high-granularity mapping is possible. Also update page_remove_rmap to
> support the eventual case where !compound && folio_test_hugetlb().
>
> HugeTLB doesn't use LRU or mlock, so we avoid those bits. This also
> means we don't need to use subpage_mapcount; if we did, it would
> overflow with only a few mappings.
>
> There is still one caller of page_dup_file_rmap left: copy_present_pte,
> and it is always called with compound=false in this case.
>
> Signed-off-by: James Houghton <jthoughton@xxxxxxxxxx>
>
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 08004371cfed..6c008c9de80e 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -5077,7 +5077,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
> * sleep during the process.
> */
> if (!PageAnon(ptepage)) {
> - page_dup_file_rmap(ptepage, true);
> + page_add_file_rmap(ptepage, src_vma, true);
> } else if (page_try_dup_anon_rmap(ptepage, true,
> src_vma)) {
> pte_t src_pte_old = entry;
> @@ -5910,7 +5910,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
> if (anon_rmap)
> hugepage_add_new_anon_rmap(folio, vma, haddr);
> else
> - page_dup_file_rmap(&folio->page, true);
> + page_add_file_rmap(&folio->page, vma, true);
> new_pte = make_huge_pte(vma, &folio->page, ((vma->vm_flags & VM_WRITE)
> && (vma->vm_flags & VM_SHARED)));
> /*
> @@ -6301,7 +6301,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
> goto out_release_unlock;
>
> if (folio_in_pagecache)
> - page_dup_file_rmap(&folio->page, true);
> + page_add_file_rmap(&folio->page, dst_vma, true);
> else
> hugepage_add_new_anon_rmap(folio, dst_vma, dst_addr);
>
> diff --git a/mm/migrate.c b/mm/migrate.c
> index d3964c414010..b0f87f19b536 100644
> --- a/mm/migrate.c
> +++ b/mm/migrate.c
> @@ -254,7 +254,7 @@ static bool remove_migration_pte(struct folio *folio,
> hugepage_add_anon_rmap(new, vma, pvmw.address,
> rmap_flags);
> else
> - page_dup_file_rmap(new, true);
> + page_add_file_rmap(new, vma, true);
> set_huge_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
> } else
> #endif
> diff --git a/mm/rmap.c b/mm/rmap.c
> index 15ae24585fc4..c010d0af3a82 100644
> --- a/mm/rmap.c
> +++ b/mm/rmap.c

Given you are making hugetlb's ref/mapcount mechanism to be consistent
with THP, I think the special folio_test_hugetlb checks you added in
this commit will break page_mapped() and folio_mapped() if page/folio
is HGMed. With these checks, folio->_nr_pages_mapped are not properly
increased/decreased.

> @@ -1318,21 +1318,21 @@ void page_add_file_rmap(struct page *page, struct vm_area_struct *vma,
> int nr = 0, nr_pmdmapped = 0;
> bool first;
>
> - VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page);
> + VM_BUG_ON_PAGE(compound && !PageTransHuge(page)
> + && !folio_test_hugetlb(folio), page);
>
> /* Is page being mapped by PTE? Is this its first map to be added? */
> if (likely(!compound)) {
> first = atomic_inc_and_test(&page->_mapcount);
> nr = first;
> - if (first && folio_test_large(folio)) {
> + if (first && folio_test_large(folio)
> + && !folio_test_hugetlb(folio)) {

So we should still increment _nr_pages_mapped for hugetlb case here,
and decrement in the corresponding place in page_remove_rmap.

> nr = atomic_inc_return_relaxed(mapped);
> nr = (nr < COMPOUND_MAPPED);
> }
> - } else if (folio_test_pmd_mappable(folio)) {
> - /* That test is redundant: it's for safety or to optimize out */
> -
> + } else {
> first = atomic_inc_and_test(&folio->_entire_mapcount);
> - if (first) {
> + if (first && !folio_test_hugetlb(folio)) {

Same here: we should still increase _nr_pages_mapped by
COMPOUND_MAPPED and decrease by COMPOUND_MAPPED in the corresponding
place in page_remove_rmap.

> nr = atomic_add_return_relaxed(COMPOUND_MAPPED, mapped);
> if (likely(nr < COMPOUND_MAPPED + COMPOUND_MAPPED)) {
> nr_pmdmapped = folio_nr_pages(folio);
> @@ -1347,6 +1347,9 @@ void page_add_file_rmap(struct page *page, struct vm_area_struct *vma,
> }
> }
>
> + if (folio_test_hugetlb(folio))
> + return;
> +
> if (nr_pmdmapped)
> __lruvec_stat_mod_folio(folio, folio_test_swapbacked(folio) ?
> NR_SHMEM_PMDMAPPED : NR_FILE_PMDMAPPED, nr_pmdmapped);
> @@ -1376,8 +1379,7 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma,
> VM_BUG_ON_PAGE(compound && !PageHead(page), page);
>
> /* Hugetlb pages are not counted in NR_*MAPPED */
> - if (unlikely(folio_test_hugetlb(folio))) {
> - /* hugetlb pages are always mapped with pmds */
> + if (unlikely(folio_test_hugetlb(folio)) && compound) {
> atomic_dec(&folio->_entire_mapcount);
> return;
> }

This entire if-block should be removed after you remove the
!folio_test_hugetlb checks in page_add_file_rmap.

> @@ -1386,15 +1388,14 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma,
> if (likely(!compound)) {
> last = atomic_add_negative(-1, &page->_mapcount);
> nr = last;
> - if (last && folio_test_large(folio)) {
> + if (last && folio_test_large(folio)
> + && !folio_test_hugetlb(folio)) {

ditto.

> nr = atomic_dec_return_relaxed(mapped);
> nr = (nr < COMPOUND_MAPPED);
> }
> - } else if (folio_test_pmd_mappable(folio)) {
> - /* That test is redundant: it's for safety or to optimize out */
> -
> + } else {
> last = atomic_add_negative(-1, &folio->_entire_mapcount);
> - if (last) {
> + if (last && !folio_test_hugetlb(folio)) {

ditto.

> nr = atomic_sub_return_relaxed(COMPOUND_MAPPED, mapped);
> if (likely(nr < COMPOUND_MAPPED)) {
> nr_pmdmapped = folio_nr_pages(folio);
> @@ -1409,6 +1410,9 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma,
> }
> }
>
> + if (folio_test_hugetlb(folio))
> + return;
> +
> if (nr_pmdmapped) {
> if (folio_test_anon(folio))
> idx = NR_ANON_THPS;
> --
> 2.39.2.637.g21b0678d19-goog
>