Re: [PATCH v1 15/39] mm/huge_memory: batch rmap operations in __split_huge_pmd_locked()

From: Ryan Roberts
Date: Mon Dec 18 2023 - 11:22:59 EST


On 11/12/2023 15:56, David Hildenbrand wrote:
> Let's use folio_add_anon_rmap_ptes(), batching the rmap operations.
>
> While at it, use more folio operations (but only in the code branch we're
> touching), use VM_WARN_ON_FOLIO(), and pass RMAP_EXCLUSIVE instead of
> manually setting PageAnonExclusive.
>
> We should never see non-anon pages on that branch: otherwise, the
> existing page_add_anon_rmap() call would have been flawed already.
>
> Signed-off-by: David Hildenbrand <david@xxxxxxxxxx>
> ---
> mm/huge_memory.c | 23 +++++++++++++++--------
> 1 file changed, 15 insertions(+), 8 deletions(-)
>
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 1f5634b2f374..82ad68fe0d12 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -2398,6 +2398,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
> unsigned long haddr, bool freeze)
> {
> struct mm_struct *mm = vma->vm_mm;
> + struct folio *folio;
> struct page *page;
> pgtable_t pgtable;
> pmd_t old_pmd, _pmd;
> @@ -2493,16 +2494,18 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
> uffd_wp = pmd_swp_uffd_wp(old_pmd);
> } else {
> page = pmd_page(old_pmd);
> + folio = page_folio(page);
> if (pmd_dirty(old_pmd)) {
> dirty = true;
> - SetPageDirty(page);
> + folio_set_dirty(folio);
> }
> write = pmd_write(old_pmd);
> young = pmd_young(old_pmd);
> soft_dirty = pmd_soft_dirty(old_pmd);
> uffd_wp = pmd_uffd_wp(old_pmd);
>
> - VM_BUG_ON_PAGE(!page_count(page), page);
> + VM_WARN_ON_FOLIO(!folio_ref_count(folio), folio);
> + VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio);

Is this warning really correct? file-backed memory can be PMD-mapped with
CONFIG_READ_ONLY_THP_FOR_FS, so presumably it can also have the need to be
remapped as pte? Although I guess if we did have a file-backed folio, it
definitely wouldn't be correct to call page_add_anon_rmap() /
folio_add_anon_rmap_ptes()...

>
> /*
> * Without "freeze", we'll simply split the PMD, propagating the
> @@ -2519,11 +2522,18 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
> *
> * See page_try_share_anon_rmap(): invalidate PMD first.
> */
> - anon_exclusive = PageAnon(page) && PageAnonExclusive(page);
> + anon_exclusive = PageAnonExclusive(page);
> if (freeze && anon_exclusive && page_try_share_anon_rmap(page))
> freeze = false;
> - if (!freeze)
> - page_ref_add(page, HPAGE_PMD_NR - 1);
> + if (!freeze) {
> + rmap_t rmap_flags = RMAP_NONE;
> +
> + folio_ref_add(folio, HPAGE_PMD_NR - 1);
> + if (anon_exclusive)
> + rmap_flags |= RMAP_EXCLUSIVE;
> + folio_add_anon_rmap_ptes(folio, page, HPAGE_PMD_NR,
> + vma, haddr, rmap_flags);
> + }
> }
>
> /*
> @@ -2566,8 +2576,6 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
> entry = mk_pte(page + i, READ_ONCE(vma->vm_page_prot));
> if (write)
> entry = pte_mkwrite(entry, vma);
> - if (anon_exclusive)
> - SetPageAnonExclusive(page + i);
> if (!young)
> entry = pte_mkold(entry);
> /* NOTE: this may set soft-dirty too on some archs */
> @@ -2577,7 +2585,6 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
> entry = pte_mksoft_dirty(entry);
> if (uffd_wp)
> entry = pte_mkuffd_wp(entry);
> - page_add_anon_rmap(page + i, vma, addr, RMAP_NONE);
> }
> VM_BUG_ON(!pte_none(ptep_get(pte)));
> set_pte_at(mm, addr, pte, entry);