Re: [PATCH RFC 07/39] mm/rmap: convert folio_add_file_rmap_range() into folio_add_file_rmap_[pte|ptes|pmd]()

From: Ryan Roberts
Date: Tue Dec 05 2023 - 07:05:02 EST


On 04/12/2023 14:21, David Hildenbrand wrote:
> Let's get rid of the compound parameter and instead define implicitly
> which mappings we're adding. That is more future proof, easier to read
> and harder to mess up.
>
> Use an enum to express the granularity internally. Make the compiler
> always special-case on the granularity by using __always_inline.
>
> Add plenty of sanity checks with CONFIG_DEBUG_VM. Replace the
> folio_test_pmd_mappable() check by a config check in the caller and
> sanity checks. Convert the single user of folio_add_file_rmap_range().
>
> This function design can later easily be extended to PUDs and to batch
> PMDs. Note that for now we don't support anything bigger than
> PMD-sized folios (as we cleanly separated hugetlb handling). Sanity checks

Is that definitely true? Don't we support PUD-mapping file-backed DAX memory?


> will catch if that ever changes.
>
> Next up is removing page_remove_rmap() along with its "compound"
> parameter and smilarly converting all other rmap functions.
>
> Signed-off-by: David Hildenbrand <david@xxxxxxxxxx>
> ---
> include/linux/rmap.h | 47 +++++++++++++++++++++++++++--
> mm/memory.c | 2 +-
> mm/rmap.c | 72 ++++++++++++++++++++++++++++----------------
> 3 files changed, 92 insertions(+), 29 deletions(-)
>
> diff --git a/include/linux/rmap.h b/include/linux/rmap.h
> index 77e336f86c72d..a4a30c361ac50 100644
> --- a/include/linux/rmap.h
> +++ b/include/linux/rmap.h
> @@ -186,6 +186,45 @@ typedef int __bitwise rmap_t;
> */
> #define RMAP_COMPOUND ((__force rmap_t)BIT(1))
>
> +/*
> + * Internally, we're using an enum to specify the granularity. Usually,
> + * we make the compiler create specialized variants for the different
> + * granularity.
> + */
> +enum rmap_mode {
> + RMAP_MODE_PTE = 0,
> + RMAP_MODE_PMD,
> +};
> +
> +static inline void __folio_rmap_sanity_checks(struct folio *folio,
> + struct page *page, unsigned int nr_pages, enum rmap_mode mode)
> +{
> + /* hugetlb folios are handled separately. */
> + VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio);
> + VM_WARN_ON_FOLIO(folio_test_large(folio) &&
> + !folio_test_large_rmappable(folio), folio);
> +
> + VM_WARN_ON_ONCE(!nr_pages || nr_pages > folio_nr_pages(folio));

nit: I don't think you technically need the second half of this - its covered by
the test below...

> + VM_WARN_ON_FOLIO(page_folio(page) != folio, folio);
> + VM_WARN_ON_FOLIO(page_folio(page + nr_pages - 1) != folio, folio);

...this one.

> +
> + switch (mode) {
> + case RMAP_MODE_PTE:
> + break;
> + case RMAP_MODE_PMD:
> + /*
> + * We don't support folios larger than a single PMD yet. So
> + * when RMAP_MODE_PMD is set, we assume that we are creating
> + * a single "entire" mapping of the folio.
> + */
> + VM_WARN_ON_FOLIO(folio_nr_pages(folio) != HPAGE_PMD_NR, folio);
> + VM_WARN_ON_FOLIO(nr_pages != HPAGE_PMD_NR, folio);
> + break;
> + default:
> + VM_WARN_ON_ONCE(true);
> + }
> +}
> +
> /*
> * rmap interfaces called when adding or removing pte of page
> */
> @@ -198,8 +237,12 @@ void folio_add_new_anon_rmap(struct folio *, struct vm_area_struct *,
> unsigned long address);
> void page_add_file_rmap(struct page *, struct vm_area_struct *,
> bool compound);
> -void folio_add_file_rmap_range(struct folio *, struct page *, unsigned int nr,
> - struct vm_area_struct *, bool compound);
> +void folio_add_file_rmap_ptes(struct folio *, struct page *, unsigned int nr,
> + struct vm_area_struct *);
> +#define folio_add_file_rmap_pte(folio, page, vma) \
> + folio_add_file_rmap_ptes(folio, page, 1, vma)
> +void folio_add_file_rmap_pmd(struct folio *, struct page *,
> + struct vm_area_struct *);
> void page_remove_rmap(struct page *, struct vm_area_struct *,
> bool compound);
>
> diff --git a/mm/memory.c b/mm/memory.c
> index 1f18ed4a54971..15325587cff01 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -4414,7 +4414,7 @@ void set_pte_range(struct vm_fault *vmf, struct folio *folio,
> folio_add_lru_vma(folio, vma);
> } else {
> add_mm_counter(vma->vm_mm, mm_counter_file(page), nr);
> - folio_add_file_rmap_range(folio, page, nr, vma, false);
> + folio_add_file_rmap_ptes(folio, page, nr, vma);
> }
> set_ptes(vma->vm_mm, addr, vmf->pte, entry, nr);
>
> diff --git a/mm/rmap.c b/mm/rmap.c
> index a735ecca47a81..1614d98062948 100644
> --- a/mm/rmap.c
> +++ b/mm/rmap.c
> @@ -1334,31 +1334,19 @@ void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma,
> SetPageAnonExclusive(&folio->page);
> }
>
> -/**
> - * folio_add_file_rmap_range - add pte mapping to page range of a folio
> - * @folio: The folio to add the mapping to
> - * @page: The first page to add
> - * @nr_pages: The number of pages which will be mapped
> - * @vma: the vm area in which the mapping is added
> - * @compound: charge the page as compound or small page
> - *
> - * The page range of folio is defined by [first_page, first_page + nr_pages)
> - *
> - * The caller needs to hold the pte lock.
> - */
> -void folio_add_file_rmap_range(struct folio *folio, struct page *page,
> - unsigned int nr_pages, struct vm_area_struct *vma,
> - bool compound)
> +static __always_inline void __folio_add_file_rmap(struct folio *folio,
> + struct page *page, unsigned int nr_pages,
> + struct vm_area_struct *vma, enum rmap_mode mode)
> {
> atomic_t *mapped = &folio->_nr_pages_mapped;
> unsigned int nr_pmdmapped = 0, first;
> int nr = 0;
>
> - VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio);
> - VM_WARN_ON_FOLIO(compound && !folio_test_pmd_mappable(folio), folio);
> + VM_WARN_ON_FOLIO(folio_test_anon(folio), folio);
> + __folio_rmap_sanity_checks(folio, page, nr_pages, mode);
>
> /* Is page being mapped by PTE? Is this its first map to be added? */
> - if (likely(!compound)) {
> + if (likely(mode == RMAP_MODE_PTE)) {
> do {
> first = atomic_inc_and_test(&page->_mapcount);
> if (first && folio_test_large(folio)) {
> @@ -1369,9 +1357,7 @@ void folio_add_file_rmap_range(struct folio *folio, struct page *page,
> if (first)
> nr++;
> } while (page++, --nr_pages > 0);
> - } else if (folio_test_pmd_mappable(folio)) {
> - /* That test is redundant: it's for safety or to optimize out */
> -
> + } else if (mode == RMAP_MODE_PMD) {
> first = atomic_inc_and_test(&folio->_entire_mapcount);
> if (first) {
> nr = atomic_add_return_relaxed(COMPOUND_MAPPED, mapped);
> @@ -1399,6 +1385,43 @@ void folio_add_file_rmap_range(struct folio *folio, struct page *page,
> mlock_vma_folio(folio, vma);
> }
>
> +/**
> + * folio_add_file_rmap_ptes - add PTE mappings to a page range of a folio
> + * @folio: The folio to add the mappings to
> + * @page: The first page to add
> + * @nr_pages: The number of pages that will be mapped using PTEs
> + * @vma: The vm area in which the mappings are added
> + *
> + * The page range of the folio is defined by [page, page + nr_pages)
> + *
> + * The caller needs to hold the page table lock.
> + */
> +void folio_add_file_rmap_ptes(struct folio *folio, struct page *page,
> + unsigned int nr_pages, struct vm_area_struct *vma)
> +{
> + __folio_add_file_rmap(folio, page, nr_pages, vma, RMAP_MODE_PTE);
> +}
> +
> +/**
> + * folio_add_file_rmap_pmd - add a PMD mapping to a page range of a folio
> + * @folio: The folio to add the mapping to
> + * @page: The first page to add
> + * @vma: The vm area in which the mapping is added
> + *
> + * The page range of the folio is defined by [page, page + HPAGE_PMD_NR)
> + *
> + * The caller needs to hold the page table lock.
> + */
> +void folio_add_file_rmap_pmd(struct folio *folio, struct page *page,
> + struct vm_area_struct *vma)
> +{
> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
> + __folio_add_file_rmap(folio, page, HPAGE_PMD_NR, vma, RMAP_MODE_PMD);
> +#else
> + WARN_ON_ONCE(true);
> +#endif
> +}
> +
> /**
> * page_add_file_rmap - add pte mapping to a file page
> * @page: the page to add the mapping to
> @@ -1411,16 +1434,13 @@ void page_add_file_rmap(struct page *page, struct vm_area_struct *vma,
> bool compound)
> {
> struct folio *folio = page_folio(page);
> - unsigned int nr_pages;
>
> VM_WARN_ON_ONCE_PAGE(compound && !PageTransHuge(page), page);
>
> if (likely(!compound))
> - nr_pages = 1;
> + folio_add_file_rmap_pte(folio, page, vma);
> else
> - nr_pages = folio_nr_pages(folio);
> -
> - folio_add_file_rmap_range(folio, page, nr_pages, vma, compound);
> + folio_add_file_rmap_pmd(folio, page, vma);
> }
>
> /**