Re: [PATCH RFC 23/39] mm/rmap: introduce folio_remove_rmap_[pte|ptes|pmd]()

From: Ryan Roberts
Date: Tue Dec 05 2023 - 07:52:47 EST


On 04/12/2023 14:21, David Hildenbrand wrote:
> Let's mimic what we did with folio_add_file_rmap_*() and
> folio_add_anon_rmap_*() so we can similarly replace page_remove_rmap()
> next.
>
> Make the compiler always special-case on the granularity by using
> __always_inline.
>
> We're adding folio_remove_rmap_ptes() handling right away, as we want to
> use that soon for batching rmap operations when unmapping PTE-mapped
> large folios.
>
> Signed-off-by: David Hildenbrand <david@xxxxxxxxxx>
> ---
> include/linux/rmap.h | 6 ++++
> mm/rmap.c | 76 ++++++++++++++++++++++++++++++++++++--------
> 2 files changed, 68 insertions(+), 14 deletions(-)
>
> diff --git a/include/linux/rmap.h b/include/linux/rmap.h
> index 017b216915f19..dd4ffb1d8ae04 100644
> --- a/include/linux/rmap.h
> +++ b/include/linux/rmap.h
> @@ -241,6 +241,12 @@ void folio_add_file_rmap_pmd(struct folio *, struct page *,
> struct vm_area_struct *);
> void page_remove_rmap(struct page *, struct vm_area_struct *,
> bool compound);
> +void folio_remove_rmap_ptes(struct folio *, struct page *, unsigned int nr,
> + struct vm_area_struct *);
> +#define folio_remove_rmap_pte(folio, page, vma) \
> + folio_remove_rmap_ptes(folio, page, 1, vma)
> +void folio_remove_rmap_pmd(struct folio *, struct page *,
> + struct vm_area_struct *);
>
> void hugetlb_add_anon_rmap(struct folio *, struct vm_area_struct *,
> unsigned long address, rmap_t flags);
> diff --git a/mm/rmap.c b/mm/rmap.c
> index 3587225055c5e..50b6909157ac1 100644
> --- a/mm/rmap.c
> +++ b/mm/rmap.c
> @@ -1463,25 +1463,36 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma,
> bool compound)
> {
> struct folio *folio = page_folio(page);
> +
> + if (likely(!compound))
> + folio_remove_rmap_pte(folio, page, vma);
> + else
> + folio_remove_rmap_pmd(folio, page, vma);
> +}
> +
> +static __always_inline void __folio_remove_rmap(struct folio *folio,
> + struct page *page, unsigned int nr_pages,
> + struct vm_area_struct *vma, enum rmap_mode mode)
> +{
> atomic_t *mapped = &folio->_nr_pages_mapped;
> - int nr = 0, nr_pmdmapped = 0;
> - bool last;
> + int last, nr = 0, nr_pmdmapped = 0;

nit: you're being inconsistent across the functions with signed vs unsigned for
page counts (e.g. nr, nr_pmdmapped) - see __folio_add_rmap(),
__folio_add_file_rmap(), __folio_add_anon_rmap().

I suggest pick one and stick to it. Personally I'd go with signed int (since
that's what all the counters in struct folio that we are manipulating are,
underneath the atomic_t) then check that nr_pages > 0 in
__folio_rmap_sanity_checks().

> enum node_stat_item idx;
>
> - VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio);
> - VM_BUG_ON_PAGE(compound && !PageHead(page), page);
> + __folio_rmap_sanity_checks(folio, page, nr_pages, mode);
>
> /* Is page being unmapped by PTE? Is this its last map to be removed? */
> - if (likely(!compound)) {
> - last = atomic_add_negative(-1, &page->_mapcount);
> - nr = last;
> - if (last && folio_test_large(folio)) {
> - nr = atomic_dec_return_relaxed(mapped);
> - nr = (nr < COMPOUND_MAPPED);
> - }
> - } else if (folio_test_pmd_mappable(folio)) {
> - /* That test is redundant: it's for safety or to optimize out */
> + if (likely(mode == RMAP_MODE_PTE)) {
> + do {
> + last = atomic_add_negative(-1, &page->_mapcount);
> + if (last && folio_test_large(folio)) {
> + last = atomic_dec_return_relaxed(mapped);
> + last = (last < COMPOUND_MAPPED);
> + }
>
> + if (last)
> + nr++;
> + } while (page++, --nr_pages > 0);
> + } else if (mode == RMAP_MODE_PMD) {
> last = atomic_add_negative(-1, &folio->_entire_mapcount);
> if (last) {
> nr = atomic_sub_return_relaxed(COMPOUND_MAPPED, mapped);
> @@ -1517,7 +1528,7 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma,
> * is still mapped.
> */
> if (folio_test_pmd_mappable(folio) && folio_test_anon(folio))

folio_test_pmd_mappable() -> folio_test_large()

Since you're converting this to support batch PTE removal, it might as well also
support smaller-than-pmd too?

I currently have a patch to do this same change in the multi-size THP series.


> - if (!compound || nr < nr_pmdmapped)
> + if (mode == RMAP_MODE_PTE || nr < nr_pmdmapped)
> deferred_split_folio(folio);
> }
>
> @@ -1532,6 +1543,43 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma,
> munlock_vma_folio(folio, vma);
> }
>
> +/**
> + * folio_remove_rmap_ptes - remove PTE mappings from a page range of a folio
> + * @folio: The folio to remove the mappings from
> + * @page: The first page to remove
> + * @nr_pages: The number of pages that will be removed from the mapping
> + * @vma: The vm area from which the mappings are removed
> + *
> + * The page range of the folio is defined by [page, page + nr_pages)
> + *
> + * The caller needs to hold the page table lock.
> + */
> +void folio_remove_rmap_ptes(struct folio *folio, struct page *page,
> + unsigned int nr_pages, struct vm_area_struct *vma)
> +{
> + __folio_remove_rmap(folio, page, nr_pages, vma, RMAP_MODE_PTE);
> +}
> +
> +/**
> + * folio_remove_rmap_pmd - remove a PMD mapping from a page range of a folio
> + * @folio: The folio to remove the mapping from
> + * @page: The first page to remove
> + * @vma: The vm area from which the mapping is removed
> + *
> + * The page range of the folio is defined by [page, page + HPAGE_PMD_NR)
> + *
> + * The caller needs to hold the page table lock.
> + */
> +void folio_remove_rmap_pmd(struct folio *folio, struct page *page,
> + struct vm_area_struct *vma)
> +{
> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
> + __folio_remove_rmap(folio, page, HPAGE_PMD_NR, vma, RMAP_MODE_PMD);
> +#else
> + WARN_ON_ONCE(true);
> +#endif
> +}
> +
> /*
> * @arg: enum ttu_flags will be passed to this argument
> */