Re: [PATCHv3 6/6] mm/gup: Cache dev_pagemap while pinning pages
From: Kirill A. Shutemov
Date: Tue Oct 02 2018 - 07:26:35 EST
On Fri, Sep 21, 2018 at 10:39:56PM +0000, Keith Busch wrote:
> Pinning pages from ZONE_DEVICE memory needs to check the backing device's
> live-ness, which is tracked in the device's dev_pagemap metadata. This
> metadata is stored in a radix tree and looking it up adds measurable
> software overhead.
>
> This patch avoids repeating this relatively costly operation when
> dev_pagemap is used by caching the last dev_pagemap when getting user
> pages. The gup_benchmark reports this reduces the time to get user pages
> to as low as 1/3 of the previous time.
>
> The cached value is combined with other output parameters into a context
> struct to keep the parameters fewer.
>
> Cc: Kirill Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx>
> Cc: Dave Hansen <dave.hansen@xxxxxxxxx>
> Cc: Dan Williams <dan.j.williams@xxxxxxxxx>
> Signed-off-by: Keith Busch <keith.busch@xxxxxxxxx>
> ---
....
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index a61ebe8ad4ca..79c80496dd50 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -2534,15 +2534,28 @@ static inline vm_fault_t vmf_error(int err)
> return VM_FAULT_SIGBUS;
> }
>
> +struct follow_page_context {
> + struct dev_pagemap *pgmap;
> + unsigned int page_mask;
> +};
> +
> struct page *follow_page_mask(struct vm_area_struct *vma,
> unsigned long address, unsigned int foll_flags,
> - unsigned int *page_mask);
> + struct follow_page_context *ctx);
>
> static inline struct page *follow_page(struct vm_area_struct *vma,
> unsigned long address, unsigned int foll_flags)
> {
> - unsigned int unused_page_mask;
> - return follow_page_mask(vma, address, foll_flags, &unused_page_mask);
> + struct page *page;
> + struct follow_page_context ctx = {
> + .pgmap = NULL,
> + .page_mask = 0,
> + };
> +
> + page = follow_page_mask(vma, address, foll_flags, &ctx);
> + if (ctx.pgmap)
> + put_dev_pagemap(ctx.pgmap);
> + return page;
> }
Do we still want to keep the function as inline? I don't think so.
Let's move it into mm/gup.c and make struct follow_page_context private to
the file.
>
> #define FOLL_WRITE 0x01 /* check pte is writable */
> diff --git a/mm/gup.c b/mm/gup.c
> index 1abc8b4afff6..124e7293e381 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -71,10 +71,10 @@ static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
> }
>
> static struct page *follow_page_pte(struct vm_area_struct *vma,
> - unsigned long address, pmd_t *pmd, unsigned int flags)
> + unsigned long address, pmd_t *pmd, unsigned int flags,
> + struct dev_pagemap **pgmap)
> {
> struct mm_struct *mm = vma->vm_mm;
> - struct dev_pagemap *pgmap = NULL;
> struct page *page;
> spinlock_t *ptl;
> pte_t *ptep, pte;
> @@ -116,8 +116,8 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
> * Only return device mapping pages in the FOLL_GET case since
> * they are only valid while holding the pgmap reference.
> */
> - pgmap = get_dev_pagemap(pte_pfn(pte), NULL);
> - if (pgmap)
> + *pgmap = get_dev_pagemap(pte_pfn(pte), *pgmap);
> + if (*pgmap)
> page = pte_page(pte);
> else
> goto no_page;
Hm. Shouldn't get_dev_pagemap() call be under if (!*pgmap)?
... ah, never mind. I've got confused by get_dev_pagemap() interface.
> static bool vma_permits_fault(struct vm_area_struct *vma,
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 533f9b00147d..9839bf91b057 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -851,13 +851,23 @@ static void touch_pmd(struct vm_area_struct *vma, unsigned long addr,
> update_mmu_cache_pmd(vma, addr, pmd);
> }
>
> +static struct page *pagemap_page(unsigned long pfn, struct dev_pagemap **pgmap)
The function name doesn't reflect the fact that it takes pin on the page.
Maybe pagemap_get_page()?
> +{
> + struct page *page;
> +
> + *pgmap = get_dev_pagemap(pfn, *pgmap);
> + if (!*pgmap)
> + return ERR_PTR(-EFAULT);
> + page = pfn_to_page(pfn);
> + get_page(page);
> + return page;
> +}
> +
--
Kirill A. Shutemov