Re: [PATCH v3 1/3] fsdax: Factor helpers to simplify dax fault code

From: Darrick J. Wong
Date: Tue May 25 2021 - 18:27:04 EST


On Thu, Apr 22, 2021 at 09:44:59PM +0800, Shiyang Ruan wrote:
> The dax page fault code is too long and a bit difficult to read. And it
> is hard to understand when we trying to add new features. Some of the
> PTE/PMD codes have similar logic. So, factor them as helper functions to
> simplify the code.
>
> Signed-off-by: Shiyang Ruan <ruansy.fnst@xxxxxxxxxxx>
> Reviewed-by: Christoph Hellwig <hch@xxxxxx>
> Reviewed-by: Ritesh Harjani <riteshh@xxxxxxxxxxxxx>

Looks reasonably straightforward.
Reviewed-by: Darrick J. Wong <djwong@xxxxxxxxxx>

--D

> ---
> fs/dax.c | 153 ++++++++++++++++++++++++++++++-------------------------
> 1 file changed, 84 insertions(+), 69 deletions(-)
>
> diff --git a/fs/dax.c b/fs/dax.c
> index b3d27fdc6775..f843fb8fbbf1 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -1244,6 +1244,53 @@ static bool dax_fault_is_synchronous(unsigned long flags,
> && (iomap->flags & IOMAP_F_DIRTY);
> }
>
> +/*
> + * If we are doing synchronous page fault and inode needs fsync, we can insert
> + * PTE/PMD into page tables only after that happens. Skip insertion for now and
> + * return the pfn so that caller can insert it after fsync is done.
> + */
> +static vm_fault_t dax_fault_synchronous_pfnp(pfn_t *pfnp, pfn_t pfn)
> +{
> + if (WARN_ON_ONCE(!pfnp))
> + return VM_FAULT_SIGBUS;
> +
> + *pfnp = pfn;
> + return VM_FAULT_NEEDDSYNC;
> +}
> +
> +static vm_fault_t dax_fault_cow_page(struct vm_fault *vmf, struct iomap *iomap,
> + loff_t pos)
> +{
> + int error = 0;
> + vm_fault_t ret;
> + unsigned long vaddr = vmf->address;
> + sector_t sector = dax_iomap_sector(iomap, pos);
> +
> + switch (iomap->type) {
> + case IOMAP_HOLE:
> + case IOMAP_UNWRITTEN:
> + clear_user_highpage(vmf->cow_page, vaddr);
> + break;
> + case IOMAP_MAPPED:
> + error = copy_cow_page_dax(iomap->bdev, iomap->dax_dev,
> + sector, vmf->cow_page, vaddr);
> + break;
> + default:
> + WARN_ON_ONCE(1);
> + error = -EIO;
> + break;
> + }
> +
> + if (error)
> + return dax_fault_return(error);
> +
> + __SetPageUptodate(vmf->cow_page);
> + ret = finish_fault(vmf);
> + if (!ret)
> + ret = VM_FAULT_DONE_COW;
> + return ret;
> +}
> +
> static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
> int *iomap_errp, const struct iomap_ops *ops)
> {
> @@ -1312,30 +1359,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
> }
>
> if (vmf->cow_page) {
> - sector_t sector = dax_iomap_sector(&iomap, pos);
> -
> - switch (iomap.type) {
> - case IOMAP_HOLE:
> - case IOMAP_UNWRITTEN:
> - clear_user_highpage(vmf->cow_page, vaddr);
> - break;
> - case IOMAP_MAPPED:
> - error = copy_cow_page_dax(iomap.bdev, iomap.dax_dev,
> - sector, vmf->cow_page, vaddr);
> - break;
> - default:
> - WARN_ON_ONCE(1);
> - error = -EIO;
> - break;
> - }
> -
> - if (error)
> - goto error_finish_iomap;
> -
> - __SetPageUptodate(vmf->cow_page);
> - ret = finish_fault(vmf);
> - if (!ret)
> - ret = VM_FAULT_DONE_COW;
> + ret = dax_fault_cow_page(vmf, &iomap, pos);
> goto finish_iomap;
> }
>
> @@ -1355,19 +1379,8 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
> entry = dax_insert_entry(&xas, mapping, vmf, entry, pfn,
> 0, write && !sync);
>
> - /*
> - * If we are doing synchronous page fault and inode needs fsync,
> - * we can insert PTE into page tables only after that happens.
> - * Skip insertion for now and return the pfn so that caller can
> - * insert it after fsync is done.
> - */
> if (sync) {
> - if (WARN_ON_ONCE(!pfnp)) {
> - error = -EIO;
> - goto error_finish_iomap;
> - }
> - *pfnp = pfn;
> - ret = VM_FAULT_NEEDDSYNC | major;
> + ret = dax_fault_synchronous_pfnp(pfnp, pfn);
> goto finish_iomap;
> }
> trace_dax_insert_mapping(inode, vmf, entry);
> @@ -1466,13 +1479,45 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
> return VM_FAULT_FALLBACK;
> }
>
> +static bool dax_fault_check_fallback(struct vm_fault *vmf, struct xa_state *xas,
> + pgoff_t max_pgoff)
> +{
> + unsigned long pmd_addr = vmf->address & PMD_MASK;
> + bool write = vmf->flags & FAULT_FLAG_WRITE;
> +
> + /*
> + * Make sure that the faulting address's PMD offset (color) matches
> + * the PMD offset from the start of the file. This is necessary so
> + * that a PMD range in the page table overlaps exactly with a PMD
> + * range in the page cache.
> + */
> + if ((vmf->pgoff & PG_PMD_COLOUR) !=
> + ((vmf->address >> PAGE_SHIFT) & PG_PMD_COLOUR))
> + return true;
> +
> + /* Fall back to PTEs if we're going to COW */
> + if (write && !(vmf->vma->vm_flags & VM_SHARED))
> + return true;
> +
> + /* If the PMD would extend outside the VMA */
> + if (pmd_addr < vmf->vma->vm_start)
> + return true;
> + if ((pmd_addr + PMD_SIZE) > vmf->vma->vm_end)
> + return true;
> +
> + /* If the PMD would extend beyond the file size */
> + if ((xas->xa_index | PG_PMD_COLOUR) >= max_pgoff)
> + return true;
> +
> + return false;
> +}
> +
> static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
> const struct iomap_ops *ops)
> {
> struct vm_area_struct *vma = vmf->vma;
> struct address_space *mapping = vma->vm_file->f_mapping;
> XA_STATE_ORDER(xas, &mapping->i_pages, vmf->pgoff, PMD_ORDER);
> - unsigned long pmd_addr = vmf->address & PMD_MASK;
> bool write = vmf->flags & FAULT_FLAG_WRITE;
> bool sync;
> unsigned int iomap_flags = (write ? IOMAP_WRITE : 0) | IOMAP_FAULT;
> @@ -1495,33 +1540,12 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
>
> trace_dax_pmd_fault(inode, vmf, max_pgoff, 0);
>
> - /*
> - * Make sure that the faulting address's PMD offset (color) matches
> - * the PMD offset from the start of the file. This is necessary so
> - * that a PMD range in the page table overlaps exactly with a PMD
> - * range in the page cache.
> - */
> - if ((vmf->pgoff & PG_PMD_COLOUR) !=
> - ((vmf->address >> PAGE_SHIFT) & PG_PMD_COLOUR))
> - goto fallback;
> -
> - /* Fall back to PTEs if we're going to COW */
> - if (write && !(vma->vm_flags & VM_SHARED))
> - goto fallback;
> -
> - /* If the PMD would extend outside the VMA */
> - if (pmd_addr < vma->vm_start)
> - goto fallback;
> - if ((pmd_addr + PMD_SIZE) > vma->vm_end)
> - goto fallback;
> -
> if (xas.xa_index >= max_pgoff) {
> result = VM_FAULT_SIGBUS;
> goto out;
> }
>
> - /* If the PMD would extend beyond the file size */
> - if ((xas.xa_index | PG_PMD_COLOUR) >= max_pgoff)
> + if (dax_fault_check_fallback(vmf, &xas, max_pgoff))
> goto fallback;
>
> /*
> @@ -1573,17 +1597,8 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
> entry = dax_insert_entry(&xas, mapping, vmf, entry, pfn,
> DAX_PMD, write && !sync);
>
> - /*
> - * If we are doing synchronous page fault and inode needs fsync,
> - * we can insert PMD into page tables only after that happens.
> - * Skip insertion for now and return the pfn so that caller can
> - * insert it after fsync is done.
> - */
> if (sync) {
> - if (WARN_ON_ONCE(!pfnp))
> - goto finish_iomap;
> - *pfnp = pfn;
> - result = VM_FAULT_NEEDDSYNC;
> + result = dax_fault_synchronous_pfnp(pfnp, pfn);
> goto finish_iomap;
> }
>
> --
> 2.31.1
>
>
>