[RFC PATCH 27/37] mm: implement speculative handling in do_fault_around()

From: Michel Lespinasse
Date: Tue Apr 06 2021 - 21:52:52 EST


Call the vm_ops->map_pages method within an rcu read locked section.
In the speculative case, verify the mmap sequence lock at the start of
the section. A match guarantees that the original vma is still valid
at that time, and that the associated vma->vm_file stays valid while
the vm_ops->map_pages() method is running.

Do not test vmf->pmd in the speculative case - we only speculate when
a page table already exists, and and this saves us from having to handle
synchronization around the vmf->pmd read.

Change xfs_filemap_map_pages() account for the fact that it can not
block anymore, as it is now running within an rcu read lock.

Signed-off-by: Michel Lespinasse <michel@xxxxxxxxxxxxxx>
---
fs/xfs/xfs_file.c | 3 +++
mm/memory.c | 22 ++++++++++++++++++++--
2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index a007ca0711d9..b360732b20ae 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1387,6 +1387,9 @@ xfs_filemap_map_pages(
struct inode *inode = file_inode(vmf->vma->vm_file);
vm_fault_t ret;

+ if (!xfs_ilock_nowait(XFS_I(inode), XFS_MMAPLOCK_SHARED))
+ return (vmf->flags & FAULT_FLAG_SPECULATIVE) ?
+ VM_FAULT_RETRY : 0;
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
ret = filemap_map_pages(vmf, start_pgoff, end_pgoff);
xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
diff --git a/mm/memory.c b/mm/memory.c
index 13e2aaf900e5..a20e13d84145 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4012,6 +4012,7 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf)
pgoff_t start_pgoff = vmf->pgoff;
pgoff_t end_pgoff;
int off;
+ vm_fault_t ret;

nr_pages = READ_ONCE(fault_around_bytes) >> PAGE_SHIFT;
mask = ~(nr_pages * PAGE_SIZE - 1) & PAGE_MASK;
@@ -4030,14 +4031,31 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf)
end_pgoff = min3(end_pgoff, vma_pages(vmf->vma) + vmf->vma->vm_pgoff - 1,
start_pgoff + nr_pages - 1);

- if (pmd_none(*vmf->pmd)) {
+ if (!(vmf->flags & FAULT_FLAG_SPECULATIVE) &&
+ pmd_none(*vmf->pmd)) {
vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm);
if (!vmf->prealloc_pte)
return VM_FAULT_OOM;
smp_wmb(); /* See comment in __pte_alloc() */
}

- return vmf->vma->vm_ops->map_pages(vmf, start_pgoff, end_pgoff);
+ rcu_read_lock();
+#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
+ if (vmf->flags & FAULT_FLAG_SPECULATIVE) {
+ if (!mmap_seq_read_check(vmf->vma->vm_mm, vmf->seq)) {
+ rcu_read_unlock();
+ return VM_FAULT_RETRY;
+ }
+ /*
+ * the mmap sequence check verified that vmf->vma was still
+ * current at that point in time.
+ * The rcu read lock ensures vmf->vma->vm_file stays valid.
+ */
+ }
+#endif
+ ret = vmf->vma->vm_ops->map_pages(vmf, start_pgoff, end_pgoff);
+ rcu_read_unlock();
+ return ret;
}

static vm_fault_t do_read_fault(struct vm_fault *vmf)
--
2.20.1