[PATCH 1/2] mm/readahead: Improve page readaround miss detection

From: Jan Kara
Date: Thu Feb 01 2024 - 08:56:33 EST


filemap_map_pages() decreases ra->mmap_miss for every page it maps. This
however overestimates number of real cache hits because we have no idea
whether the application will use the pages we map or not. This is
problematic in particular in memory constrained situations where we
think we have great readahead success rate although in fact we are just
trashing page cache & disk. Change filemap_map_pages() to count only
success of mapping the page we are faulting in. This should be actually
enough to keep mmap_miss close to 0 for workloads doing sequential reads
because filemap_map_pages() does not map page with readahead flag and
thus these are going to contribute to decreasing the mmap_miss counter.

Reported-by: Liu Shixin <liushixin2@xxxxxxxxxx>
Fixes: f1820361f83d ("mm: implement ->map_pages for page cache")
Signed-off-by: Jan Kara <jack@xxxxxxx>
---
mm/filemap.c | 39 ++++++++++++++++++++++-----------------
1 file changed, 22 insertions(+), 17 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index 750e779c23db..0b843f99407c 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3436,8 +3436,7 @@ static struct folio *next_uptodate_folio(struct xa_state *xas,
*/
static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf,
struct folio *folio, unsigned long start,
- unsigned long addr, unsigned int nr_pages,
- unsigned int *mmap_miss)
+ unsigned long addr, unsigned int nr_pages)
{
vm_fault_t ret = 0;
struct page *page = folio_page(folio, start);
@@ -3448,8 +3447,6 @@ static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf,
if (PageHWPoison(page + count))
goto skip;

- (*mmap_miss)++;
-
/*
* NOTE: If there're PTE markers, we'll leave them to be
* handled in the specific fault path, and it'll prohibit the
@@ -3488,8 +3485,7 @@ static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf,
}

static vm_fault_t filemap_map_order0_folio(struct vm_fault *vmf,
- struct folio *folio, unsigned long addr,
- unsigned int *mmap_miss)
+ struct folio *folio, unsigned long addr)
{
vm_fault_t ret = 0;
struct page *page = &folio->page;
@@ -3497,8 +3493,6 @@ static vm_fault_t filemap_map_order0_folio(struct vm_fault *vmf,
if (PageHWPoison(page))
return ret;

- (*mmap_miss)++;
-
/*
* NOTE: If there're PTE markers, we'll leave them to be
* handled in the specific fault path, and it'll prohibit
@@ -3527,7 +3521,7 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
XA_STATE(xas, &mapping->i_pages, start_pgoff);
struct folio *folio;
vm_fault_t ret = 0;
- unsigned int nr_pages = 0, mmap_miss = 0, mmap_miss_saved;
+ unsigned int nr_pages = 0;

rcu_read_lock();
folio = next_uptodate_folio(&xas, mapping, end_pgoff);
@@ -3556,12 +3550,11 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
nr_pages = min(end, end_pgoff) - xas.xa_index + 1;

if (!folio_test_large(folio))
- ret |= filemap_map_order0_folio(vmf,
- folio, addr, &mmap_miss);
+ ret |= filemap_map_order0_folio(vmf, folio, addr);
else
ret |= filemap_map_folio_range(vmf, folio,
xas.xa_index - folio->index, addr,
- nr_pages, &mmap_miss);
+ nr_pages);

folio_unlock(folio);
folio_put(folio);
@@ -3570,11 +3563,23 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
out:
rcu_read_unlock();

- mmap_miss_saved = READ_ONCE(file->f_ra.mmap_miss);
- if (mmap_miss >= mmap_miss_saved)
- WRITE_ONCE(file->f_ra.mmap_miss, 0);
- else
- WRITE_ONCE(file->f_ra.mmap_miss, mmap_miss_saved - mmap_miss);
+ /* VM_FAULT_NOPAGE means we succeeded in mapping desired page */
+ if (ret == VM_FAULT_NOPAGE) {
+ unsigned int mmap_miss = READ_ONCE(file->f_ra.mmap_miss);
+
+ /*
+ * We've found the page we needed in the page cache, decrease
+ * mmap_miss. Note that we don't decrease mmap_miss for every
+ * page we've mapped because we don't know whether the process
+ * will actually use them. We will thus underestimate number of
+ * page cache hits but the least the page marked with readahead
+ * flag will not be mapped by filemap_map_pages() and this will
+ * contribute to decreasing mmap_miss to make up for occasional
+ * fault miss.
+ */
+ if (mmap_miss)
+ WRITE_ONCE(file->f_ra.mmap_miss, mmap_miss - 1);
+ }

return ret;
}
--
2.35.3


--3ct7qtyk6r2k3prh
Content-Type: text/x-patch; charset=us-ascii
Content-Disposition: attachment;
filename="0002-mm-readahead-Fix-readahead-miss-detection-with-FAULT.patch"