[PATCH] mm: Return the folio from swapin_readahead

From: Matthew Wilcox (Oracle)
Date: Fri Mar 22 2024 - 13:28:33 EST


The unuse_pte_range() caller only wants the folio while do_swap_page()
wants both the page and the folio. Since do_swap_page() already has
logic for handling both the folio and the page, move the folio-to-page
logic there. This also lets us allocate larger folios in the
SWP_SYNCHRONOUS_IO path in future.

Signed-off-by: Matthew Wilcox (Oracle) <willy@xxxxxxxxxxxxx>
---
mm/memory.c | 6 ++----
mm/swap.h | 6 +++---
mm/swap_state.c | 8 +++-----
mm/swapfile.c | 5 +----
4 files changed, 9 insertions(+), 16 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index 40070ef01867..aedf0ee554d1 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4020,7 +4020,6 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
/* skip swapcache */
folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0,
vma, vmf->address, false);
- page = &folio->page;
if (folio) {
__folio_set_locked(folio);
__folio_set_swapbacked(folio);
@@ -4045,10 +4044,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
folio->private = NULL;
}
} else {
- page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE,
+ folio = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE,
vmf);
- if (page)
- folio = page_folio(page);
swapcache = folio;
}

@@ -4069,6 +4066,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
ret = VM_FAULT_MAJOR;
count_vm_event(PGMAJFAULT);
count_memcg_event_mm(vma->vm_mm, PGMAJFAULT);
+ page = folio_file_page(folio, swp_offset(entry));
} else if (PageHWPoison(page)) {
/*
* hwpoisoned dirty swapcache pages are kept for killing
diff --git a/mm/swap.h b/mm/swap.h
index fc2f6ade7f80..6661b55b2c75 100644
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -55,8 +55,8 @@ struct folio *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_flags,
bool skip_if_exists);
struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t flag,
struct mempolicy *mpol, pgoff_t ilx);
-struct page *swapin_readahead(swp_entry_t entry, gfp_t flag,
- struct vm_fault *vmf);
+struct folio *swapin_readahead(swp_entry_t entry, gfp_t flag,
+ struct vm_fault *vmf);

static inline unsigned int folio_swap_flags(struct folio *folio)
{
@@ -87,7 +87,7 @@ static inline struct folio *swap_cluster_readahead(swp_entry_t entry,
return NULL;
}

-static inline struct page *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask,
+static inline struct folio *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask,
struct vm_fault *vmf)
{
return NULL;
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 2deac23633cd..f3c379e93bc6 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -885,13 +885,13 @@ static struct folio *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask,
* @gfp_mask: memory allocation flags
* @vmf: fault information
*
- * Returns the struct page for entry and addr, after queueing swapin.
+ * Returns the struct folio for entry and addr, after queueing swapin.
*
* It's a main entry function for swap readahead. By the configuration,
* it will read ahead blocks by cluster-based(ie, physical disk based)
* or vma-based(ie, virtual address based on faulty address) readahead.
*/
-struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
+struct folio *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
struct vm_fault *vmf)
{
struct mempolicy *mpol;
@@ -904,9 +904,7 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
swap_cluster_readahead(entry, gfp_mask, mpol, ilx);
mpol_cond_put(mpol);

- if (!folio)
- return NULL;
- return folio_file_page(folio, swp_offset(entry));
+ return folio;
}

#ifdef CONFIG_SYSFS
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 5e6d2304a2a4..c9d041ad8df6 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1883,7 +1883,6 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,

folio = swap_cache_get_folio(entry, vma, addr);
if (!folio) {
- struct page *page;
struct vm_fault vmf = {
.vma = vma,
.address = addr,
@@ -1891,10 +1890,8 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
.pmd = pmd,
};

- page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE,
+ folio = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE,
&vmf);
- if (page)
- folio = page_folio(page);
}
if (!folio) {
swp_count = READ_ONCE(si->swap_map[offset]);
--
2.43.0


> mm/memory.c | 8 +++-----
> mm/swap.h | 4 ++--
> mm/swap_state.c | 6 ++----
> mm/swapfile.c | 5 +----
> 4 files changed, 8 insertions(+), 15 deletions(-)
>
> diff --git a/mm/memory.c b/mm/memory.c
> index e42fadc25268..dfdb620a9123 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -4005,12 +4005,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
> if (PTR_ERR(folio) == -EBUSY)
> goto out;
> need_clear_cache = true;
> - page = &folio->page;
> } else {
> - page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE,
> - vmf);
> - if (page)
> - folio = page_folio(page);
> + folio = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE, vmf);
> swapcache = folio;
> }
>
> @@ -4027,6 +4023,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
> goto unlock;
> }
>
> + page = folio_file_page(folio, swp_offset(entry));
> +
> /* Had to read the page from swap area: Major fault */
> ret = VM_FAULT_MAJOR;
> count_vm_event(PGMAJFAULT);
> diff --git a/mm/swap.h b/mm/swap.h
> index 40e902812cc5..aee134907a70 100644
> --- a/mm/swap.h
> +++ b/mm/swap.h
> @@ -57,7 +57,7 @@ struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t flag,
> struct mempolicy *mpol, pgoff_t ilx);
> struct folio *swapin_direct(swp_entry_t entry, gfp_t flag,
> struct vm_fault *vmf);
> -struct page *swapin_readahead(swp_entry_t entry, gfp_t flag,
> +struct folio *swapin_readahead(swp_entry_t entry, gfp_t flag,
> struct vm_fault *vmf);
>
> static inline unsigned int folio_swap_flags(struct folio *folio)
> @@ -95,7 +95,7 @@ static inline struct folio *swapin_direct(swp_entry_t entry, gfp_t flag,
> return NULL;
> }
>
> -static inline struct page *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask,
> +static inline struct folio *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask,
> struct vm_fault *vmf)
> {
> return NULL;
> diff --git a/mm/swap_state.c b/mm/swap_state.c
> index 0a3fa48b3893..2a9c6bdff5ea 100644
> --- a/mm/swap_state.c
> +++ b/mm/swap_state.c
> @@ -951,7 +951,7 @@ struct folio *swapin_direct(swp_entry_t entry, gfp_t gfp_mask,
> * it will read ahead blocks by cluster-based(ie, physical disk based)
> * or vma-based(ie, virtual address based on faulty address) readahead.
> */
> -struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
> +struct folio *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
> struct vm_fault *vmf)
> {
> struct mempolicy *mpol;
> @@ -964,9 +964,7 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
> swap_cluster_readahead(entry, gfp_mask, mpol, ilx);
> mpol_cond_put(mpol);
>
> - if (!folio)
> - return NULL;
> - return folio_file_page(folio, swp_offset(entry));
> + return folio;
> }
>
> #ifdef CONFIG_SYSFS
> diff --git a/mm/swapfile.c b/mm/swapfile.c
> index 4919423cce76..4dd894395a0f 100644
> --- a/mm/swapfile.c
> +++ b/mm/swapfile.c
> @@ -1883,7 +1883,6 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
>
> folio = swap_cache_get_folio(entry, vma, addr);
> if (!folio) {
> - struct page *page;
> struct vm_fault vmf = {
> .vma = vma,
> .address = addr,
> @@ -1891,10 +1890,8 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
> .pmd = pmd,
> };
>
> - page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE,
> + folio = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE,
> &vmf);
> - if (page)
> - folio = page_folio(page);
> }
> if (!folio) {
> swp_count = READ_ONCE(si->swap_map[offset]);
> --
> 2.43.0
>