Re: [PATCH v6 1/5] mm/filemap: add mempolicy support to the filemap layer
From: Vlastimil Babka
Date: Fri Feb 28 2025 - 09:17:20 EST
On 2/26/25 09:25, Shivank Garg wrote:
> From: Shivansh Dhiman <shivansh.dhiman@xxxxxxx>
>
> Add NUMA mempolicy support to the filemap allocation path by introducing
> new APIs that take a mempolicy argument:
> - filemap_grab_folio_mpol()
> - filemap_alloc_folio_mpol()
> - __filemap_get_folio_mpol()
>
> These APIs allow callers to specify a NUMA policy during page cache
> allocations, enabling fine-grained control over memory placement. This is
> particularly needed by KVM when using guest-memfd memory backends, where
> the guest memory needs to be allocated according to the NUMA policy
> specified by VMM.
>
> The existing non-mempolicy APIs remain unchanged and continue to use the
> default allocation behavior.
>
> Signed-off-by: Shivansh Dhiman <shivansh.dhiman@xxxxxxx>
> Signed-off-by: Shivank Garg <shivankg@xxxxxxx>
<snip>
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -1001,11 +1001,17 @@ int filemap_add_folio(struct address_space *mapping, struct folio *folio,
> EXPORT_SYMBOL_GPL(filemap_add_folio);
>
> #ifdef CONFIG_NUMA
> -struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order)
> +struct folio *filemap_alloc_folio_mpol_noprof(gfp_t gfp, unsigned int order,
> + struct mempolicy *mpol)
> {
> int n;
> struct folio *folio;
>
> + if (mpol)
> + return folio_alloc_mpol_noprof(gfp, order, mpol,
> + NO_INTERLEAVE_INDEX,
> + numa_node_id());
> +
> if (cpuset_do_page_mem_spread()) {
> unsigned int cpuset_mems_cookie;
> do {
> @@ -1018,6 +1024,12 @@ struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order)
> }
> return folio_alloc_noprof(gfp, order);
> }
> +EXPORT_SYMBOL(filemap_alloc_folio_mpol_noprof);
> +
> +struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order)
> +{
> + return filemap_alloc_folio_mpol_noprof(gfp, order, NULL);
> +}
> EXPORT_SYMBOL(filemap_alloc_folio_noprof);
> #endif
Here it seems to me:
- filemap_alloc_folio_noprof() could stay unchanged
- filemap_alloc_folio_mpol_noprof() would
- call folio_alloc_mpol_noprof() if (mpol)
- call filemap_alloc_folio_noprof() otherwise
The code would be a bit more clearly structured that way?
> @@ -1881,11 +1893,12 @@ void *filemap_get_entry(struct address_space *mapping, pgoff_t index)
> }
>
> /**
> - * __filemap_get_folio - Find and get a reference to a folio.
> + * __filemap_get_folio_mpol - Find and get a reference to a folio.
> * @mapping: The address_space to search.
> * @index: The page index.
> * @fgp_flags: %FGP flags modify how the folio is returned.
> * @gfp: Memory allocation flags to use if %FGP_CREAT is specified.
> + * @mpol: The mempolicy to apply when allocating a new folio.
> *
> * Looks up the page cache entry at @mapping & @index.
> *
> @@ -1896,8 +1909,8 @@ void *filemap_get_entry(struct address_space *mapping, pgoff_t index)
> *
> * Return: The found folio or an ERR_PTR() otherwise.
> */
> -struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
> - fgf_t fgp_flags, gfp_t gfp)
> +struct folio *__filemap_get_folio_mpol(struct address_space *mapping, pgoff_t index,
> + fgf_t fgp_flags, gfp_t gfp, struct mempolicy *mpol)
> {
> struct folio *folio;
>
> @@ -1967,7 +1980,7 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
> err = -ENOMEM;
> if (order > min_order)
> alloc_gfp |= __GFP_NORETRY | __GFP_NOWARN;
> - folio = filemap_alloc_folio(alloc_gfp, order);
> + folio = filemap_alloc_folio_mpol(alloc_gfp, order, mpol);
> if (!folio)
> continue;
>
> @@ -2003,6 +2016,13 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
> folio_clear_dropbehind(folio);
> return folio;
> }
> +EXPORT_SYMBOL(__filemap_get_folio_mpol);
> +
> +struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
> + fgf_t fgp_flags, gfp_t gfp)
> +{
> + return __filemap_get_folio_mpol(mapping, index, fgp_flags, gfp, NULL);
> +}
> EXPORT_SYMBOL(__filemap_get_folio);
>
> static inline struct folio *find_get_entry(struct xa_state *xas, pgoff_t max,