Re: [PATCH net-next v2 09/15] mm: page_frag: reuse MSB of 'size' field for pfmemalloc

From: Alexander H Duyck
Date: Tue Apr 16 2024 - 12:22:25 EST


On Mon, 2024-04-15 at 21:19 +0800, Yunsheng Lin wrote:
> The '(PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)' case is for the
> system with page size less than 32KB, which is 0x8000 bytes
> requiring 16 bits space, change 'size' to 'size_mask' to avoid
> using the MSB, and change 'pfmemalloc' field to reuse the that
> MSB, so that we remove the orginal space needed by 'pfmemalloc'.
>
> For another case, the MSB of 'offset' is reused for 'pfmemalloc'.
>
> Signed-off-by: Yunsheng Lin <linyunsheng@xxxxxxxxxx>
> ---
> include/linux/page_frag_cache.h | 13 ++++++++-----
> mm/page_frag_cache.c | 5 +++--
> 2 files changed, 11 insertions(+), 7 deletions(-)
>
> diff --git a/include/linux/page_frag_cache.h b/include/linux/page_frag_cache.h
> index fe5faa80b6c3..40a7d6da9ef0 100644
> --- a/include/linux/page_frag_cache.h
> +++ b/include/linux/page_frag_cache.h
> @@ -12,15 +12,16 @@ struct page_frag_cache {
> void *va;
> #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
> __u16 offset;
> - __u16 size;
> + __u16 size_mask:15;
> + __u16 pfmemalloc:1;
> #else
> - __u32 offset;
> + __u32 offset:31;
> + __u32 pfmemalloc:1;
> #endif

This seems like a really bad idea. Using a bit-field like this seems
like a waste as it means that all the accesses now have to add
additional operations to access either offset or size. It wasn't as if
this is an oversized struct, or one that we are allocating a ton of. As
such I am not sure why we need to optmize for size like this.

> /* we maintain a pagecount bias, so that we dont dirty cache line
> * containing page->_refcount every time we allocate a fragment.
> */
> unsigned int pagecnt_bias;
> - bool pfmemalloc;
> };
>
> static inline void page_frag_cache_init(struct page_frag_cache *nc)
> @@ -43,7 +44,9 @@ static inline void *__page_frag_alloc_va_align(struct page_frag_cache *nc,
> gfp_t gfp_mask,
> unsigned int align)
> {
> - nc->offset = ALIGN(nc->offset, align);
> + unsigned int offset = nc->offset;
> +
> + nc->offset = ALIGN(offset, align);
>
> return page_frag_alloc_va(nc, fragsz, gfp_mask);
> }
> @@ -53,7 +56,7 @@ static inline void *page_frag_alloc_va_align(struct page_frag_cache *nc,
> gfp_t gfp_mask,
> unsigned int align)
> {
> - WARN_ON_ONCE(!is_power_of_2(align));
> + WARN_ON_ONCE(!is_power_of_2(align) || align >= PAGE_SIZE);

The "align >= PAGE_SIZE" fix should probably go with your change that
reversed the direction.

>
> return __page_frag_alloc_va_align(nc, fragsz, gfp_mask, align);
> }
> diff --git a/mm/page_frag_cache.c b/mm/page_frag_cache.c
> index 50511d8522d0..8d93029116e1 100644
> --- a/mm/page_frag_cache.c
> +++ b/mm/page_frag_cache.c
> @@ -32,7 +32,8 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
> __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC;
> page = alloc_pages_node(NUMA_NO_NODE, gfp_mask,
> PAGE_FRAG_CACHE_MAX_ORDER);
> - nc->size = page ? PAGE_FRAG_CACHE_MAX_SIZE : PAGE_SIZE;
> + nc->size_mask = page ? PAGE_FRAG_CACHE_MAX_SIZE - 1 : PAGE_SIZE - 1;
> + VM_BUG_ON(page && nc->size_mask != PAGE_FRAG_CACHE_MAX_SIZE - 1);
> #endif
> if (unlikely(!page))
> page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
> @@ -86,7 +87,7 @@ void *page_frag_alloc_va(struct page_frag_cache *nc, unsigned int fragsz,
>
> #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
> /* if size can vary use size else just use PAGE_SIZE */
> - size = nc->size;
> + size = nc->size_mask + 1;
> #else
> size = PAGE_SIZE;
> #endif

So now we are having to add arithmetic operations to the size in
addition having to mask in order to read the values. That just seems
like that much more overhead.