[PATCH net-next v5 10/13] mm: page_frag: introduce prepare/probe/commit API

From: Yunsheng Lin
Date: Tue May 28 2024 - 09:02:34 EST


There are many use cases that need minimum memory in order
for forward progress, but more performant if more memory is
available or need to probe the cache info to use any memory
available for frag caoleasing reason.

Currently skb_page_frag_refill() API is used to solve the
above use cases, but caller needs to know about the internal
detail and access the data field of 'struct page_frag' to
meet the requirement of the above use cases and its
implementation is similar to the one in mm subsystem.

To unify those two page_frag implementations, introduce a
prepare API to ensure minimum memory is satisfied and return
how much the actual memory is available to the caller and a
probe API to report the current available memory to caller
without doing cache refilling. The caller needs to either call
the commit API to report how much memory it actually uses, or
not do so if deciding to not use any memory.

As next patch is about to replace 'struct page_frag' with
'struct page_frag_cache' in linux/sched.h, which is included
by the asm-offsets.s, using the virt_to_page() in the inline
helper of page_frag_cache.h cause a "'vmemmap' undeclared"
compiling error for asm-offsets.s, use a macro for probe API
to avoid that compiling error.

CC: Alexander Duyck <alexander.duyck@xxxxxxxxx>
Signed-off-by: Yunsheng Lin <linyunsheng@xxxxxxxxxx>
---
include/linux/page_frag_cache.h | 81 ++++++++++++++++++
mm/page_frag_cache.c | 145 ++++++++++++++++++++++++++++++++
2 files changed, 226 insertions(+)

diff --git a/include/linux/page_frag_cache.h b/include/linux/page_frag_cache.h
index 5228e3025ba1..365302787012 100644
--- a/include/linux/page_frag_cache.h
+++ b/include/linux/page_frag_cache.h
@@ -93,6 +93,9 @@ static inline unsigned int __page_frag_cache_page_offset(struct encoded_va *enco

void page_frag_cache_drain(struct page_frag_cache *nc);
void __page_frag_cache_drain(struct page *page, unsigned int count);
+struct page *page_frag_alloc_pg(struct page_frag_cache *nc,
+ unsigned int *offset, unsigned int fragsz,
+ gfp_t gfp);
void *__page_frag_alloc_va_align(struct page_frag_cache *nc,
unsigned int fragsz, gfp_t gfp_mask,
unsigned int align_mask);
@@ -105,12 +108,90 @@ static inline void *page_frag_alloc_va_align(struct page_frag_cache *nc,
return __page_frag_alloc_va_align(nc, fragsz, gfp_mask, -align);
}

+static inline unsigned int page_frag_cache_page_offset(const struct page_frag_cache *nc)
+{
+ return __page_frag_cache_page_offset(nc->encoded_va, nc->remaining);
+}
+
static inline void *page_frag_alloc_va(struct page_frag_cache *nc,
unsigned int fragsz, gfp_t gfp_mask)
{
return __page_frag_alloc_va_align(nc, fragsz, gfp_mask, ~0u);
}

+void *page_frag_alloc_va_prepare(struct page_frag_cache *nc, unsigned int *fragsz,
+ gfp_t gfp);
+
+static inline void *page_frag_alloc_va_prepare_align(struct page_frag_cache *nc,
+ unsigned int *fragsz,
+ gfp_t gfp,
+ unsigned int align)
+{
+ WARN_ON_ONCE(!is_power_of_2(align) || align > PAGE_SIZE);
+ nc->remaining = nc->remaining & -align;
+ return page_frag_alloc_va_prepare(nc, fragsz, gfp);
+}
+
+struct page *page_frag_alloc_pg_prepare(struct page_frag_cache *nc,
+ unsigned int *offset,
+ unsigned int *fragsz, gfp_t gfp);
+
+struct page *page_frag_alloc_prepare(struct page_frag_cache *nc,
+ unsigned int *offset,
+ unsigned int *fragsz,
+ void **va, gfp_t gfp);
+
+static inline struct encoded_va *__page_frag_alloc_probe(struct page_frag_cache *nc,
+ unsigned int *offset,
+ unsigned int *fragsz,
+ void **va)
+{
+ struct encoded_va *encoded_va;
+
+ *fragsz = nc->remaining;
+ encoded_va = nc->encoded_va;
+ *offset = __page_frag_cache_page_offset(encoded_va, *fragsz);
+ *va = encoded_page_address(encoded_va) + *offset;
+
+ return encoded_va;
+}
+
+#define page_frag_alloc_probe(nc, offset, fragsz, va) \
+({ \
+ struct page *__page = NULL; \
+ \
+ VM_BUG_ON(!*(fragsz)); \
+ if (likely((nc)->remaining >= *(fragsz))) \
+ __page = virt_to_page(__page_frag_alloc_probe(nc, \
+ offset, \
+ fragsz, \
+ va)); \
+ \
+ __page; \
+})
+
+static inline void page_frag_alloc_commit(struct page_frag_cache *nc,
+ unsigned int fragsz)
+{
+ VM_BUG_ON(fragsz > nc->remaining || !nc->pagecnt_bias);
+ nc->pagecnt_bias--;
+ nc->remaining -= fragsz;
+}
+
+static inline void page_frag_alloc_commit_noref(struct page_frag_cache *nc,
+ unsigned int fragsz)
+{
+ VM_BUG_ON(fragsz > nc->remaining);
+ nc->remaining -= fragsz;
+}
+
+static inline void page_frag_alloc_abort(struct page_frag_cache *nc,
+ unsigned int fragsz)
+{
+ nc->pagecnt_bias++;
+ nc->remaining += fragsz;
+}
+
void page_frag_free_va(void *addr);

#endif
diff --git a/mm/page_frag_cache.c b/mm/page_frag_cache.c
index 7842f0f2f672..cde7f16e589f 100644
--- a/mm/page_frag_cache.c
+++ b/mm/page_frag_cache.c
@@ -60,6 +60,151 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
return page;
}

+static struct page *page_frag_cache_refill(struct page_frag_cache *nc,
+ gfp_t gfp_mask)
+{
+ struct encoded_va *encoded_va = nc->encoded_va;
+ struct page *page;
+
+ if (unlikely(!encoded_va))
+ return __page_frag_cache_refill(nc, gfp_mask);
+
+ page = virt_to_page(encoded_va);
+
+ if (!page_ref_sub_and_test(page, nc->pagecnt_bias))
+ return __page_frag_cache_refill(nc, gfp_mask);
+
+ if (unlikely(encoded_page_pfmemalloc(encoded_va))) {
+ VM_BUG_ON(compound_order(page) !=
+ encoded_page_order(encoded_va));
+ free_unref_page(page, encoded_page_order(encoded_va));
+ return __page_frag_cache_refill(nc, gfp_mask);
+ }
+
+ /* OK, page count is 0, we can safely set it */
+ set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1);
+
+ /* reset page count bias and remaining of new frag */
+ nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
+ nc->remaining = page_frag_cache_page_size(encoded_va);
+
+ return page;
+}
+
+void *page_frag_alloc_va_prepare(struct page_frag_cache *nc,
+ unsigned int *fragsz, gfp_t gfp)
+{
+ struct encoded_va *encoded_va;
+ unsigned int remaining;
+
+ remaining = nc->remaining;
+ if (unlikely(*fragsz > remaining)) {
+ if (WARN_ON_ONCE(*fragsz > PAGE_SIZE) ||
+ unlikely(!page_frag_cache_refill(nc, gfp)))
+ return NULL;
+
+ remaining = nc->remaining;
+ }
+
+ encoded_va = nc->encoded_va;
+ *fragsz = remaining;
+ return encoded_page_address(encoded_va) +
+ __page_frag_cache_page_offset(encoded_va, remaining);
+}
+EXPORT_SYMBOL(page_frag_alloc_va_prepare);
+
+struct page *page_frag_alloc_pg_prepare(struct page_frag_cache *nc,
+ unsigned int *offset,
+ unsigned int *fragsz, gfp_t gfp)
+{
+ struct encoded_va *encoded_va;
+ unsigned int remaining;
+ struct page *page;
+
+ remaining = nc->remaining;
+ if (unlikely(*fragsz > remaining)) {
+ if (WARN_ON_ONCE(*fragsz > PAGE_SIZE)) {
+ *fragsz = 0;
+ return NULL;
+ }
+
+ page = page_frag_cache_refill(nc, gfp);
+ remaining = nc->remaining;
+ encoded_va = nc->encoded_va;
+ } else {
+ encoded_va = nc->encoded_va;
+ page = virt_to_page(encoded_va);
+ }
+
+ *offset = __page_frag_cache_page_offset(encoded_va, remaining);
+ *fragsz = remaining;
+
+ return page;
+}
+EXPORT_SYMBOL(page_frag_alloc_pg_prepare);
+
+struct page *page_frag_alloc_prepare(struct page_frag_cache *nc,
+ unsigned int *offset,
+ unsigned int *fragsz,
+ void **va, gfp_t gfp)
+{
+ struct encoded_va *encoded_va;
+ unsigned int remaining;
+ struct page *page;
+
+ remaining = nc->remaining;
+ if (unlikely(*fragsz > remaining)) {
+ if (WARN_ON_ONCE(*fragsz > PAGE_SIZE)) {
+ *fragsz = 0;
+ return NULL;
+ }
+
+ page = page_frag_cache_refill(nc, gfp);
+ remaining = nc->remaining;
+ encoded_va = nc->encoded_va;
+ } else {
+ encoded_va = nc->encoded_va;
+ page = virt_to_page(encoded_va);
+ }
+
+ *offset = __page_frag_cache_page_offset(encoded_va, remaining);
+ *fragsz = remaining;
+ *va = encoded_page_address(encoded_va) + *offset;
+
+ return page;
+}
+EXPORT_SYMBOL(page_frag_alloc_prepare);
+
+struct page *page_frag_alloc_pg(struct page_frag_cache *nc,
+ unsigned int *offset, unsigned int fragsz,
+ gfp_t gfp)
+{
+ struct page *page;
+
+ if (unlikely(fragsz > nc->remaining)) {
+ if (WARN_ON_ONCE(fragsz > PAGE_SIZE))
+ return NULL;
+
+ page = page_frag_cache_refill(nc, gfp);
+ if (unlikely(!page))
+ return NULL;
+
+ *offset = 0;
+ } else {
+ struct encoded_va *encoded_va = nc->encoded_va;
+
+ page = virt_to_page(encoded_va);
+ *offset = __page_frag_cache_page_offset(encoded_va,
+ nc->remaining);
+ }
+
+ nc->remaining -= fragsz;
+ nc->pagecnt_bias--;
+
+ return page;
+}
+EXPORT_SYMBOL(page_frag_alloc_pg);
+
void page_frag_cache_drain(struct page_frag_cache *nc)
{
if (!nc->encoded_va)
--
2.30.0