[PATCH v3 01/20] mm/zsmalloc: add zpdesc memory descriptor for zswap.zpool

From: alexs
Date: Mon Jul 08 2024 - 02:29:00 EST


From: Alex Shi (Tencent) <alexs@xxxxxxxxxx>

The 1st patch introduces new memory decriptor zpdesc and rename
zspage.first_page to zspage.first_zpdesc, no functional change.

We removed PG_owner_priv_1 since it was moved to zspage after
commit a41ec880aa7b ("zsmalloc: move huge compressed obj from
page to zspage").

And keep the memcg_data member, since as Yosry pointed out:
"When the pages are freed, put_page() -> folio_put() -> __folio_put() will call
mem_cgroup_uncharge(). The latter will call folio_memcg() (which reads
folio->memcg_data) to figure out if uncharging needs to be done.

There are also other similar code paths that will check
folio->memcg_data. It is currently expected to be present for all
folios. So until we have custom code paths per-folio type for
allocation/freeing/etc, we need to keep folio->memcg_data present and
properly initialized."

Originally-by: Hyeonggon Yoo <42.hyeyoo@xxxxxxxxx>
Signed-off-by: Alex Shi (Tencent) <alexs@xxxxxxxxxx>
---
mm/zpdesc.h | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++
mm/zsmalloc.c | 21 ++++++++--------
2 files changed, 76 insertions(+), 11 deletions(-)
create mode 100644 mm/zpdesc.h

diff --git a/mm/zpdesc.h b/mm/zpdesc.h
new file mode 100644
index 000000000000..2dbef231f616
--- /dev/null
+++ b/mm/zpdesc.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* zpdesc.h: zswap.zpool memory descriptor
+ *
+ * Written by Alex Shi (Tencent) <alexs@xxxxxxxxxx>
+ * Hyeonggon Yoo <42.hyeyoo@xxxxxxxxx>
+ */
+#ifndef __MM_ZPDESC_H__
+#define __MM_ZPDESC_H__
+
+/*
+ * struct zpdesc - Memory descriptor for zpool memory, now is for zsmalloc
+ * @flags: Page flags, PG_private: identifies the first component page
+ * @lru: Indirectly used by page migration
+ * @mops: Used by page migration
+ * @next: Next zpdesc in a zspage in zsmalloc zpool
+ * @handle: For huge zspage in zsmalloc zpool
+ * @zspage: Pointer to zspage in zsmalloc
+ * @memcg_data: Memory Control Group data.
+ *
+ * This struct overlays struct page for now. Do not modify without a good
+ * understanding of the issues.
+ */
+struct zpdesc {
+ unsigned long flags;
+ struct list_head lru;
+ struct movable_operations *mops;
+ union {
+ /* Next zpdescs in a zspage in zsmalloc zpool */
+ struct zpdesc *next;
+ /* For huge zspage in zsmalloc zpool */
+ unsigned long handle;
+ };
+ struct zspage *zspage;
+ unsigned long _zp_pad_1;
+#ifdef CONFIG_MEMCG
+ unsigned long memcg_data;
+#endif
+};
+#define ZPDESC_MATCH(pg, zp) \
+ static_assert(offsetof(struct page, pg) == offsetof(struct zpdesc, zp))
+
+ZPDESC_MATCH(flags, flags);
+ZPDESC_MATCH(lru, lru);
+ZPDESC_MATCH(mapping, mops);
+ZPDESC_MATCH(index, next);
+ZPDESC_MATCH(index, handle);
+ZPDESC_MATCH(private, zspage);
+#ifdef CONFIG_MEMCG
+ZPDESC_MATCH(memcg_data, memcg_data);
+#endif
+#undef ZPDESC_MATCH
+static_assert(sizeof(struct zpdesc) <= sizeof(struct page));
+
+#define zpdesc_page(zp) (_Generic((zp), \
+ const struct zpdesc *: (const struct page *)(zp), \
+ struct zpdesc *: (struct page *)(zp)))
+
+#define zpdesc_folio(zp) (_Generic((zp), \
+ const struct zpdesc *: (const struct folio *)(zp), \
+ struct zpdesc *: (struct folio *)(zp)))
+
+#define page_zpdesc(p) (_Generic((p), \
+ const struct page *: (const struct zpdesc *)(p), \
+ struct page *: (struct zpdesc *)(p)))
+
+#endif
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 5d6581ab7c07..a532851025f9 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -13,20 +13,18 @@

/*
* Following is how we use various fields and flags of underlying
- * struct page(s) to form a zspage.
+ * struct zpdesc(page) to form a zspage.
*
- * Usage of struct page fields:
- * page->private: points to zspage
- * page->index: links together all component pages of a zspage
+ * Usage of struct zpdesc fields:
+ * zpdesc->zspage: points to zspage
+ * zpdesc->next: links together all component pages of a zspage
* For the huge page, this is always 0, so we use this field
* to store handle.
* page->page_type: PG_zsmalloc, lower 16 bit locate the first object
* offset in a subpage of a zspage
*
- * Usage of struct page flags:
+ * Usage of struct zpdesc(page) flags:
* PG_private: identifies the first component page
- * PG_owner_priv_1: identifies the huge component page
- *
*/

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -64,6 +62,7 @@
#include <linux/pagemap.h>
#include <linux/fs.h>
#include <linux/local_lock.h>
+#include "zpdesc.h"

#define ZSPAGE_MAGIC 0x58

@@ -253,7 +252,7 @@ struct zspage {
};
unsigned int inuse;
unsigned int freeobj;
- struct page *first_page;
+ struct zpdesc *first_zpdesc;
struct list_head list; /* fullness list */
struct zs_pool *pool;
rwlock_t lock;
@@ -448,7 +447,7 @@ static inline void mod_zspage_inuse(struct zspage *zspage, int val)

static inline struct page *get_first_page(struct zspage *zspage)
{
- struct page *first_page = zspage->first_page;
+ struct page *first_page = zpdesc_page(zspage->first_zpdesc);

VM_BUG_ON_PAGE(!is_first_page(first_page), first_page);
return first_page;
@@ -948,7 +947,7 @@ static void create_page_chain(struct size_class *class, struct zspage *zspage,
set_page_private(page, (unsigned long)zspage);
page->index = 0;
if (i == 0) {
- zspage->first_page = page;
+ zspage->first_zpdesc = page_zpdesc(page);
SetPagePrivate(page);
if (unlikely(class->objs_per_zspage == 1 &&
class->pages_per_zspage == 1))
@@ -1324,7 +1323,7 @@ static unsigned long obj_malloc(struct zs_pool *pool,
link->handle = handle | OBJ_ALLOCATED_TAG;
else
/* record handle to page->index */
- zspage->first_page->index = handle | OBJ_ALLOCATED_TAG;
+ zspage->first_zpdesc->handle = handle | OBJ_ALLOCATED_TAG;

kunmap_atomic(vaddr);
mod_zspage_inuse(zspage, 1);
--
2.43.0