[PATCH v4 1/4] mm/zsmalloc: encode class index in obj value for lockless class lookup
From: Wenchao Hao
Date: Tue Jun 09 2026 - 07:40:03 EST
From: Wenchao Hao <haowenchao@xxxxxxxxxx>
Encode the size_class index (class_idx) into the obj value so that
zs_free() can determine the correct size_class without dereferencing
the handle->obj->PFN->zpdesc->zspage->class chain under pool->lock.
class_idx is invariant across page migration (only PFN is rewritten),
so a lockless read of obj always yields a valid class_idx.
Where obj has more bits below the PFN field than obj_idx alone
needs, split that space into class_idx and obj_idx subfields:
|<-- _PFN_BITS -->|<-- ZS_OBJ_CLASS_BITS -->|<-- ZS_OBJ_IDX_BITS -->|
+-----------------+-------------------------+-----------------------+
| PFN | class_idx | obj_idx |
+-----------------+-------------------------+-----------------------+
MSB ^ LSB
|
+-- ZS_OBJ_PFN_SHIFT
The macro layout changes as follows:
Before After Meaning
---------------- ------------------ ----------------------------
OBJ_INDEX_BITS ZS_OBJ_IDX_BITS width of obj_idx subfield
OBJ_INDEX_MASK ZS_OBJ_IDX_MASK mask of obj_idx subfield
(n/a) ZS_OBJ_CLASS_BITS width of class_idx subfield
(n/a) ZS_OBJ_CLASS_MASK mask of class_idx subfield
(n/a) ZS_OBJ_PFN_SHIFT bit offset of PFN in obj
ZS_OBJ_CLASS_BITS folds to 0 (and the layout collapses to
[PFN | obj_idx]) when obj has no spare bits, i.e. on 32-bit
or on 64-bit fallback paths where MAX_POSSIBLE_PHYSMEM_BITS ==
BITS_PER_LONG (e.g. UML); zs_free() then falls back to
pool->lock.
Signed-off-by: Wenchao Hao <haowenchao@xxxxxxxxxx>
---
mm/zsmalloc.c | 99 +++++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 85 insertions(+), 14 deletions(-)
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 63128ddb7959..f84258d63917 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -67,8 +67,8 @@
#define MAX_POSSIBLE_PHYSMEM_BITS MAX_PHYSMEM_BITS
#else
/*
- * If this definition of MAX_PHYSMEM_BITS is used, OBJ_INDEX_BITS will just
- * be PAGE_SHIFT
+ * If this definition of MAX_PHYSMEM_BITS is used, ZS_OBJ_PFN_SHIFT will
+ * just be PAGE_SHIFT
*/
#define MAX_POSSIBLE_PHYSMEM_BITS BITS_PER_LONG
#endif
@@ -88,8 +88,23 @@
#define OBJ_TAG_BITS 1
#define OBJ_TAG_MASK OBJ_ALLOCATED_TAG
-#define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS)
-#define OBJ_INDEX_MASK ((_AC(1, UL) << OBJ_INDEX_BITS) - 1)
+/*
+ * obj is encoded as [PFN | class_idx | obj_idx] within an unsigned long:
+ *
+ * |<-- _PFN_BITS -->|<-- ZS_OBJ_CLASS_BITS -->|<-- ZS_OBJ_IDX_BITS -->|
+ * +-----------------+-------------------------+-----------------------+
+ * | PFN | class_idx | obj_idx |
+ * +-----------------+-------------------------+-----------------------+
+ * MSB ^ LSB
+ * |
+ * +-- ZS_OBJ_PFN_SHIFT
+ *
+ * Encoding class_idx into obj lets zs_free() locate the size_class
+ * without holding pool->lock; class_idx is invariant across page
+ * migration (only PFN changes), so a lockless read of the obj value
+ * always yields a valid class_idx.
+ */
+#define ZS_OBJ_PFN_SHIFT (BITS_PER_LONG - _PFN_BITS)
#define HUGE_BITS 1
#define FULLNESS_BITS 4
@@ -98,9 +113,55 @@
#define ZS_MAX_PAGES_PER_ZSPAGE (_AC(CONFIG_ZSMALLOC_CHAIN_SIZE, UL))
+/*
+ * ceil(log2(ZS_MAX_PAGES_PER_ZSPAGE)) at preprocessor time, for use
+ * in #if below. Kconfig restricts ZSMALLOC_CHAIN_SIZE to [4, 16].
+ */
+#if ZS_MAX_PAGES_PER_ZSPAGE <= 4
+#define ZS_CHAIN_LOG2 2
+#elif ZS_MAX_PAGES_PER_ZSPAGE <= 8
+#define ZS_CHAIN_LOG2 3
+#elif ZS_MAX_PAGES_PER_ZSPAGE <= 16
+#define ZS_CHAIN_LOG2 4
+#else
+#error "ZSMALLOC_CHAIN_SIZE out of expected range [4,16]"
+#endif
+
+/* PAGE_SHIFT - 5 = log2(PAGE_SIZE / 32); 32 = ZS_MIN_ALLOC_SIZE floor. */
+#define ZS_MAX_OBJ_PER_PAGE_LOG2 (PAGE_SHIFT - 5)
+
+/*
+ * obj_idx width that keeps ZS_MIN_ALLOC_SIZE at its 32-byte floor.
+ * Below this, ZS_MIN_ALLOC_SIZE is auto-raised by the MAX(32, ...)
+ * formula -- still correct, but objects are coarser.
+ */
+#define ZS_OBJ_IDX_DENSE_BITS (ZS_CHAIN_LOG2 + ZS_MAX_OBJ_PER_PAGE_LOG2)
+
+/*
+ * Encode class_idx only when obj has spare bits; otherwise
+ * ZS_OBJ_CLASS_BITS folds to 0 (32-bit, or 64-bit UML/fallback).
+ */
+#if BITS_PER_LONG >= 64 && \
+ ZS_OBJ_PFN_SHIFT >= (CLASS_BITS + 1) + ZS_OBJ_IDX_DENSE_BITS
+#define ZS_OBJ_CLASS_BITS (CLASS_BITS + 1)
+#else
+#define ZS_OBJ_CLASS_BITS 0
+#endif
+#define ZS_OBJ_CLASS_MASK ((_AC(1, UL) << ZS_OBJ_CLASS_BITS) - 1)
+
+#define ZS_OBJ_IDX_BITS (ZS_OBJ_PFN_SHIFT - ZS_OBJ_CLASS_BITS)
+#define ZS_OBJ_IDX_MASK ((_AC(1, UL) << ZS_OBJ_IDX_BITS) - 1)
+
+/*
+ * Belt-and-suspenders: the #if above already guarantees this when
+ * class_idx is enabled. Catches future tweaks that bypass it.
+ */
+static_assert(ZS_OBJ_IDX_BITS >= ZS_CHAIN_LOG2,
+ "zsmalloc: ZS_MIN_ALLOC_SIZE would exceed ZS_MAX_ALLOC_SIZE");
+
/* ZS_MIN_ALLOC_SIZE must be multiple of ZS_ALIGN */
#define ZS_MIN_ALLOC_SIZE \
- MAX(32, (ZS_MAX_PAGES_PER_ZSPAGE << PAGE_SHIFT >> OBJ_INDEX_BITS))
+ MAX(32, (ZS_MAX_PAGES_PER_ZSPAGE << PAGE_SHIFT >> ZS_OBJ_IDX_BITS))
/* each chunk includes extra space to keep handle */
#define ZS_MAX_ALLOC_SIZE PAGE_SIZE
@@ -721,26 +782,35 @@ static struct zpdesc *get_next_zpdesc(struct zpdesc *zpdesc)
static void obj_to_location(unsigned long obj, struct zpdesc **zpdesc,
unsigned int *obj_idx)
{
- *zpdesc = pfn_zpdesc(obj >> OBJ_INDEX_BITS);
- *obj_idx = (obj & OBJ_INDEX_MASK);
+ *zpdesc = pfn_zpdesc(obj >> ZS_OBJ_PFN_SHIFT);
+ *obj_idx = (obj & ZS_OBJ_IDX_MASK);
}
static void obj_to_zpdesc(unsigned long obj, struct zpdesc **zpdesc)
{
- *zpdesc = pfn_zpdesc(obj >> OBJ_INDEX_BITS);
+ *zpdesc = pfn_zpdesc(obj >> ZS_OBJ_PFN_SHIFT);
+}
+
+/* Folds to 0 when ZS_OBJ_CLASS_BITS == 0; no ifdef needed at callers. */
+static unsigned int obj_to_class_idx(unsigned long obj)
+{
+ return (obj >> ZS_OBJ_IDX_BITS) & ZS_OBJ_CLASS_MASK;
}
/**
- * location_to_obj - get obj value encoded from (<zpdesc>, <obj_idx>)
+ * location_to_obj - encode (<zpdesc>, <obj_idx>, <class_idx>) into obj value
* @zpdesc: zpdesc object resides in zspage
* @obj_idx: object index
+ * @class_idx: size class index; ignored when ZS_OBJ_CLASS_BITS == 0
*/
-static unsigned long location_to_obj(struct zpdesc *zpdesc, unsigned int obj_idx)
+static unsigned long location_to_obj(struct zpdesc *zpdesc, unsigned int obj_idx,
+ unsigned int class_idx)
{
unsigned long obj;
- obj = zpdesc_pfn(zpdesc) << OBJ_INDEX_BITS;
- obj |= obj_idx & OBJ_INDEX_MASK;
+ obj = zpdesc_pfn(zpdesc) << ZS_OBJ_PFN_SHIFT;
+ obj |= (unsigned long)(class_idx & ZS_OBJ_CLASS_MASK) << ZS_OBJ_IDX_BITS;
+ obj |= obj_idx & ZS_OBJ_IDX_MASK;
return obj;
}
@@ -1276,7 +1346,7 @@ static unsigned long obj_malloc(struct zs_pool *pool,
kunmap_local(vaddr);
mod_zspage_inuse(zspage, 1);
- obj = location_to_obj(m_zpdesc, obj);
+ obj = location_to_obj(m_zpdesc, obj, zspage->class);
record_obj(handle, obj);
return obj;
@@ -1762,7 +1832,8 @@ static int zs_page_migrate(struct page *newpage, struct page *page,
old_obj = handle_to_obj(handle);
obj_to_location(old_obj, &dummy, &obj_idx);
- new_obj = (unsigned long)location_to_obj(newzpdesc, obj_idx);
+ new_obj = location_to_obj(newzpdesc, obj_idx,
+ obj_to_class_idx(old_obj));
record_obj(handle, new_obj);
}
}
--
2.34.1