[RFC PATCH v2 1/3] mm/zsmalloc: encode class index in obj value for lockless class lookup
From: Wenchao Hao
Date: Wed May 27 2026 - 08:05:11 EST
Encode the size_class index (class_idx) into the obj value so that
zs_free() can determine the correct size_class without dereferencing
the handle->obj->PFN->zpdesc->zspage->class chain under pool->lock.
class_idx is invariant across page migration (only PFN is rewritten),
so a lockless read of obj always yields a valid class_idx.
The space below the PFN field in obj is over-provisioned on 64-bit
systems, with more bits than obj_idx needs. Split that space into
class_idx and obj_idx subfields:
|<-- _PFN_BITS -->|<-- ZS_OBJ_CLASS_BITS -->|<-- ZS_OBJ_IDX_BITS -->|
+-----------------+-------------------------+-----------------------+
| PFN | class_idx | obj_idx |
+-----------------+-------------------------+-----------------------+
MSB ^ LSB
|
+-- ZS_OBJ_PFN_SHIFT
The macro layout changes as follows:
Before After Meaning
---------------- ------------------ ----------------------------
OBJ_INDEX_BITS ZS_OBJ_IDX_BITS width of obj_idx subfield
OBJ_INDEX_MASK ZS_OBJ_IDX_MASK mask of obj_idx subfield
(n/a) ZS_OBJ_CLASS_BITS width of class_idx subfield
(n/a) ZS_OBJ_CLASS_MASK mask of class_idx subfield
(n/a) ZS_OBJ_PFN_SHIFT bit offset of PFN in obj
On 32-bit systems there is no spare room for class_idx, so the
encoding is disabled (ZS_OBJ_CLASS_BITS = 0) and the obj layout
remains [PFN | obj_idx].
Signed-off-by: Wenchao Hao <haowenchao@xxxxxxxxxx>
---
mm/zsmalloc.c | 80 ++++++++++++++++++++++++++++++++++++++++++---------
1 file changed, 66 insertions(+), 14 deletions(-)
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 63128ddb7959..6b0014b43408 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -67,8 +67,8 @@
#define MAX_POSSIBLE_PHYSMEM_BITS MAX_PHYSMEM_BITS
#else
/*
- * If this definition of MAX_PHYSMEM_BITS is used, OBJ_INDEX_BITS will just
- * be PAGE_SHIFT
+ * If this definition of MAX_PHYSMEM_BITS is used, ZS_OBJ_PFN_SHIFT will
+ * just be PAGE_SHIFT
*/
#define MAX_POSSIBLE_PHYSMEM_BITS BITS_PER_LONG
#endif
@@ -88,8 +88,27 @@
#define OBJ_TAG_BITS 1
#define OBJ_TAG_MASK OBJ_ALLOCATED_TAG
-#define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS)
-#define OBJ_INDEX_MASK ((_AC(1, UL) << OBJ_INDEX_BITS) - 1)
+/*
+ * obj is encoded as [PFN | class_idx | obj_idx] within an unsigned long:
+ *
+ * |<-- _PFN_BITS -->|<-- ZS_OBJ_CLASS_BITS -->|<-- ZS_OBJ_IDX_BITS -->|
+ * +-----------------+-------------------------+-----------------------+
+ * | PFN | class_idx | obj_idx |
+ * +-----------------+-------------------------+-----------------------+
+ * MSB ^ LSB
+ * |
+ * +-- ZS_OBJ_PFN_SHIFT
+ *
+ * Encoding class_idx into obj lets zs_free() locate the size_class
+ * without holding pool->lock; class_idx is invariant across page
+ * migration (only PFN changes), so a lockless read of the obj value
+ * always yields a valid class_idx.
+ *
+ * On 32-bit systems there is no spare room for class_idx, so
+ * ZS_OBJ_CLASS_BITS is 0 and the layout collapses to the original
+ * [PFN | obj_idx] without any ifdef in callers.
+ */
+#define ZS_OBJ_PFN_SHIFT (BITS_PER_LONG - _PFN_BITS)
#define HUGE_BITS 1
#define FULLNESS_BITS 4
@@ -98,9 +117,29 @@
#define ZS_MAX_PAGES_PER_ZSPAGE (_AC(CONFIG_ZSMALLOC_CHAIN_SIZE, UL))
+/*
+ * Reuse the width that struct zspage already reserves for its
+ * class field (zspage->class:CLASS_BITS + 1) for the class_idx
+ * field encoded in obj. On 32-bit there is no spare room, so set
+ * it to 0; the encoded class_idx then folds to a constant 0 and
+ * the layout collapses back to [PFN | obj_idx].
+ */
+#if BITS_PER_LONG >= 64
+#define ZS_OBJ_CLASS_BITS (CLASS_BITS + 1)
+#else
+#define ZS_OBJ_CLASS_BITS 0
+#endif
+#define ZS_OBJ_CLASS_MASK ((_AC(1, UL) << ZS_OBJ_CLASS_BITS) - 1)
+
+#define ZS_OBJ_IDX_BITS (ZS_OBJ_PFN_SHIFT - ZS_OBJ_CLASS_BITS)
+#define ZS_OBJ_IDX_MASK ((_AC(1, UL) << ZS_OBJ_IDX_BITS) - 1)
+
+static_assert(ZS_OBJ_IDX_BITS > 0,
+ "zsmalloc: PFN + class_idx leave no room for obj_idx");
+
/* ZS_MIN_ALLOC_SIZE must be multiple of ZS_ALIGN */
#define ZS_MIN_ALLOC_SIZE \
- MAX(32, (ZS_MAX_PAGES_PER_ZSPAGE << PAGE_SHIFT >> OBJ_INDEX_BITS))
+ MAX(32, (ZS_MAX_PAGES_PER_ZSPAGE << PAGE_SHIFT >> ZS_OBJ_IDX_BITS))
/* each chunk includes extra space to keep handle */
#define ZS_MAX_ALLOC_SIZE PAGE_SIZE
@@ -721,26 +760,38 @@ static struct zpdesc *get_next_zpdesc(struct zpdesc *zpdesc)
static void obj_to_location(unsigned long obj, struct zpdesc **zpdesc,
unsigned int *obj_idx)
{
- *zpdesc = pfn_zpdesc(obj >> OBJ_INDEX_BITS);
- *obj_idx = (obj & OBJ_INDEX_MASK);
+ *zpdesc = pfn_zpdesc(obj >> ZS_OBJ_PFN_SHIFT);
+ *obj_idx = (obj & ZS_OBJ_IDX_MASK);
}
static void obj_to_zpdesc(unsigned long obj, struct zpdesc **zpdesc)
{
- *zpdesc = pfn_zpdesc(obj >> OBJ_INDEX_BITS);
+ *zpdesc = pfn_zpdesc(obj >> ZS_OBJ_PFN_SHIFT);
+}
+
+/*
+ * On 32-bit systems ZS_OBJ_CLASS_BITS is 0 and ZS_OBJ_CLASS_MASK is 0,
+ * so this collapses to a constant 0. No ifdef needed at the call site.
+ */
+static unsigned int obj_to_class_idx(unsigned long obj)
+{
+ return (obj >> ZS_OBJ_IDX_BITS) & ZS_OBJ_CLASS_MASK;
}
/**
- * location_to_obj - get obj value encoded from (<zpdesc>, <obj_idx>)
+ * location_to_obj - encode (<zpdesc>, <obj_idx>, <class_idx>) into obj value
* @zpdesc: zpdesc object resides in zspage
* @obj_idx: object index
+ * @class_idx: size class index; ignored on 32-bit (ZS_OBJ_CLASS_BITS == 0)
*/
-static unsigned long location_to_obj(struct zpdesc *zpdesc, unsigned int obj_idx)
+static unsigned long location_to_obj(struct zpdesc *zpdesc, unsigned int obj_idx,
+ unsigned int class_idx)
{
unsigned long obj;
- obj = zpdesc_pfn(zpdesc) << OBJ_INDEX_BITS;
- obj |= obj_idx & OBJ_INDEX_MASK;
+ obj = zpdesc_pfn(zpdesc) << ZS_OBJ_PFN_SHIFT;
+ obj |= (unsigned long)(class_idx & ZS_OBJ_CLASS_MASK) << ZS_OBJ_IDX_BITS;
+ obj |= obj_idx & ZS_OBJ_IDX_MASK;
return obj;
}
@@ -1276,7 +1327,7 @@ static unsigned long obj_malloc(struct zs_pool *pool,
kunmap_local(vaddr);
mod_zspage_inuse(zspage, 1);
- obj = location_to_obj(m_zpdesc, obj);
+ obj = location_to_obj(m_zpdesc, obj, zspage->class);
record_obj(handle, obj);
return obj;
@@ -1762,7 +1813,8 @@ static int zs_page_migrate(struct page *newpage, struct page *page,
old_obj = handle_to_obj(handle);
obj_to_location(old_obj, &dummy, &obj_idx);
- new_obj = (unsigned long)location_to_obj(newzpdesc, obj_idx);
+ new_obj = location_to_obj(newzpdesc, obj_idx,
+ obj_to_class_idx(old_obj));
record_obj(handle, new_obj);
}
}
--
2.34.1