[PATCH v4 2/4] mm/zsmalloc: drop pool->lock from zs_free on 64-bit systems

From: Wenchao Hao

Date: Tue Jun 09 2026 - 07:40:27 EST


From: Wenchao Hao <haowenchao@xxxxxxxxxx>

With class_idx encoded in obj, zs_free() can locate the size_class
without holding pool->lock on 64-bit systems. Page migration also
takes class->lock and only rewrites the PFN field of obj, so:

1. read obj locklessly,
2. lock the size_class derived from obj's class_idx,
3. re-read obj under class->lock to get a stable PFN.

This eliminates the rwlock read-side cacheline bouncing between
zs_free() and migration/compaction on multi-core systems.

Annotate handle_to_obj()/record_obj() with READ_ONCE()/WRITE_ONCE() to
prevent load/store tearing on the lockless read path and silence KCSAN
data race reports.

When ZS_OBJ_CLASS_BITS == 0 (32-bit, or 64-bit with obj too narrow to
hold class_idx), zs_free() keeps pool->lock.

Signed-off-by: Wenchao Hao <haowenchao@xxxxxxxxxx>
---
mm/zsmalloc.c | 75 ++++++++++++++++++++++++++++++++++++++++-----------
1 file changed, 60 insertions(+), 15 deletions(-)

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index f84258d63917..fe20ab297542 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -21,6 +21,10 @@
* pool->lock
* class->lock
* zspage->lock
+ *
+ * When ZS_OBJ_CLASS_BITS > 0, zs_free() skips pool->lock; it picks
+ * the size_class from obj's encoded class_idx and serializes against
+ * page migration via class->lock.
*/

#include <linux/module.h>
@@ -457,10 +461,13 @@ static void cache_free_zspage(struct zspage *zspage)
kmem_cache_free(zspage_cachep, zspage);
}

-/* class->lock(which owns the handle) synchronizes races */
+/*
+ * Pairs with READ_ONCE() in handle_to_obj(): zs_free() may read the
+ * handle locklessly, so prevent store tearing here.
+ */
static void record_obj(unsigned long handle, unsigned long obj)
{
- *(unsigned long *)handle = obj;
+ WRITE_ONCE(*(unsigned long *)handle, obj);
}

static inline bool __maybe_unused is_first_zpdesc(struct zpdesc *zpdesc)
@@ -817,7 +824,7 @@ static unsigned long location_to_obj(struct zpdesc *zpdesc, unsigned int obj_idx

static unsigned long handle_to_obj(unsigned long handle)
{
- return *(unsigned long *)handle;
+ return READ_ONCE(*(unsigned long *)handle);
}

static inline bool obj_allocated(struct zpdesc *zpdesc, void *obj,
@@ -1451,10 +1458,58 @@ static void obj_free(int class_size, unsigned long obj)
mod_zspage_inuse(zspage, -1);
}

+/*
+ * Resolve @handle to its zspage / size_class and acquire class->lock.
+ *
+ * When class_idx is encoded in obj (ZS_OBJ_CLASS_BITS > 0), it is
+ * invariant under page migration, so the handle can be read locklessly
+ * to pick the size_class. Once class->lock is held migration is
+ * blocked and the handle is re-read to obtain a stable PFN.
+ *
+ * Otherwise (32-bit, or 64-bit fallback paths like UML where the
+ * encoding is disabled), fall back to pool->lock for the lookup.
+ */
+#if ZS_OBJ_CLASS_BITS > 0
+static inline void obj_handle_class_lock(struct zs_pool *pool, unsigned long handle,
+ unsigned long *objp, struct zspage **zspagep,
+ struct size_class **classp)
+ __acquires(&(*classp)->lock)
+{
+ struct zpdesc *f_zpdesc;
+ unsigned long obj;
+
+ obj = handle_to_obj(handle);
+ *classp = pool->size_class[obj_to_class_idx(obj)];
+ spin_lock(&(*classp)->lock);
+ /* Re-read under class->lock: PFN is now stable vs migration. */
+ obj = handle_to_obj(handle);
+ obj_to_zpdesc(obj, &f_zpdesc);
+ *zspagep = get_zspage(f_zpdesc);
+ *objp = obj;
+}
+#else
+static inline void obj_handle_class_lock(struct zs_pool *pool, unsigned long handle,
+ unsigned long *objp, struct zspage **zspagep,
+ struct size_class **classp)
+ __acquires(&(*classp)->lock)
+{
+ struct zpdesc *f_zpdesc;
+ unsigned long obj;
+
+ read_lock(&pool->lock);
+ obj = handle_to_obj(handle);
+ obj_to_zpdesc(obj, &f_zpdesc);
+ *zspagep = get_zspage(f_zpdesc);
+ *classp = zspage_class(pool, *zspagep);
+ spin_lock(&(*classp)->lock);
+ read_unlock(&pool->lock);
+ *objp = obj;
+}
+#endif
+
void zs_free(struct zs_pool *pool, unsigned long handle)
{
struct zspage *zspage;
- struct zpdesc *f_zpdesc;
unsigned long obj;
struct size_class *class;
int fullness;
@@ -1462,17 +1517,7 @@ void zs_free(struct zs_pool *pool, unsigned long handle)
if (IS_ERR_OR_NULL((void *)handle))
return;

- /*
- * The pool->lock protects the race with zpage's migration
- * so it's safe to get the page from handle.
- */
- read_lock(&pool->lock);
- obj = handle_to_obj(handle);
- obj_to_zpdesc(obj, &f_zpdesc);
- zspage = get_zspage(f_zpdesc);
- class = zspage_class(pool, zspage);
- spin_lock(&class->lock);
- read_unlock(&pool->lock);
+ obj_handle_class_lock(pool, handle, &obj, &zspage, &class);

class_stat_sub(class, ZS_OBJS_INUSE, 1);
obj_free(class->size, obj);
--
2.34.1