[PATCH v7 1/6] zswap: fix writeback lock ordering for zsmalloc

From: Nhat Pham
Date: Mon Nov 28 2022 - 14:16:26 EST


From: Johannes Weiner <hannes@xxxxxxxxxxx>

zswap's customary lock order is tree->lock before pool->lock, because
the tree->lock protects the entries' refcount, and the free callbacks in
the backends acquire their respective pool locks to dispatch the backing
object. zsmalloc's map callback takes the pool lock, so zswap must not
grab the tree->lock while a handle is mapped. This currently only
happens during writeback, which isn't implemented for zsmalloc. In
preparation for it, move the tree->lock section out of the mapped entry
section

Signed-off-by: Johannes Weiner <hannes@xxxxxxxxxxx>
Signed-off-by: Nhat Pham <nphamcs@xxxxxxxxx>
---
mm/zswap.c | 35 +++++++++++++++++++----------------
1 file changed, 19 insertions(+), 16 deletions(-)

diff --git a/mm/zswap.c b/mm/zswap.c
index 3019f0bde194..f6c89049cf70 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -968,6 +968,7 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
swpentry = zhdr->swpentry; /* here */
tree = zswap_trees[swp_type(swpentry)];
offset = swp_offset(swpentry);
+ zpool_unmap_handle(pool, handle);

/* find and ref zswap entry */
spin_lock(&tree->lock);
@@ -975,20 +976,12 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
if (!entry) {
/* entry was invalidated */
spin_unlock(&tree->lock);
- zpool_unmap_handle(pool, handle);
kfree(tmp);
return 0;
}
spin_unlock(&tree->lock);
BUG_ON(offset != entry->offset);

- src = (u8 *)zhdr + sizeof(struct zswap_header);
- if (!zpool_can_sleep_mapped(pool)) {
- memcpy(tmp, src, entry->length);
- src = tmp;
- zpool_unmap_handle(pool, handle);
- }
-
/* try to allocate swap cache page */
switch (zswap_get_swap_cache_page(swpentry, &page)) {
case ZSWAP_SWAPCACHE_FAIL: /* no memory or invalidate happened */
@@ -1006,6 +999,14 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
dlen = PAGE_SIZE;

+ zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO);
+ src = (u8 *)zhdr + sizeof(struct zswap_header);
+ if (!zpool_can_sleep_mapped(pool)) {
+ memcpy(tmp, src, entry->length);
+ src = tmp;
+ zpool_unmap_handle(pool, handle);
+ }
+
mutex_lock(acomp_ctx->mutex);
sg_init_one(&input, src, entry->length);
sg_init_table(&output, 1);
@@ -1015,6 +1016,11 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
dlen = acomp_ctx->req->dlen;
mutex_unlock(acomp_ctx->mutex);

+ if (!zpool_can_sleep_mapped(pool))
+ kfree(tmp);
+ else
+ zpool_unmap_handle(pool, handle);
+
BUG_ON(ret);
BUG_ON(dlen != PAGE_SIZE);

@@ -1045,7 +1051,11 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
zswap_entry_put(tree, entry);
spin_unlock(&tree->lock);

- goto end;
+ return ret;
+
+fail:
+ if (!zpool_can_sleep_mapped(pool))
+ kfree(tmp);

/*
* if we get here due to ZSWAP_SWAPCACHE_EXIST
@@ -1054,17 +1064,10 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
* if we free the entry in the following put
* it is also okay to return !0
*/
-fail:
spin_lock(&tree->lock);
zswap_entry_put(tree, entry);
spin_unlock(&tree->lock);

-end:
- if (zpool_can_sleep_mapped(pool))
- zpool_unmap_handle(pool, handle);
- else
- kfree(tmp);
-
return ret;
}

--
2.30.2