[RFC PATCH v2 4/7] mm: zswap: remove page reclaim logic from zsmalloc

From: Domenico Cerasuolo
Date: Tue Jun 06 2023 - 10:57:35 EST


With the recent enhancement to zswap enabling direct page writeback, the
need for the shrink code in zsmalloc has become obsolete. As a result,
this commit removes the page reclaim logic from zsmalloc entirely.

Signed-off-by: Domenico Cerasuolo <cerasuolodomenico@xxxxxxxxx>
---
mm/zsmalloc.c | 393 ++------------------------------------------------
1 file changed, 13 insertions(+), 380 deletions(-)

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 02f7f414aade..75386283dba0 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -107,21 +107,8 @@
*/
#define OBJ_ALLOCATED_TAG 1

-#ifdef CONFIG_ZPOOL
-/*
- * The second least-significant bit in the object's header identifies if the
- * value stored at the header is a deferred handle from the last reclaim
- * attempt.
- *
- * As noted above, this is valid because we have room for two bits.
- */
-#define OBJ_DEFERRED_HANDLE_TAG 2
-#define OBJ_TAG_BITS 2
-#define OBJ_TAG_MASK (OBJ_ALLOCATED_TAG | OBJ_DEFERRED_HANDLE_TAG)
-#else
#define OBJ_TAG_BITS 1
#define OBJ_TAG_MASK OBJ_ALLOCATED_TAG
-#endif /* CONFIG_ZPOOL */

#define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS - OBJ_TAG_BITS)
#define OBJ_INDEX_MASK ((_AC(1, UL) << OBJ_INDEX_BITS) - 1)
@@ -227,12 +214,6 @@ struct link_free {
* Handle of allocated object.
*/
unsigned long handle;
-#ifdef CONFIG_ZPOOL
- /*
- * Deferred handle of a reclaimed object.
- */
- unsigned long deferred_handle;
-#endif
};
};

@@ -250,13 +231,6 @@ struct zs_pool {
/* Compact classes */
struct shrinker shrinker;

-#ifdef CONFIG_ZPOOL
- /* List tracking the zspages in LRU order by most recently added object */
- struct list_head lru;
- struct zpool *zpool;
- const struct zpool_ops *zpool_ops;
-#endif
-
#ifdef CONFIG_ZSMALLOC_STAT
struct dentry *stat_dentry;
#endif
@@ -279,13 +253,6 @@ struct zspage {
unsigned int freeobj;
struct page *first_page;
struct list_head list; /* fullness list */
-
-#ifdef CONFIG_ZPOOL
- /* links the zspage to the lru list in the pool */
- struct list_head lru;
- bool under_reclaim;
-#endif
-
struct zs_pool *pool;
rwlock_t lock;
};
@@ -393,14 +360,7 @@ static void *zs_zpool_create(const char *name, gfp_t gfp,
* different contexts and its caller must provide a valid
* gfp mask.
*/
- struct zs_pool *pool = zs_create_pool(name);
-
- if (pool) {
- pool->zpool = zpool;
- pool->zpool_ops = zpool_ops;
- }
-
- return pool;
+ return zs_create_pool(name);
}

static void zs_zpool_destroy(void *pool)
@@ -422,27 +382,6 @@ static void zs_zpool_free(void *pool, unsigned long handle)
zs_free(pool, handle);
}

-static int zs_reclaim_page(struct zs_pool *pool, unsigned int retries);
-
-static int zs_zpool_shrink(void *pool, unsigned int pages,
- unsigned int *reclaimed)
-{
- unsigned int total = 0;
- int ret = -EINVAL;
-
- while (total < pages) {
- ret = zs_reclaim_page(pool, 8);
- if (ret < 0)
- break;
- total++;
- }
-
- if (reclaimed)
- *reclaimed = total;
-
- return ret;
-}
-
static void *zs_zpool_map(void *pool, unsigned long handle,
enum zpool_mapmode mm)
{
@@ -481,7 +420,7 @@ static struct zpool_driver zs_zpool_driver = {
.malloc_support_movable = true,
.malloc = zs_zpool_malloc,
.free = zs_zpool_free,
- .shrink = zs_zpool_shrink,
+ .shrink = NULL,
.map = zs_zpool_map,
.unmap = zs_zpool_unmap,
.total_size = zs_zpool_total_size,
@@ -884,14 +823,6 @@ static inline bool obj_allocated(struct page *page, void *obj, unsigned long *ph
return obj_tagged(page, obj, phandle, OBJ_ALLOCATED_TAG);
}

-#ifdef CONFIG_ZPOOL
-static bool obj_stores_deferred_handle(struct page *page, void *obj,
- unsigned long *phandle)
-{
- return obj_tagged(page, obj, phandle, OBJ_DEFERRED_HANDLE_TAG);
-}
-#endif
-
static void reset_page(struct page *page)
{
__ClearPageMovable(page);
@@ -922,39 +853,6 @@ static int trylock_zspage(struct zspage *zspage)
return 0;
}

-#ifdef CONFIG_ZPOOL
-static unsigned long find_deferred_handle_obj(struct size_class *class,
- struct page *page, int *obj_idx);
-
-/*
- * Free all the deferred handles whose objects are freed in zs_free.
- */
-static void free_handles(struct zs_pool *pool, struct size_class *class,
- struct zspage *zspage)
-{
- int obj_idx = 0;
- struct page *page = get_first_page(zspage);
- unsigned long handle;
-
- while (1) {
- handle = find_deferred_handle_obj(class, page, &obj_idx);
- if (!handle) {
- page = get_next_page(page);
- if (!page)
- break;
- obj_idx = 0;
- continue;
- }
-
- cache_free_handle(pool, handle);
- obj_idx++;
- }
-}
-#else
-static inline void free_handles(struct zs_pool *pool, struct size_class *class,
- struct zspage *zspage) {}
-#endif
-
static void __free_zspage(struct zs_pool *pool, struct size_class *class,
struct zspage *zspage)
{
@@ -969,9 +867,6 @@ static void __free_zspage(struct zs_pool *pool, struct size_class *class,
VM_BUG_ON(get_zspage_inuse(zspage));
VM_BUG_ON(fg != ZS_INUSE_RATIO_0);

- /* Free all deferred handles from zs_free */
- free_handles(pool, class, zspage);
-
next = page = get_first_page(zspage);
do {
VM_BUG_ON_PAGE(!PageLocked(page), page);
@@ -1006,9 +901,6 @@ static void free_zspage(struct zs_pool *pool, struct size_class *class,
}

remove_zspage(class, zspage, ZS_INUSE_RATIO_0);
-#ifdef CONFIG_ZPOOL
- list_del(&zspage->lru);
-#endif
__free_zspage(pool, class, zspage);
}

@@ -1054,11 +946,6 @@ static void init_zspage(struct size_class *class, struct zspage *zspage)
off %= PAGE_SIZE;
}

-#ifdef CONFIG_ZPOOL
- INIT_LIST_HEAD(&zspage->lru);
- zspage->under_reclaim = false;
-#endif
-
set_freeobj(zspage, 0);
}

@@ -1525,20 +1412,13 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)
/* We completely set up zspage so mark them as movable */
SetZsPageMovable(pool, zspage);
out:
-#ifdef CONFIG_ZPOOL
- /* Add/move zspage to beginning of LRU */
- if (!list_empty(&zspage->lru))
- list_del(&zspage->lru);
- list_add(&zspage->lru, &pool->lru);
-#endif
-
spin_unlock(&pool->lock);

return handle;
}
EXPORT_SYMBOL_GPL(zs_malloc);

-static void obj_free(int class_size, unsigned long obj, unsigned long *handle)
+static void obj_free(int class_size, unsigned long obj)
{
struct link_free *link;
struct zspage *zspage;
@@ -1554,25 +1434,12 @@ static void obj_free(int class_size, unsigned long obj, unsigned long *handle)
vaddr = kmap_atomic(f_page);
link = (struct link_free *)(vaddr + f_offset);

- if (handle) {
-#ifdef CONFIG_ZPOOL
- /* Stores the (deferred) handle in the object's header */
- *handle |= OBJ_DEFERRED_HANDLE_TAG;
- *handle &= ~OBJ_ALLOCATED_TAG;
-
- if (likely(!ZsHugePage(zspage)))
- link->deferred_handle = *handle;
- else
- f_page->index = *handle;
-#endif
- } else {
- /* Insert this object in containing zspage's freelist */
- if (likely(!ZsHugePage(zspage)))
- link->next = get_freeobj(zspage) << OBJ_TAG_BITS;
- else
- f_page->index = 0;
- set_freeobj(zspage, f_objidx);
- }
+ /* Insert this object in containing zspage's freelist */
+ if (likely(!ZsHugePage(zspage)))
+ link->next = get_freeobj(zspage) << OBJ_TAG_BITS;
+ else
+ f_page->index = 0;
+ set_freeobj(zspage, f_objidx);

kunmap_atomic(vaddr);
mod_zspage_inuse(zspage, -1);
@@ -1600,21 +1467,7 @@ void zs_free(struct zs_pool *pool, unsigned long handle)
class = zspage_class(pool, zspage);

class_stat_dec(class, ZS_OBJS_INUSE, 1);
-
-#ifdef CONFIG_ZPOOL
- if (zspage->under_reclaim) {
- /*
- * Reclaim needs the handles during writeback. It'll free
- * them along with the zspage when it's done with them.
- *
- * Record current deferred handle in the object's header.
- */
- obj_free(class->size, obj, &handle);
- spin_unlock(&pool->lock);
- return;
- }
-#endif
- obj_free(class->size, obj, NULL);
+ obj_free(class->size, obj);

fullness = fix_fullness_group(class, zspage);
if (fullness == ZS_INUSE_RATIO_0)
@@ -1735,18 +1588,6 @@ static unsigned long find_alloced_obj(struct size_class *class,
return find_tagged_obj(class, page, obj_idx, OBJ_ALLOCATED_TAG);
}

-#ifdef CONFIG_ZPOOL
-/*
- * Find object storing a deferred handle in header in zspage from index object
- * and return handle.
- */
-static unsigned long find_deferred_handle_obj(struct size_class *class,
- struct page *page, int *obj_idx)
-{
- return find_tagged_obj(class, page, obj_idx, OBJ_DEFERRED_HANDLE_TAG);
-}
-#endif
-
struct zs_compact_control {
/* Source spage for migration which could be a subpage of zspage */
struct page *s_page;
@@ -1786,7 +1627,7 @@ static void migrate_zspage(struct zs_pool *pool, struct size_class *class,
zs_object_copy(class, free_obj, used_obj);
obj_idx++;
record_obj(handle, free_obj);
- obj_free(class->size, used_obj, NULL);
+ obj_free(class->size, used_obj);
}

/* Remember last position in this iteration */
@@ -1846,7 +1687,7 @@ static int putback_zspage(struct size_class *class, struct zspage *zspage)
return fullness;
}

-#if defined(CONFIG_ZPOOL) || defined(CONFIG_COMPACTION)
+#ifdef CONFIG_COMPACTION
/*
* To prevent zspage destroy during migration, zspage freeing should
* hold locks of all pages in the zspage.
@@ -1888,24 +1729,7 @@ static void lock_zspage(struct zspage *zspage)
}
migrate_read_unlock(zspage);
}
-#endif /* defined(CONFIG_ZPOOL) || defined(CONFIG_COMPACTION) */
-
-#ifdef CONFIG_ZPOOL
-/*
- * Unlocks all the pages of the zspage.
- *
- * pool->lock must be held before this function is called
- * to prevent the underlying pages from migrating.
- */
-static void unlock_zspage(struct zspage *zspage)
-{
- struct page *page = get_first_page(zspage);
-
- do {
- unlock_page(page);
- } while ((page = get_next_page(page)) != NULL);
-}
-#endif /* CONFIG_ZPOOL */
+#endif /* CONFIG_COMPACTION */

static void migrate_lock_init(struct zspage *zspage)
{
@@ -2126,9 +1950,6 @@ static void async_free_zspage(struct work_struct *work)
VM_BUG_ON(fullness != ZS_INUSE_RATIO_0);
class = pool->size_class[class_idx];
spin_lock(&pool->lock);
-#ifdef CONFIG_ZPOOL
- list_del(&zspage->lru);
-#endif
__free_zspage(pool, class, zspage);
spin_unlock(&pool->lock);
}
@@ -2474,10 +2295,6 @@ struct zs_pool *zs_create_pool(const char *name)
*/
zs_register_shrinker(pool);

-#ifdef CONFIG_ZPOOL
- INIT_LIST_HEAD(&pool->lru);
-#endif
-
return pool;

err:
@@ -2520,190 +2337,6 @@ void zs_destroy_pool(struct zs_pool *pool)
}
EXPORT_SYMBOL_GPL(zs_destroy_pool);

-#ifdef CONFIG_ZPOOL
-static void restore_freelist(struct zs_pool *pool, struct size_class *class,
- struct zspage *zspage)
-{
- unsigned int obj_idx = 0;
- unsigned long handle, off = 0; /* off is within-page offset */
- struct page *page = get_first_page(zspage);
- struct link_free *prev_free = NULL;
- void *prev_page_vaddr = NULL;
-
- /* in case no free object found */
- set_freeobj(zspage, (unsigned int)(-1UL));
-
- while (page) {
- void *vaddr = kmap_atomic(page);
- struct page *next_page;
-
- while (off < PAGE_SIZE) {
- void *obj_addr = vaddr + off;
-
- /* skip allocated object */
- if (obj_allocated(page, obj_addr, &handle)) {
- obj_idx++;
- off += class->size;
- continue;
- }
-
- /* free deferred handle from reclaim attempt */
- if (obj_stores_deferred_handle(page, obj_addr, &handle))
- cache_free_handle(pool, handle);
-
- if (prev_free)
- prev_free->next = obj_idx << OBJ_TAG_BITS;
- else /* first free object found */
- set_freeobj(zspage, obj_idx);
-
- prev_free = (struct link_free *)vaddr + off / sizeof(*prev_free);
- /* if last free object in a previous page, need to unmap */
- if (prev_page_vaddr) {
- kunmap_atomic(prev_page_vaddr);
- prev_page_vaddr = NULL;
- }
-
- obj_idx++;
- off += class->size;
- }
-
- /*
- * Handle the last (full or partial) object on this page.
- */
- next_page = get_next_page(page);
- if (next_page) {
- if (!prev_free || prev_page_vaddr) {
- /*
- * There is no free object in this page, so we can safely
- * unmap it.
- */
- kunmap_atomic(vaddr);
- } else {
- /* update prev_page_vaddr since prev_free is on this page */
- prev_page_vaddr = vaddr;
- }
- } else { /* this is the last page */
- if (prev_free) {
- /*
- * Reset OBJ_TAG_BITS bit to last link to tell
- * whether it's allocated object or not.
- */
- prev_free->next = -1UL << OBJ_TAG_BITS;
- }
-
- /* unmap previous page (if not done yet) */
- if (prev_page_vaddr) {
- kunmap_atomic(prev_page_vaddr);
- prev_page_vaddr = NULL;
- }
-
- kunmap_atomic(vaddr);
- }
-
- page = next_page;
- off %= PAGE_SIZE;
- }
-}
-
-static int zs_reclaim_page(struct zs_pool *pool, unsigned int retries)
-{
- int i, obj_idx, ret = 0;
- unsigned long handle;
- struct zspage *zspage;
- struct page *page;
- int fullness;
-
- /* Lock LRU and fullness list */
- spin_lock(&pool->lock);
- if (list_empty(&pool->lru)) {
- spin_unlock(&pool->lock);
- return -EINVAL;
- }
-
- for (i = 0; i < retries; i++) {
- struct size_class *class;
-
- zspage = list_last_entry(&pool->lru, struct zspage, lru);
- list_del(&zspage->lru);
-
- /* zs_free may free objects, but not the zspage and handles */
- zspage->under_reclaim = true;
-
- class = zspage_class(pool, zspage);
- fullness = get_fullness_group(class, zspage);
-
- /* Lock out object allocations and object compaction */
- remove_zspage(class, zspage, fullness);
-
- spin_unlock(&pool->lock);
- cond_resched();
-
- /* Lock backing pages into place */
- lock_zspage(zspage);
-
- obj_idx = 0;
- page = get_first_page(zspage);
- while (1) {
- handle = find_alloced_obj(class, page, &obj_idx);
- if (!handle) {
- page = get_next_page(page);
- if (!page)
- break;
- obj_idx = 0;
- continue;
- }
-
- /*
- * This will write the object and call zs_free.
- *
- * zs_free will free the object, but the
- * under_reclaim flag prevents it from freeing
- * the zspage altogether. This is necessary so
- * that we can continue working with the
- * zspage potentially after the last object
- * has been freed.
- */
- ret = pool->zpool_ops->evict(pool->zpool, handle);
- if (ret)
- goto next;
-
- obj_idx++;
- }
-
-next:
- /* For freeing the zspage, or putting it back in the pool and LRU list. */
- spin_lock(&pool->lock);
- zspage->under_reclaim = false;
-
- if (!get_zspage_inuse(zspage)) {
- /*
- * Fullness went stale as zs_free() won't touch it
- * while the page is removed from the pool. Fix it
- * up for the check in __free_zspage().
- */
- zspage->fullness = ZS_INUSE_RATIO_0;
-
- __free_zspage(pool, class, zspage);
- spin_unlock(&pool->lock);
- return 0;
- }
-
- /*
- * Eviction fails on one of the handles, so we need to restore zspage.
- * We need to rebuild its freelist (and free stored deferred handles),
- * put it back to the correct size class, and add it to the LRU list.
- */
- restore_freelist(pool, class, zspage);
- putback_zspage(class, zspage);
- list_add(&zspage->lru, &pool->lru);
- unlock_zspage(zspage);
- }
-
- spin_unlock(&pool->lock);
- return -EAGAIN;
-}
-#endif /* CONFIG_ZPOOL */
-
static int __init zs_init(void)
{
int ret;
--
2.34.1