[RFC PATCH] zswap: add writeback_time_threshold interface to shrink zswap pool

From: Zhongkun He
Date: Wed Oct 11 2023 - 01:11:33 EST


zswap does not have a suitable method to select objects that have not
been accessed for a long time, and just shrink the pool when the limit
is hit. There is a high probability of wasting memory in zswap if the
limit is too high.

This patch add a new interface writeback_time_threshold to shrink zswap
pool proactively based on the time threshold in second, e.g.::

echo 600 > /sys/module/zswap/parameters/writeback_time_threshold

If zswap_entrys have not been accessed for more than 600 seconds, they
will be swapout to swap. if set to 0, all of them will be swapout.

Signed-off-by: Zhongkun He <hezhongkun.hzk@xxxxxxxxxxxxx>
---
Documentation/admin-guide/mm/zswap.rst | 9 +++
mm/zswap.c | 76 ++++++++++++++++++++++++++
2 files changed, 85 insertions(+)

diff --git a/Documentation/admin-guide/mm/zswap.rst b/Documentation/admin-guide/mm/zswap.rst
index 45b98390e938..9ffaed26c3c0 100644
--- a/Documentation/admin-guide/mm/zswap.rst
+++ b/Documentation/admin-guide/mm/zswap.rst
@@ -153,6 +153,15 @@ attribute, e. g.::

Setting this parameter to 100 will disable the hysteresis.

+When there is a lot of cold memory according to the store time in the zswap,
+it can be swapout and save memory in userspace proactively. User can write
+writeback time threshold in second to enable it, e.g.::
+
+ echo 600 > /sys/module/zswap/parameters/writeback_time_threshold
+
+If zswap_entrys have not been accessed for more than 600 seconds, they will be
+swapout. if set to 0, all of them will be swapout.
+
A debugfs interface is provided for various statistic about pool size, number
of pages stored, same-value filled pages and various counters for the reasons
pages are rejected.
diff --git a/mm/zswap.c b/mm/zswap.c
index 083c693602b8..c3a19b56a29b 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -141,6 +141,16 @@ static bool zswap_exclusive_loads_enabled = IS_ENABLED(
CONFIG_ZSWAP_EXCLUSIVE_LOADS_DEFAULT_ON);
module_param_named(exclusive_loads, zswap_exclusive_loads_enabled, bool, 0644);

+/* zswap writeback time threshold in second */
+static unsigned int zswap_writeback_time_thr;
+static int zswap_writeback_time_thr_param_set(const char *, const struct kernel_param *);
+static const struct kernel_param_ops zswap_writeback_param_ops = {
+ .set = zswap_writeback_time_thr_param_set,
+ .get = param_get_uint,
+};
+module_param_cb(writeback_time_threshold, &zswap_writeback_param_ops,
+ &zswap_writeback_time_thr, 0644);
+
/* Number of zpools in zswap_pool (empirically determined for scalability) */
#define ZSWAP_NR_ZPOOLS 32

@@ -197,6 +207,7 @@ struct zswap_pool {
* value - value of the same-value filled pages which have same content
* objcg - the obj_cgroup that the compressed memory is charged to
* lru - handle to the pool's lru used to evict pages.
+ * sto_time - the store time of zswap_entry.
*/
struct zswap_entry {
struct rb_node rbnode;
@@ -210,6 +221,7 @@ struct zswap_entry {
};
struct obj_cgroup *objcg;
struct list_head lru;
+ ktime_t sto_time;
};

/*
@@ -288,6 +300,31 @@ static void zswap_update_total_size(void)
zswap_pool_total_size = total;
}

+static void zswap_reclaim_entry_by_timethr(void);
+
+static bool zswap_reach_timethr(struct zswap_pool *pool)
+{
+ struct zswap_entry *entry;
+ ktime_t expire_time = 0;
+ bool ret = false;
+
+ spin_lock(&pool->lru_lock);
+
+ if (list_empty(&pool->lru))
+ goto out;
+
+ entry = list_last_entry(&pool->lru, struct zswap_entry, lru);
+ expire_time = ktime_add(entry->sto_time,
+ ns_to_ktime(zswap_writeback_time_thr * NSEC_PER_SEC));
+
+ if (ktime_after(ktime_get_boottime(), expire_time))
+ ret = true;
+out:
+ spin_unlock(&pool->lru_lock);
+ return ret;
+}
+
+
/*********************************
* zswap entry functions
**********************************/
@@ -395,6 +432,7 @@ static void zswap_free_entry(struct zswap_entry *entry)
else {
spin_lock(&entry->pool->lru_lock);
list_del(&entry->lru);
+ entry->sto_time = 0;
spin_unlock(&entry->pool->lru_lock);
zpool_free(zswap_find_zpool(entry), entry->handle);
zswap_pool_put(entry->pool);
@@ -709,6 +747,28 @@ static void shrink_worker(struct work_struct *w)
zswap_pool_put(pool);
}

+static void zswap_reclaim_entry_by_timethr(void)
+{
+ struct zswap_pool *pool = zswap_pool_current_get();
+ int ret, failures = 0;
+
+ if (!pool)
+ return;
+
+ while (zswap_reach_timethr(pool)) {
+ ret = zswap_reclaim_entry(pool);
+ if (ret) {
+ zswap_reject_reclaim_fail++;
+ if (ret != -EAGAIN)
+ break;
+ if (++failures == MAX_RECLAIM_RETRIES)
+ break;
+ }
+ cond_resched();
+ }
+ zswap_pool_put(pool);
+}
+
static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
{
int i;
@@ -1037,6 +1097,21 @@ static int zswap_enabled_param_set(const char *val,
return ret;
}

+static int zswap_writeback_time_thr_param_set(const char *val,
+ const struct kernel_param *kp)
+{
+ int ret = -ENODEV;
+
+ /* if this is load-time (pre-init) param setting, just return. */
+ if (system_state != SYSTEM_RUNNING)
+ return ret;
+
+ ret = param_set_uint(val, kp);
+ if (!ret)
+ zswap_reclaim_entry_by_timethr();
+ return ret;
+}
+
/*********************************
* writeback code
**********************************/
@@ -1360,6 +1435,7 @@ bool zswap_store(struct folio *folio)
if (entry->length) {
spin_lock(&entry->pool->lru_lock);
list_add(&entry->lru, &entry->pool->lru);
+ entry->sto_time = ktime_get_boottime();
spin_unlock(&entry->pool->lru_lock);
}
spin_unlock(&tree->lock);
--
2.25.1