[RFC PATCH 3/3] zram: rework writeback target selection logic
From: Sergey Senozhatsky
Date: Wed Sep 04 2024 - 09:39:37 EST
Writeback suffers from the same problem as recompression
did before - target slot selection for writeback is just
a simple iteration over zram->table entries (stored pages)
which selects suboptimal targets for writeback. This is
especially problematic for writeback, because we uncompress
objects before writeback so each of them takes 4K out of
limited writeback storage. For example, when we take a
48 bytes slot and store it as a 4K object to writeback device
we only save 48 bytes of memory (release from zsmalloc pool).
We naturally want to pick the largest objects for writeback,
because then each writeback will relase the largest amount
of memory.
This patch applies the same solution and strategy as for
recompression target selection: pp control (post-process)
with 16 groups of candidate pp slots. Slots are assigned to
pp groups based on sizes - the larger the slot the higher the
group index. This gives us sorted by size lists of candidate
slots (in linear time), so that among candidate slots we always
select the largest ones first.
TEST
====
A very simple demonstration: zram is configured with a writeback
device. A limited writeback (wb_limit 2500 pages) is performed
then, with a log of sizes of slots that were written back.
You can see that patched zram selects slots for recompression in
significantly different manner, which leads to higher memory
savings (see column #2 of mm_stat output).
BASE
----
*** initial state of zram device
/sys/block/zram0/mm_stat
1750327296 619765836 631902208 0 631902208 1 0 34278 34278
*** writeback idle wb_limit 2500
/sys/block/zram0/mm_stat
1750327296 617622333 631578624 0 631902208 1 0 34278 34278
Sizes of selected objects for writeback:
... 193 349 46 46 46 46 852 1002 543 162 107 49 34 34 34 ...
PATCHED
-------
*** initial state of zram device
/sys/block/zram0/mm_stat
1750319104 619760957 631992320 0 631992320 1 0 34278 34278
*** writeback idle wb_limit 2500
/sys/block/zram0/mm_stat
1750319104 612672056 626135040 0 631992320 1 0 34278 34278
Sizes of selected objects for writeback:
... 3680 3614 3694 3667 3553 3537 3342 3362 ...
Signed-off-by: Sergey Senozhatsky <senozhatsky@xxxxxxxxxxxx>
---
drivers/block/zram/zram_drv.c | 232 ++++++++++++++++++++++------------
1 file changed, 151 insertions(+), 81 deletions(-)
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 998efe3979f8..a384939b2501 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -183,6 +183,69 @@ static void zram_accessed(struct zram *zram, u32 index)
#endif
}
+#if defined CONFIG_ZRAM_WRITEBACK || defined CONFIG_ZRAM_MULTI_COMP
+struct zram_pp_slot {
+ unsigned long index;
+ struct list_head entry;
+};
+
+#define NUM_PP_GROUPS 17
+
+struct zram_pp_ctl {
+ struct list_head slots[NUM_PP_GROUPS];
+};
+
+static void init_pp_ctl(struct zram_pp_ctl *ctl)
+{
+ u32 idx;
+
+ for (idx = 0; idx < NUM_PP_GROUPS; idx++)
+ INIT_LIST_HEAD(&ctl->slots[idx]);
+}
+
+static void release_pp_slot(struct zram *zram, struct zram_pp_slot *pps)
+{
+ zram_slot_lock(zram, pps->index);
+ if (zram_test_flag(zram, pps->index, ZRAM_PP_SLOT))
+ zram_clear_flag(zram, pps->index, ZRAM_PP_SLOT);
+ zram_slot_unlock(zram, pps->index);
+ kfree(pps);
+}
+
+static void release_pp_ctl(struct zram *zram, struct zram_pp_ctl *ctl)
+{
+ u32 idx;
+
+ for (idx = 0; idx < NUM_PP_GROUPS; idx++) {
+ while (!list_empty(&ctl->slots[idx])) {
+ struct zram_pp_slot *pps;
+
+ pps = list_first_entry(&ctl->slots[idx],
+ struct zram_pp_slot,
+ entry);
+ list_del_init(&pps->entry);
+ release_pp_slot(zram, pps);
+ }
+ }
+}
+
+static void place_pp_slot(struct zram *zram, struct zram_pp_ctl *ctl,
+ struct zram_pp_slot *pps)
+{
+ s32 diff, idx;
+
+ /*
+ * On 4K system this keeps PP slot groups 256 bytes apart. The
+ * higher the group IDX the larger the slot size.
+ */
+ diff = PAGE_SIZE / (NUM_PP_GROUPS - 1);
+ idx = zram_get_obj_size(zram, pps->index) / diff;
+ list_add(&pps->entry, &ctl->slots[idx]);
+
+ zram_set_flag(zram, pps->index, ZRAM_PP_SLOT);
+}
+#endif
+
static inline void update_used_max(struct zram *zram,
const unsigned long pages)
{
@@ -587,11 +650,82 @@ static void read_from_bdev_async(struct zram *zram, struct page *page,
#define IDLE_WRITEBACK (1<<1)
#define INCOMPRESSIBLE_WRITEBACK (1<<2)
+static int scan_slots_for_writeback(struct zram *zram, u32 mode,
+ unsigned long nr_pages,
+ unsigned long index,
+ struct zram_pp_ctl *ctl)
+{
+ struct zram_pp_slot *pps = NULL;
+
+ for (; nr_pages != 0; index++, nr_pages--) {
+ if (!pps)
+ pps = kmalloc(sizeof(*pps), GFP_KERNEL);
+ if (!pps)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&pps->entry);
+
+ zram_slot_lock(zram, index);
+ if (!zram_allocated(zram, index))
+ goto next;
+
+ if (zram_test_flag(zram, index, ZRAM_WB) ||
+ zram_test_flag(zram, index, ZRAM_SAME) ||
+ zram_test_flag(zram, index, ZRAM_UNDER_WB) ||
+ zram_test_flag(zram, index, ZRAM_PP_SLOT))
+ goto next;
+
+ if (mode & IDLE_WRITEBACK &&
+ !zram_test_flag(zram, index, ZRAM_IDLE))
+ goto next;
+ if (mode & HUGE_WRITEBACK &&
+ !zram_test_flag(zram, index, ZRAM_HUGE))
+ goto next;
+ if (mode & INCOMPRESSIBLE_WRITEBACK &&
+ !zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE))
+ goto next;
+
+ pps->index = index;
+ place_pp_slot(zram, ctl, pps);
+ pps = NULL;
+next:
+ zram_slot_unlock(zram, index);
+ }
+
+ kfree(pps);
+ return 0;
+}
+
+static struct zram_pp_slot *select_slot_for_writeback(struct zram_pp_ctl *ctl)
+{
+ struct zram_pp_slot *pps = NULL;
+ s32 idx = NUM_PP_GROUPS - 1;
+
+ /*
+ * Select PP-slots starting from the highest group, which should
+ * give us the best candidate for recompression.
+ */
+ while(idx > 0) {
+ pps = list_first_entry_or_null(&ctl->slots[idx],
+ struct zram_pp_slot,
+ entry);
+ if (pps) {
+ list_del_init(&pps->entry);
+ break;
+ }
+
+ idx--;
+ }
+ return pps;
+}
+
static ssize_t writeback_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
{
struct zram *zram = dev_to_zram(dev);
unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
+ struct zram_pp_slot *pps;
+ struct zram_pp_ctl ctl;
unsigned long index = 0;
struct bio bio;
struct bio_vec bio_vec;
@@ -600,6 +734,8 @@ static ssize_t writeback_store(struct device *dev,
int mode, err;
unsigned long blk_idx = 0;
+ init_pp_ctl(&ctl);
+
if (sysfs_streq(buf, "idle"))
mode = IDLE_WRITEBACK;
else if (sysfs_streq(buf, "huge"))
@@ -637,11 +773,14 @@ static ssize_t writeback_store(struct device *dev,
goto release_init_lock;
}
- for (; nr_pages != 0; index++, nr_pages--) {
+ scan_slots_for_writeback(zram, mode, nr_pages, index, &ctl);
+
+ while ((pps = select_slot_for_writeback(&ctl))) {
spin_lock(&zram->wb_limit_lock);
if (zram->wb_limit_enable && !zram->bd_wb_limit) {
spin_unlock(&zram->wb_limit_lock);
ret = -EIO;
+ release_pp_slot(zram, pps);
break;
}
spin_unlock(&zram->wb_limit_lock);
@@ -650,30 +789,15 @@ static ssize_t writeback_store(struct device *dev,
blk_idx = alloc_block_bdev(zram);
if (!blk_idx) {
ret = -ENOSPC;
+ release_pp_slot(zram, pps);
break;
}
}
+ index = pps->index;
zram_slot_lock(zram, index);
- if (!zram_allocated(zram, index))
+ if (!zram_test_flag(zram, index, ZRAM_PP_SLOT))
goto next;
-
- if (zram_test_flag(zram, index, ZRAM_WB) ||
- zram_test_flag(zram, index, ZRAM_SAME) ||
- zram_test_flag(zram, index, ZRAM_UNDER_WB) ||
- zram_test_flag(zram, index, ZRAM_PP_SLOT))
- goto next;
-
- if (mode & IDLE_WRITEBACK &&
- !zram_test_flag(zram, index, ZRAM_IDLE))
- goto next;
- if (mode & HUGE_WRITEBACK &&
- !zram_test_flag(zram, index, ZRAM_HUGE))
- goto next;
- if (mode & INCOMPRESSIBLE_WRITEBACK &&
- !zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE))
- goto next;
-
/*
* Clearing ZRAM_UNDER_WB is duty of caller.
* IOW, zram_free_page never clear it.
@@ -682,11 +806,14 @@ static ssize_t writeback_store(struct device *dev,
/* Need for hugepage writeback racing */
zram_set_flag(zram, index, ZRAM_IDLE);
zram_slot_unlock(zram, index);
+
if (zram_read_page(zram, page, index, NULL)) {
zram_slot_lock(zram, index);
zram_clear_flag(zram, index, ZRAM_UNDER_WB);
zram_clear_flag(zram, index, ZRAM_IDLE);
zram_slot_unlock(zram, index);
+
+ release_pp_slot(zram, pps);
continue;
}
@@ -705,6 +832,8 @@ static ssize_t writeback_store(struct device *dev,
zram_clear_flag(zram, index, ZRAM_UNDER_WB);
zram_clear_flag(zram, index, ZRAM_IDLE);
zram_slot_unlock(zram, index);
+
+ release_pp_slot(zram, pps);
/*
* BIO errors are not fatal, we continue and simply
* attempt to writeback the remaining objects (pages).
@@ -729,7 +858,7 @@ static ssize_t writeback_store(struct device *dev,
*/
zram_slot_lock(zram, index);
if (!zram_allocated(zram, index) ||
- !zram_test_flag(zram, index, ZRAM_IDLE)) {
+ !zram_test_flag(zram, index, ZRAM_IDLE)) {
zram_clear_flag(zram, index, ZRAM_UNDER_WB);
zram_clear_flag(zram, index, ZRAM_IDLE);
goto next;
@@ -747,12 +876,14 @@ static ssize_t writeback_store(struct device *dev,
spin_unlock(&zram->wb_limit_lock);
next:
zram_slot_unlock(zram, index);
+ release_pp_slot(zram, pps);
}
if (blk_idx)
free_block_bdev(zram, blk_idx);
__free_page(page);
release_init_lock:
+ release_pp_ctl(zram, &ctl);
up_read(&zram->init_lock);
return ret;
@@ -1649,67 +1780,6 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
}
#ifdef CONFIG_ZRAM_MULTI_COMP
-struct zram_pp_slot {
- unsigned long index;
- struct list_head entry;
-};
-
-#define NUM_PP_GROUPS 17
-
-struct zram_pp_ctl {
- struct list_head slots[NUM_PP_GROUPS];
-};
-
-static void init_pp_ctl(struct zram_pp_ctl *ctl)
-{
- u32 idx;
-
- for (idx = 0; idx < NUM_PP_GROUPS; idx++)
- INIT_LIST_HEAD(&ctl->slots[idx]);
-}
-
-static void release_pp_slot(struct zram *zram, struct zram_pp_slot *pps)
-{
- zram_slot_lock(zram, pps->index);
- if (zram_test_flag(zram, pps->index, ZRAM_PP_SLOT))
- zram_clear_flag(zram, pps->index, ZRAM_PP_SLOT);
- zram_slot_unlock(zram, pps->index);
- kfree(pps);
-}
-
-static void release_pp_ctl(struct zram *zram, struct zram_pp_ctl *ctl)
-{
- u32 idx;
-
- for (idx = 0; idx < NUM_PP_GROUPS; idx++) {
- while (!list_empty(&ctl->slots[idx])) {
- struct zram_pp_slot *pps;
-
- pps = list_first_entry(&ctl->slots[idx],
- struct zram_pp_slot,
- entry);
- list_del_init(&pps->entry);
- release_pp_slot(zram, pps);
- }
- }
-}
-
-static void place_pp_slot(struct zram *zram, struct zram_pp_ctl *ctl,
- struct zram_pp_slot *pps)
-{
- s32 diff, idx;
-
- /*
- * On 4K system this keeps PP slot groups 256 bytes apart. The
- * higher the group IDX the larger the slot size.
- */
- diff = PAGE_SIZE / (NUM_PP_GROUPS - 1);
- idx = zram_get_obj_size(zram, pps->index) / diff;
- list_add(&pps->entry, &ctl->slots[idx]);
-
- zram_set_flag(zram, pps->index, ZRAM_PP_SLOT);
-}
-
#define RECOMPRESS_IDLE (1 << 0)
#define RECOMPRESS_HUGE (1 << 1)
--
2.46.0.469.g59c65b2a67-goog