Re: [RFC PATCH] zram: introduce writeback_ext interface
From: Sergey Senozhatsky
Date: Mon Mar 24 2025 - 03:09:50 EST
On (25/03/24 15:45), Sergey Senozhatsky wrote:
>
> Alternatively I can just extend writeback and say something like
> "starting from 6.16 it also accepts indices ranges" in zram's
> documentation.
See below.
Maybe this is actually fine and we don't need a new sysfs file.
Subject: [RFC PATCH] zram: modernize writeback interface
Extend page writeback with page indices ranges. The previous interface
could write only one page at a time:
echo page_index=100 > zram0/writeback
...
echo page_index=200 > zram0/writeback
echo page_index=500 > zram0/writeback
...
echo page_index=700 > zram0/writeback
One obvious downside is that it increases the number of syscalls.
Less obvious but significantly more important downside is that when
given only one page to post-process zram cannot perform an optimal
target selection. This becomes critical when writeback_limit is
enabled, because under writeback_limit we want to guarantee the
highest memory savings. The new interface supports multiple page
indices ranges in a single call:
echo page_index_range=100-200 \
page_index_range=500-700 > zram0/writeback
This gives zram a chance to apply an optimal target selection
strategy on each iteration of the writeback loop.
Apart from that the new interface unifies parameters passing and
resembles other "modern" zram device attributes (e.g. recompression),
while the old interface used a mixed scheme: values-less parameters
for mode and a key=value format for page_index. We still support
the "old" value-less format for compatibility reasons.
Signed-off-by: Sergey Senozhatsky <senozhatsky@xxxxxxxxxxxx>
---
Documentation/admin-guide/blockdev/zram.rst | 8 +
drivers/block/zram/zram_drv.c | 321 +++++++++++++-------
2 files changed, 224 insertions(+), 105 deletions(-)
diff --git a/Documentation/admin-guide/blockdev/zram.rst b/Documentation/admin-guide/blockdev/zram.rst
index 9bdb30901a93..753543751c28 100644
--- a/Documentation/admin-guide/blockdev/zram.rst
+++ b/Documentation/admin-guide/blockdev/zram.rst
@@ -369,6 +369,14 @@ they could write a page index into the interface::
echo "page_index=1251" > /sys/block/zramX/writeback
+Starting from 6.16, this interface supports `page_index_range` parameters
+(multiple ranges can be provided at once), which specify `LOW-HIGH` ranges
+of pages to be written-back. This reduces the number of syscalls, but more
+importantly this enables optimal post-processing target selection strategy.
+Usage example::
+
+ echo "page_index_range=1-100" > /sys/block/zramX/writeback
+
If there are lots of write IO with flash device, potentially, it has
flash wearout problem so that admin needs to design write limitation
to guarantee storage health for entire product life.
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index fda7d8624889..f267d562518c 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -734,114 +734,19 @@ static void read_from_bdev_async(struct zram *zram, struct page *page,
submit_bio(bio);
}
-#define PAGE_WB_SIG "page_index="
-
-#define PAGE_WRITEBACK 0
-#define HUGE_WRITEBACK (1<<0)
-#define IDLE_WRITEBACK (1<<1)
-#define INCOMPRESSIBLE_WRITEBACK (1<<2)
-
-static int scan_slots_for_writeback(struct zram *zram, u32 mode,
- unsigned long nr_pages,
- unsigned long index,
- struct zram_pp_ctl *ctl)
+static int zram_writeback_slots(struct zram *zram, struct zram_pp_ctl *ctl)
{
- for (; nr_pages != 0; index++, nr_pages--) {
- bool ok = true;
-
- zram_slot_lock(zram, index);
- if (!zram_allocated(zram, index))
- goto next;
-
- if (zram_test_flag(zram, index, ZRAM_WB) ||
- zram_test_flag(zram, index, ZRAM_SAME))
- goto next;
-
- if (mode & IDLE_WRITEBACK &&
- !zram_test_flag(zram, index, ZRAM_IDLE))
- goto next;
- if (mode & HUGE_WRITEBACK &&
- !zram_test_flag(zram, index, ZRAM_HUGE))
- goto next;
- if (mode & INCOMPRESSIBLE_WRITEBACK &&
- !zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE))
- goto next;
-
- ok = place_pp_slot(zram, ctl, index);
-next:
- zram_slot_unlock(zram, index);
- if (!ok)
- break;
- }
-
- return 0;
-}
-
-static ssize_t writeback_store(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t len)
-{
- struct zram *zram = dev_to_zram(dev);
- unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
- struct zram_pp_ctl *ctl = NULL;
+ unsigned long blk_idx = 0;
+ struct page *page = NULL;
struct zram_pp_slot *pps;
- unsigned long index = 0;
- struct bio bio;
struct bio_vec bio_vec;
- struct page *page = NULL;
- ssize_t ret = len;
- int mode, err;
- unsigned long blk_idx = 0;
-
- if (sysfs_streq(buf, "idle"))
- mode = IDLE_WRITEBACK;
- else if (sysfs_streq(buf, "huge"))
- mode = HUGE_WRITEBACK;
- else if (sysfs_streq(buf, "huge_idle"))
- mode = IDLE_WRITEBACK | HUGE_WRITEBACK;
- else if (sysfs_streq(buf, "incompressible"))
- mode = INCOMPRESSIBLE_WRITEBACK;
- else {
- if (strncmp(buf, PAGE_WB_SIG, sizeof(PAGE_WB_SIG) - 1))
- return -EINVAL;
-
- if (kstrtol(buf + sizeof(PAGE_WB_SIG) - 1, 10, &index) ||
- index >= nr_pages)
- return -EINVAL;
-
- nr_pages = 1;
- mode = PAGE_WRITEBACK;
- }
-
- down_read(&zram->init_lock);
- if (!init_done(zram)) {
- ret = -EINVAL;
- goto release_init_lock;
- }
-
- /* Do not permit concurrent post-processing actions. */
- if (atomic_xchg(&zram->pp_in_progress, 1)) {
- up_read(&zram->init_lock);
- return -EAGAIN;
- }
-
- if (!zram->backing_dev) {
- ret = -ENODEV;
- goto release_init_lock;
- }
+ struct bio bio;
+ int ret, err;
+ u32 index;
page = alloc_page(GFP_KERNEL);
- if (!page) {
- ret = -ENOMEM;
- goto release_init_lock;
- }
-
- ctl = init_pp_ctl();
- if (!ctl) {
- ret = -ENOMEM;
- goto release_init_lock;
- }
-
- scan_slots_for_writeback(zram, mode, nr_pages, index, ctl);
+ if (!page)
+ return -ENOMEM;
while ((pps = select_pp_slot(ctl))) {
spin_lock(&zram->wb_limit_lock);
@@ -929,10 +834,216 @@ static ssize_t writeback_store(struct device *dev,
if (blk_idx)
free_block_bdev(zram, blk_idx);
-
-release_init_lock:
if (page)
__free_page(page);
+
+ return ret;
+}
+
+#define PAGE_WRITEBACK 0
+#define HUGE_WRITEBACK (1 << 0)
+#define IDLE_WRITEBACK (1 << 1)
+#define INCOMPRESSIBLE_WRITEBACK (1 << 2)
+
+static int parse_page_index(char *val, unsigned long nr_pages,
+ unsigned long *lo, unsigned long *hi)
+{
+ int ret;
+
+ ret = kstrtoul(val, 10, lo);
+ if (ret)
+ return ret;
+ *hi = *lo + 1;
+ if (*lo >= nr_pages || *hi > nr_pages)
+ return -ERANGE;
+ return 0;
+}
+
+static int parse_page_index_range(char *val, unsigned long nr_pages,
+ unsigned long *lo, unsigned long *hi)
+{
+ char *delim;
+ int ret;
+
+ delim = strchr(val, '-');
+ if (!delim)
+ return -EINVAL;
+
+ *delim = 0x00;
+ ret = kstrtoul(val, 10, lo);
+ if (ret)
+ return ret;
+ if (*lo >= nr_pages)
+ return -ERANGE;
+
+ ret = kstrtoul(delim + 1, 10, hi);
+ if (ret)
+ return ret;
+ if (*hi >= nr_pages || *lo > *hi)
+ return -ERANGE;
+ *hi += 1;
+ return 0;
+}
+
+static int parse_mode(char *val, u32 *mode)
+{
+ *mode = 0;
+
+ if (!strcmp(val, "idle"))
+ *mode = IDLE_WRITEBACK;
+ if (!strcmp(val, "huge"))
+ *mode = HUGE_WRITEBACK;
+ if (!strcmp(val, "huge_idle"))
+ *mode = IDLE_WRITEBACK | HUGE_WRITEBACK;
+ if (!strcmp(val, "incompressible"))
+ *mode = INCOMPRESSIBLE_WRITEBACK;
+
+ if (*mode == 0)
+ return -EINVAL;
+ return 0;
+}
+
+static int scan_slots_for_writeback(struct zram *zram, u32 mode,
+ unsigned long lo, unsigned long hi,
+ struct zram_pp_ctl *ctl)
+{
+ u32 index = lo;
+
+ while (index < hi) {
+ bool ok = true;
+
+ zram_slot_lock(zram, index);
+ if (!zram_allocated(zram, index))
+ goto next;
+
+ if (zram_test_flag(zram, index, ZRAM_WB) ||
+ zram_test_flag(zram, index, ZRAM_SAME))
+ goto next;
+
+ if (mode & IDLE_WRITEBACK &&
+ !zram_test_flag(zram, index, ZRAM_IDLE))
+ goto next;
+ if (mode & HUGE_WRITEBACK &&
+ !zram_test_flag(zram, index, ZRAM_HUGE))
+ goto next;
+ if (mode & INCOMPRESSIBLE_WRITEBACK &&
+ !zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE))
+ goto next;
+
+ ok = place_pp_slot(zram, ctl, index);
+next:
+ zram_slot_unlock(zram, index);
+ if (!ok)
+ break;
+ index++;
+ }
+
+ return 0;
+}
+
+static ssize_t writeback_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct zram *zram = dev_to_zram(dev);
+ u64 nr_pages = zram->disksize >> PAGE_SHIFT;
+ unsigned long lo = 0, hi = nr_pages;
+ struct zram_pp_ctl *ctl = NULL;
+ char *args, *param, *val;
+ ssize_t ret = len;
+ int err, mode = 0;
+
+ down_read(&zram->init_lock);
+ if (!init_done(zram)) {
+ up_read(&zram->init_lock);
+ return -EINVAL;
+ }
+
+ /* Do not permit concurrent post-processing actions. */
+ if (atomic_xchg(&zram->pp_in_progress, 1)) {
+ up_read(&zram->init_lock);
+ return -EAGAIN;
+ }
+
+ if (!zram->backing_dev) {
+ ret = -ENODEV;
+ goto release_init_lock;
+ }
+
+ ctl = init_pp_ctl();
+ if (!ctl) {
+ ret = -ENOMEM;
+ goto release_init_lock;
+ }
+
+ args = skip_spaces(buf);
+ while (*args) {
+ args = next_arg(args, ¶m, &val);
+
+ /*
+ * Workaround to support the old writeback interface.
+ *
+ * The old writeback interface has a minor inconsistency and
+ * requires key=value only for page_index parameter, while
+ * writeback mode is a valueless parameter.
+ *
+ * This is not the case anymore and now all parameters are
+ * required to have values, however, we need to support the
+ * legacy writeback interface that's why we check if we can
+ * recognize a valueless parameter as a (legacy) writeback
+ * mode.
+ */
+ if (!val || !*val) {
+ err = parse_mode(param, &mode);
+ if (err) {
+ ret = err;
+ goto release_init_lock;
+ }
+
+ scan_slots_for_writeback(zram, mode, lo, hi, ctl);
+ break;
+ }
+
+ if (!strcmp(param, "type")) {
+ err = parse_mode(val, &mode);
+ if (err) {
+ ret = err;
+ goto release_init_lock;
+ }
+
+ scan_slots_for_writeback(zram, mode, lo, hi, ctl);
+ break;
+ }
+
+ if (!strcmp(param, "page_index")) {
+ err = parse_page_index(val, nr_pages, &lo, &hi);
+ if (err) {
+ ret = err;
+ goto release_init_lock;
+ }
+
+ scan_slots_for_writeback(zram, mode, lo, hi, ctl);
+ break;
+ }
+
+ /* There can be several page index ranges */
+ if (!strcmp(param, "page_index_range")) {
+ err = parse_page_index_range(val, nr_pages, &lo, &hi);
+ if (err) {
+ ret = err;
+ goto release_init_lock;
+ }
+
+ scan_slots_for_writeback(zram, mode, lo, hi, ctl);
+ continue;
+ }
+ }
+
+ err = zram_writeback_slots(zram, ctl);
+ if (err)
+ ret = err;
+
+release_init_lock:
release_pp_ctl(zram, ctl);
atomic_set(&zram->pp_in_progress, 0);
up_read(&zram->init_lock);
--
2.49.0.395.g12beb8f557-goog