[PATCH v2 04/79] ssdfs: implement raw device operations

From: Viacheslav Dubeyko

Date: Sun Mar 15 2026 - 22:21:31 EST

Complete patchset is available here:
https://github.com/dubeyko/ssdfs-driver/tree/master/patchset/linux-kernel-6.18.0

Implement raw device operations:
(1) device_name: get device name
(2) device_size: get device size in bytes
(3) open_zone: open zone
(4) reopen_zone: reopen closed zone
(5) close_zone: close zone
(6) read: read from device
(7) read_block: read logical block
(8) read_blocks: read sequence of logical blocks
(9) can_write_block: can we write into logical block?
(10) write_block: write logical block to device
(11) write_blocks: write sequence of logical blocks to device
(12) erase: erase the whole erase block
(13) trim: support of background erase operation
(14) sync: synchronize page cache with device

Signed-off-by: Viacheslav Dubeyko <slava@xxxxxxxxxxx>
---
fs/ssdfs/dev_bdev.c | 1065 ++++++++++++++++++++++++++++++++++
fs/ssdfs/dev_mtd.c | 650 +++++++++++++++++++++
fs/ssdfs/dev_zns.c | 1344 +++++++++++++++++++++++++++++++++++++++++++
3 files changed, 3059 insertions(+)
create mode 100644 fs/ssdfs/dev_bdev.c
create mode 100644 fs/ssdfs/dev_mtd.c
create mode 100644 fs/ssdfs/dev_zns.c

diff --git a/fs/ssdfs/dev_bdev.c b/fs/ssdfs/dev_bdev.c
new file mode 100644
index 000000000000..13da78eadd12
--- /dev/null
+++ b/fs/ssdfs/dev_bdev.c
@@ -0,0 +1,1065 @@
+/*
+ * SPDX-License-Identifier: BSD-3-Clause-Clear
+ *
+ * SSDFS -- SSD-oriented File System.
+ *
+ * fs/ssdfs/dev_bdev.c - Block device access code.
+ *
+ * Copyright (c) 2014-2019 HGST, a Western Digital Company.
+ * http://www.hgst.com/
+ * Copyright (c) 2014-2026 Viacheslav Dubeyko <slava@xxxxxxxxxxx>
+ * http://www.ssdfs.org/
+ *
+ * (C) Copyright 2014-2019, HGST, Inc., All rights reserved.
+ *
+ * Created by HGST, San Jose Research Center, Storage Architecture Group
+ *
+ * Authors: Viacheslav Dubeyko <slava@xxxxxxxxxxx>
+ *
+ * Acknowledgement: Cyril Guyot
+ * Zvonimir Bandic
+ */
+
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/pagevec.h>
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/backing-dev.h>
+
+#include "peb_mapping_queue.h"
+#include "peb_mapping_table_cache.h"
+#include "folio_vector.h"
+#include "ssdfs.h"
+
+#include <trace/events/ssdfs.h>
+
+#ifdef CONFIG_SSDFS_MEMORY_LEAKS_ACCOUNTING
+atomic64_t ssdfs_dev_bdev_folio_leaks;
+atomic64_t ssdfs_dev_bdev_memory_leaks;
+atomic64_t ssdfs_dev_bdev_cache_leaks;
+#endif /* CONFIG_SSDFS_MEMORY_LEAKS_ACCOUNTING */
+
+/*
+ * void ssdfs_dev_bdev_cache_leaks_increment(void *kaddr)
+ * void ssdfs_dev_bdev_cache_leaks_decrement(void *kaddr)
+ * void *ssdfs_dev_bdev_kmalloc(size_t size, gfp_t flags)
+ * void *ssdfs_dev_bdev_kzalloc(size_t size, gfp_t flags)
+ * void *ssdfs_dev_bdev_kcalloc(size_t n, size_t size, gfp_t flags)
+ * void ssdfs_dev_bdev_kfree(void *kaddr)
+ * struct folio *ssdfs_dev_bdev_alloc_folio(gfp_t gfp_mask,
+ * unsigned int order)
+ * struct folio *ssdfs_dev_bdev_add_batch_folio(struct folio_batch *batch,
+ * unsigned int order)
+ * void ssdfs_dev_bdev_free_folio(struct folio *folio)
+ * void ssdfs_dev_bdev_folio_batch_release(struct folio_batch *batch)
+ */
+#ifdef CONFIG_SSDFS_MEMORY_LEAKS_ACCOUNTING
+ SSDFS_MEMORY_LEAKS_CHECKER_FNS(dev_bdev)
+#else
+ SSDFS_MEMORY_ALLOCATOR_FNS(dev_bdev)
+#endif /* CONFIG_SSDFS_MEMORY_LEAKS_ACCOUNTING */
+
+void ssdfs_dev_bdev_memory_leaks_init(void)
+{
+#ifdef CONFIG_SSDFS_MEMORY_LEAKS_ACCOUNTING
+ atomic64_set(&ssdfs_dev_bdev_folio_leaks, 0);
+ atomic64_set(&ssdfs_dev_bdev_memory_leaks, 0);
+ atomic64_set(&ssdfs_dev_bdev_cache_leaks, 0);
+#endif /* CONFIG_SSDFS_MEMORY_LEAKS_ACCOUNTING */
+}
+
+void ssdfs_dev_bdev_check_memory_leaks(void)
+{
+#ifdef CONFIG_SSDFS_MEMORY_LEAKS_ACCOUNTING
+ if (atomic64_read(&ssdfs_dev_bdev_folio_leaks) != 0) {
+ SSDFS_ERR("BLOCK DEV: "
+ "memory leaks include %lld folios\n",
+ atomic64_read(&ssdfs_dev_bdev_folio_leaks));
+ }
+
+ if (atomic64_read(&ssdfs_dev_bdev_memory_leaks) != 0) {
+ SSDFS_ERR("BLOCK DEV: "
+ "memory allocator suffers from %lld leaks\n",
+ atomic64_read(&ssdfs_dev_bdev_memory_leaks));
+ }
+
+ if (atomic64_read(&ssdfs_dev_bdev_cache_leaks) != 0) {
+ SSDFS_ERR("BLOCK DEV: "
+ "caches suffers from %lld leaks\n",
+ atomic64_read(&ssdfs_dev_bdev_cache_leaks));
+ }
+#endif /* CONFIG_SSDFS_MEMORY_LEAKS_ACCOUNTING */
+}
+
+static DECLARE_WAIT_QUEUE_HEAD(wq);
+
+/*
+ * ssdfs_bdev_device_name() - get device name
+ * @sb: superblock object
+ */
+static const char *ssdfs_bdev_device_name(struct super_block *sb)
+{
+ return sb->s_id;
+}
+
+/*
+ * ssdfs_bdev_device_size() - get partition size in bytes
+ * @sb: superblock object
+ */
+static __u64 ssdfs_bdev_device_size(struct super_block *sb)
+{
+ return i_size_read(sb->s_bdev->bd_mapping->host);
+}
+
+static int ssdfs_bdev_open_zone(struct super_block *sb, loff_t offset)
+{
+ return -EOPNOTSUPP;
+}
+
+static int ssdfs_bdev_reopen_zone(struct super_block *sb, loff_t offset)
+{
+ return -EOPNOTSUPP;
+}
+
+static int ssdfs_bdev_close_zone(struct super_block *sb, loff_t offset)
+{
+ return -EOPNOTSUPP;
+}
+
+/*
+ * ssdfs_bdev_bio_alloc() - allocate bio object
+ * @bdev: block device
+ * @nr_iovecs: number of items in biovec
+ * @op: direction of I/O
+ * @gfp_mask: mask of creation flags
+ */
+struct bio *ssdfs_bdev_bio_alloc(struct block_device *bdev,
+ unsigned int nr_iovecs,
+ unsigned int op,
+ gfp_t gfp_mask)
+{
+ struct bio *bio;
+
+ bio = bio_alloc(bdev, nr_iovecs, op, gfp_mask);
+ if (!bio) {
+ SSDFS_ERR("fail to allocate bio\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ return bio;
+}
+
+/*
+ * ssdfs_bdev_bio_put() - free bio object
+ */
+void ssdfs_bdev_bio_put(struct bio *bio)
+{
+ if (!bio)
+ return;
+
+ bio_put(bio);
+}
+
+/*
+ * ssdfs_bdev_bio_add_folio() - add folio into bio
+ * @bio: pointer on bio object
+ * @folio: memory folio
+ * @offset: vec entry offset
+ */
+int ssdfs_bdev_bio_add_folio(struct bio *bio, struct folio *folio,
+ unsigned int offset)
+{
+#ifdef CONFIG_SSDFS_DEBUG
+ BUG_ON(!bio || !folio);
+
+ SSDFS_DBG("folio %p, count %d\n",
+ folio, folio_ref_count(folio));
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ if (!bio_add_folio(bio, folio, folio_size(folio), offset)) {
+ SSDFS_ERR("fail to add folio: "
+ "offset %u, size %zu\n",
+ offset, folio_size(folio));
+ return -ERANGE;
+ }
+
+ return 0;
+}
+
+/*
+ * ssdfs_bdev_sync_folio_request() - submit folio request
+ * @sb: superblock object
+ * @folio: memory folio
+ * @offset: offset in bytes from partition's begin
+ * @op: direction of I/O
+ * @op_flags: request op flags
+ */
+static int ssdfs_bdev_sync_folio_request(struct super_block *sb,
+ struct folio *folio,
+ loff_t offset,
+ unsigned int op, int op_flags)
+{
+ struct bio *bio;
+ loff_t folio_index;
+ sector_t sector;
+ int err = 0;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ BUG_ON(!folio);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ folio_index = div_u64(offset, folio_size(folio));
+ sector = (pgoff_t)(((u64)folio_index * folio_size(folio)) >>
+ SECTOR_SHIFT);
+
+ bio = ssdfs_bdev_bio_alloc(sb->s_bdev, 1, op, GFP_NOIO);
+ if (IS_ERR_OR_NULL(bio)) {
+ err = !bio ? -ERANGE : PTR_ERR(bio);
+ SSDFS_ERR("fail to allocate bio: err %d\n",
+ err);
+ return err;
+ }
+
+ bio->bi_iter.bi_sector = sector;
+ bio_set_dev(bio, sb->s_bdev);
+ bio->bi_opf = op | op_flags;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("folio %p, count %d\n",
+ folio, folio_ref_count(folio));
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ err = ssdfs_bdev_bio_add_folio(bio, folio, 0);
+ if (unlikely(err)) {
+ SSDFS_ERR("fail to add folio into bio: "
+ "err %d\n",
+ err);
+ goto finish_sync_folio_request;
+ }
+
+ err = submit_bio_wait(bio);
+ if (unlikely(err)) {
+ SSDFS_ERR("fail to process request: "
+ "err %d\n",
+ err);
+ goto finish_sync_folio_request;
+ }
+
+finish_sync_folio_request:
+ ssdfs_bdev_bio_put(bio);
+
+ return err;
+}
+
+/*
+ * ssdfs_bdev_sync_batch_request() - submit folio batch request
+ * @sb: superblock object
+ * @batch: folio batch
+ * @offset: offset in bytes from partition's begin
+ * @op: direction of I/O
+ * @op_flags: request op flags
+ */
+static int ssdfs_bdev_sync_batch_request(struct super_block *sb,
+ struct folio_batch *batch,
+ loff_t offset,
+ unsigned int op, int op_flags)
+{
+ struct bio *bio;
+ loff_t folio_index;
+ sector_t sector;
+ u32 block_size;
+ int i;
+ int err = 0;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ BUG_ON(!batch);
+
+ SSDFS_DBG("offset %llu, op %#x, op_flags %#x\n",
+ offset, op, op_flags);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ if (folio_batch_count(batch) == 0) {
+ SSDFS_WARN("empty folio batch\n");
+ return -ERANGE;
+ }
+
+#ifdef CONFIG_SSDFS_DEBUG
+ BUG_ON(!batch->folios[0]);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ block_size = folio_size(batch->folios[0]);
+
+ folio_index = div_u64(offset, block_size);
+ sector = (pgoff_t)(((u64)folio_index * block_size) >> SECTOR_SHIFT);
+
+ bio = ssdfs_bdev_bio_alloc(sb->s_bdev, folio_batch_count(batch),
+ op, GFP_NOIO);
+ if (IS_ERR_OR_NULL(bio)) {
+ err = !bio ? -ERANGE : PTR_ERR(bio);
+ SSDFS_ERR("fail to allocate bio: err %d\n",
+ err);
+ return err;
+ }
+
+ bio->bi_iter.bi_sector = sector;
+ bio_set_dev(bio, sb->s_bdev);
+ bio->bi_opf = op | op_flags;
+
+ for (i = 0; i < folio_batch_count(batch); i++) {
+ struct folio *folio = batch->folios[i];
+
+#ifdef CONFIG_SSDFS_DEBUG
+ BUG_ON(!folio);
+
+ SSDFS_DBG("folio %p, count %d\n",
+ folio, folio_ref_count(folio));
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ err = ssdfs_bdev_bio_add_folio(bio, folio, 0);
+ if (unlikely(err)) {
+ SSDFS_ERR("fail to add folio %d into bio: "
+ "err %d\n",
+ i, err);
+ goto finish_sync_batch_request;
+ }
+ }
+
+ err = submit_bio_wait(bio);
+ if (unlikely(err)) {
+ SSDFS_ERR("fail to process request: "
+ "err %d\n",
+ err);
+ goto finish_sync_batch_request;
+ }
+
+finish_sync_batch_request:
+ ssdfs_bdev_bio_put(bio);
+
+ return err;
+}
+
+/*
+ * ssdfs_bdev_read_block() - read logical block from the volume
+ * @sb: superblock object
+ * @folio: memory folio
+ * @offset: offset in bytes from partition's begin
+ *
+ * This function tries to read data on @offset
+ * from partition's begin in memory folio.
+ *
+ * RETURN:
+ * [success]
+ * [failure] - error code:
+ *
+ * %-EIO - I/O error.
+ */
+int ssdfs_bdev_read_block(struct super_block *sb, struct folio *folio,
+ loff_t offset)
+{
+ int err;
+
+ err = ssdfs_bdev_sync_folio_request(sb, folio, offset,
+ REQ_OP_READ, REQ_SYNC);
+ if (err) {
+ folio_clear_uptodate(folio);
+ } else {
+ folio_mark_uptodate(folio);
+ flush_dcache_folio(folio);
+ }
+
+ ssdfs_folio_unlock(folio);
+
+ return err;
+}
+
+/*
+ * ssdfs_bdev_read_blocks() - read logical blocks from the volume
+ * @sb: superblock object
+ * @batch: folio batch
+ * @offset: offset in bytes from partition's begin
+ *
+ * This function tries to read data on @offset
+ * from partition's begin in folio batch.
+ *
+ * RETURN:
+ * [success]
+ * [failure] - error code:
+ *
+ * %-EIO - I/O error.
+ */
+int ssdfs_bdev_read_blocks(struct super_block *sb, struct folio_batch *batch,
+ loff_t offset)
+{
+ int i;
+ int err = 0;
+
+ err = ssdfs_bdev_sync_batch_request(sb, batch, offset,
+ REQ_OP_READ, REQ_RAHEAD);
+
+ for (i = 0; i < folio_batch_count(batch); i++) {
+ struct folio *folio = batch->folios[i];
+
+#ifdef CONFIG_SSDFS_DEBUG
+ BUG_ON(!folio);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ if (err) {
+ folio_clear_uptodate(folio);
+ } else {
+ folio_mark_uptodate(folio);
+ flush_dcache_folio(folio);
+ }
+
+ ssdfs_folio_unlock(folio);
+ }
+
+ return err;
+}
+
+/*
+ * ssdfs_bdev_read_batch() - read from volume into buffer
+ * @sb: superblock object
+ * @block_size: block size in bytes
+ * @offset: offset in bytes from partition's begin
+ * @len: size of buffer in bytes
+ * @buf: buffer
+ * @read_bytes: pointer on read bytes [out]
+ *
+ * This function tries to read data on @offset
+ * from partition's begin with @len bytes in size
+ * from the volume into @buf.
+ *
+ * RETURN:
+ * [success]
+ * [failure] - error code:
+ *
+ * %-EIO - I/O error.
+ */
+static int ssdfs_bdev_read_batch(struct super_block *sb,
+ u32 block_size,
+ loff_t offset, size_t len,
+ void *buf, size_t *read_bytes)
+{
+ struct folio_batch batch;
+ struct folio *folio;
+ loff_t folio_start, folio_end;
+ u32 folios_count;
+ u32 read_len;
+ loff_t cur_offset = offset;
+ u32 offset_inside_folio;
+ int i;
+ int err = 0;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("sb %p, block_size %u, offset %llu, len %zu, buf %p\n",
+ sb, block_size, (unsigned long long)offset, len, buf);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ *read_bytes = 0;
+
+ folio_start = div_u64(offset, block_size);
+ folio_end = div_u64(offset + len + block_size - 1, block_size);
+ folios_count = (u32)(folio_end - folio_start);
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("offset %llu, len %zu, block_size %u, "
+ "folio_start %llu, folio_end %llu, folios_count %u\n",
+ (unsigned long long)offset, len, block_size,
+ (unsigned long long)folio_start,
+ (unsigned long long)folio_end,
+ folios_count);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ if (folios_count > SSDFS_EXTENT_LEN_MAX) {
+ SSDFS_WARN("folios_count %u > batch_capacity %u, "
+ "offset %llu, len %zu, block_size %u, "
+ "folio_start %llu, folio_end %llu\n",
+ folios_count, SSDFS_EXTENT_LEN_MAX,
+ (unsigned long long)offset, len,
+ block_size, folio_start, folio_end);
+ return -ERANGE;
+ }
+
+ folio_batch_init(&batch);
+
+ for (i = 0; i < folios_count; i++) {
+ folio = ssdfs_dev_bdev_alloc_folio(GFP_KERNEL | __GFP_ZERO,
+ get_order(block_size));
+ if (IS_ERR_OR_NULL(folio)) {
+ err = (folio == NULL ? -ENOMEM : PTR_ERR(folio));
+ SSDFS_ERR("unable to allocate memory folio\n");
+ goto finish_bdev_read_batch;
+ }
+
+ ssdfs_folio_get(folio);
+ ssdfs_folio_lock(folio);
+ folio_batch_add(&batch, folio);
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("folio %p, count %d\n",
+ folio, folio_ref_count(folio));
+#endif /* CONFIG_SSDFS_DEBUG */
+ }
+
+ err = ssdfs_bdev_sync_batch_request(sb, &batch, offset,
+ REQ_OP_READ, REQ_SYNC);
+ if (unlikely(err)) {
+ SSDFS_ERR("fail to read folio batch: err %d\n",
+ err);
+ goto finish_bdev_read_batch;
+ }
+
+ for (i = 0; i < folio_batch_count(&batch); i++) {
+ folio = batch.folios[i];
+
+#ifdef CONFIG_SSDFS_DEBUG
+ BUG_ON(!folio);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ if (*read_bytes >= len) {
+ err = -ERANGE;
+ SSDFS_ERR("read_bytes %zu >= len %zu\n",
+ *read_bytes, len);
+ goto finish_bdev_read_batch;
+ }
+
+ div_u64_rem(cur_offset, block_size, &offset_inside_folio);
+ read_len = min_t(size_t, (size_t)(block_size -
+ offset_inside_folio),
+ (size_t)(len - *read_bytes));
+
+ err = __ssdfs_memcpy_from_folio(buf, *read_bytes,
+ len,
+ folio, offset_inside_folio,
+ block_size,
+ read_len);
+ if (unlikely(err)) {
+ SSDFS_ERR("fail to copy: err %d\n", err);
+ goto finish_bdev_read_batch;
+ }
+
+ *read_bytes += read_len;
+ cur_offset += read_len;
+ }
+
+finish_bdev_read_batch:
+ for (i = folio_batch_count(&batch) - 1; i >= 0; i--) {
+ folio = batch.folios[i];
+
+ if (folio) {
+ ssdfs_folio_unlock(folio);
+ ssdfs_folio_put(folio);
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("folio %p, count %d\n",
+ folio, folio_ref_count(folio));
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ ssdfs_dev_bdev_free_folio(folio);
+ batch.folios[i] = NULL;
+ }
+ }
+
+ folio_batch_reinit(&batch);
+
+ if (*read_bytes != len) {
+ err = -EIO;
+ SSDFS_ERR("read_bytes (%zu) != len (%zu)\n",
+ *read_bytes, len);
+ }
+
+ return err;
+}
+
+/*
+ * ssdfs_bdev_read() - read from volume into buffer
+ * @sb: superblock object
+ * @block_size: block size in bytes
+ * @offset: offset in bytes from partition's begin
+ * @len: size of buffer in bytes
+ * @buf: buffer
+ *
+ * This function tries to read data on @offset
+ * from partition's begin with @len bytes in size
+ * from the volume into @buf.
+ *
+ * RETURN:
+ * [success]
+ * [failure] - error code:
+ *
+ * %-EIO - I/O error.
+ */
+int ssdfs_bdev_read(struct super_block *sb, u32 block_size,
+ loff_t offset, size_t len, void *buf)
+{
+ size_t read_bytes = 0;
+ loff_t cur_offset = offset;
+ u8 *ptr = (u8 *)buf;
+ int err;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("sb %p, block_size %u, offset %llu, len %zu, buf %p\n",
+ sb, block_size, (unsigned long long)offset, len, buf);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ if (len == 0) {
+ SSDFS_WARN("len is zero\n");
+ return 0;
+ }
+
+ while (read_bytes < len) {
+ size_t iter_read;
+
+ err = ssdfs_bdev_read_batch(sb, block_size,
+ cur_offset,
+ len - read_bytes,
+ ptr,
+ &iter_read);
+ if (unlikely(err)) {
+ SSDFS_ERR("fail to read batch: "
+ "block_size %u, cur_offset %llu, "
+ "read_bytes %zu, err %d\n",
+ block_size, cur_offset,
+ read_bytes, err);
+ return err;
+ }
+
+ cur_offset += iter_read;
+ ptr += iter_read;
+ read_bytes += iter_read;
+ }
+
+ return 0;
+}
+
+/*
+ * ssdfs_bdev_can_write_block() - check that logical block can be written
+ * @sb: superblock object
+ * @block_size: size of block in bytes
+ * @offset: offset in bytes from partition's begin
+ * @need_check: make check or not?
+ *
+ * This function checks that logical block can be written.
+ *
+ * RETURN:
+ * [success]
+ * [failure] - error code:
+ *
+ * %-EROFS - file system in RO mode.
+ * %-ENOMEM - fail to allocate memory.
+ * %-EIO - I/O error.
+ */
+int ssdfs_bdev_can_write_block(struct super_block *sb, u32 block_size,
+ loff_t offset, bool need_check)
+{
+ struct ssdfs_fs_info *fsi = SSDFS_FS_I(sb);
+ struct ssdfs_signature *magic;
+ void *buf;
+ bool is_ssdfs_log_found;
+ int err;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("sb %p, offset %llu, block_size %u, need_check %d\n",
+ sb, (unsigned long long)offset,
+ block_size, (int)need_check);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ if (!need_check)
+ return 0;
+
+ buf = ssdfs_dev_bdev_kzalloc(block_size, GFP_KERNEL);
+ if (!buf) {
+ SSDFS_ERR("unable to allocate %d bytes\n", block_size);
+ return -ENOMEM;
+ }
+
+ err = ssdfs_bdev_read(sb, block_size, offset, block_size, buf);
+ if (err)
+ goto free_buf;
+
+ if (memchr_inv(buf, 0xff, block_size)) {
+ if (memchr_inv(buf, 0x00, block_size)) {
+ magic = (struct ssdfs_signature *)buf;
+
+ is_ssdfs_log_found =
+ __is_ssdfs_segment_header_magic_valid(magic) ||
+ is_ssdfs_partial_log_header_magic_valid(magic) ||
+ __is_ssdfs_log_footer_magic_valid(magic);
+
+ if (is_ssdfs_log_found &&
+ is_ssdfs_uuid_and_fs_ctime_actual(fsi, buf)) {
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("area with offset %llu contains data\n",
+ (unsigned long long)offset);
+
+ SSDFS_DBG("PAGE DUMP:\n");
+ print_hex_dump_bytes("", DUMP_PREFIX_OFFSET,
+ buf,
+ block_size);
+ SSDFS_DBG("\n");
+#endif /* CONFIG_SSDFS_DEBUG */
+ err = -EIO;
+ } else {
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("area with offset %llu contains data\n",
+ (unsigned long long)offset);
+#endif /* CONFIG_SSDFS_DEBUG */
+ err = -EIO;
+ }
+ }
+ }
+
+free_buf:
+ ssdfs_dev_bdev_kfree(buf);
+ return err;
+}
+
+/*
+ * ssdfs_bdev_write_block() - write logical block to volume
+ * @sb: superblock object
+ * @offset: offset in bytes from partition's begin
+ * @folio: memory folio
+ *
+ * This function tries to write from @folio data
+ * on @offset from partition's begin.
+ *
+ * RETURN:
+ * [success]
+ * [failure] - error code:
+ *
+ * %-EROFS - file system in RO mode.
+ * %-EIO - I/O error.
+ */
+int ssdfs_bdev_write_block(struct super_block *sb, loff_t offset,
+ struct folio *folio)
+{
+ struct ssdfs_fs_info *fsi = SSDFS_FS_I(sb);
+#ifdef CONFIG_SSDFS_DEBUG
+ u32 remainder;
+#endif /* CONFIG_SSDFS_DEBUG */
+ int err = 0;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("sb %p, offset %llu, folio %p\n",
+ sb, offset, folio);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ if (sb->s_flags & SB_RDONLY) {
+ SSDFS_WARN("unable to write on RO file system\n");
+ return -EROFS;
+ }
+
+#ifdef CONFIG_SSDFS_DEBUG
+ BUG_ON(!folio);
+ BUG_ON((offset >= ssdfs_bdev_device_size(sb)) ||
+ (folio_size(folio) > (ssdfs_bdev_device_size(sb) - offset)));
+ div_u64_rem((u64)offset, (u64)folio_size(folio), &remainder);
+ BUG_ON(remainder);
+ BUG_ON(!folio_test_dirty(folio));
+ BUG_ON(folio_test_locked(folio));
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ ssdfs_folio_lock(folio);
+ atomic_inc(&fsi->pending_bios);
+
+ err = ssdfs_bdev_sync_folio_request(sb, folio, offset,
+ REQ_OP_WRITE, REQ_SYNC);
+ if (err) {
+ SSDFS_ERR("failed to write (err %d): offset %llu\n",
+ err, (unsigned long long)offset);
+ } else {
+ ssdfs_clear_dirty_folio(folio);
+ folio_mark_uptodate(folio);
+ }
+
+ ssdfs_folio_unlock(folio);
+ ssdfs_folio_put(folio);
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("folio %p, count %d\n",
+ folio, folio_ref_count(folio));
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ if (atomic_dec_and_test(&fsi->pending_bios))
+ wake_up_all(&wq);
+
+ return err;
+}
+
+/*
+ * ssdfs_bdev_write_blocks() - write batch on volume
+ * @sb: superblock object
+ * @offset: offset in bytes from partition's begin
+ * @batch: memory folios batch
+ *
+ * This function tries to write from @batch data
+ * on @offset from partition's beginning.
+ *
+ * RETURN:
+ * [success]
+ * [failure] - error code:
+ *
+ * %-EROFS - file system in RO mode.
+ * %-EIO - I/O error.
+ */
+int ssdfs_bdev_write_blocks(struct super_block *sb, loff_t offset,
+ struct folio_batch *batch)
+{
+ struct ssdfs_fs_info *fsi = SSDFS_FS_I(sb);
+ struct folio *folio;
+ int i;
+#ifdef CONFIG_SSDFS_DEBUG
+ u32 remainder;
+#endif /* CONFIG_SSDFS_DEBUG */
+ int err = 0;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("sb %p, offset %llu, batch %p\n",
+ sb, offset, batch);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ if (sb->s_flags & SB_RDONLY) {
+ SSDFS_WARN("unable to write on RO file system\n");
+ return -EROFS;
+ }
+
+#ifdef CONFIG_SSDFS_DEBUG
+ BUG_ON(!batch);
+ BUG_ON(offset >= ssdfs_bdev_device_size(sb));
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ if (folio_batch_count(batch) == 0) {
+ SSDFS_WARN("empty batch\n");
+ return 0;
+ }
+
+#ifdef CONFIG_SSDFS_DEBUG
+ div_u64_rem((u64)offset, (u64)folio_size(batch->folios[0]), &remainder);
+ BUG_ON(remainder);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ for (i = 0; i < folio_batch_count(batch); i++) {
+ folio = batch->folios[i];
+
+#ifdef CONFIG_SSDFS_DEBUG
+ BUG_ON(!folio);
+
+ SSDFS_DBG("folio_index %lu, folio_size %zu, "
+ "folio_dirty %#x\n",
+ folio->index, folio_size(folio),
+ folio_test_dirty(folio));
+
+ BUG_ON(!folio_test_dirty(folio));
+ BUG_ON(folio_test_locked(folio));
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ ssdfs_folio_lock(folio);
+ }
+
+ atomic_inc(&fsi->pending_bios);
+
+ err = ssdfs_bdev_sync_batch_request(sb, batch, offset,
+ REQ_OP_WRITE, REQ_SYNC);
+
+ for (i = 0; i < folio_batch_count(batch); i++) {
+ folio = batch->folios[i];
+
+ if (err) {
+ SSDFS_ERR("failed to write (err %d): "
+ "folio_index %llu\n",
+ err,
+ (unsigned long long)folio->index);
+ } else {
+ ssdfs_clear_dirty_folio(folio);
+ folio_mark_uptodate(folio);
+ }
+
+ ssdfs_folio_unlock(folio);
+ ssdfs_folio_put(folio);
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("folio %p, count %d\n",
+ folio, folio_ref_count(folio));
+#endif /* CONFIG_SSDFS_DEBUG */
+ }
+
+ if (atomic_dec_and_test(&fsi->pending_bios))
+ wake_up_all(&wq);
+
+ return err;
+}
+
+/*
+ * ssdfs_bdev_support_discard() - check that block device supports discard
+ */
+static inline bool ssdfs_bdev_support_discard(struct block_device *bdev)
+{
+ return bdev_max_discard_sectors(bdev) ||
+ bdev_is_zoned(bdev);
+}
+
+/*
+ * ssdfs_bdev_trim() - initiate background erase operation
+ * @sb: superblock object
+ * @offset: offset in bytes from partition's begin
+ * @len: size in bytes
+ *
+ * This function tries to initiate background erase operation.
+ *
+ * RETURN:
+ * [success]
+ * [failure] - error code:
+ *
+ * %-EROFS - file system in RO mode.
+ * %-EFAULT - erase operation error.
+ */
+static int ssdfs_bdev_trim(struct super_block *sb, loff_t offset, size_t len)
+{
+ struct ssdfs_fs_info *fsi = SSDFS_FS_I(sb);
+ u32 erase_size = fsi->erasesize;
+ loff_t page_start, page_end;
+ u32 pages_count;
+ u32 remainder;
+ sector_t start_sector;
+ sector_t sectors_count;
+ int err = 0;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("sb %p, offset %llu, len %zu\n",
+ sb, (unsigned long long)offset, len);
+
+ div_u64_rem((u64)len, (u64)erase_size, &remainder);
+ BUG_ON(remainder);
+ div_u64_rem((u64)offset, (u64)erase_size, &remainder);
+ BUG_ON(remainder);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ if (sb->s_flags & SB_RDONLY)
+ return -EROFS;
+
+ div_u64_rem((u64)len, (u64)erase_size, &remainder);
+ if (remainder) {
+ SSDFS_WARN("len %llu, erase_size %u, remainder %u\n",
+ (unsigned long long)len,
+ erase_size, remainder);
+ return -ERANGE;
+ }
+
+ page_start = offset >> PAGE_SHIFT;
+ page_end = (offset + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ pages_count = (u32)(page_end - page_start);
+
+ if (pages_count == 0) {
+ SSDFS_WARN("pages_count equals to zero\n");
+ return -ERANGE;
+ }
+
+ start_sector = page_start << (PAGE_SHIFT - SSDFS_SECTOR_SHIFT);
+ sectors_count = pages_count << (PAGE_SHIFT - SSDFS_SECTOR_SHIFT);
+
+ if (ssdfs_bdev_support_discard(sb->s_bdev)) {
+ err = blkdev_issue_secure_erase(sb->s_bdev,
+ start_sector, sectors_count,
+ GFP_NOFS);
+ if (err)
+ goto try_zeroout;
+ } else {
+try_zeroout:
+ err = blkdev_issue_zeroout(sb->s_bdev,
+ start_sector, sectors_count,
+ GFP_NOFS, 0);
+ }
+
+ if (unlikely(err)) {
+ SSDFS_ERR("fail to discard: "
+ "start_sector %llu, sectors_count %llu, "
+ "err %d\n",
+ start_sector, sectors_count, err);
+ return err;
+ }
+
+ return 0;
+}
+
+/*
+ * ssdfs_bdev_erase() - make erase operation
+ * @sb: superblock object
+ * @offset: offset in bytes from partition's begin
+ * @len: size in bytes
+ *
+ * This function tries to make erase operation.
+ *
+ * RETURN:
+ * [success]
+ * [failure] - error code:
+ *
+ * %-EROFS - file system in RO mode.
+ * %-EFAULT - erase operation error.
+ */
+static int ssdfs_bdev_erase(struct super_block *sb, loff_t offset, size_t len)
+{
+ return ssdfs_bdev_trim(sb, offset, len);
+}
+
+/*
+ * ssdfs_bdev_peb_isbad() - check that PEB is bad
+ * @sb: superblock object
+ * @offset: offset in bytes from partition's begin
+ *
+ * This function tries to detect that PEB is bad or not.
+ */
+static int ssdfs_bdev_peb_isbad(struct super_block *sb, loff_t offset)
+{
+ /* do nothing */
+ return 0;
+}
+
+/*
+ * ssdfs_bdev_mark_peb_bad() - mark PEB as bad
+ * @sb: superblock object
+ * @offset: offset in bytes from partition's begin
+ *
+ * This function tries to mark PEB as bad.
+ */
+static int ssdfs_bdev_mark_peb_bad(struct super_block *sb, loff_t offset)
+{
+ /* do nothing */
+ return 0;
+}
+
+/*
+ * ssdfs_bdev_sync() - make sync operation
+ * @sb: superblock object
+ */
+static void ssdfs_bdev_sync(struct super_block *sb)
+{
+ struct ssdfs_fs_info *fsi = SSDFS_FS_I(sb);
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("device %s\n", sb->s_id);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ wait_event(wq, atomic_read(&fsi->pending_bios) == 0);
+}
+
+const struct ssdfs_device_ops ssdfs_bdev_devops = {
+ .device_name = ssdfs_bdev_device_name,
+ .device_size = ssdfs_bdev_device_size,
+ .open_zone = ssdfs_bdev_open_zone,
+ .reopen_zone = ssdfs_bdev_reopen_zone,
+ .close_zone = ssdfs_bdev_close_zone,
+ .read = ssdfs_bdev_read,
+ .read_block = ssdfs_bdev_read_block,
+ .read_blocks = ssdfs_bdev_read_blocks,
+ .can_write_block = ssdfs_bdev_can_write_block,
+ .write_block = ssdfs_bdev_write_block,
+ .write_blocks = ssdfs_bdev_write_blocks,
+ .erase = ssdfs_bdev_erase,
+ .trim = ssdfs_bdev_trim,
+ .peb_isbad = ssdfs_bdev_peb_isbad,
+ .mark_peb_bad = ssdfs_bdev_mark_peb_bad,
+ .sync = ssdfs_bdev_sync,
+};
diff --git a/fs/ssdfs/dev_mtd.c b/fs/ssdfs/dev_mtd.c
new file mode 100644
index 000000000000..ccb79c7f81bf
--- /dev/null
+++ b/fs/ssdfs/dev_mtd.c
@@ -0,0 +1,650 @@
+/*
+ * SPDX-License-Identifier: BSD-3-Clause-Clear
+ *
+ * SSDFS -- SSD-oriented File System.
+ *
+ * fs/ssdfs/dev_mtd.c - MTD device access code.
+ *
+ * Copyright (c) 2014-2019 HGST, a Western Digital Company.
+ * http://www.hgst.com/
+ * Copyright (c) 2014-2026 Viacheslav Dubeyko <slava@xxxxxxxxxxx>
+ * http://www.ssdfs.org/
+ *
+ * (C) Copyright 2014-2019, HGST, Inc., All rights reserved.
+ *
+ * Created by HGST, San Jose Research Center, Storage Architecture Group
+ *
+ * Authors: Viacheslav Dubeyko <slava@xxxxxxxxxxx>
+ *
+ * Acknowledgement: Cyril Guyot
+ * Zvonimir Bandic
+ */
+
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/super.h>
+#include <linux/pagevec.h>
+
+#include "peb_mapping_queue.h"
+#include "peb_mapping_table_cache.h"
+#include "folio_vector.h"
+#include "ssdfs.h"
+
+#include <trace/events/ssdfs.h>
+
+#ifdef CONFIG_SSDFS_MEMORY_LEAKS_ACCOUNTING
+atomic64_t ssdfs_dev_mtd_folio_leaks;
+atomic64_t ssdfs_dev_mtd_memory_leaks;
+atomic64_t ssdfs_dev_mtd_cache_leaks;
+#endif /* CONFIG_SSDFS_MEMORY_LEAKS_ACCOUNTING */
+
+/*
+ * void ssdfs_dev_mtd_cache_leaks_increment(void *kaddr)
+ * void ssdfs_dev_mtd_cache_leaks_decrement(void *kaddr)
+ * void *ssdfs_dev_mtd_kmalloc(size_t size, gfp_t flags)
+ * void *ssdfs_dev_mtd_kzalloc(size_t size, gfp_t flags)
+ * void *ssdfs_dev_mtd_kcalloc(size_t n, size_t size, gfp_t flags)
+ * void ssdfs_dev_mtd_kfree(void *kaddr)
+ * struct folio *ssdfs_dev_mtd_alloc_folio(gfp_t gfp_mask,
+ * unsigned int order)
+ * struct folio *ssdfs_dev_mtd_add_batch_folio(struct folio_batch *batch,
+ * unsigned int order)
+ * void ssdfs_dev_mtd_free_folio(struct folio *folio)
+ * void ssdfs_dev_mtd_folio_batch_release(struct folio_batch *batch)
+ */
+#ifdef CONFIG_SSDFS_MEMORY_LEAKS_ACCOUNTING
+ SSDFS_MEMORY_LEAKS_CHECKER_FNS(dev_mtd)
+#else
+ SSDFS_MEMORY_ALLOCATOR_FNS(dev_mtd)
+#endif /* CONFIG_SSDFS_MEMORY_LEAKS_ACCOUNTING */
+
+void ssdfs_dev_mtd_memory_leaks_init(void)
+{
+#ifdef CONFIG_SSDFS_MEMORY_LEAKS_ACCOUNTING
+ atomic64_set(&ssdfs_dev_mtd_folio_leaks, 0);
+ atomic64_set(&ssdfs_dev_mtd_memory_leaks, 0);
+ atomic64_set(&ssdfs_dev_mtd_cache_leaks, 0);
+#endif /* CONFIG_SSDFS_MEMORY_LEAKS_ACCOUNTING */
+}
+
+void ssdfs_dev_mtd_check_memory_leaks(void)
+{
+#ifdef CONFIG_SSDFS_MEMORY_LEAKS_ACCOUNTING
+ if (atomic64_read(&ssdfs_dev_mtd_folio_leaks) != 0) {
+ SSDFS_ERR("MTD DEV: "
+ "memory leaks include %lld folios\n",
+ atomic64_read(&ssdfs_dev_mtd_folio_leaks));
+ }
+
+ if (atomic64_read(&ssdfs_dev_mtd_memory_leaks) != 0) {
+ SSDFS_ERR("MTD DEV: "
+ "memory allocator suffers from %lld leaks\n",
+ atomic64_read(&ssdfs_dev_mtd_memory_leaks));
+ }
+
+ if (atomic64_read(&ssdfs_dev_mtd_cache_leaks) != 0) {
+ SSDFS_ERR("MTD DEV: "
+ "caches suffers from %lld leaks\n",
+ atomic64_read(&ssdfs_dev_mtd_cache_leaks));
+ }
+#endif /* CONFIG_SSDFS_MEMORY_LEAKS_ACCOUNTING */
+}
+
+/*
+ * ssdfs_mtd_device_name() - get device name
+ * @sb: superblock object
+ */
+static const char *ssdfs_mtd_device_name(struct super_block *sb)
+{
+ return sb->s_mtd->name;
+}
+
+/*
+ * ssdfs_mtd_device_size() - get partition size in bytes
+ * @sb: superblock object
+ */
+static __u64 ssdfs_mtd_device_size(struct super_block *sb)
+{
+ return SSDFS_FS_I(sb)->mtd->size;
+}
+
+static int ssdfs_mtd_open_zone(struct super_block *sb, loff_t offset)
+{
+ return -EOPNOTSUPP;
+}
+
+static int ssdfs_mtd_reopen_zone(struct super_block *sb, loff_t offset)
+{
+ return -EOPNOTSUPP;
+}
+
+static int ssdfs_mtd_close_zone(struct super_block *sb, loff_t offset)
+{
+ return -EOPNOTSUPP;
+}
+
+/*
+ * ssdfs_mtd_read() - read from volume into buffer
+ * @sb: superblock object
+ * @block_size: block size in bytes
+ * @offset: offset in bytes from partition's begin
+ * @len: size of buffer in bytes
+ * @buf: buffer
+ *
+ * This function tries to read data on @offset
+ * from partition's begin with @len bytes in size
+ * from the volume into @buf.
+ *
+ * RETURN:
+ * [success]
+ * [failure] - error code:
+ *
+ * %-EIO - I/O error.
+ */
+static int ssdfs_mtd_read(struct super_block *sb, u32 block_size,
+ loff_t offset, size_t len, void *buf)
+{
+ struct ssdfs_fs_info *fsi = SSDFS_FS_I(sb);
+ struct mtd_info *mtd = fsi->mtd;
+ loff_t folio_index;
+ size_t retlen;
+ int ret;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("sb %p, block_size %u, offset %llu, len %zu, buf %p\n",
+ sb, block_size, (unsigned long long)offset, len, buf);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ folio_index = div_u64(offset, block_size);
+ offset = folio_index * block_size;
+
+ ret = mtd_read(mtd, offset, len, &retlen, buf);
+ if (ret) {
+ SSDFS_ERR("failed to read (err %d): offset %llu, len %zu\n",
+ ret, (unsigned long long)offset, len);
+ return ret;
+ }
+
+ if (retlen != len) {
+ SSDFS_ERR("retlen (%zu) != len (%zu)\n", retlen, len);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+/*
+ * ssdfs_mtd_read_block() - read block from the volume
+ * @sb: superblock object
+ * @folio: memory folio
+ * @offset: offset in bytes from partition's begin
+ *
+ * This function tries to read data on @offset
+ * from partition's begin in memory folio.
+ *
+ * RETURN:
+ * [success]
+ * [failure] - error code:
+ *
+ * %-EIO - I/O error.
+ */
+static int ssdfs_mtd_read_block(struct super_block *sb, struct folio *folio,
+ loff_t offset)
+{
+ void *kaddr;
+ u32 processed_bytes = 0;
+ int err;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("sb %p, offset %llu, folio %p, folio_index %llu\n",
+ sb, (unsigned long long)offset, folio,
+ (unsigned long long)folio->index);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ while (processed_bytes < folio_size(folio)) {
+ kaddr = kmap_local_folio(folio, processed_bytes);
+ err = ssdfs_mtd_read(sb, PAGE_SIZE,
+ offset + processed_bytes,
+ PAGE_SIZE, kaddr);
+ kunmap_local(kaddr);
+
+ if (err) {
+ folio_clear_uptodate(folio);
+ break;
+ }
+
+ processed_bytes += PAGE_SIZE;
+ };
+
+ if (!err) {
+ folio_mark_uptodate(folio);
+ flush_dcache_folio(folio);
+ }
+
+ ssdfs_folio_unlock(folio);
+
+ return err;
+}
+
+/*
+ * ssdfs_mtd_read_blocks() - read logical blocks from the volume
+ * @sb: superblock object
+ * @batch: memory folios batch
+ * @offset: offset in bytes from partition's begin
+ *
+ * This function tries to read data on @offset
+ * from partition's begin in memory folios.
+ *
+ * RETURN:
+ * [success]
+ * [failure] - error code:
+ *
+ * %-EIO - I/O error.
+ */
+static int ssdfs_mtd_read_blocks(struct super_block *sb,
+ struct folio_batch *batch,
+ loff_t offset)
+{
+ struct folio *folio;
+ loff_t cur_offset = offset;
+ int i;
+ int err;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("sb %p, offset %llu, batch %p\n",
+ sb, (unsigned long long)offset, batch);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ if (folio_batch_count(batch) == 0) {
+ SSDFS_WARN("empty folio batch\n");
+ return 0;
+ }
+
+ for (i = 0; i < folio_batch_count(batch); i++) {
+ folio = batch->folios[i];
+
+#ifdef CONFIG_SSDFS_DEBUG
+ BUG_ON(!folio);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ err = ssdfs_mtd_read_block(sb, folio, cur_offset);
+ if (unlikely(err)) {
+ SSDFS_ERR("fail to read block: "
+ "cur_offset %llu, err %d\n",
+ cur_offset, err);
+ return err;
+ }
+
+ cur_offset += folio_size(folio);
+ }
+
+ return 0;
+}
+
+/*
+ * ssdfs_mtd_can_write_block() - check that logical block can be written
+ * @sb: superblock object
+ * @block_size: block size in bytes
+ * @offset: offset in bytes from partition's begin
+ * @need_check: make check or not?
+ *
+ * This function checks that logical block can be written.
+ *
+ * RETURN:
+ * [success]
+ * [failure] - error code:
+ *
+ * %-EROFS - file system in RO mode.
+ * %-ENOMEM - fail to allocate memory.
+ * %-EIO - I/O error.
+ */
+static int ssdfs_mtd_can_write_block(struct super_block *sb, u32 block_size,
+ loff_t offset, bool need_check)
+{
+ struct ssdfs_fs_info *fsi = SSDFS_FS_I(sb);
+ struct ssdfs_signature *magic;
+ void *buf;
+ bool is_ssdfs_log_found;
+ int err;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("sb %p, offset %llu, block_size %u, need_check %d\n",
+ sb, (unsigned long long)offset,
+ block_size, (int)need_check);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ if (!need_check)
+ return 0;
+
+ buf = ssdfs_dev_mtd_kzalloc(block_size, GFP_KERNEL);
+ if (!buf) {
+ SSDFS_ERR("unable to allocate %d bytes\n", block_size);
+ return -ENOMEM;
+ }
+
+ err = ssdfs_mtd_read(sb, block_size, offset, block_size, buf);
+ if (err)
+ goto free_buf;
+
+ if (memchr_inv(buf, 0xff, block_size)) {
+ if (memchr_inv(buf, 0x00, block_size)) {
+ magic = (struct ssdfs_signature *)buf;
+
+ is_ssdfs_log_found =
+ __is_ssdfs_segment_header_magic_valid(magic) ||
+ is_ssdfs_partial_log_header_magic_valid(magic) ||
+ __is_ssdfs_log_footer_magic_valid(magic);
+
+ if (is_ssdfs_log_found &&
+ is_ssdfs_uuid_and_fs_ctime_actual(fsi, buf)) {
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("area with offset %llu contains data\n",
+ (unsigned long long)offset);
+
+ SSDFS_DBG("PAGE DUMP:\n");
+ print_hex_dump_bytes("", DUMP_PREFIX_OFFSET,
+ buf,
+ block_size);
+ SSDFS_DBG("\n");
+#endif /* CONFIG_SSDFS_DEBUG */
+ err = -EIO;
+ } else {
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("area with offset %llu contains data\n",
+ (unsigned long long)offset);
+#endif /* CONFIG_SSDFS_DEBUG */
+ err = -EIO;
+ }
+ }
+ }
+
+free_buf:
+ ssdfs_dev_mtd_kfree(buf);
+ return err;
+}
+
+/*
+ * ssdfs_mtd_write_block() - write logical block to volume
+ * @sb: superblock object
+ * @offset: offset in bytes from partition's beginning
+ * @folio: memory folio
+ *
+ * This function tries to write from @folio data
+ * on @offset from partition's beginning.
+ *
+ * RETURN:
+ * [success]
+ * [failure] - error code:
+ *
+ * %-EROFS - file system in RO mode.
+ * %-EIO - I/O error.
+ */
+static int ssdfs_mtd_write_folio(struct super_block *sb, loff_t offset,
+ struct folio *folio)
+{
+ struct ssdfs_fs_info *fsi = SSDFS_FS_I(sb);
+ struct mtd_info *mtd = fsi->mtd;
+ size_t retlen;
+ unsigned char *kaddr;
+ int ret;
+#ifdef CONFIG_SSDFS_DEBUG
+ u32 remainder;
+#endif /* CONFIG_SSDFS_DEBUG */
+ u32 written_bytes = 0;
+ int err = 0;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("sb %p, offset %llu, folio %p\n",
+ sb, offset, folio);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ if (sb->s_flags & SB_RDONLY) {
+ SSDFS_WARN("unable to write on RO file system\n");
+ return -EROFS;
+ }
+
+#ifdef CONFIG_SSDFS_DEBUG
+ BUG_ON(!folio);
+ BUG_ON((offset >= mtd->size) ||
+ (folio_size(folio) > (mtd->size - offset)));
+ div_u64_rem((u64)offset, (u64)folio_size(folio), &remainder);
+ BUG_ON(remainder);
+ BUG_ON(!folio_test_dirty(folio));
+ BUG_ON(folio_test_locked(folio));
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ ssdfs_folio_lock(folio);
+
+ while (written_bytes < folio_size(folio)) {
+ kaddr = kmap_local_folio(folio, written_bytes);
+ ret = mtd_write(mtd, offset + written_bytes, PAGE_SIZE,
+ &retlen, kaddr);
+ kunmap_local(kaddr);
+
+ if (ret || (retlen != PAGE_SIZE)) {
+ SSDFS_ERR("failed to write (err %d): offset %llu, "
+ "len %zu, retlen %zu\n",
+ ret, (unsigned long long)offset,
+ PAGE_SIZE, retlen);
+ err = -EIO;
+ break;
+ }
+
+ written_bytes += PAGE_SIZE;
+ }
+
+ if (!err) {
+ ssdfs_clear_dirty_folio(folio);
+ folio_mark_uptodate(folio);
+ }
+
+ ssdfs_folio_unlock(folio);
+ ssdfs_folio_put(folio);
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("folio %p, count %d\n",
+ folio, folio_ref_count(folio));
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ return err;
+}
+
+/*
+ * ssdfs_mtd_write_blocks() - write logical blocks to volume
+ * @sb: superblock object
+ * @offset: offset in bytes from partition's beginning
+ * @batch: memory folios batch
+ *
+ * This function tries to write from @batch data
+ * to @offset from partition's beginning.
+ *
+ * RETURN:
+ * [success]
+ * [failure] - error code:
+ *
+ * %-EROFS - file system in RO mode.
+ * %-EIO - I/O error.
+ */
+static int ssdfs_mtd_write_blocks(struct super_block *sb, loff_t offset,
+ struct folio_batch *batch)
+{
+ struct folio *folio;
+ loff_t cur_offset = offset;
+ int i;
+ int err;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("sb %p, offset %llu, batch %p\n",
+ sb, offset, batch);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ if (sb->s_flags & SB_RDONLY) {
+ SSDFS_WARN("unable to write on RO file system\n");
+ return -EROFS;
+ }
+
+ if (folio_batch_count(batch) == 0) {
+ SSDFS_WARN("empty folio batch\n");
+ return 0;
+ }
+
+ for (i = 0; i < folio_batch_count(batch); i++) {
+ folio = batch->folios[i];
+
+#ifdef CONFIG_SSDFS_DEBUG
+ BUG_ON(!folio);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ err = ssdfs_mtd_write_folio(sb, cur_offset, folio);
+ if (unlikely(err)) {
+ SSDFS_ERR("fail to write block: "
+ "cur_offset %llu, err %d\n",
+ cur_offset, err);
+ return err;
+ }
+
+ cur_offset += folio_size(folio);
+ }
+
+ return 0;
+}
+
+/*
+ * ssdfs_mtd_erase() - make erase operation
+ * @sb: superblock object
+ * @offset: offset in bytes from partition's begin
+ * @len: size in bytes
+ *
+ * This function tries to make erase operation.
+ *
+ * RETURN:
+ * [success]
+ * [failure] - error code:
+ *
+ * %-EROFS - file system in RO mode.
+ * %-EFAULT - erase operation error.
+ */
+static int ssdfs_mtd_erase(struct super_block *sb, loff_t offset, size_t len)
+{
+ struct mtd_info *mtd = SSDFS_FS_I(sb)->mtd;
+ struct erase_info ei;
+ u32 remainder;
+ int ret;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("sb %p, offset %llu, len %zu\n",
+ sb, (unsigned long long)offset, len);
+
+ div_u64_rem((u64)len, (u64)mtd->erasesize, &remainder);
+ BUG_ON(remainder);
+ div_u64_rem((u64)offset, (u64)mtd->erasesize, &remainder);
+ BUG_ON(remainder);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ if (sb->s_flags & SB_RDONLY)
+ return -EROFS;
+
+ div_u64_rem((u64)len, (u64)mtd->erasesize, &remainder);
+ if (remainder) {
+ SSDFS_WARN("len %llu, erase_size %u, remainder %u\n",
+ (unsigned long long)len,
+ mtd->erasesize, remainder);
+ return -ERANGE;
+ }
+
+ memset(&ei, 0, sizeof(ei));
+ ei.addr = offset;
+ ei.len = len;
+
+ ret = mtd_erase(mtd, &ei);
+ if (ret) {
+ SSDFS_ERR("failed to erase (err %d): offset %llu, len %zu\n",
+ ret, (unsigned long long)offset, len);
+ return ret;
+ }
+
+ return 0;
+}
+
+/*
+ * ssdfs_mtd_trim() - initiate background erase operation
+ * @sb: superblock object
+ * @offset: offset in bytes from partition's begin
+ * @len: size in bytes
+ *
+ * This function tries to initiate background erase operation.
+ * Currently, it is the same operation as foreground erase.
+ *
+ * RETURN:
+ * [success]
+ * [failure] - error code:
+ *
+ * %-EROFS - file system in RO mode.
+ * %-EFAULT - erase operation error.
+ */
+static int ssdfs_mtd_trim(struct super_block *sb, loff_t offset, size_t len)
+{
+ return ssdfs_mtd_erase(sb, offset, len);
+}
+
+/*
+ * ssdfs_mtd_peb_isbad() - check that PEB is bad
+ * @sb: superblock object
+ * @offset: offset in bytes from partition's begin
+ *
+ * This function tries to detect that PEB is bad or not.
+ */
+static int ssdfs_mtd_peb_isbad(struct super_block *sb, loff_t offset)
+{
+ return mtd_block_isbad(SSDFS_FS_I(sb)->mtd, offset);
+}
+
+/*
+ * ssdfs_mtd_mark_peb_bad() - mark PEB as bad
+ * @sb: superblock object
+ * @offset: offset in bytes from partition's begin
+ *
+ * This function tries to mark PEB as bad.
+ */
+static int ssdfs_mtd_mark_peb_bad(struct super_block *sb, loff_t offset)
+{
+ return mtd_block_markbad(SSDFS_FS_I(sb)->mtd, offset);
+}
+
+/*
+ * ssdfs_mtd_sync() - make sync operation
+ * @sb: superblock object
+ */
+static void ssdfs_mtd_sync(struct super_block *sb)
+{
+ struct ssdfs_fs_info *fsi = SSDFS_FS_I(sb);
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("device %d (\"%s\")\n",
+ fsi->mtd->index, fsi->mtd->name);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ mtd_sync(fsi->mtd);
+}
+
+const struct ssdfs_device_ops ssdfs_mtd_devops = {
+ .device_name = ssdfs_mtd_device_name,
+ .device_size = ssdfs_mtd_device_size,
+ .open_zone = ssdfs_mtd_open_zone,
+ .reopen_zone = ssdfs_mtd_reopen_zone,
+ .close_zone = ssdfs_mtd_close_zone,
+ .read = ssdfs_mtd_read,
+ .read_block = ssdfs_mtd_read_block,
+ .read_blocks = ssdfs_mtd_read_blocks,
+ .can_write_block = ssdfs_mtd_can_write_block,
+ .write_block = ssdfs_mtd_write_folio,
+ .write_blocks = ssdfs_mtd_write_blocks,
+ .erase = ssdfs_mtd_erase,
+ .trim = ssdfs_mtd_trim,
+ .peb_isbad = ssdfs_mtd_peb_isbad,
+ .mark_peb_bad = ssdfs_mtd_mark_peb_bad,
+ .sync = ssdfs_mtd_sync,
+};
diff --git a/fs/ssdfs/dev_zns.c b/fs/ssdfs/dev_zns.c
new file mode 100644
index 000000000000..f2afe0038f9b
--- /dev/null
+++ b/fs/ssdfs/dev_zns.c
@@ -0,0 +1,1344 @@
+/*
+ * SPDX-License-Identifier: BSD-3-Clause-Clear
+ *
+ * SSDFS -- SSD-oriented File System.
+ *
+ * fs/ssdfs/dev_zns.c - ZNS SSD support.
+ *
+ * Copyright (c) 2022-2023 Bytedance Ltd. and/or its affiliates.
+ * https://www.bytedance.com/
+ * Copyright (c) 2022-2026 Viacheslav Dubeyko <slava@xxxxxxxxxxx>
+ * http://www.ssdfs.org/
+ * All rights reserved.
+ *
+ * Authors: Viacheslav Dubeyko <slava@xxxxxxxxxxx>
+ *
+ * Acknowledgement: Cong Wang
+ */
+
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/pagevec.h>
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/backing-dev.h>
+
+#include "peb_mapping_queue.h"
+#include "peb_mapping_table_cache.h"
+#include "folio_vector.h"
+#include "ssdfs.h"
+
+#include <trace/events/ssdfs.h>
+
+#ifdef CONFIG_SSDFS_MEMORY_LEAKS_ACCOUNTING
+atomic64_t ssdfs_dev_zns_folio_leaks;
+atomic64_t ssdfs_dev_zns_memory_leaks;
+atomic64_t ssdfs_dev_zns_cache_leaks;
+#endif /* CONFIG_SSDFS_MEMORY_LEAKS_ACCOUNTING */
+
+/*
+ * void ssdfs_dev_zns_cache_leaks_increment(void *kaddr)
+ * void ssdfs_dev_zns_cache_leaks_decrement(void *kaddr)
+ * void *ssdfs_dev_zns_kmalloc(size_t size, gfp_t flags)
+ * void *ssdfs_dev_zns_kzalloc(size_t size, gfp_t flags)
+ * void *ssdfs_dev_zns_kcalloc(size_t n, size_t size, gfp_t flags)
+ * void ssdfs_dev_zns_kfree(void *kaddr)
+ * struct page *ssdfs_dev_zns_alloc_page(gfp_t gfp_mask)
+ * struct page *ssdfs_dev_zns_add_pagevec_page(struct pagevec *pvec)
+ * void ssdfs_dev_zns_free_page(struct page *page)
+ * void ssdfs_dev_zns_pagevec_release(struct pagevec *pvec)
+ */
+#ifdef CONFIG_SSDFS_MEMORY_LEAKS_ACCOUNTING
+ SSDFS_MEMORY_LEAKS_CHECKER_FNS(dev_zns)
+#else
+ SSDFS_MEMORY_ALLOCATOR_FNS(dev_zns)
+#endif /* CONFIG_SSDFS_MEMORY_LEAKS_ACCOUNTING */
+
+void ssdfs_dev_zns_memory_leaks_init(void)
+{
+#ifdef CONFIG_SSDFS_MEMORY_LEAKS_ACCOUNTING
+ atomic64_set(&ssdfs_dev_zns_folio_leaks, 0);
+ atomic64_set(&ssdfs_dev_zns_memory_leaks, 0);
+ atomic64_set(&ssdfs_dev_zns_cache_leaks, 0);
+#endif /* CONFIG_SSDFS_MEMORY_LEAKS_ACCOUNTING */
+}
+
+void ssdfs_dev_zns_check_memory_leaks(void)
+{
+#ifdef CONFIG_SSDFS_MEMORY_LEAKS_ACCOUNTING
+ if (atomic64_read(&ssdfs_dev_zns_folio_leaks) != 0) {
+ SSDFS_ERR("ZNS DEV: "
+ "memory leaks include %lld folios\n",
+ atomic64_read(&ssdfs_dev_zns_folio_leaks));
+ }
+
+ if (atomic64_read(&ssdfs_dev_zns_memory_leaks) != 0) {
+ SSDFS_ERR("ZNS DEV: "
+ "memory allocator suffers from %lld leaks\n",
+ atomic64_read(&ssdfs_dev_zns_memory_leaks));
+ }
+
+ if (atomic64_read(&ssdfs_dev_zns_cache_leaks) != 0) {
+ SSDFS_ERR("ZNS DEV: "
+ "caches suffers from %lld leaks\n",
+ atomic64_read(&ssdfs_dev_zns_cache_leaks));
+ }
+#endif /* CONFIG_SSDFS_MEMORY_LEAKS_ACCOUNTING */
+}
+
+static DECLARE_WAIT_QUEUE_HEAD(zns_wq);
+
+/*
+ * ssdfs_zns_device_name() - get device name
+ * @sb: superblock object
+ */
+static const char *ssdfs_zns_device_name(struct super_block *sb)
+{
+ return sb->s_id;
+}
+
+/*
+ * ssdfs_zns_device_size() - get partition size in bytes
+ * @sb: superblock object
+ */
+static __u64 ssdfs_zns_device_size(struct super_block *sb)
+{
+ return i_size_read(sb->s_bdev->bd_mapping->host);
+}
+
+static int ssdfs_report_zone(struct blk_zone *zone,
+ unsigned int index, void *data)
+{
+ ssdfs_memcpy(data, 0, sizeof(struct blk_zone),
+ zone, 0, sizeof(struct blk_zone),
+ sizeof(struct blk_zone));
+ return 0;
+}
+
+/*
+ * ssdfs_zns_open_zone() - open zone
+ * @sb: superblock object
+ * @offset: offset in bytes from partition's begin
+ */
+static int ssdfs_zns_open_zone(struct super_block *sb, loff_t offset)
+{
+ struct ssdfs_fs_info *fsi = SSDFS_FS_I(sb);
+ sector_t zone_sector = offset >> SECTOR_SHIFT;
+ sector_t zone_size = fsi->erasesize >> SECTOR_SHIFT;
+ u32 open_zones;
+ unsigned int nofs_flags;
+ int err;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("sb %p, offset %llu\n",
+ sb, (unsigned long long)offset);
+ SSDFS_DBG("BEFORE: open_zones %d\n",
+ atomic_read(&fsi->open_zones));
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ nofs_flags = memalloc_nofs_save();
+ err = blkdev_zone_mgmt(sb->s_bdev, REQ_OP_ZONE_OPEN,
+ zone_sector, zone_size);
+ memalloc_nofs_restore(nofs_flags);
+ if (unlikely(err)) {
+ SSDFS_ERR("fail to open zone: "
+ "zone_sector %llu, zone_size %llu, "
+ "open_zones %u, max_open_zones %u, "
+ "err %d\n",
+ zone_sector, zone_size,
+ open_zones, fsi->max_open_zones,
+ err);
+ return err;
+ }
+
+ open_zones = atomic_inc_return(&fsi->open_zones);
+ if (open_zones > fsi->max_open_zones) {
+ atomic_dec(&fsi->open_zones);
+
+ SSDFS_WARN("open zones limit achieved: "
+ "open_zones %u\n", open_zones);
+ return -EBUSY;
+ }
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("AFTER: open_zones %d\n",
+ atomic_read(&fsi->open_zones));
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ return 0;
+}
+
+/*
+ * ssdfs_zns_reopen_zone() - reopen closed zone
+ * @sb: superblock object
+ * @offset: offset in bytes from partition's begin
+ */
+static int ssdfs_zns_reopen_zone(struct super_block *sb, loff_t offset)
+{
+ struct ssdfs_fs_info *fsi = SSDFS_FS_I(sb);
+ struct blk_zone zone;
+ sector_t zone_sector = offset >> SECTOR_SHIFT;
+ sector_t zone_size = fsi->erasesize >> SECTOR_SHIFT;
+ unsigned int nofs_flags;
+ int err;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("sb %p, offset %llu\n",
+ sb, (unsigned long long)offset);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ err = blkdev_report_zones(sb->s_bdev, zone_sector, 1,
+ ssdfs_report_zone, &zone);
+ if (err != 1) {
+ SSDFS_ERR("fail to take report zone: "
+ "zone_sector %llu, err %d\n",
+ zone_sector, err);
+ return err;
+ }
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("zone before: start %llu, len %llu, wp %llu, "
+ "type %#x, cond %#x, non_seq %#x, "
+ "reset %#x, capacity %llu\n",
+ zone.start, zone.len, zone.wp,
+ zone.type, zone.cond, zone.non_seq,
+ zone.reset, zone.capacity);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ switch (zone.cond) {
+ case BLK_ZONE_COND_CLOSED:
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("zone is closed: offset %llu\n",
+ offset);
+#endif /* CONFIG_SSDFS_DEBUG */
+ /* continue logic */
+ break;
+
+ case BLK_ZONE_COND_READONLY:
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("zone is READ-ONLY: offset %llu\n",
+ offset);
+#endif /* CONFIG_SSDFS_DEBUG */
+ return -EIO;
+
+ case BLK_ZONE_COND_FULL:
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("zone is full: offset %llu\n",
+ offset);
+#endif /* CONFIG_SSDFS_DEBUG */
+ return -EIO;
+
+ case BLK_ZONE_COND_OFFLINE:
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("zone is offline: offset %llu\n",
+ offset);
+#endif /* CONFIG_SSDFS_DEBUG */
+ return -EIO;
+
+ default:
+ /* continue logic */
+ break;
+ }
+
+ nofs_flags = memalloc_nofs_save();
+ err = blkdev_zone_mgmt(sb->s_bdev, REQ_OP_ZONE_OPEN,
+ zone_sector, zone_size);
+ memalloc_nofs_restore(nofs_flags);
+ if (unlikely(err)) {
+ SSDFS_ERR("fail to open zone: "
+ "zone_sector %llu, zone_size %llu, "
+ "err %d\n",
+ zone_sector, zone_size,
+ err);
+ return err;
+ }
+
+ err = blkdev_report_zones(sb->s_bdev, zone_sector, 1,
+ ssdfs_report_zone, &zone);
+ if (err != 1) {
+ SSDFS_ERR("fail to take report zone: "
+ "zone_sector %llu, err %d\n",
+ zone_sector, err);
+ return err;
+ }
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("zone after: start %llu, len %llu, wp %llu, "
+ "type %#x, cond %#x, non_seq %#x, "
+ "reset %#x, capacity %llu\n",
+ zone.start, zone.len, zone.wp,
+ zone.type, zone.cond, zone.non_seq,
+ zone.reset, zone.capacity);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ switch (zone.cond) {
+ case BLK_ZONE_COND_CLOSED:
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("zone is closed: offset %llu\n",
+ offset);
+#endif /* CONFIG_SSDFS_DEBUG */
+ return -EIO;
+
+ case BLK_ZONE_COND_READONLY:
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("zone is READ-ONLY: offset %llu\n",
+ offset);
+#endif /* CONFIG_SSDFS_DEBUG */
+ return -EIO;
+
+ case BLK_ZONE_COND_FULL:
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("zone is full: offset %llu\n",
+ offset);
+#endif /* CONFIG_SSDFS_DEBUG */
+ return -EIO;
+
+ case BLK_ZONE_COND_OFFLINE:
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("zone is offline: offset %llu\n",
+ offset);
+#endif /* CONFIG_SSDFS_DEBUG */
+ return -EIO;
+
+ default:
+ /* continue logic */
+ break;
+ }
+
+ return 0;
+}
+
+/*
+ * ssdfs_zns_close_zone() - close zone
+ * @sb: superblock object
+ * @offset: offset in bytes from partition's begin
+ */
+static int ssdfs_zns_close_zone(struct super_block *sb, loff_t offset)
+{
+ struct ssdfs_fs_info *fsi = SSDFS_FS_I(sb);
+ sector_t zone_sector = offset >> SECTOR_SHIFT;
+ sector_t zone_size = fsi->erasesize >> SECTOR_SHIFT;
+ u32 open_zones;
+ unsigned int nofs_flags;
+ int err;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("sb %p, offset %llu\n",
+ sb, (unsigned long long)offset);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ nofs_flags = memalloc_nofs_save();
+ err = blkdev_zone_mgmt(sb->s_bdev, REQ_OP_ZONE_FINISH,
+ zone_sector, zone_size);
+ memalloc_nofs_restore(nofs_flags);
+ if (unlikely(err)) {
+ SSDFS_ERR("fail to open zone: "
+ "zone_sector %llu, zone_size %llu, err %d\n",
+ zone_sector, zone_size, err);
+ return err;
+ }
+
+ open_zones = atomic_dec_return(&fsi->open_zones);
+ if (open_zones > fsi->max_open_zones) {
+ SSDFS_WARN("open zones limit exhausted: "
+ "open_zones %u\n", open_zones);
+ }
+
+ return 0;
+}
+
+/*
+ * ssdfs_zns_zone_size() - retrieve zone size
+ * @sb: superblock object
+ * @offset: offset in bytes from partition's begin
+ *
+ * This function tries to retrieve zone size.
+ */
+u64 ssdfs_zns_zone_size(struct super_block *sb, loff_t offset)
+{
+ struct blk_zone zone;
+ sector_t zone_sector = offset >> SECTOR_SHIFT;
+ int res;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("sb %p, offset %llu\n",
+ sb, (unsigned long long)offset);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ res = blkdev_report_zones(sb->s_bdev, zone_sector, 1,
+ ssdfs_report_zone, &zone);
+ if (res != 1) {
+ SSDFS_ERR("fail to take report zone: "
+ "zone_sector %llu, err %d\n",
+ zone_sector, res);
+ return U64_MAX;
+ }
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("zone: start %llu, len %llu, wp %llu, "
+ "type %#x, cond %#x, non_seq %#x, "
+ "reset %#x, capacity %llu\n",
+ zone.start, zone.len, zone.wp,
+ zone.type, zone.cond, zone.non_seq,
+ zone.reset, zone.capacity);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ return (u64)zone.len << SECTOR_SHIFT;
+}
+
+/*
+ * ssdfs_zns_zone_capacity() - retrieve zone capacity
+ * @sb: superblock object
+ * @offset: offset in bytes from partition's begin
+ *
+ * This function tries to retrieve zone capacity.
+ */
+u64 ssdfs_zns_zone_capacity(struct super_block *sb, loff_t offset)
+{
+ struct blk_zone zone;
+ sector_t zone_sector = offset >> SECTOR_SHIFT;
+ int res;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("sb %p, offset %llu\n",
+ sb, (unsigned long long)offset);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ res = blkdev_report_zones(sb->s_bdev, zone_sector, 1,
+ ssdfs_report_zone, &zone);
+ if (res != 1) {
+ SSDFS_ERR("fail to take report zone: "
+ "zone_sector %llu, err %d\n",
+ zone_sector, res);
+ return U64_MAX;
+ }
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("zone: start %llu, len %llu, wp %llu, "
+ "type %#x, cond %#x, non_seq %#x, "
+ "reset %#x, capacity %llu\n",
+ zone.start, zone.len, zone.wp,
+ zone.type, zone.cond, zone.non_seq,
+ zone.reset, zone.capacity);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ return (u64)zone.capacity << SECTOR_SHIFT;
+}
+
+/*
+ * ssdfs_zns_zone_write_pointer() - retrieve zone's write pointer
+ * @sb: superblock object
+ * @offset: offset in bytes from partition's begin
+ *
+ * This function tries to retrieve zone's write pointer.
+ */
+u64 ssdfs_zns_zone_write_pointer(struct super_block *sb, loff_t offset)
+{
+ struct blk_zone zone;
+ sector_t zone_sector = offset >> SECTOR_SHIFT;
+ int res;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("sb %p, offset %llu\n",
+ sb, (unsigned long long)offset);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ res = blkdev_report_zones(sb->s_bdev, zone_sector, 1,
+ ssdfs_report_zone, &zone);
+ if (res != 1) {
+ SSDFS_ERR("fail to take report zone: "
+ "zone_sector %llu, err %d\n",
+ zone_sector, res);
+ return U64_MAX;
+ }
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("zone: start %llu, len %llu, wp %llu, "
+ "type %#x, cond %#x, non_seq %#x, "
+ "reset %#x, capacity %llu\n",
+ zone.start, zone.len, zone.wp,
+ zone.type, zone.cond, zone.non_seq,
+ zone.reset, zone.capacity);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ if (zone.wp >= (zone.start + zone.capacity)) {
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("zone is closed: "
+ "start %llu, len %llu, "
+ "wp %llu, type %#x, cond %#x, non_seq %#x, "
+ "reset %#x, capacity %llu\n",
+ zone.start, zone.len, zone.wp,
+ zone.type, zone.cond, zone.non_seq,
+ zone.reset, zone.capacity);
+#endif /* CONFIG_SSDFS_DEBUG */
+ return U64_MAX;
+ }
+
+ return (u64)zone.wp << SECTOR_SHIFT;
+}
+
+/*
+ * ssdfs_zns_sync_folio_request() - submit folio request
+ * @sb: superblock object
+ * @folio: memory folio
+ * @zone_start: first sector of zone
+ * @offset: offset in bytes from partition's begin
+ * @op: direction of I/O
+ * @op_flags: request op flags
+ */
+static int ssdfs_zns_sync_folio_request(struct super_block *sb,
+ struct folio *folio,
+ sector_t zone_start,
+ loff_t offset,
+ unsigned int op, int op_flags)
+{
+ struct bio *bio;
+#ifdef CONFIG_SSDFS_DEBUG
+ sector_t zone_sector = offset >> SECTOR_SHIFT;
+ struct blk_zone zone;
+ int res;
+#endif /* CONFIG_SSDFS_DEBUG */
+ int err = 0;
+
+ op |= REQ_OP_ZONE_APPEND | REQ_IDLE;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ BUG_ON(!folio);
+
+ SSDFS_DBG("offset %llu, zone_start %llu, "
+ "op %#x, op_flags %#x\n",
+ offset, zone_start, op, op_flags);
+
+ res = blkdev_report_zones(sb->s_bdev, zone_sector, 1,
+ ssdfs_report_zone, &zone);
+ if (res != 1) {
+ SSDFS_ERR("fail to take report zone: "
+ "zone_sector %llu, err %d\n",
+ zone_sector, res);
+ } else {
+ SSDFS_DBG("zone: start %llu, len %llu, wp %llu, "
+ "type %#x, cond %#x, non_seq %#x, "
+ "reset %#x, capacity %llu\n",
+ zone.start, zone.len, zone.wp,
+ zone.type, zone.cond, zone.non_seq,
+ zone.reset, zone.capacity);
+ }
+
+ BUG_ON(zone_start != zone.start);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ bio = ssdfs_bdev_bio_alloc(sb->s_bdev, 1, op, GFP_NOFS);
+ if (IS_ERR_OR_NULL(bio)) {
+ err = !bio ? -ERANGE : PTR_ERR(bio);
+ SSDFS_ERR("fail to allocate bio: err %d\n",
+ err);
+ return err;
+ }
+
+ bio->bi_iter.bi_sector = zone_start;
+ bio_set_dev(bio, sb->s_bdev);
+ bio->bi_opf = op | op_flags;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("folio %p, count %d\n",
+ folio, folio_ref_count(folio));
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ err = ssdfs_bdev_bio_add_folio(bio, folio, 0);
+ if (unlikely(err)) {
+ SSDFS_ERR("fail to add folio into bio: "
+ "err %d\n",
+ err);
+ goto finish_sync_folio_request;
+ }
+
+ err = submit_bio_wait(bio);
+ if (unlikely(err)) {
+ SSDFS_ERR("fail to process request: "
+ "err %d\n",
+ err);
+ goto finish_sync_folio_request;
+ }
+
+finish_sync_folio_request:
+ ssdfs_bdev_bio_put(bio);
+
+ return err;
+}
+
+/*
+ * ssdfs_zns_sync_batch_request() - submit folio batch request
+ * @sb: superblock object
+ * @batch: folio batch
+ * @zone_start: first sector of zone
+ * @offset: offset in bytes from partition's begin
+ * @op: direction of I/O
+ * @op_flags: request op flags
+ */
+static int ssdfs_zns_sync_batch_request(struct super_block *sb,
+ struct folio_batch *batch,
+ sector_t zone_start,
+ loff_t offset,
+ unsigned int op, int op_flags)
+{
+ struct bio *bio;
+ int i;
+#ifdef CONFIG_SSDFS_DEBUG
+ sector_t zone_sector = offset >> SECTOR_SHIFT;
+ struct blk_zone zone;
+ int res;
+#endif /* CONFIG_SSDFS_DEBUG */
+ int err = 0;
+
+ op |= REQ_OP_ZONE_APPEND | REQ_IDLE;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ BUG_ON(!batch);
+
+ SSDFS_DBG("offset %llu, zone_start %llu, "
+ "op %#x, op_flags %#x\n",
+ offset, zone_start, op, op_flags);
+
+ res = blkdev_report_zones(sb->s_bdev, zone_sector, 1,
+ ssdfs_report_zone, &zone);
+ if (res != 1) {
+ SSDFS_ERR("fail to take report zone: "
+ "zone_sector %llu, err %d\n",
+ zone_sector, res);
+ } else {
+ SSDFS_DBG("zone: start %llu, len %llu, wp %llu, "
+ "type %#x, cond %#x, non_seq %#x, "
+ "reset %#x, capacity %llu\n",
+ zone.start, zone.len, zone.wp,
+ zone.type, zone.cond, zone.non_seq,
+ zone.reset, zone.capacity);
+ }
+
+ BUG_ON(zone_start != zone.start);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ if (folio_batch_count(batch) == 0) {
+ SSDFS_WARN("empty folio batch\n");
+ return 0;
+ }
+
+ bio = ssdfs_bdev_bio_alloc(sb->s_bdev, folio_batch_count(batch),
+ op, GFP_NOFS);
+ if (IS_ERR_OR_NULL(bio)) {
+ err = !bio ? -ERANGE : PTR_ERR(bio);
+ SSDFS_ERR("fail to allocate bio: err %d\n",
+ err);
+ return err;
+ }
+
+ bio->bi_iter.bi_sector = zone_start;
+ bio_set_dev(bio, sb->s_bdev);
+ bio->bi_opf = op | op_flags;
+
+ for (i = 0; i < folio_batch_count(batch); i++) {
+ struct folio *folio = batch->folios[i];
+
+#ifdef CONFIG_SSDFS_DEBUG
+ BUG_ON(!folio);
+
+ SSDFS_DBG("folio %p, count %d\n",
+ folio, folio_ref_count(folio));
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ err = ssdfs_bdev_bio_add_folio(bio, folio, 0);
+ if (unlikely(err)) {
+ SSDFS_ERR("fail to add folio %d into bio: "
+ "err %d\n",
+ i, err);
+ goto finish_sync_batch_request;
+ }
+ }
+
+ err = submit_bio_wait(bio);
+ if (unlikely(err)) {
+ SSDFS_ERR("fail to process request: "
+ "err %d\n",
+ err);
+ goto finish_sync_batch_request;
+ }
+
+finish_sync_batch_request:
+ ssdfs_bdev_bio_put(bio);
+
+ return err;
+}
+
+/*
+ * ssdfs_zns_read_block() - read logical block from the volume
+ * @sb: superblock object
+ * @folio: memory folio
+ * @offset: offset in bytes from partition's begin
+ *
+ * This function tries to read data on @offset
+ * from partition's beginning in memory folio.
+ *
+ * RETURN:
+ * [success]
+ * [failure] - error code:
+ *
+ * %-EIO - I/O error.
+ */
+static int ssdfs_zns_read_block(struct super_block *sb, struct folio *folio,
+ loff_t offset)
+{
+#ifdef CONFIG_SSDFS_DEBUG
+ struct blk_zone zone;
+ sector_t zone_sector = offset >> SECTOR_SHIFT;
+ int res;
+#endif /* CONFIG_SSDFS_DEBUG */
+ int err = 0;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("sb %p, offset %llu\n",
+ sb, (unsigned long long)offset);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ err = ssdfs_bdev_read_block(sb, folio, offset);
+
+#ifdef CONFIG_SSDFS_DEBUG
+ res = blkdev_report_zones(sb->s_bdev, zone_sector, 1,
+ ssdfs_report_zone, &zone);
+ if (res != 1) {
+ SSDFS_ERR("fail to take report zone: "
+ "zone_sector %llu, err %d\n",
+ zone_sector, res);
+ } else {
+ SSDFS_DBG("zone: start %llu, len %llu, wp %llu, "
+ "type %#x, cond %#x, non_seq %#x, "
+ "reset %#x, capacity %llu\n",
+ zone.start, zone.len, zone.wp,
+ zone.type, zone.cond, zone.non_seq,
+ zone.reset, zone.capacity);
+ }
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ return err;
+}
+
+/*
+ * ssdfs_zns_read_blocks() - read logical blocks from the volume
+ * @sb: superblock object
+ * @batch: folio batch
+ * @offset: offset in bytes from partition's beginning
+ *
+ * This function tries to read data on @offset
+ * from partition's beginning.
+ *
+ * RETURN:
+ * [success]
+ * [failure] - error code:
+ *
+ * %-EIO - I/O error.
+ */
+static
+int ssdfs_zns_read_blocks(struct super_block *sb, struct folio_batch *batch,
+ loff_t offset)
+{
+#ifdef CONFIG_SSDFS_DEBUG
+ struct blk_zone zone;
+ sector_t zone_sector = offset >> SECTOR_SHIFT;
+ int res;
+#endif /* CONFIG_SSDFS_DEBUG */
+ int err = 0;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("sb %p, offset %llu\n",
+ sb, (unsigned long long)offset);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ err = ssdfs_bdev_read_blocks(sb, batch, offset);
+
+#ifdef CONFIG_SSDFS_DEBUG
+ res = blkdev_report_zones(sb->s_bdev, zone_sector, 1,
+ ssdfs_report_zone, &zone);
+ if (res != 1) {
+ SSDFS_ERR("fail to take report zone: "
+ "zone_sector %llu, err %d\n",
+ zone_sector, res);
+ } else {
+ SSDFS_DBG("zone: start %llu, len %llu, wp %llu, "
+ "type %#x, cond %#x, non_seq %#x, "
+ "reset %#x, capacity %llu\n",
+ zone.start, zone.len, zone.wp,
+ zone.type, zone.cond, zone.non_seq,
+ zone.reset, zone.capacity);
+ }
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ return err;
+}
+
+/*
+ * ssdfs_zns_read() - read from volume into buffer
+ * @sb: superblock object
+ * @block_size: block size in bytes
+ * @offset: offset in bytes from partition's begin
+ * @len: size of buffer in bytes
+ * @buf: buffer
+ *
+ * This function tries to read data on @offset
+ * from partition's begin with @len bytes in size
+ * from the volume into @buf.
+ *
+ * RETURN:
+ * [success]
+ * [failure] - error code:
+ *
+ * %-EIO - I/O error.
+ */
+static
+int ssdfs_zns_read(struct super_block *sb, u32 block_size,
+ loff_t offset, size_t len, void *buf)
+{
+#ifdef CONFIG_SSDFS_DEBUG
+ struct blk_zone zone;
+ sector_t zone_sector = offset >> SECTOR_SHIFT;
+ int res;
+#endif /* CONFIG_SSDFS_DEBUG */
+ int err = 0;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("sb %p, block_size %u, offset %llu, len %zu, buf %p\n",
+ sb, block_size, (unsigned long long)offset, len, buf);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ err = ssdfs_bdev_read(sb, block_size, offset, len, buf);
+
+#ifdef CONFIG_SSDFS_DEBUG
+ res = blkdev_report_zones(sb->s_bdev, zone_sector, 1,
+ ssdfs_report_zone, &zone);
+ if (res != 1) {
+ SSDFS_ERR("fail to take report zone: "
+ "zone_sector %llu, err %d\n",
+ zone_sector, res);
+ } else {
+ SSDFS_DBG("zone: start %llu, len %llu, wp %llu, "
+ "type %#x, cond %#x, non_seq %#x, "
+ "reset %#x, capacity %llu\n",
+ zone.start, zone.len, zone.wp,
+ zone.type, zone.cond, zone.non_seq,
+ zone.reset, zone.capacity);
+ }
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ return err;
+}
+
+/*
+ * ssdfs_zns_can_write_block() - check that logical block can be written
+ * @sb: superblock object
+ * @block_size: block size in bytes
+ * @offset: offset in bytes from partition's beginning
+ * @need_check: make check or not?
+ *
+ * This function checks that logical block can be written.
+ *
+ * RETURN:
+ * [success]
+ * [failure] - error code:
+ *
+ * %-EROFS - file system in RO mode.
+ * %-ENOMEM - fail to allocate memory.
+ * %-EIO - I/O error.
+ */
+static int ssdfs_zns_can_write_block(struct super_block *sb, u32 block_size,
+ loff_t offset, bool need_check)
+{
+ struct ssdfs_fs_info *fsi = SSDFS_FS_I(sb);
+ struct blk_zone zone;
+ sector_t zone_sector = offset >> SECTOR_SHIFT;
+ sector_t zone_size = fsi->erasesize >> SECTOR_SHIFT;
+ u64 peb_id;
+ loff_t zone_offset;
+ int res;
+ int err = 0;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("sb %p, offset %llu, block_size %u, need_check %d\n",
+ sb, (unsigned long long)offset,
+ block_size, (int)need_check);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ if (!need_check)
+ return 0;
+
+ res = blkdev_report_zones(sb->s_bdev, zone_sector, 1,
+ ssdfs_report_zone, &zone);
+ if (res != 1) {
+ SSDFS_ERR("fail to take report zone: "
+ "zone_sector %llu, err %d\n",
+ zone_sector, res);
+ return res;
+ }
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("zone before: start %llu, len %llu, wp %llu, "
+ "type %#x, cond %#x, non_seq %#x, "
+ "reset %#x, capacity %llu\n",
+ zone.start, zone.len, zone.wp,
+ zone.type, zone.cond, zone.non_seq,
+ zone.reset, zone.capacity);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ switch (zone.type) {
+ case BLK_ZONE_TYPE_CONVENTIONAL:
+ return ssdfs_bdev_can_write_block(sb, block_size,
+ offset, need_check);
+
+ default:
+ /*
+ * BLK_ZONE_TYPE_SEQWRITE_REQ
+ * BLK_ZONE_TYPE_SEQWRITE_PREF
+ *
+ * continue logic
+ */
+ break;
+ }
+
+ switch (zone.cond) {
+ case BLK_ZONE_COND_NOT_WP:
+ return ssdfs_bdev_can_write_block(sb, block_size,
+ offset, need_check);
+
+ case BLK_ZONE_COND_EMPTY:
+ /* can write */
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("zone is empty: offset %llu\n",
+ offset);
+#endif /* CONFIG_SSDFS_DEBUG */
+ return 0;
+
+ case BLK_ZONE_COND_CLOSED:
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("zone is closed: offset %llu\n",
+ offset);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ peb_id = offset / fsi->erasesize;
+ zone_offset = peb_id * fsi->erasesize;
+
+ err = ssdfs_zns_reopen_zone(sb, zone_offset);
+ if (unlikely(err)) {
+ SSDFS_ERR("fail to reopen zone: "
+ "zone_offset %llu, zone_size %llu, "
+ "err %d\n",
+ zone_offset, zone_size, err);
+ return err;
+ }
+
+ return 0;
+
+ case BLK_ZONE_COND_READONLY:
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("zone is READ-ONLY: offset %llu\n",
+ offset);
+#endif /* CONFIG_SSDFS_DEBUG */
+ return -EIO;
+
+ case BLK_ZONE_COND_FULL:
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("zone is full: offset %llu\n",
+ offset);
+#endif /* CONFIG_SSDFS_DEBUG */
+ return -EIO;
+
+ case BLK_ZONE_COND_OFFLINE:
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("zone is offline: offset %llu\n",
+ offset);
+#endif /* CONFIG_SSDFS_DEBUG */
+ return -EIO;
+
+ default:
+ /* continue logic */
+ break;
+ }
+
+ if (zone_sector < zone.wp) {
+ err = -EIO;
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("cannot be written: "
+ "zone_sector %llu, zone.wp %llu\n",
+ zone_sector, zone.wp);
+#endif /* CONFIG_SSDFS_DEBUG */
+ }
+
+#ifdef CONFIG_SSDFS_DEBUG
+ res = blkdev_report_zones(sb->s_bdev, zone_sector, 1,
+ ssdfs_report_zone, &zone);
+ if (res != 1) {
+ SSDFS_ERR("fail to take report zone: "
+ "zone_sector %llu, err %d\n",
+ zone_sector, res);
+ } else {
+ SSDFS_DBG("zone after: start %llu, len %llu, wp %llu, "
+ "type %#x, cond %#x, non_seq %#x, "
+ "reset %#x, capacity %llu\n",
+ zone.start, zone.len, zone.wp,
+ zone.type, zone.cond, zone.non_seq,
+ zone.reset, zone.capacity);
+ }
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ return err;
+}
+
+/*
+ * ssdfs_zns_write_block() - write logical block to volume
+ * @sb: superblock object
+ * @offset: offset in bytes from partition's begin
+ * @folio: memory folio
+ *
+ * This function tries to write from @folio data
+ * on @offset from partition's beginning.
+ *
+ * RETURN:
+ * [success]
+ * [failure] - error code:
+ *
+ * %-EROFS - file system in RO mode.
+ * %-EIO - I/O error.
+ */
+static
+int ssdfs_zns_write_block(struct super_block *sb, loff_t offset,
+ struct folio *folio)
+{
+ struct ssdfs_fs_info *fsi = SSDFS_FS_I(sb);
+ loff_t zone_start;
+#ifdef CONFIG_SSDFS_DEBUG
+ struct blk_zone zone;
+ sector_t zone_sector = offset >> SECTOR_SHIFT;
+ u32 remainder;
+ int res;
+#endif /* CONFIG_SSDFS_DEBUG */
+ int err = 0;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("sb %p, offset %llu, folio %p\n",
+ sb, offset, folio);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ if (sb->s_flags & SB_RDONLY) {
+ SSDFS_WARN("unable to write on RO file system\n");
+ return -EROFS;
+ }
+
+#ifdef CONFIG_SSDFS_DEBUG
+ BUG_ON(!folio);
+ BUG_ON((offset >= ssdfs_zns_device_size(sb)) ||
+ (folio_size(folio) > (ssdfs_zns_device_size(sb) - offset)));
+ div_u64_rem((u64)offset, (u64)folio_size(folio), &remainder);
+ BUG_ON(remainder);
+ BUG_ON(!folio_test_dirty(folio));
+ BUG_ON(folio_test_locked(folio));
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ ssdfs_folio_lock(folio);
+ atomic_inc(&fsi->pending_bios);
+
+ zone_start = (offset / fsi->erasesize) * fsi->erasesize;
+ zone_start >>= SECTOR_SHIFT;
+
+ err = ssdfs_zns_sync_folio_request(sb, folio, zone_start, offset,
+ REQ_OP_WRITE, REQ_SYNC);
+ if (err) {
+ SSDFS_ERR("failed to write (err %d): offset %llu\n",
+ err, (unsigned long long)offset);
+ } else {
+ ssdfs_clear_dirty_folio(folio);
+ folio_mark_uptodate(folio);
+ }
+
+ ssdfs_folio_unlock(folio);
+ ssdfs_folio_put(folio);
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("folio %p, count %d\n",
+ folio, folio_ref_count(folio));
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ if (atomic_dec_and_test(&fsi->pending_bios))
+ wake_up_all(&zns_wq);
+
+#ifdef CONFIG_SSDFS_DEBUG
+ res = blkdev_report_zones(sb->s_bdev, zone_sector, 1,
+ ssdfs_report_zone, &zone);
+ if (res != 1) {
+ SSDFS_ERR("fail to take report zone: "
+ "zone_sector %llu, err %d\n",
+ zone_sector, res);
+ } else {
+ SSDFS_DBG("zone: start %llu, len %llu, wp %llu, "
+ "type %#x, cond %#x, non_seq %#x, "
+ "reset %#x, capacity %llu\n",
+ zone.start, zone.len, zone.wp,
+ zone.type, zone.cond, zone.non_seq,
+ zone.reset, zone.capacity);
+ }
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ return err;
+}
+
+/*
+ * ssdfs_zns_write_blocks() - write folio batch to volume
+ * @sb: superblock object
+ * @offset: offset in bytes from partition's beginning
+ * @batch: folio batch
+ *
+ * This function tries to write from @batch data
+ * on @offset from partition's beginning.
+ *
+ * RETURN:
+ * [success]
+ * [failure] - error code:
+ *
+ * %-EROFS - file system in RO mode.
+ * %-EIO - I/O error.
+ */
+static
+int ssdfs_zns_write_blocks(struct super_block *sb, loff_t offset,
+ struct folio_batch *batch)
+{
+ struct ssdfs_fs_info *fsi = SSDFS_FS_I(sb);
+ struct folio *folio;
+ loff_t zone_start;
+ int i;
+#ifdef CONFIG_SSDFS_DEBUG
+ struct blk_zone zone;
+ sector_t zone_sector = offset >> SECTOR_SHIFT;
+ u32 remainder;
+ int res;
+#endif /* CONFIG_SSDFS_DEBUG */
+ int err = 0;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("sb %p, offset %llu, batch %p\n",
+ sb, offset, batch);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ if (sb->s_flags & SB_RDONLY) {
+ SSDFS_WARN("unable to write on RO file system\n");
+ return -EROFS;
+ }
+
+#ifdef CONFIG_SSDFS_DEBUG
+ BUG_ON(!batch);
+ BUG_ON(offset >= ssdfs_zns_device_size(sb));
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ if (folio_batch_count(batch) == 0) {
+ SSDFS_WARN("empty folio batch\n");
+ return 0;
+ }
+
+#ifdef CONFIG_SSDFS_DEBUG
+ div_u64_rem((u64)offset, (u64)folio_size(batch->folios[0]), &remainder);
+ BUG_ON(remainder);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ for (i = 0; i < folio_batch_count(batch); i++) {
+ folio = batch->folios[i];
+
+#ifdef CONFIG_SSDFS_DEBUG
+ BUG_ON(!folio);
+ BUG_ON(!folio_test_dirty(folio));
+ BUG_ON(folio_test_locked(folio));
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ ssdfs_folio_lock(folio);
+ }
+
+ atomic_inc(&fsi->pending_bios);
+
+ zone_start = (offset / fsi->erasesize) * fsi->erasesize;
+ zone_start >>= SECTOR_SHIFT;
+
+ err = ssdfs_zns_sync_batch_request(sb, batch, zone_start, offset,
+ REQ_OP_WRITE, REQ_SYNC);
+
+ for (i = 0; i < folio_batch_count(batch); i++) {
+ folio = batch->folios[i];
+
+ if (err) {
+ SSDFS_ERR("failed to write (err %d): "
+ "folio_index %llu\n",
+ err,
+ (unsigned long long)folio->index);
+ } else {
+ ssdfs_clear_dirty_folio(folio);
+ folio_mark_uptodate(folio);
+ }
+
+ ssdfs_folio_unlock(folio);
+ ssdfs_folio_put(folio);
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("folio %p, count %d\n",
+ folio, folio_ref_count(folio));
+#endif /* CONFIG_SSDFS_DEBUG */
+ }
+
+ if (atomic_dec_and_test(&fsi->pending_bios))
+ wake_up_all(&zns_wq);
+
+#ifdef CONFIG_SSDFS_DEBUG
+ res = blkdev_report_zones(sb->s_bdev, zone_sector, 1,
+ ssdfs_report_zone, &zone);
+ if (res != 1) {
+ SSDFS_ERR("fail to take report zone: "
+ "zone_sector %llu, err %d\n",
+ zone_sector, res);
+ } else {
+ SSDFS_DBG("zone: start %llu, len %llu, wp %llu, "
+ "type %#x, cond %#x, non_seq %#x, "
+ "reset %#x, capacity %llu\n",
+ zone.start, zone.len, zone.wp,
+ zone.type, zone.cond, zone.non_seq,
+ zone.reset, zone.capacity);
+ }
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ return err;
+}
+
+/*
+ * ssdfs_zns_trim() - initiate background erase operation
+ * @sb: superblock object
+ * @offset: offset in bytes from partition's begin
+ * @len: size in bytes
+ *
+ * This function tries to initiate background erase operation.
+ *
+ * RETURN:
+ * [success]
+ * [failure] - error code:
+ *
+ * %-EROFS - file system in RO mode.
+ * %-EFAULT - erase operation error.
+ */
+static int ssdfs_zns_trim(struct super_block *sb, loff_t offset, size_t len)
+{
+ struct ssdfs_fs_info *fsi = SSDFS_FS_I(sb);
+ u32 erase_size = fsi->erasesize;
+ loff_t page_start, page_end;
+ u32 pages_count;
+ u32 remainder;
+ sector_t start_sector;
+ sector_t sectors_count;
+ unsigned int nofs_flags;
+ int err = 0;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("sb %p, offset %llu, len %zu\n",
+ sb, (unsigned long long)offset, len);
+
+ div_u64_rem((u64)len, (u64)erase_size, &remainder);
+ BUG_ON(remainder);
+ div_u64_rem((u64)offset, (u64)erase_size, &remainder);
+ BUG_ON(remainder);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ if (sb->s_flags & SB_RDONLY)
+ return -EROFS;
+
+ div_u64_rem((u64)len, (u64)erase_size, &remainder);
+ if (remainder) {
+ SSDFS_WARN("len %llu, erase_size %u, remainder %u\n",
+ (unsigned long long)len,
+ erase_size, remainder);
+ return -ERANGE;
+ }
+
+ page_start = offset >> PAGE_SHIFT;
+ page_end = (offset + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ pages_count = (u32)(page_end - page_start);
+
+ if (pages_count == 0) {
+ SSDFS_WARN("pages_count equals to zero\n");
+ return -ERANGE;
+ }
+
+ start_sector = offset >> SECTOR_SHIFT;
+ sectors_count = fsi->erasesize >> SECTOR_SHIFT;
+
+ nofs_flags = memalloc_nofs_save();
+ err = blkdev_zone_mgmt(sb->s_bdev, REQ_OP_ZONE_RESET,
+ start_sector, sectors_count);
+ memalloc_nofs_restore(nofs_flags);
+ if (unlikely(err)) {
+ SSDFS_ERR("fail to reset zone: "
+ "zone_sector %llu, zone_size %llu, err %d\n",
+ start_sector, sectors_count, err);
+ return err;
+ }
+
+ return 0;
+}
+
+/*
+ * ssdfs_zns_peb_isbad() - check that PEB is bad
+ * @sb: superblock object
+ * @offset: offset in bytes from partition's begin
+ *
+ * This function tries to detect that PEB is bad or not.
+ */
+static int ssdfs_zns_peb_isbad(struct super_block *sb, loff_t offset)
+{
+ /* do nothing */
+ return 0;
+}
+
+/*
+ * ssdfs_zns_mark_peb_bad() - mark PEB as bad
+ * @sb: superblock object
+ * @offset: offset in bytes from partition's begin
+ *
+ * This function tries to mark PEB as bad.
+ */
+static int ssdfs_zns_mark_peb_bad(struct super_block *sb, loff_t offset)
+{
+ /* do nothing */
+ return 0;
+}
+
+/*
+ * ssdfs_zns_sync() - make sync operation
+ * @sb: superblock object
+ */
+static void ssdfs_zns_sync(struct super_block *sb)
+{
+ struct ssdfs_fs_info *fsi = SSDFS_FS_I(sb);
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("device %s\n", sb->s_id);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ wait_event(zns_wq, atomic_read(&fsi->pending_bios) == 0);
+}
+
+const struct ssdfs_device_ops ssdfs_zns_devops = {
+ .device_name = ssdfs_zns_device_name,
+ .device_size = ssdfs_zns_device_size,
+ .open_zone = ssdfs_zns_open_zone,
+ .reopen_zone = ssdfs_zns_reopen_zone,
+ .close_zone = ssdfs_zns_close_zone,
+ .read = ssdfs_zns_read,
+ .read_block = ssdfs_zns_read_block,
+ .read_blocks = ssdfs_zns_read_blocks,
+ .can_write_block = ssdfs_zns_can_write_block,
+ .write_block = ssdfs_zns_write_block,
+ .write_blocks = ssdfs_zns_write_blocks,
+ .erase = ssdfs_zns_trim,
+ .trim = ssdfs_zns_trim,
+ .peb_isbad = ssdfs_zns_peb_isbad,
+ .mark_peb_bad = ssdfs_zns_mark_peb_bad,
+ .sync = ssdfs_zns_sync,
+};
--
2.34.1