Re: [PATCH v2 2/4] erofs: support unencoded inodes for fileio

From: Gao Xiang
Date: Fri Aug 30 2024 - 22:54:26 EST




On 2024/8/31 08:19, Sandeep Dhavale wrote:
On Thu, Aug 29, 2024 at 8:29 PM Gao Xiang <hsiangkao@xxxxxxxxxxxxxxxxx> wrote:

Since EROFS only needs to handle read requests in simple contexts,
Just directly use vfs_iocb_iter_read() for data I/Os.

Signed-off-by: Gao Xiang <hsiangkao@xxxxxxxxxxxxxxxxx>
---
v2:
- fix redundant refcount which cause hanging on chunked inodes.

fs/erofs/Makefile | 1 +
fs/erofs/data.c | 50 +++++++++++-
fs/erofs/fileio.c | 181 ++++++++++++++++++++++++++++++++++++++++++++
fs/erofs/inode.c | 17 +++--
fs/erofs/internal.h | 7 +-
fs/erofs/zdata.c | 46 ++---------
6 files changed, 251 insertions(+), 51 deletions(-)
create mode 100644 fs/erofs/fileio.c

diff --git a/fs/erofs/Makefile b/fs/erofs/Makefile
index 097d672e6b14..4331d53c7109 100644
--- a/fs/erofs/Makefile
+++ b/fs/erofs/Makefile
@@ -7,4 +7,5 @@ erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o zutil.o
erofs-$(CONFIG_EROFS_FS_ZIP_LZMA) += decompressor_lzma.o
erofs-$(CONFIG_EROFS_FS_ZIP_DEFLATE) += decompressor_deflate.o
erofs-$(CONFIG_EROFS_FS_ZIP_ZSTD) += decompressor_zstd.o
+erofs-$(CONFIG_EROFS_FS_BACKED_BY_FILE) += fileio.o
erofs-$(CONFIG_EROFS_FS_ONDEMAND) += fscache.o
diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index 0fb31c588ae0..b4c07ce7a294 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -132,7 +132,7 @@ int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map)
if (map->m_la >= inode->i_size) {
/* leave out-of-bound access unmapped */
map->m_flags = 0;
- map->m_plen = 0;
+ map->m_plen = map->m_llen;
goto out;
}

@@ -197,8 +197,13 @@ static void erofs_fill_from_devinfo(struct erofs_map_dev *map,
struct erofs_device_info *dif)
{
map->m_bdev = NULL;
- if (dif->file && S_ISBLK(file_inode(dif->file)->i_mode))
- map->m_bdev = file_bdev(dif->file);
+ map->m_fp = NULL;
+ if (dif->file) {
+ if (S_ISBLK(file_inode(dif->file)->i_mode))
+ map->m_bdev = file_bdev(dif->file);
+ else
+ map->m_fp = dif->file;
+ }
map->m_daxdev = dif->dax_dev;
map->m_dax_part_off = dif->dax_part_off;
map->m_fscache = dif->fscache;
@@ -215,6 +220,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
map->m_daxdev = EROFS_SB(sb)->dax_dev;
map->m_dax_part_off = EROFS_SB(sb)->dax_part_off;
map->m_fscache = EROFS_SB(sb)->s_fscache;
+ map->m_fp = EROFS_SB(sb)->fdev;

if (map->m_deviceid) {
down_read(&devs->rwsem);
@@ -250,6 +256,42 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
return 0;
}

+/*
+ * bit 30: I/O error occurred on this folio
+ * bit 0 - 29: remaining parts to complete this folio
+ */
+#define EROFS_ONLINEFOLIO_EIO (1 << 30)
+
+void erofs_onlinefolio_init(struct folio *folio)
+{
+ union {
+ atomic_t o;
+ void *v;
+ } u = { .o = ATOMIC_INIT(1) };
+
+ folio->private = u.v; /* valid only if file-backed folio is locked */
+}
+
+void erofs_onlinefolio_split(struct folio *folio)
+{
+ atomic_inc((atomic_t *)&folio->private);
+}
+
+void erofs_onlinefolio_end(struct folio *folio, int err)
+{
+ int orig, v;
+
+ do {
+ orig = atomic_read((atomic_t *)&folio->private);
+ v = (orig - 1) | (err ? EROFS_ONLINEFOLIO_EIO : 0);
+ } while (atomic_cmpxchg((atomic_t *)&folio->private, orig, v) != orig);
+
+ if (v & ~EROFS_ONLINEFOLIO_EIO)
+ return;
+ folio->private = 0;
+ folio_end_read(folio, !(v & EROFS_ONLINEFOLIO_EIO));
+}
+
static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
unsigned int flags, struct iomap *iomap, struct iomap *srcmap)
{
@@ -399,7 +441,7 @@ static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
}

/* for uncompressed (aligned) files and raw access for other files */
-const struct address_space_operations erofs_raw_access_aops = {
+const struct address_space_operations erofs_aops = {
.read_folio = erofs_read_folio,
.readahead = erofs_readahead,
.bmap = erofs_bmap,
diff --git a/fs/erofs/fileio.c b/fs/erofs/fileio.c
new file mode 100644
index 000000000000..eab52b8abd0b
--- /dev/null
+++ b/fs/erofs/fileio.c
@@ -0,0 +1,181 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2024, Alibaba Cloud
+ */
+#include "internal.h"
+#include <trace/events/erofs.h>
+
+struct erofs_fileio_rq {
+ struct bio_vec bvecs[BIO_MAX_VECS];
+ struct bio bio;
+ struct kiocb iocb;
+};
+
+struct erofs_fileio {
+ struct erofs_map_blocks map;
+ struct erofs_map_dev dev;
+ struct erofs_fileio_rq *rq;
+};
+
+static void erofs_fileio_ki_complete(struct kiocb *iocb, long ret)
+{
+ struct erofs_fileio_rq *rq =
+ container_of(iocb, struct erofs_fileio_rq, iocb);
+ struct folio_iter fi;
+
+ DBG_BUGON(rq->bio.bi_end_io);
+ if (ret > 0) {
+ if (ret != rq->bio.bi_iter.bi_size) {
+ bio_advance(&rq->bio, ret);
+ zero_fill_bio(&rq->bio);
+ }
+ ret = 0;
+ }
+ bio_for_each_folio_all(fi, &rq->bio) {
+ DBG_BUGON(folio_test_uptodate(fi.folio));
+ erofs_onlinefolio_end(fi.folio, ret);
+ }
+ kfree(rq);
+}
+
+static void erofs_fileio_rq_submit(struct erofs_fileio_rq *rq)
+{
+ struct iov_iter iter;
+ int ret;
+
+ if (!rq)
+ return;
+ rq->iocb.ki_pos = rq->bio.bi_iter.bi_sector << 9;
+ rq->iocb.ki_ioprio = get_current_ioprio();
+ rq->iocb.ki_complete = erofs_fileio_ki_complete;
+ rq->iocb.ki_flags = (rq->iocb.ki_filp->f_mode & FMODE_CAN_ODIRECT) ?
+ IOCB_DIRECT : 0;
Hi Gao,
Does this mean, direct IO by default if the backing file supports it
(technically filesystem where image/backing file reside)?

Yes.

Thanks,
Gao Xiang