[PATCH] f2fs: multidevice: support direct IO

From: Chao Yu
Date: Mon Jul 19 2021 - 05:04:41 EST


Commit 3c62be17d4f5 ("f2fs: support multiple devices") missed
to support direct IO for multiple device feature, this patch
adds to support the missing part of multidevice feature.

In addition, for multiple device image, we should be aware of
any issued direct write IO rather than just buffered write IO,
so that fsync and syncfs can issue a preflush command to the
device where direct write IO goes, to persist user data for
posix compliant.

Signed-off-by: Chao Yu <chao@xxxxxxxxxx>
---
fs/f2fs/data.c | 52 +++++++++++++++++++++++++++++++++++++++++++++--
fs/f2fs/f2fs.h | 24 +++++++++++++++++++---
fs/f2fs/segment.c | 29 +++++++++++++-------------
fs/f2fs/super.c | 7 +++++++
4 files changed, 93 insertions(+), 19 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 095350ccf80d..be65dca0de40 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1451,10 +1451,16 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
struct extent_info ei = {0,0,0};
block_t blkaddr;
unsigned int start_pgofs;
+ int bidx = 0;
+ bool multidevice_dio;

if (!maxblocks)
return 0;

+ multidevice_dio = f2fs_allow_multi_device_dio(sbi, flag);
+ if (multidevice_dio)
+ map->m_bdev = sbi->sb->s_bdev;
+
map->m_len = 0;
map->m_flags = 0;

@@ -1477,6 +1483,16 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
if (flag == F2FS_GET_BLOCK_DIO)
f2fs_wait_on_block_writeback_range(inode,
map->m_pblk, map->m_len);
+
+ if (multidevice_dio) {
+ bidx = f2fs_target_device_index(sbi, map->m_pblk);
+ if (bidx) {
+ map->m_bdev = FDEV(bidx).bdev;
+ map->m_pblk -= FDEV(bidx).start_blk;
+ map->m_len = min(map->m_len,
+ FDEV(bidx).end_blk + 1 - map->m_pblk);
+ }
+ }
goto out;
}

@@ -1574,6 +1590,9 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
if (flag == F2FS_GET_BLOCK_PRE_AIO)
goto skip;

+ if (multidevice_dio)
+ bidx = f2fs_target_device_index(sbi, blkaddr);
+
if (map->m_len == 0) {
/* preallocated unwritten block should be mapped for fiemap. */
if (blkaddr == NEW_ADDR)
@@ -1582,10 +1601,15 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,

map->m_pblk = blkaddr;
map->m_len = 1;
+
+ if (multidevice_dio && map->m_bdev != FDEV(bidx).bdev)
+ map->m_bdev = FDEV(bidx).bdev;
} else if ((map->m_pblk != NEW_ADDR &&
blkaddr == (map->m_pblk + ofs)) ||
(map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) ||
flag == F2FS_GET_BLOCK_PRE_DIO) {
+ if (multidevice_dio && map->m_bdev != FDEV(bidx).bdev)
+ goto sync_out;
ofs++;
map->m_len++;
} else {
@@ -1638,11 +1662,27 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,

sync_out:

- /* for hardware encryption, but to avoid potential issue in future */
- if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED)
+ if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED) {
+ /*
+ * for hardware encryption, but to avoid potential issue
+ * in future
+ */
f2fs_wait_on_block_writeback_range(inode,
map->m_pblk, map->m_len);

+ if (multidevice_dio) {
+ bidx = f2fs_target_device_index(sbi, map->m_pblk);
+ if (bidx) {
+ map->m_bdev = FDEV(bidx).bdev;
+ map->m_pblk -= FDEV(bidx).start_blk;
+ f2fs_bug_on(sbi,
+ map->m_bdev != FDEV(bidx).bdev ||
+ map->m_pblk + map->m_len >
+ FDEV(bidx).end_blk + 1);
+ }
+ }
+ }
+
if (flag == F2FS_GET_BLOCK_PRECACHE) {
if (map->m_flags & F2FS_MAP_MAPPED) {
unsigned int ofs = start_pgofs - map->m_lblk;
@@ -1720,6 +1760,9 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
map_bh(bh, inode->i_sb, map.m_pblk);
bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
bh->b_size = blks_to_bytes(inode, map.m_len);
+
+ if (f2fs_allow_multi_device_dio(F2FS_I_SB(inode), flag))
+ bh->b_bdev = map.m_bdev;
}
return err;
}
@@ -3511,6 +3554,7 @@ static void f2fs_dio_submit_bio(struct bio *bio, struct inode *inode,
{
struct f2fs_private_dio *dio;
bool write = (bio_op(bio) == REQ_OP_WRITE);
+ block_t blkaddr = SECTOR_TO_BLOCK(bio->bi_iter.bi_sector);
unsigned int blkcnt = bio_sectors(bio) >> F2FS_LOG_SECTORS_PER_BLOCK;

dio = f2fs_kzalloc(F2FS_I_SB(inode),
@@ -3527,6 +3571,10 @@ static void f2fs_dio_submit_bio(struct bio *bio, struct inode *inode,
bio->bi_end_io = f2fs_dio_end_io;
bio->bi_private = dio;

+ if (write)
+ f2fs_update_device_state(F2FS_I_SB(inode), inode->i_ino,
+ blkaddr, blkcnt);
+
inc_page_counts(F2FS_I_SB(inode),
write ? F2FS_DIO_WRITE : F2FS_DIO_READ, dio->blkcnt);

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 7369f8087f64..f3ac3cbad2e3 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -607,6 +607,7 @@ struct extent_tree {
F2FS_MAP_UNWRITTEN)

struct f2fs_map_blocks {
+ struct block_device *m_bdev; /* for multi-device */
block_t m_pblk;
block_t m_lblk;
unsigned int m_len;
@@ -1712,12 +1713,15 @@ struct f2fs_sb_info {

/* For shrinker support */
struct list_head s_list;
+ struct mutex umount_mutex;
+ unsigned int shrinker_run_no;
+
+ /* For multi devices */
int s_ndevs; /* number of devices */
struct f2fs_dev_info *devs; /* for device list */
unsigned int dirty_device; /* for checkpoint data flush */
spinlock_t dev_lock; /* protect dirty_device */
- struct mutex umount_mutex;
- unsigned int shrinker_run_no;
+ bool aligned_blksize; /* all devices has the same logical blksize */

/* For write statistics */
u64 sectors_written_start;
@@ -3527,6 +3531,8 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
block_t old_blkaddr, block_t *new_blkaddr,
struct f2fs_summary *sum, int type,
struct f2fs_io_info *fio);
+void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino,
+ block_t blkaddr, unsigned int blkcnt);
void f2fs_wait_on_page_writeback(struct page *page,
enum page_type type, bool ordered, bool locked);
void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr);
@@ -4334,6 +4340,16 @@ static inline int allow_outplace_dio(struct inode *inode,
!block_unaligned_IO(inode, iocb, iter));
}

+static inline bool f2fs_allow_multi_device_dio(struct f2fs_sb_info *sbi,
+ int flag)
+{
+ if (!f2fs_is_multi_device(sbi))
+ return false;
+ if (flag != F2FS_GET_BLOCK_DIO)
+ return false;
+ return sbi->aligned_blksize;
+}
+
static inline bool f2fs_force_buffered_io(struct inode *inode,
struct kiocb *iocb, struct iov_iter *iter)
{
@@ -4342,7 +4358,9 @@ static inline bool f2fs_force_buffered_io(struct inode *inode,

if (f2fs_post_read_required(inode))
return true;
- if (f2fs_is_multi_device(sbi))
+
+ /* disallow direct IO if any of devices has unaligned blksize */
+ if (f2fs_is_multi_device(sbi) && !sbi->aligned_blksize)
return true;
/*
* for blkzoned device, fallback direct IO to buffered IO, so
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 15cc89eef28d..6fcfd234108a 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -3442,24 +3442,24 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
up_read(&SM_I(sbi)->curseg_lock);
}

-static void update_device_state(struct f2fs_io_info *fio)
+void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino,
+ block_t blkaddr, unsigned int blkcnt)
{
- struct f2fs_sb_info *sbi = fio->sbi;
- unsigned int devidx;
-
if (!f2fs_is_multi_device(sbi))
return;

- devidx = f2fs_target_device_index(sbi, fio->new_blkaddr);
+ for (; blkcnt > 0; blkcnt--, blkaddr++) {
+ unsigned int devidx = f2fs_target_device_index(sbi, blkaddr);

- /* update device state for fsync */
- f2fs_set_dirty_device(sbi, fio->ino, devidx, FLUSH_INO);
+ /* update device state for fsync */
+ f2fs_set_dirty_device(sbi, ino, devidx, FLUSH_INO);

- /* update device state for checkpoint */
- if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) {
- spin_lock(&sbi->dev_lock);
- f2fs_set_bit(devidx, (char *)&sbi->dirty_device);
- spin_unlock(&sbi->dev_lock);
+ /* update device state for checkpoint */
+ if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) {
+ spin_lock(&sbi->dev_lock);
+ f2fs_set_bit(devidx, (char *)&sbi->dirty_device);
+ spin_unlock(&sbi->dev_lock);
+ }
}
}

@@ -3486,7 +3486,7 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
goto reallocate;
}

- update_device_state(fio);
+ f2fs_update_device_state(fio->sbi, fio->ino, fio->new_blkaddr, 1);

if (keep_order)
up_read(&fio->sbi->io_order_lock);
@@ -3575,7 +3575,8 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio)
else
err = f2fs_submit_page_bio(fio);
if (!err) {
- update_device_state(fio);
+ f2fs_update_device_state(fio->sbi, fio->ino,
+ fio->new_blkaddr, 1);
f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
}

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 72eb9d70969f..dced7778d530 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -3646,6 +3646,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
{
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
unsigned int max_devices = MAX_DEVICES;
+ unsigned int logical_blksize;
int i;

/* Initialize single device information */
@@ -3666,6 +3667,9 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
if (!sbi->devs)
return -ENOMEM;

+ logical_blksize = bdev_logical_block_size(sbi->sb->s_bdev);
+ sbi->aligned_blksize = true;
+
for (i = 0; i < max_devices; i++) {

if (i > 0 && !RDEV(i).path[0])
@@ -3702,6 +3706,9 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
/* to release errored devices */
sbi->s_ndevs = i + 1;

+ if (logical_blksize != bdev_logical_block_size(FDEV(i).bdev))
+ sbi->aligned_blksize = false;
+
#ifdef CONFIG_BLK_DEV_ZONED
if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM &&
!f2fs_sb_has_blkzoned(sbi)) {
--
2.22.1