[PATCH 2/2] f2fs: fix to reserve space for IO align feature

From: Chao Yu
Date: Tue Aug 27 2019 - 06:24:45 EST


https://bugzilla.kernel.org/show_bug.cgi?id=204137

With below script, we will hit panic during new segment allocation:

DISK=bingo.img
MOUNT_DIR=/mnt/f2fs

dd if=/dev/zero of=$DISK bs=1M count=105
mkfs.f2fe -a 1 -o 19 -t 1 -z 1 -f -q $DISK

mount -t f2fs $DISK $MOUNT_DIR -o "noinline_dentry,flush_merge,noextent_cache,mode=lfs,io_bits=7,fsync_mode=strict"

for (( i = 0; i < 4096; i++ )); do
name=`head /dev/urandom | tr -dc A-Za-z0-9 | head -c 10`
mkdir $MOUNT_DIR/$name
done

umount $MOUNT_DIR
rm $DISK

--- Core dump ---
Call Trace:
allocate_segment_by_default+0x9d/0x100 [f2fs]
f2fs_allocate_data_block+0x3c0/0x5c0 [f2fs]
do_write_page+0x62/0x110 [f2fs]
f2fs_outplace_write_data+0x43/0xc0 [f2fs]
f2fs_do_write_data_page+0x386/0x560 [f2fs]
__write_data_page+0x706/0x850 [f2fs]
f2fs_write_cache_pages+0x267/0x6a0 [f2fs]
f2fs_write_data_pages+0x19c/0x2e0 [f2fs]
do_writepages+0x1c/0x70
__filemap_fdatawrite_range+0xaa/0xe0
filemap_fdatawrite+0x1f/0x30
f2fs_sync_dirty_inodes+0x74/0x1f0 [f2fs]
block_operations+0xdc/0x350 [f2fs]
f2fs_write_checkpoint+0x104/0x1150 [f2fs]
f2fs_sync_fs+0xa2/0x120 [f2fs]
f2fs_balance_fs_bg+0x33c/0x390 [f2fs]
f2fs_write_node_pages+0x4c/0x1f0 [f2fs]
do_writepages+0x1c/0x70
__writeback_single_inode+0x45/0x320
writeback_sb_inodes+0x273/0x5c0
wb_writeback+0xff/0x2e0
wb_workfn+0xa1/0x370
process_one_work+0x138/0x350
worker_thread+0x4d/0x3d0
kthread+0x109/0x140
ret_from_fork+0x25/0x30

The root cause here is, with IO alignment feature enables, in worst
case, we need F2FS_IO_SIZE() free blocks space for single one 4k write
due to filling dummy pages to make IO being aligned.

So we will easily run out of free segments during non-inline directory's
data writeback, even in process of foreground GC.

In order to fix this issue, I just propose to reserve additional free
space for IO alignment feature to handle worst case of free space usage
ratio during FGGC.

Fixes: 0a595ebaaa6b ("f2fs: support IO alignment for DATA and NODE writes")
Signed-off-by: Chao Yu <yuchao0@xxxxxxxxxx>
---
fs/f2fs/f2fs.h | 5 +++++
fs/f2fs/segment.h | 3 ++-
fs/f2fs/super.c | 43 +++++++++++++++++++++++++++++++++++++++++++
fs/f2fs/sysfs.c | 4 +++-
4 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index c7b2b68c8c85..4b21ac42d44e 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -929,6 +929,7 @@ struct f2fs_sm_info {
unsigned int segment_count; /* total # of segments */
unsigned int main_segments; /* # of segments in main area */
unsigned int reserved_segments; /* # of reserved segments */
+ unsigned int additional_reserved_segments;/* reserved segs for IO align feature */
unsigned int ovp_segments; /* # of overprovision segments */

/* a threshold to reclaim prefree segments */
@@ -1777,6 +1778,10 @@ static inline unsigned int get_available_block_count(struct inode *inode,
if (!__allow_reserved_blocks(sbi, inode, cap))
avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks;

+ if (F2FS_IO_ALIGNED(sbi))
+ avail_user_block_count -= sbi->blocks_per_seg *
+ SM_I(sbi)->additional_reserved_segments;
+
if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
if (avail_user_block_count > sbi->unusable_block_count)
avail_user_block_count -= sbi->unusable_block_count;
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 325781a1ae4d..78d0f7b4c47a 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -508,7 +508,8 @@ static inline unsigned int free_segments(struct f2fs_sb_info *sbi)

static inline int reserved_segments(struct f2fs_sb_info *sbi)
{
- return SM_I(sbi)->reserved_segments;
+ return SM_I(sbi)->reserved_segments +
+ SM_I(sbi)->additional_reserved_segments;
}

static inline unsigned int free_sections(struct f2fs_sb_info *sbi)
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index bf642d1f25fc..c98f2db76cf8 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -277,6 +277,45 @@ static inline void limit_reserve_root(struct f2fs_sb_info *sbi)
F2FS_OPTION(sbi).s_resgid));
}

+static inline int adjust_reserved_segment(struct f2fs_sb_info *sbi)
+{
+ unsigned int sec_blks = sbi->blocks_per_seg * sbi->segs_per_sec;
+ unsigned int avg_vblocks;
+ unsigned int wanted_reserved_segments;
+ block_t avail_user_block_count;
+
+ if (!F2FS_IO_ALIGNED(sbi))
+ return 0;
+
+ /* average valid block count in section in worst case */
+ avg_vblocks = sec_blks / F2FS_IO_SIZE(sbi);
+
+ /*
+ * we need enough free space when migrating one section in worst case
+ */
+ wanted_reserved_segments = (F2FS_IO_SIZE(sbi) / avg_vblocks) *
+ reserved_segments(sbi);
+ wanted_reserved_segments -= reserved_segments(sbi);
+
+ avail_user_block_count = sbi->user_block_count -
+ sbi->current_reserved_blocks -
+ F2FS_OPTION(sbi).root_reserved_blocks;
+
+ if (wanted_reserved_segments * sbi->blocks_per_seg >
+ avail_user_block_count) {
+ f2fs_err(sbi, "IO align feature can't grab additional reserved segment: %u",
+ wanted_reserved_segments);
+ return -ENOSPC;
+ }
+
+ SM_I(sbi)->additional_reserved_segments = wanted_reserved_segments;
+
+ f2fs_info(sbi, "IO align feature needs additional reserved segment: %u",
+ wanted_reserved_segments);
+
+ return 0;
+}
+
static void init_once(void *foo)
{
struct f2fs_inode_info *fi = (struct f2fs_inode_info *) foo;
@@ -3424,6 +3463,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
goto free_nm;
}

+ err = adjust_reserved_segment(sbi);
+ if (err)
+ goto free_nm;
+
/* For write statistics */
if (sb->s_bdev->bd_part)
sbi->sectors_written_start =
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index f9fcca695db9..1824114d739c 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -259,7 +259,9 @@ static ssize_t __sbi_store(struct f2fs_attr *a,
if (a->struct_type == RESERVED_BLOCKS) {
spin_lock(&sbi->stat_lock);
if (t > (unsigned long)(sbi->user_block_count -
- F2FS_OPTION(sbi).root_reserved_blocks)) {
+ F2FS_OPTION(sbi).root_reserved_blocks -
+ sbi->blocks_per_seg *
+ SM_I(sbi)->additional_reserved_segments)) {
spin_unlock(&sbi->stat_lock);
return -EINVAL;
}
--
2.18.0.rc1