[RFC PATCH 1/1] f2fs-dev: ioctl for removing a range from F2FS
From: sunqiuyang
Date: Thu Feb 21 2019 - 00:17:18 EST
From: Qiuyang Sun <sunqiuyang@xxxxxxxxxx>
This ioctl shrinks a given length (aligned to sections) from end of the
main area. Any cursegs and valid blocks will be moved out before
invalidating the range.
This feature can be used for adjusting partition sizes online.
Signed-off-by: Qiuyang Sun <sunqiuyang@xxxxxxxxxx>
---
fs/f2fs/f2fs.h | 9 ++++++
fs/f2fs/file.c | 28 +++++++++++++++++++
fs/f2fs/gc.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++++--
fs/f2fs/segment.c | 47 +++++++++++++++++++++++--------
fs/f2fs/segment.h | 1 +
fs/f2fs/super.c | 1 +
6 files changed, 156 insertions(+), 13 deletions(-)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 8c69e12..fd7f3ba 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -406,6 +406,8 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal,
#define F2FS_IOC_SET_PIN_FILE _IOW(F2FS_IOCTL_MAGIC, 13, __u32)
#define F2FS_IOC_GET_PIN_FILE _IOR(F2FS_IOCTL_MAGIC, 14, __u32)
#define F2FS_IOC_PRECACHE_EXTENTS _IO(F2FS_IOCTL_MAGIC, 15)
+#define F2FS_IOC_RESIZE_FROM_END _IOWR(F2FS_IOCTL_MAGIC, 16, \
+ struct f2fs_resize_from_end)
#define F2FS_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY
#define F2FS_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY
@@ -457,6 +459,10 @@ struct f2fs_flush_device {
u32 segments; /* # of segments to flush */
};
+struct f2fs_resize_from_end {
+ u64 len; /* bytes to shrink */
+};
+
/* for inline stuff */
#define DEF_INLINE_RESERVED_SIZE 1
static inline int get_extra_isize(struct inode *inode);
@@ -1226,6 +1232,7 @@ struct f2fs_sb_info {
unsigned int segs_per_sec; /* segments per section */
unsigned int secs_per_zone; /* sections per zone */
unsigned int total_sections; /* total section count */
+ unsigned int new_total_sections; /* for resize from end */
unsigned int total_node_count; /* total node block count */
unsigned int total_valid_node_count; /* valid node block count */
loff_t max_file_blocks; /* max block index of file */
@@ -3008,6 +3015,7 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
int f2fs_disable_cp_again(struct f2fs_sb_info *sbi);
void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi);
int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
+void allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type);
void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi);
int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range);
bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
@@ -3146,6 +3154,7 @@ int f2fs_migrate_page(struct address_space *mapping, struct page *newpage,
int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background,
unsigned int segno);
void f2fs_build_gc_manager(struct f2fs_sb_info *sbi);
+int f2fs_resize_from_end(struct f2fs_sb_info *sbi, size_t resize_len);
/*
* recovery.c
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index b8f5d12..29e70fd 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2968,6 +2968,32 @@ static int f2fs_ioc_precache_extents(struct file *filp, unsigned long arg)
return f2fs_precache_extents(file_inode(filp));
}
+static int f2fs_ioc_resize_from_end(struct file *filp, unsigned long arg)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp));
+ struct f2fs_resize_from_end param;
+ int ret;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (f2fs_readonly(sbi->sb))
+ return -EROFS;
+
+ if (copy_from_user(¶m, (struct f2fs_resize_from_end __user *)arg,
+ sizeof(param)))
+ return -EFAULT;
+
+ ret = mnt_want_write_file(filp);
+ if (ret)
+ return ret;
+
+ ret = f2fs_resize_from_end(sbi, param.len);
+ mnt_drop_write_file(filp);
+
+ return ret;
+}
+
long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp)))))
@@ -3024,6 +3050,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return f2fs_ioc_set_pin_file(filp, arg);
case F2FS_IOC_PRECACHE_EXTENTS:
return f2fs_ioc_precache_extents(filp, arg);
+ case F2FS_IOC_RESIZE_FROM_END:
+ return f2fs_ioc_resize_from_end(filp, arg);
default:
return -ENOTTY;
}
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 195cf0f..3877e99 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -311,7 +311,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
struct sit_info *sm = SIT_I(sbi);
struct victim_sel_policy p;
unsigned int secno, last_victim;
- unsigned int last_segment = MAIN_SEGS(sbi);
+ unsigned int last_segment = NEW_MAIN_SECS(sbi) * sbi->segs_per_sec;
unsigned int nsearched = 0;
mutex_lock(&dirty_i->seglist_lock);
@@ -404,7 +404,8 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
sm->last_victim[p.gc_mode] = last_victim + 1;
else
sm->last_victim[p.gc_mode] = segno + 1;
- sm->last_victim[p.gc_mode] %= MAIN_SEGS(sbi);
+ sm->last_victim[p.gc_mode] %=
+ (NEW_MAIN_SECS(sbi) * sbi->segs_per_sec);
break;
}
}
@@ -1350,3 +1351,81 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi)
SIT_I(sbi)->last_victim[ALLOC_NEXT] =
GET_SEGNO(sbi, FDEV(0).end_blk) + 1;
}
+
+static void free_segment_range(struct f2fs_sb_info *sbi, unsigned int start,
+ unsigned int end)
+{
+ int type;
+ unsigned int segno, next_inuse;
+ struct gc_inode_list gc_list = {
+ .ilist = LIST_HEAD_INIT(gc_list.ilist),
+ .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
+ };
+
+ /* Move out cursegs from the target range */
+ for (type = CURSEG_HOT_DATA; type < NR_CURSEG_TYPE; type++) {
+ segno = CURSEG_I(sbi, type)->segno;
+ if (segno >= start && segno <= end)
+ allocate_segment_for_resize(sbi, type);
+ }
+
+ /* do GC to move out valid blocks in the range */
+ mutex_lock(&sbi->gc_mutex);
+ for (segno = start; segno <= end; segno += sbi->segs_per_sec)
+ do_garbage_collect(sbi, segno, &gc_list, FG_GC);
+
+ mutex_unlock(&sbi->gc_mutex);
+ put_gc_inode(&gc_list);
+
+ f2fs_sync_fs(sbi->sb, 1);
+
+ next_inuse = find_next_inuse(FREE_I(sbi), end + 1, start);
+ if (next_inuse <= end) {
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "segno %u should be free but still inuse!", next_inuse);
+ f2fs_bug_on(sbi, 1);
+ }
+}
+
+int f2fs_resize_from_end(struct f2fs_sb_info *sbi, size_t resize_len)
+{
+ unsigned int section_size = F2FS_BLKSIZE * BLKS_PER_SEC(sbi);
+ unsigned int secs = (resize_len + section_size - 1) / section_size;
+ int gc_mode;
+
+ if (secs * BLKS_PER_SEC(sbi) + valid_user_blocks(sbi) +
+ sbi->current_reserved_blocks + sbi->unusable_block_count +
+ F2FS_OPTION(sbi).root_reserved_blocks > sbi->user_block_count)
+ return -ENOSPC;
+
+ mutex_lock(&DIRTY_I(sbi)->seglist_lock);
+ NEW_MAIN_SECS(sbi) = MAIN_SECS(sbi) - secs;
+ for (gc_mode = 0; gc_mode < MAX_GC_POLICY; gc_mode++)
+ if (SIT_I(sbi)->last_victim[gc_mode] >=
+ NEW_MAIN_SECS(sbi) * sbi->segs_per_sec)
+ SIT_I(sbi)->last_victim[gc_mode] = 0;
+ mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
+
+ free_segment_range(sbi, NEW_MAIN_SECS(sbi) * sbi->segs_per_sec,
+ MAIN_SEGS(sbi) - 1);
+
+ /* Update FS metadata */
+ SM_I(sbi)->segment_count -= secs * sbi->segs_per_sec;
+ MAIN_SECS(sbi) = NEW_MAIN_SECS(sbi);
+ MAIN_SEGS(sbi) = MAIN_SECS(sbi) * sbi->segs_per_sec;
+ sbi->user_block_count -= secs * BLKS_PER_SEC(sbi);
+ sbi->ckpt->user_block_count = cpu_to_le64(sbi->user_block_count);
+ FREE_I(sbi)->free_sections -= secs;
+ FREE_I(sbi)->free_segments -= secs * sbi->segs_per_sec;
+
+ /* Update superblock */
+ F2FS_RAW_SUPER(sbi)->section_count = cpu_to_le32(MAIN_SECS(sbi));
+ F2FS_RAW_SUPER(sbi)->segment_count = cpu_to_le32(le32_to_cpu(
+ F2FS_RAW_SUPER(sbi)->segment_count) - secs * sbi->segs_per_sec);
+ F2FS_RAW_SUPER(sbi)->segment_count_main = cpu_to_le32(MAIN_SEGS(sbi));
+ F2FS_RAW_SUPER(sbi)->block_count = cpu_to_le32(le32_to_cpu(
+ F2FS_RAW_SUPER(sbi)->block_count) - secs * BLKS_PER_SEC(sbi));
+ f2fs_commit_super(sbi, false);
+
+ return 0;
+}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 4aef183..294074c 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -2348,7 +2348,7 @@ static void get_new_segment(struct f2fs_sb_info *sbi,
{
struct free_segmap_info *free_i = FREE_I(sbi);
unsigned int segno, secno, zoneno;
- unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
+ unsigned int total_zones = NEW_MAIN_SECS(sbi) / sbi->secs_per_zone;
unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg);
unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg);
unsigned int left_start = hint;
@@ -2365,12 +2365,13 @@ static void get_new_segment(struct f2fs_sb_info *sbi,
goto got_it;
}
find_other_zone:
- secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
- if (secno >= MAIN_SECS(sbi)) {
+ secno = find_next_zero_bit(free_i->free_secmap, NEW_MAIN_SECS(sbi),
+ hint);
+ if (secno >= NEW_MAIN_SECS(sbi)) {
if (dir == ALLOC_RIGHT) {
secno = find_next_zero_bit(free_i->free_secmap,
- MAIN_SECS(sbi), 0);
- f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi));
+ NEW_MAIN_SECS(sbi), 0);
+ f2fs_bug_on(sbi, secno >= NEW_MAIN_SECS(sbi));
} else {
go_left = 1;
left_start = hint - 1;
@@ -2385,8 +2386,8 @@ static void get_new_segment(struct f2fs_sb_info *sbi,
continue;
}
left_start = find_next_zero_bit(free_i->free_secmap,
- MAIN_SECS(sbi), 0);
- f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi));
+ NEW_MAIN_SECS(sbi), 0);
+ f2fs_bug_on(sbi, left_start >= NEW_MAIN_SECS(sbi));
break;
}
secno = left_start;
@@ -2639,6 +2640,25 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
stat_inc_seg_type(sbi, curseg);
}
+void allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type)
+{
+ struct curseg_info *curseg = CURSEG_I(sbi, type);
+ unsigned int old_segno = curseg->segno;
+
+ if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type))
+ change_curseg(sbi, type);
+ else
+ new_curseg(sbi, type, true);
+
+ stat_inc_seg_type(sbi, curseg);
+
+ if (get_valid_blocks(sbi, old_segno, false) == 0)
+ __set_test_and_free(sbi, old_segno);
+ f2fs_msg(sbi->sb, KERN_NOTICE,
+ "For resize: curseg of type %d: %u ==> %u",
+ type, old_segno, curseg->segno);
+}
+
void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
{
struct curseg_info *curseg;
@@ -3738,6 +3758,12 @@ static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
bool dirtied;
segno = le32_to_cpu(segno_in_journal(journal, i));
+ if (segno >= MAIN_SEGS(sbi)) {
+ f2fs_msg(sbi->sb, KERN_NOTICE,
+ "Skip segno %u / %u in jnl!\n",
+ segno, MAIN_SEGS(sbi));
+ continue;
+ }
dirtied = __mark_sit_entry_dirty(sbi, segno);
if (!dirtied)
@@ -4093,12 +4119,11 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
start = le32_to_cpu(segno_in_journal(journal, i));
if (start >= MAIN_SEGS(sbi)) {
- f2fs_msg(sbi->sb, KERN_ERR,
+ /* This may happen if the FS was once resized. */
+ f2fs_msg(sbi->sb, KERN_NOTICE,
"Wrong journal entry on segno %u",
start);
- set_sbi_flag(sbi, SBI_NEED_FSCK);
- err = -EINVAL;
- break;
+ continue;
}
se = &sit_i->sentries[start];
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 5c7ed04..54caf99 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -59,6 +59,7 @@
#define MAIN_SEGS(sbi) (SM_I(sbi)->main_segments)
#define MAIN_SECS(sbi) ((sbi)->total_sections)
+#define NEW_MAIN_SECS(sbi) ((sbi)->new_total_sections)
#define TOTAL_SEGS(sbi) \
(SM_I(sbi) ? SM_I(sbi)->segment_count : \
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 3dc7f56..5cd2ced 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -2713,6 +2713,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
sbi->segs_per_sec = le32_to_cpu(raw_super->segs_per_sec);
sbi->secs_per_zone = le32_to_cpu(raw_super->secs_per_zone);
sbi->total_sections = le32_to_cpu(raw_super->section_count);
+ sbi->new_total_sections = sbi->total_sections;
sbi->total_node_count =
(le32_to_cpu(raw_super->segment_count_nat) / 2)
* sbi->blocks_per_seg * NAT_ENTRY_PER_BLOCK;
--
1.8.3.1