Re: [PATCH 1/3] f2fs: add ioctl to flush data from faster device to cold area

From: Chao Yu
Date: Fri Apr 21 2017 - 21:15:47 EST


On 2017/4/19 9:45, Jaegeuk Kim wrote:
> This patch adds an ioctl to flush data in faster device to cold area. User can
> give device number and number of segments to move. It doesn't move it if there
> is only one device.
>
> The parameter looks like:
>
> struct f2fs_flush_device {
> u32 dev_num; /* device number to flush */
> u32 segments; /* # of segments to flush */
> };
>
> Signed-off-by: Jaegeuk Kim <jaegeuk@xxxxxxxxxx>
> ---
> fs/f2fs/f2fs.h | 12 ++++++++--
> fs/f2fs/file.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++--
> fs/f2fs/gc.c | 19 +++++++++++-----
> fs/f2fs/segment.c | 14 ++++++++----
> fs/f2fs/segment.h | 4 +++-
> 5 files changed, 102 insertions(+), 14 deletions(-)
>
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 562db8989a4e..c28e8e7d6a5f 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -280,6 +280,8 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal,
> #define F2FS_IOC_DEFRAGMENT _IO(F2FS_IOCTL_MAGIC, 8)
> #define F2FS_IOC_MOVE_RANGE _IOWR(F2FS_IOCTL_MAGIC, 9, \
> struct f2fs_move_range)
> +#define F2FS_IOC_FLUSH_DEVICE _IOW(F2FS_IOCTL_MAGIC, 10, \
> + struct f2fs_flush_device)
>
> #define F2FS_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY
> #define F2FS_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY
> @@ -316,6 +318,11 @@ struct f2fs_move_range {
> u64 len; /* size to move */
> };
>
> +struct f2fs_flush_device {
> + u32 dev_num; /* device number to flush */
> + u32 segments; /* # of segments to flush */
> +};
> +
> /*
> * For INODE and NODE manager
> */
> @@ -941,7 +948,7 @@ struct f2fs_sb_info {
> int bg_gc; /* background gc calls */
> unsigned int ndirty_inode[NR_INODE_TYPE]; /* # of dirty inodes */
> #endif
> - unsigned int last_victim[2]; /* last victim segment # */
> + unsigned int last_victim[4]; /* last victim segment # */

unsigned int last_victim[MAX_GC_POLICY];

> spinlock_t stat_lock; /* lock for stat operations */
>
> /* For sysfs suppport */
> @@ -2323,7 +2330,8 @@ int f2fs_migrate_page(struct address_space *mapping, struct page *newpage,
> int start_gc_thread(struct f2fs_sb_info *sbi);
> void stop_gc_thread(struct f2fs_sb_info *sbi);
> block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
> -int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background);
> +int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background,
> + unsigned int segno);
> void build_gc_manager(struct f2fs_sb_info *sbi);
>
> /*
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 0ac833dd2634..561ecb46007b 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -1855,7 +1855,7 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
> mutex_lock(&sbi->gc_mutex);
> }
>
> - ret = f2fs_gc(sbi, sync, true);
> + ret = f2fs_gc(sbi, sync, true, NULL_SEGNO);
> out:
> mnt_drop_write_file(filp);
> return ret;
> @@ -2211,6 +2211,67 @@ static int f2fs_ioc_move_range(struct file *filp, unsigned long arg)
> return err;
> }
>
> +static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
> +{
> + struct inode *inode = file_inode(filp);
> + struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> + unsigned int start_segno = 0, end_segno = 0;
> + unsigned int dev_start_segno = 0, dev_end_segno = 0;
> + struct f2fs_flush_device range;
> + int ret;
> +
> + if (!capable(CAP_SYS_ADMIN))
> + return -EPERM;
> +
> + if (f2fs_readonly(sbi->sb))
> + return -EROFS;
> +
> + if (copy_from_user(&range, (struct f2fs_flush_device __user *)arg,
> + sizeof(range)))
> + return -EFAULT;
> +
> + if (sbi->s_ndevs <= 1 || sbi->s_ndevs - 1 <= range.dev_num) {
> + f2fs_msg(sbi->sb, KERN_WARNING, "Can't flush %u in %d\n",
> + range.dev_num, sbi->s_ndevs);
> + return -EINVAL;
> + }
> +
> + ret = mnt_want_write_file(filp);
> + if (ret)
> + return ret;
> +
> + if (range.dev_num != 0)
> + dev_start_segno = GET_SEGNO(sbi, FDEV(range.dev_num).start_blk);
> + dev_end_segno = GET_SEGNO(sbi, FDEV(range.dev_num).end_blk);
> +
> + start_segno = sbi->last_victim[FLUSH_DEVICE];
> + if (start_segno < dev_start_segno || start_segno >= dev_end_segno)
> + start_segno = dev_start_segno;
> + end_segno = min(start_segno + range.segments, dev_end_segno);
> +
> + while (start_segno < end_segno) {
> + if (!mutex_trylock(&sbi->gc_mutex)) {
> + ret = -EBUSY;
> + goto out;
> + }
> + sbi->last_victim[GC_CB] = end_segno + 1;
> + sbi->last_victim[GC_GREEDY] = end_segno + 1;
> + sbi->last_victim[ALLOC_NEXT] = end_segno + 1;
> + ret = f2fs_gc(sbi, true, true, start_segno);
> + sbi->last_victim[ALLOC_NEXT] = 0;

Better to update it under gc_mutex lock to avoid contention.

> + if (ret == -EAGAIN)
> + ret = 0;
> + else if (ret < 0)
> + break;
> + start_segno++;
> + }
> + sbi->last_victim[FLUSH_DEVICE] = start_segno;

Ditto.

> +out:
> + mnt_drop_write_file(filp);
> + return ret;
> +}
> +
> +
> long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
> {
> switch (cmd) {
> @@ -2248,6 +2309,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
> return f2fs_ioc_defragment(filp, arg);
> case F2FS_IOC_MOVE_RANGE:
> return f2fs_ioc_move_range(filp, arg);
> + case F2FS_IOC_FLUSH_DEVICE:
> + return f2fs_ioc_flush_device(filp, arg);
> default:
> return -ENOTTY;
> }
> @@ -2315,8 +2378,8 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
> case F2FS_IOC_GARBAGE_COLLECT:
> case F2FS_IOC_WRITE_CHECKPOINT:
> case F2FS_IOC_DEFRAGMENT:
> - break;
> case F2FS_IOC_MOVE_RANGE:
> + case F2FS_IOC_FLUSH_DEVICE:
> break;
> default:
> return -ENOIOCTLCMD;
> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> index 9172112d6246..d988c1aaf132 100644
> --- a/fs/f2fs/gc.c
> +++ b/fs/f2fs/gc.c
> @@ -84,7 +84,7 @@ static int gc_thread_func(void *data)
> stat_inc_bggc_count(sbi);
>
> /* if return value is not zero, no victim was selected */
> - if (f2fs_gc(sbi, test_opt(sbi, FORCE_FG_GC), true))
> + if (f2fs_gc(sbi, test_opt(sbi, FORCE_FG_GC), true, NULL_SEGNO))
> wait_ms = gc_th->no_gc_sleep_time;
>
> trace_f2fs_background_gc(sbi->sb, wait_ms,
> @@ -308,6 +308,14 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
> p.min_segno = NULL_SEGNO;
> p.min_cost = get_max_cost(sbi, &p);
>
> + if (*result != NULL_SEGNO) {
> + if (IS_DATASEG(get_seg_entry(sbi, *result)->type) &&
> + get_valid_blocks(sbi, *result, false) &&
> + !sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result)))
> + p.min_segno = *result;
> + goto out;
> + }
> +
> if (p.max_search == 0)
> goto out;
>
> @@ -912,7 +920,6 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
> * - mutex_lock(sentry_lock) - change_curseg()
> * - lock_page(sum_page)
> */
> -
> if (type == SUM_TYPE_NODE)
> gc_node_segment(sbi, sum->entries, segno, gc_type);
> else
> @@ -939,9 +946,9 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
> return sec_freed;
> }
>
> -int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background)
> +int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
> + bool background, unsigned int segno)
> {
> - unsigned int segno;
> int gc_type = sync ? FG_GC : BG_GC;
> int sec_freed = 0;
> int ret = -EINVAL;
> @@ -990,8 +997,10 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background)
> sbi->cur_victim_sec = NULL_SEGNO;
>
> if (!sync) {
> - if (has_not_enough_free_secs(sbi, sec_freed, 0))
> + if (has_not_enough_free_secs(sbi, sec_freed, 0)) {
> + segno = NULL_SEGNO;
> goto gc_more;
> + }
>
> if (gc_type == FG_GC)
> ret = write_checkpoint(sbi, &cpc);
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 58cfbe3d4dc7..88489d3156ab 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -401,7 +401,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
> */
> if (has_not_enough_free_secs(sbi, 0, 0)) {
> mutex_lock(&sbi->gc_mutex);
> - f2fs_gc(sbi, false, false);
> + f2fs_gc(sbi, false, false, NULL_SEGNO);
> }
> }
>
> @@ -1566,6 +1566,8 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
> if (type == CURSEG_HOT_DATA || IS_NODESEG(type))
> return 0;
>
> + if (sbi->last_victim[ALLOC_NEXT])
> + return sbi->last_victim[ALLOC_NEXT];
> return CURSEG_I(sbi, type)->segno;
> }
>
> @@ -1663,12 +1665,15 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
> {
> struct curseg_info *curseg = CURSEG_I(sbi, type);
> const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
> + unsigned segno = 0;
> int i, cnt;
> bool reversed = false;
>
> /* need_SSR() already forces to do this */
> - if (v_ops->get_victim(sbi, &(curseg)->next_segno, BG_GC, type, SSR))
> + if (v_ops->get_victim(sbi, &segno, BG_GC, type, SSR)) {
> + curseg->next_segno = segno;
> return 1;
> + }
>
> /* For node segments, let's do SSR more intensively */
> if (IS_NODESEG(type)) {
> @@ -1692,9 +1697,10 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
> for (; cnt-- > 0; reversed ? i-- : i++) {
> if (i == type)
> continue;
> - if (v_ops->get_victim(sbi, &(curseg)->next_segno,
> - BG_GC, i, SSR))
> + if (v_ops->get_victim(sbi, &segno, BG_GC, i, SSR)) {
> + curseg->next_segno = segno;
> return 1;
> + }
> }
> return 0;
> }
> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> index 5f6ef163aa8f..1d35f8d298cd 100644
> --- a/fs/f2fs/segment.h
> +++ b/fs/f2fs/segment.h
> @@ -138,7 +138,9 @@ enum {
> */
> enum {
> GC_CB = 0,
> - GC_GREEDY
> + GC_GREEDY,
> + ALLOC_NEXT,
> + FLUSH_DEVICE,

add MAX_GC_POLICY here?

Thanks,

> };
>
> /*
>