Re: [RFC PATCH] ext4: auto batched discard support at kernel thread

From: Namjae Jeon
Date: Fri Dec 02 2011 - 03:12:46 EST


2011/12/2 Kyungmin Park <kmpark@xxxxxxxxxxxxx>:
> Hi,
>
> It's proof of concept to run kernel thread for batched discard.
>
> Now it can run fitrim at user level. but it's not clear which deamon run this activity.
> In case of android platform, the launcher is candidate
> but user can change the default launcher then it can't use the fitrim any more.
>
> To address this issue. no dependency with platform. run the fitrim at kernel.
> Basically don't bother the user it runs at 2 clock. Please note that if it's clean state, it doesn't take much time.
>
> Please give opinions and comments.
I think that it is good approach. If we can adjust wake up time and
cycle, it is more better.
And we can avoid performance degradation by copy merging in mmc/sdd internal.
On the other hand lifetime of ssd/mmc is decreased by this patch ?
> Thank you,
> Kyungmin Park
> ---
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 5b0e26a..2cad9b3 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -942,6 +942,7 @@ struct ext4_inode_info {
>
> Â#define EXT4_MOUNT2_EXPLICIT_DELALLOC Â0x00000001 /* User explicitly
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âspecified delalloc */
> +#define EXT4_MOUNT2_AUTO_DISCARD Â Â Â 0x00000002 /* Auto batched discard */
>
> Â#define clear_opt(sb, opt) Â Â Â Â Â Â EXT4_SB(sb)->s_mount_opt &= \
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â~EXT4_MOUNT_##opt
> @@ -1249,6 +1250,10 @@ struct ext4_sb_info {
>
> Â Â Â Â/* record the last minlen when FITRIM is called. */
> Â Â Â Âatomic_t s_last_trim_minblks;
> +
> + Â Â Â /* timer for periodic auto batched discard */
> + Â Â Â struct timer_list s_auto_discard;
> + Â Â Â struct task_struct *s_auto_discard_thread;
> Â};
>
> Âstatic inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index 3858767..a2e9920 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -818,6 +818,10 @@ static void ext4_put_super(struct super_block *sb)
> Â Â Â Â Â Â Â Â Â Â Â Âext4_abort(sb, "Couldn't clean up the journal");
> Â Â Â Â}
>
> + Â Â Â if (test_opt2(sb, AUTO_DISCARD)) {
> + Â Â Â Â Â Â Â kthread_stop(sbi->s_auto_discard_thread);
> + Â Â Â Â Â Â Â del_timer(&sbi->s_auto_discard);
> + Â Â Â }
> Â Â Â Âdel_timer(&sbi->s_err_report);
> Â Â Â Âext4_release_system_zone(sb);
> Â Â Â Âext4_mb_release(sb);
> @@ -1144,6 +1148,9 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
> Â Â Â Âif (test_opt(sb, DISCARD) && !(def_mount_opts & EXT4_DEFM_DISCARD))
> Â Â Â Â Â Â Â Âseq_puts(seq, ",discard");
>
> + Â Â Â if (test_opt2(sb, AUTO_DISCARD))
> + Â Â Â Â Â Â Â seq_puts(seq, ",auto_batched_discard");
> +
> Â Â Â Âif (test_opt(sb, NOLOAD))
> Â Â Â Â Â Â Â Âseq_puts(seq, ",norecovery");
>
> @@ -1333,7 +1340,7 @@ enum {
> Â Â Â ÂOpt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
> Â Â Â ÂOpt_inode_readahead_blks, Opt_journal_ioprio,
> Â Â Â ÂOpt_dioread_nolock, Opt_dioread_lock,
> - Â Â Â Opt_discard, Opt_nodiscard,
> + Â Â Â Opt_discard, Opt_nodiscard, Opt_auto_discard,
> Â Â Â ÂOpt_init_inode_table, Opt_noinit_inode_table,
> Â};
>
> @@ -1407,6 +1414,7 @@ static const match_table_t tokens = {
> Â Â Â Â{Opt_dioread_lock, "dioread_lock"},
> Â Â Â Â{Opt_discard, "discard"},
> Â Â Â Â{Opt_nodiscard, "nodiscard"},
> + Â Â Â {Opt_auto_discard, "auto_batched_discard"},
> Â Â Â Â{Opt_init_inode_table, "init_itable=%u"},
> Â Â Â Â{Opt_init_inode_table, "init_itable"},
> Â Â Â Â{Opt_noinit_inode_table, "noinit_itable"},
> @@ -1886,6 +1894,9 @@ set_qf_format:
> Â Â Â Â Â Â Â Âcase Opt_nodiscard:
> Â Â Â Â Â Â Â Â Â Â Â Âclear_opt(sb, DISCARD);
> Â Â Â Â Â Â Â Â Â Â Â Âbreak;
> + Â Â Â Â Â Â Â case Opt_auto_discard:
> + Â Â Â Â Â Â Â Â Â Â Â set_opt2(sb, AUTO_DISCARD);
> + Â Â Â Â Â Â Â Â Â Â Â break;
> Â Â Â Â Â Â Â Âcase Opt_dioread_nolock:
> Â Â Â Â Â Â Â Â Â Â Â Âset_opt(sb, DIOREAD_NOLOCK);
> Â Â Â Â Â Â Â Â Â Â Â Âbreak;
> @@ -2763,6 +2774,71 @@ static void print_daily_error_info(unsigned long arg)
> Â Â Â Âmod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); Â/* Once a day */
> Â}
>
> +/*
> + * This function is called once a day to make a trim the device
> + */
> +static int ext4_auto_batched_discard_thread(void *data)
> +{
> + Â Â Â struct super_block *sb = data;
> + Â Â Â struct ext4_sb_info *sbi = EXT4_SB(sb);
> + Â Â Â struct fstrim_range range;
> + Â Â Â struct timeval now;
> + Â Â Â struct tm tm;
> + Â Â Â long next;
> + Â Â Â int ret;
> +
> + Â Â Â set_freezable();
> +
> + Â Â Â for (;;) {
> + Â Â Â Â Â Â Â if (kthread_should_stop())
> + Â Â Â Â Â Â Â Â Â Â Â break;
> +
> + Â Â Â Â Â Â Â if (try_to_freeze())
> + Â Â Â Â Â Â Â Â Â Â Â continue;
> +
> + Â Â Â Â Â Â Â range.start = 0;
> + Â Â Â Â Â Â Â range.len = ~(__u64)0;
> + Â Â Â Â Â Â Â range.minlen = SZ_1M;
> +
> + Â Â Â Â Â Â Â ret = ext4_trim_fs(sb, &range);
> + Â Â Â Â Â Â Â if (ret < 0)
> + Â Â Â Â Â Â Â Â Â Â Â ext4_msg(sb, KERN_NOTICE, "error count: %u", ret);
> + Â Â Â Â Â Â Â else
> + Â Â Â Â Â Â Â Â Â Â Â ext4_msg(sb, KERN_NOTICE, "trimmed size %llu",
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â range.len);
> +
> + Â Â Â Â Â Â Â do_gettimeofday(&now);
> + Â Â Â Â Â Â Â time_to_tm((time_t) now.tv_sec, 0, &tm);
> +
> + Â Â Â Â Â Â Â /* Run the every day at 2 clock */
> + Â Â Â Â Â Â Â /* XXX need to consider the timezone? */
> + Â Â Â Â Â Â Â next = 2 - tm.tm_hour;
> + Â Â Â Â Â Â Â if (next <= 0)
> + Â Â Â Â Â Â Â Â Â Â Â next += 24;
> + Â Â Â Â Â Â Â next *= 60*60*HZ;
> +
> + Â Â Â Â Â Â Â /* Re-arm the timer for next trim */
> + Â Â Â Â Â Â Â mod_timer(&sbi->s_auto_discard, jiffies + next);
> +
> + Â Â Â Â Â Â Â set_current_state(TASK_INTERRUPTIBLE);
> + Â Â Â Â Â Â Â schedule();
> + Â Â Â }
> +
> + Â Â Â return 0;
> +}
> +
> +static void ext4_auto_batched_discard(unsigned long arg)
> +{
> + Â Â Â struct super_block *sb = (struct super_block *) arg;
> + Â Â Â struct ext4_sb_info *sbi = EXT4_SB(sb);
> +
> + Â Â Â /*
> + Â Â Â Â* The ext4_trim_fs can't run at timer context
> + Â Â Â Â* So use the created kthread.
> + Â Â Â Â*/
> + Â Â Â wake_up_process(sbi->s_auto_discard_thread);
> +}
> +
> Â/* Find next suitable group and run ext4_init_inode_table */
> Âstatic int ext4_run_li_request(struct ext4_li_request *elr)
> Â{
> @@ -3576,6 +3652,21 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
> Â Â Â Âsbi->s_err_report.function = print_daily_error_info;
> Â Â Â Âsbi->s_err_report.data = (unsigned long) sb;
>
> + Â Â Â if (test_opt2(sb, AUTO_DISCARD)) {
> + Â Â Â Â Â Â Â init_timer(&sbi->s_auto_discard);
> + Â Â Â Â Â Â Â sbi->s_auto_discard.function = ext4_auto_batched_discard;
> + Â Â Â Â Â Â Â sbi->s_auto_discard.data = (unsigned long) sb;
> + Â Â Â Â Â Â Â sbi->s_auto_discard_thread =
> + Â Â Â Â Â Â Â Â Â Â Â kthread_create(ext4_auto_batched_discard_thread, sb,
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â "ext4-batched-discard");
> + Â Â Â Â Â Â Â if (IS_ERR(sbi->s_auto_discard_thread)) {
> + Â Â Â Â Â Â Â Â Â Â Â err = PTR_ERR(sbi->s_auto_discard_thread);
> + Â Â Â Â Â Â Â Â Â Â Â goto failed_mount2;
> + Â Â Â Â Â Â Â }
> + Â Â Â Â Â Â Â /* One hour is enough to know the time */
> + Â Â Â Â Â Â Â mod_timer(&sbi->s_auto_discard, jiffies + 1*60*60*HZ);
> + Â Â Â }
> +
> Â Â Â Âerr = percpu_counter_init(&sbi->s_freeclusters_counter,
> Â Â Â Â Â Â Â Â Â Â Â Âext4_count_free_clusters(sb));
> Â Â Â Âif (!err) {
> @@ -3848,6 +3939,10 @@ failed_mount_wq:
> Â Â Â Â Â Â Â Âsbi->s_journal = NULL;
> Â Â Â Â}
> Âfailed_mount3:
> + Â Â Â if (test_opt2(sb, AUTO_DISCARD)) {
> + Â Â Â Â Â Â Â kthread_stop(sbi->s_auto_discard_thread);
> + Â Â Â Â Â Â Â del_timer(&sbi->s_auto_discard);
> + Â Â Â }
> Â Â Â Âdel_timer(&sbi->s_err_report);
> Â Â Â Âif (sbi->s_flex_groups)
> Â Â Â Â Â Â Â Âext4_kvfree(sbi->s_flex_groups);
> @@ -4546,6 +4641,22 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
> Â Â Â Âif (sbi->s_journal == NULL)
> Â Â Â Â Â Â Â Âext4_commit_super(sb, 1);
>
> + Â Â Â if (test_opt2(sb, AUTO_DISCARD) && !sbi->s_auto_discard_thread) {
> + Â Â Â Â Â Â Â init_timer(&sbi->s_auto_discard);
> + Â Â Â Â Â Â Â sbi->s_auto_discard.function = ext4_auto_batched_discard;
> + Â Â Â Â Â Â Â sbi->s_auto_discard.data = (unsigned long) sb;
> + Â Â Â Â Â Â Â sbi->s_auto_discard_thread =
> + Â Â Â Â Â Â Â Â Â Â Â kthread_create(ext4_auto_batched_discard_thread, sb,
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â "ext4-batched-discard");
> + Â Â Â Â Â Â Â if (IS_ERR(sbi->s_auto_discard_thread)) {
> + Â Â Â Â Â Â Â Â Â Â Â err = PTR_ERR(sbi->s_auto_discard_thread);
> + Â Â Â Â Â Â Â Â Â Â Â goto restore_opts;
> + Â Â Â Â Â Â Â }
> +
> + Â Â Â Â Â Â Â /* One hour is enough to know the time */
> + Â Â Â Â Â Â Â mod_timer(&sbi->s_auto_discard, jiffies + 1*60*60*HZ);
> + Â Â Â }
> +
> Â#ifdef CONFIG_QUOTA
> Â Â Â Â/* Release old quota file names */
> Â Â Â Âfor (i = 0; i < MAXQUOTAS; i++)
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at Âhttp://vger.kernel.org/majordomo-info.html
> Please read the FAQ at Âhttp://www.tux.org/lkml/
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/