Re: [PATCH v3] f2fs: add iostat latency statistics support for discard

From: Jaegeuk Kim
Date: Mon Apr 03 2023 - 13:50:59 EST


Could you split the patch which cleans up and adds it on top of it?

On 03/30, Yangtao Li wrote:
> In this patch, it adds to account discard latency.
>
> Reported-by: kernel test robot <lkp@xxxxxxxxx>
> Link: https://lore.kernel.org/oe-kbuild-all/202303211005.RGxljvli-lkp@xxxxxxxxx/
> Signed-off-by: Yangtao Li <frank.li@xxxxxxxx>
> ---
> v3:
> -force conversion to enum iostat_lat_type type
> v2:
> -rename get_bio_iostat_private to iostat_get_bio_private
> fs/f2fs/data.c | 2 +-
> fs/f2fs/f2fs.h | 1 +
> fs/f2fs/iostat.c | 83 +++++++++++++++++++++----------------
> fs/f2fs/iostat.h | 49 +++++++++++-----------
> fs/f2fs/segment.c | 8 +++-
> include/trace/events/f2fs.h | 68 ++++++++++++++++--------------
> 6 files changed, 118 insertions(+), 93 deletions(-)
>
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 359de650772e..6bcd71a0dbac 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -2307,7 +2307,7 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
> if (bio_add_page(bio, page, blocksize, 0) < blocksize)
> goto submit_and_realloc;
>
> - ctx = get_post_read_ctx(bio);
> + ctx = iostat_get_bio_private(bio);
> ctx->enabled_steps |= STEP_DECOMPRESS;
> refcount_inc(&dic->refcnt);
>
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 4e2596dacbf1..e73fefe0d8fb 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -1112,6 +1112,7 @@ enum page_type {
> META_FLUSH,
> IPU, /* the below types are used by tracepoints only. */
> OPU,
> + DISCARD, /* used by iostat */
> };
>
> enum temp_type {
> diff --git a/fs/f2fs/iostat.c b/fs/f2fs/iostat.c
> index 3d5bfb1ad585..f40b8915ae1b 100644
> --- a/fs/f2fs/iostat.c
> +++ b/fs/f2fs/iostat.c
> @@ -86,23 +86,21 @@ int __maybe_unused iostat_info_seq_show(struct seq_file *seq, void *offset)
>
> static inline void __record_iostat_latency(struct f2fs_sb_info *sbi)
> {
> - int io, idx;
> - struct f2fs_iostat_latency iostat_lat[MAX_IO_TYPE][NR_PAGE_TYPE];
> + struct f2fs_iostat_latency iostat_lat[MAX_LAT_TYPE];
> struct iostat_lat_info *io_lat = sbi->iostat_io_lat;
> + unsigned int lat_type;
> unsigned long flags;
>
> spin_lock_irqsave(&sbi->iostat_lat_lock, flags);
> - for (idx = 0; idx < MAX_IO_TYPE; idx++) {
> - for (io = 0; io < NR_PAGE_TYPE; io++) {
> - iostat_lat[idx][io].peak_lat =
> - jiffies_to_msecs(io_lat->peak_lat[idx][io]);
> - iostat_lat[idx][io].cnt = io_lat->bio_cnt[idx][io];
> - iostat_lat[idx][io].avg_lat = iostat_lat[idx][io].cnt ?
> - jiffies_to_msecs(io_lat->sum_lat[idx][io]) / iostat_lat[idx][io].cnt : 0;
> - io_lat->sum_lat[idx][io] = 0;
> - io_lat->peak_lat[idx][io] = 0;
> - io_lat->bio_cnt[idx][io] = 0;
> - }
> + for (lat_type = 0; lat_type < MAX_LAT_TYPE; lat_type++) {
> + iostat_lat[lat_type].peak_lat =
> + jiffies_to_msecs(io_lat->peak_lat[lat_type]);
> + iostat_lat[lat_type].cnt = io_lat->bio_cnt[lat_type];
> + iostat_lat[lat_type].avg_lat = iostat_lat[lat_type].cnt ?
> + jiffies_to_msecs(io_lat->sum_lat[lat_type]) / iostat_lat[lat_type].cnt : 0;
> + io_lat->sum_lat[lat_type] = 0;
> + io_lat->peak_lat[lat_type] = 0;
> + io_lat->bio_cnt[lat_type] = 0;
> }
> spin_unlock_irqrestore(&sbi->iostat_lat_lock, flags);
>
> @@ -208,62 +206,75 @@ void f2fs_update_iostat(struct f2fs_sb_info *sbi, struct inode *inode,
> f2fs_record_iostat(sbi);
> }
>
> -static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx,
> - enum iostat_lat_type lat_type)
> +static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx)
> {
> - unsigned long ts_diff;
> - unsigned int page_type = iostat_ctx->type;
> struct f2fs_sb_info *sbi = iostat_ctx->sbi;
> struct iostat_lat_info *io_lat = sbi->iostat_io_lat;
> + unsigned int lat_type = iostat_ctx->lat_type;
> + unsigned long ts_diff;
> unsigned long flags;
>
> if (!sbi->iostat_enable)
> return;
>
> - ts_diff = jiffies - iostat_ctx->submit_ts;
> - if (page_type == META_FLUSH) {
> - page_type = META;
> - } else if (page_type >= NR_PAGE_TYPE) {
> - f2fs_warn(sbi, "%s: %d over NR_PAGE_TYPE", __func__, page_type);
> + if (lat_type >= MAX_LAT_TYPE) {
> + f2fs_warn(sbi, "%s: %d over MAX_LAT_TYPE", __func__, lat_type);
> return;
> }
> + ts_diff = jiffies - iostat_ctx->submit_ts;
>
> spin_lock_irqsave(&sbi->iostat_lat_lock, flags);
> - io_lat->sum_lat[lat_type][page_type] += ts_diff;
> - io_lat->bio_cnt[lat_type][page_type]++;
> - if (ts_diff > io_lat->peak_lat[lat_type][page_type])
> - io_lat->peak_lat[lat_type][page_type] = ts_diff;
> + io_lat->sum_lat[lat_type] += ts_diff;
> + io_lat->bio_cnt[lat_type]++;
> + if (ts_diff > io_lat->peak_lat[lat_type])
> + io_lat->peak_lat[lat_type] = ts_diff;
> spin_unlock_irqrestore(&sbi->iostat_lat_lock, flags);
> }
>
> void iostat_update_and_unbind_ctx(struct bio *bio)
> +{
> + struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
> +
> + if (op_is_write(bio_op(bio)) && !op_is_discard(bio_op(bio)))
> + bio->bi_private = iostat_ctx->sbi;
> + else
> + bio->bi_private = iostat_ctx->iostat_private;
> +
> + __update_iostat_latency(iostat_ctx);
> + mempool_free(iostat_ctx, bio_iostat_ctx_pool);
> +}
> +
> +void iostat_update_submit_ctx(struct bio *bio, enum page_type type)
> {
> struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
> enum iostat_lat_type lat_type;
>
> - if (op_is_write(bio_op(bio))) {
> + iostat_ctx->submit_ts = jiffies;
> +
> + if (type == DISCARD) {
> + lat_type = DISCARD_LAT;
> + } else if (op_is_write(bio_op(bio))) {
> lat_type = bio->bi_opf & REQ_SYNC ?
> - WRITE_SYNC_IO : WRITE_ASYNC_IO;
> - bio->bi_private = iostat_ctx->sbi;
> + WRITE_SYNC_DATA_LAT : WRITE_ASYNC_DATA_LAT;
> + lat_type = (enum iostat_lat_type)(lat_type + type);
> } else {
> - lat_type = READ_IO;
> - bio->bi_private = iostat_ctx->post_read_ctx;
> + lat_type = READ_DATA_LAT;
> + lat_type = (enum iostat_lat_type)(lat_type + type);
> }
>
> - __update_iostat_latency(iostat_ctx, lat_type);
> - mempool_free(iostat_ctx, bio_iostat_ctx_pool);
> + iostat_ctx->lat_type = lat_type;
> }
>
> void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
> - struct bio *bio, struct bio_post_read_ctx *ctx)
> + struct bio *bio, void *private)
> {
> struct bio_iostat_ctx *iostat_ctx;
> /* Due to the mempool, this never fails. */
> iostat_ctx = mempool_alloc(bio_iostat_ctx_pool, GFP_NOFS);
> iostat_ctx->sbi = sbi;
> iostat_ctx->submit_ts = 0;
> - iostat_ctx->type = 0;
> - iostat_ctx->post_read_ctx = ctx;
> + iostat_ctx->lat_type = 0;
> + iostat_ctx->iostat_private = private;
> bio->bi_private = iostat_ctx;
> }
>
> diff --git a/fs/f2fs/iostat.h b/fs/f2fs/iostat.h
> index eb99d05cf272..67b468691498 100644
> --- a/fs/f2fs/iostat.h
> +++ b/fs/f2fs/iostat.h
> @@ -6,17 +6,24 @@
> #ifndef __F2FS_IOSTAT_H__
> #define __F2FS_IOSTAT_H__
>
> +#ifdef CONFIG_F2FS_IOSTAT
> +
> struct bio_post_read_ctx;
>
> enum iostat_lat_type {
> - READ_IO = 0,
> - WRITE_SYNC_IO,
> - WRITE_ASYNC_IO,
> - MAX_IO_TYPE,
> + READ_DATA_LAT = 0,
> + READ_NODE_LAT,
> + READ_META_LAT,
> + WRITE_SYNC_DATA_LAT,
> + WRITE_SYNC_NODE_LAT,
> + WRITE_SYNC_META_LAT,
> + WRITE_ASYNC_DATA_LAT,
> + WRITE_ASYNC_NODE_LAT,
> + WRITE_ASYNC_META_LAT,
> + DISCARD_LAT,
> + MAX_LAT_TYPE,
> };
>
> -#ifdef CONFIG_F2FS_IOSTAT
> -
> #define NUM_PREALLOC_IOSTAT_CTXS 128
> #define DEFAULT_IOSTAT_PERIOD_MS 3000
> #define MIN_IOSTAT_PERIOD_MS 100
> @@ -24,9 +31,9 @@ enum iostat_lat_type {
> #define MAX_IOSTAT_PERIOD_MS 8640000
>
> struct iostat_lat_info {
> - unsigned long sum_lat[MAX_IO_TYPE][NR_PAGE_TYPE]; /* sum of io latencies */
> - unsigned long peak_lat[MAX_IO_TYPE][NR_PAGE_TYPE]; /* peak io latency */
> - unsigned int bio_cnt[MAX_IO_TYPE][NR_PAGE_TYPE]; /* bio count */
> + unsigned long sum_lat[MAX_LAT_TYPE]; /* sum of io latencies */
> + unsigned long peak_lat[MAX_LAT_TYPE]; /* peak io latency */
> + unsigned int bio_cnt[MAX_LAT_TYPE]; /* bio count */
> };
>
> extern int __maybe_unused iostat_info_seq_show(struct seq_file *seq,
> @@ -38,29 +45,21 @@ extern void f2fs_update_iostat(struct f2fs_sb_info *sbi, struct inode *inode,
> struct bio_iostat_ctx {
> struct f2fs_sb_info *sbi;
> unsigned long submit_ts;
> - enum page_type type;
> - struct bio_post_read_ctx *post_read_ctx;
> + enum iostat_lat_type lat_type;
> + void *iostat_private;
> };
>
> -static inline void iostat_update_submit_ctx(struct bio *bio,
> - enum page_type type)
> -{
> - struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
> -
> - iostat_ctx->submit_ts = jiffies;
> - iostat_ctx->type = type;
> -}
> -
> -static inline struct bio_post_read_ctx *get_post_read_ctx(struct bio *bio)
> +static inline struct bio_post_read_ctx *iostat_get_bio_private(struct bio *bio)
> {
> struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
>
> - return iostat_ctx->post_read_ctx;
> + return iostat_ctx->iostat_private;
> }
>
> +extern void iostat_update_submit_ctx(struct bio *bio, enum page_type type);
> extern void iostat_update_and_unbind_ctx(struct bio *bio);
> extern void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
> - struct bio *bio, struct bio_post_read_ctx *ctx);
> + struct bio *bio, void *private);
> extern int f2fs_init_iostat_processing(void);
> extern void f2fs_destroy_iostat_processing(void);
> extern int f2fs_init_iostat(struct f2fs_sb_info *sbi);
> @@ -70,10 +69,10 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, struct inode *in
> enum iostat_type type, unsigned long long io_bytes) {}
> static inline void iostat_update_and_unbind_ctx(struct bio *bio) {}
> static inline void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
> - struct bio *bio, struct bio_post_read_ctx *ctx) {}
> + struct bio *bio, void *private) {}
> static inline void iostat_update_submit_ctx(struct bio *bio,
> enum page_type type) {}
> -static inline struct bio_post_read_ctx *get_post_read_ctx(struct bio *bio)
> +static inline struct bio_post_read_ctx *iostat_get_bio_private(struct bio *bio)
> {
> return bio->bi_private;
> }
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 72bce3808394..5a2b162812c6 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -1100,9 +1100,12 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
>
> static void f2fs_submit_discard_endio(struct bio *bio)
> {
> - struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
> + struct discard_cmd *dc;
> unsigned long flags;
>
> + iostat_update_and_unbind_ctx(bio);
> + dc = bio->bi_private;
> +
> spin_lock_irqsave(&dc->lock, flags);
> if (!dc->error)
> dc->error = blk_status_to_errno(bio->bi_status);
> @@ -1276,6 +1279,9 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
> bio->bi_private = dc;
> bio->bi_end_io = f2fs_submit_discard_endio;
> bio->bi_opf |= flag;
> +
> + iostat_alloc_and_bind_ctx(sbi, bio, dc);
> + iostat_update_submit_ctx(bio, DISCARD);
> submit_bio(bio);
>
> atomic_inc(&dcc->issued_discard);
> diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
> index 99cbc5949e3c..385291ac9ad5 100644
> --- a/include/trace/events/f2fs.h
> +++ b/include/trace/events/f2fs.h
> @@ -2045,7 +2045,7 @@ struct f2fs_iostat_latency {
>
> TRACE_EVENT(f2fs_iostat_latency,
>
> - TP_PROTO(struct f2fs_sb_info *sbi, struct f2fs_iostat_latency (*iostat_lat)[NR_PAGE_TYPE]),
> + TP_PROTO(struct f2fs_sb_info *sbi, struct f2fs_iostat_latency *iostat_lat),
>
> TP_ARGS(sbi, iostat_lat),
>
> @@ -2078,37 +2078,43 @@ TRACE_EVENT(f2fs_iostat_latency,
> __field(unsigned int, m_wr_as_peak)
> __field(unsigned int, m_wr_as_avg)
> __field(unsigned int, m_wr_as_cnt)
> + __field(unsigned int, discard_peak)
> + __field(unsigned int, discard_avg)
> + __field(unsigned int, discard_cnt)
> ),
>
> TP_fast_assign(
> __entry->dev = sbi->sb->s_dev;
> - __entry->d_rd_peak = iostat_lat[READ_IO][DATA].peak_lat;
> - __entry->d_rd_avg = iostat_lat[READ_IO][DATA].avg_lat;
> - __entry->d_rd_cnt = iostat_lat[READ_IO][DATA].cnt;
> - __entry->n_rd_peak = iostat_lat[READ_IO][NODE].peak_lat;
> - __entry->n_rd_avg = iostat_lat[READ_IO][NODE].avg_lat;
> - __entry->n_rd_cnt = iostat_lat[READ_IO][NODE].cnt;
> - __entry->m_rd_peak = iostat_lat[READ_IO][META].peak_lat;
> - __entry->m_rd_avg = iostat_lat[READ_IO][META].avg_lat;
> - __entry->m_rd_cnt = iostat_lat[READ_IO][META].cnt;
> - __entry->d_wr_s_peak = iostat_lat[WRITE_SYNC_IO][DATA].peak_lat;
> - __entry->d_wr_s_avg = iostat_lat[WRITE_SYNC_IO][DATA].avg_lat;
> - __entry->d_wr_s_cnt = iostat_lat[WRITE_SYNC_IO][DATA].cnt;
> - __entry->n_wr_s_peak = iostat_lat[WRITE_SYNC_IO][NODE].peak_lat;
> - __entry->n_wr_s_avg = iostat_lat[WRITE_SYNC_IO][NODE].avg_lat;
> - __entry->n_wr_s_cnt = iostat_lat[WRITE_SYNC_IO][NODE].cnt;
> - __entry->m_wr_s_peak = iostat_lat[WRITE_SYNC_IO][META].peak_lat;
> - __entry->m_wr_s_avg = iostat_lat[WRITE_SYNC_IO][META].avg_lat;
> - __entry->m_wr_s_cnt = iostat_lat[WRITE_SYNC_IO][META].cnt;
> - __entry->d_wr_as_peak = iostat_lat[WRITE_ASYNC_IO][DATA].peak_lat;
> - __entry->d_wr_as_avg = iostat_lat[WRITE_ASYNC_IO][DATA].avg_lat;
> - __entry->d_wr_as_cnt = iostat_lat[WRITE_ASYNC_IO][DATA].cnt;
> - __entry->n_wr_as_peak = iostat_lat[WRITE_ASYNC_IO][NODE].peak_lat;
> - __entry->n_wr_as_avg = iostat_lat[WRITE_ASYNC_IO][NODE].avg_lat;
> - __entry->n_wr_as_cnt = iostat_lat[WRITE_ASYNC_IO][NODE].cnt;
> - __entry->m_wr_as_peak = iostat_lat[WRITE_ASYNC_IO][META].peak_lat;
> - __entry->m_wr_as_avg = iostat_lat[WRITE_ASYNC_IO][META].avg_lat;
> - __entry->m_wr_as_cnt = iostat_lat[WRITE_ASYNC_IO][META].cnt;
> + __entry->d_rd_peak = iostat_lat[READ_DATA_LAT].peak_lat;
> + __entry->d_rd_avg = iostat_lat[READ_DATA_LAT].avg_lat;
> + __entry->d_rd_cnt = iostat_lat[READ_DATA_LAT].cnt;
> + __entry->n_rd_peak = iostat_lat[READ_NODE_LAT].peak_lat;
> + __entry->n_rd_avg = iostat_lat[READ_NODE_LAT].avg_lat;
> + __entry->n_rd_cnt = iostat_lat[READ_NODE_LAT].cnt;
> + __entry->m_rd_peak = iostat_lat[READ_META_LAT].peak_lat;
> + __entry->m_rd_avg = iostat_lat[READ_META_LAT].avg_lat;
> + __entry->m_rd_cnt = iostat_lat[READ_META_LAT].cnt;
> + __entry->d_wr_s_peak = iostat_lat[WRITE_SYNC_DATA_LAT].peak_lat;
> + __entry->d_wr_s_avg = iostat_lat[WRITE_SYNC_DATA_LAT].avg_lat;
> + __entry->d_wr_s_cnt = iostat_lat[WRITE_SYNC_DATA_LAT].cnt;
> + __entry->n_wr_s_peak = iostat_lat[WRITE_SYNC_NODE_LAT].peak_lat;
> + __entry->n_wr_s_avg = iostat_lat[WRITE_SYNC_NODE_LAT].avg_lat;
> + __entry->n_wr_s_cnt = iostat_lat[WRITE_SYNC_NODE_LAT].cnt;
> + __entry->m_wr_s_peak = iostat_lat[WRITE_SYNC_META_LAT].peak_lat;
> + __entry->m_wr_s_avg = iostat_lat[WRITE_SYNC_META_LAT].avg_lat;
> + __entry->m_wr_s_cnt = iostat_lat[WRITE_SYNC_META_LAT].cnt;
> + __entry->d_wr_as_peak = iostat_lat[WRITE_ASYNC_DATA_LAT].peak_lat;
> + __entry->d_wr_as_avg = iostat_lat[WRITE_ASYNC_DATA_LAT].avg_lat;
> + __entry->d_wr_as_cnt = iostat_lat[WRITE_ASYNC_DATA_LAT].cnt;
> + __entry->n_wr_as_peak = iostat_lat[WRITE_ASYNC_NODE_LAT].peak_lat;
> + __entry->n_wr_as_avg = iostat_lat[WRITE_ASYNC_NODE_LAT].avg_lat;
> + __entry->n_wr_as_cnt = iostat_lat[WRITE_ASYNC_NODE_LAT].cnt;
> + __entry->m_wr_as_peak = iostat_lat[WRITE_ASYNC_META_LAT].peak_lat;
> + __entry->m_wr_as_avg = iostat_lat[WRITE_ASYNC_META_LAT].avg_lat;
> + __entry->m_wr_as_cnt = iostat_lat[WRITE_ASYNC_META_LAT].cnt;
> + __entry->discard_peak = iostat_lat[DISCARD_LAT].peak_lat;
> + __entry->discard_avg = iostat_lat[DISCARD_LAT].avg_lat;
> + __entry->discard_cnt = iostat_lat[DISCARD_LAT].cnt;
> ),
>
> TP_printk("dev = (%d,%d), "
> @@ -2116,7 +2122,8 @@ TRACE_EVENT(f2fs_iostat_latency,
> "rd_data [%u/%u/%u], rd_node [%u/%u/%u], rd_meta [%u/%u/%u], "
> "wr_sync_data [%u/%u/%u], wr_sync_node [%u/%u/%u], "
> "wr_sync_meta [%u/%u/%u], wr_async_data [%u/%u/%u], "
> - "wr_async_node [%u/%u/%u], wr_async_meta [%u/%u/%u]",
> + "wr_async_node [%u/%u/%u], wr_async_meta [%u/%u/%u], "
> + "discard [%u/%u/%u]",
> show_dev(__entry->dev),
> __entry->d_rd_peak, __entry->d_rd_avg, __entry->d_rd_cnt,
> __entry->n_rd_peak, __entry->n_rd_avg, __entry->n_rd_cnt,
> @@ -2126,7 +2133,8 @@ TRACE_EVENT(f2fs_iostat_latency,
> __entry->m_wr_s_peak, __entry->m_wr_s_avg, __entry->m_wr_s_cnt,
> __entry->d_wr_as_peak, __entry->d_wr_as_avg, __entry->d_wr_as_cnt,
> __entry->n_wr_as_peak, __entry->n_wr_as_avg, __entry->n_wr_as_cnt,
> - __entry->m_wr_as_peak, __entry->m_wr_as_avg, __entry->m_wr_as_cnt)
> + __entry->m_wr_as_peak, __entry->m_wr_as_avg, __entry->m_wr_as_cnt,
> + __entry->discard_peak, __entry->discard_avg, __entry->discard_cnt)
> );
> #endif
>
> --
> 2.35.1