[PATCH] f2fs: add tracepoint for f2fs iostat

From: Daeho Jeong
Date: Sun Mar 29 2020 - 23:30:59 EST


Added a tracepoint to see iostat of f2fs. Default period of that
is 3 second. This tracepoint can be used to be monitoring
I/O statistics periodically.

Bug: 152162885
Change-Id: I6fbe010b9cf1a90caa0f4793a6dab77c4cba7da6
Signed-off-by: Daeho Jeong <daehojeong@xxxxxxxxxx>
---
Documentation/ABI/testing/sysfs-fs-f2fs | 6 +++
fs/f2fs/f2fs.h | 16 +++++++-
fs/f2fs/super.c | 1 +
fs/f2fs/sysfs.c | 39 +++++++++++++++++++
include/trace/events/f2fs.h | 52 +++++++++++++++++++++++++
5 files changed, 113 insertions(+), 1 deletion(-)

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index c8620ea7022a7..427f5b45c67f1 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -332,3 +332,9 @@ Description: Give a way to attach REQ_META|FUA to data writes
* REQ_META | REQ_FUA |
* 5 | 4 | 3 | 2 | 1 | 0 |
* Cold | Warm | Hot | Cold | Warm | Hot |
+
+What: /sys/fs/f2fs/<disk>/iostat_period_ms
+Date: April 2020
+Contact: "Daeho Jeong" <daehojeong@xxxxxxxxxx>
+Description: Give a way to change iostat_period time. 3secs by default.
+ The new iostat trace gives stats gap given the period.
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index c2788738aa0d4..6cedbfb2067c5 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1505,7 +1505,10 @@ struct f2fs_sb_info {
/* For app/fs IO statistics */
spinlock_t iostat_lock;
unsigned long long write_iostat[NR_IO_TYPE];
+ unsigned long long prev_write_iostat[NR_IO_TYPE];
bool iostat_enable;
+ unsigned long iostat_next_period;
+ unsigned int iostat_period_ms;

/* to attach REQ_META|REQ_FUA flags */
unsigned int data_io_flag;
@@ -2999,16 +3002,25 @@ static inline int get_inline_xattr_addrs(struct inode *inode)
sizeof((f2fs_inode)->field)) \
<= (F2FS_OLD_ATTRIBUTE_SIZE + (extra_isize))) \

+#define DEFAULT_IOSTAT_PERIOD_MS 3000
+#define MIN_IOSTAT_PERIOD_MS 100
+/* maximum period of iostat tracing is 1 day */
+#define MAX_IOSTAT_PERIOD_MS 8640000
+
static inline void f2fs_reset_iostat(struct f2fs_sb_info *sbi)
{
int i;

spin_lock(&sbi->iostat_lock);
- for (i = 0; i < NR_IO_TYPE; i++)
+ for (i = 0; i < NR_IO_TYPE; i++) {
sbi->write_iostat[i] = 0;
+ sbi->prev_write_iostat[i] = 0;
+ }
spin_unlock(&sbi->iostat_lock);
}

+extern void f2fs_record_iostat(struct f2fs_sb_info *sbi);
+
static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi,
enum iostat_type type, unsigned long long io_bytes)
{
@@ -3022,6 +3034,8 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi,
sbi->write_iostat[APP_WRITE_IO] -
sbi->write_iostat[APP_DIRECT_IO];
spin_unlock(&sbi->iostat_lock);
+
+ f2fs_record_iostat(sbi);
}

#define __is_large_section(sbi) ((sbi)->segs_per_sec > 1)
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index f2dfc21c6abb0..438296e17183d 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -3424,6 +3424,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
/* init iostat info */
spin_lock_init(&sbi->iostat_lock);
sbi->iostat_enable = false;
+ sbi->iostat_period_ms = DEFAULT_IOSTAT_PERIOD_MS;

for (i = 0; i < NR_PAGE_TYPE; i++) {
int n = (i == META) ? 1: NR_TEMP_TYPE;
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index aeebfb5024a22..d05cb68c26374 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -15,6 +15,7 @@
#include "f2fs.h"
#include "segment.h"
#include "gc.h"
+#include <trace/events/f2fs.h>

static struct proc_dir_entry *f2fs_proc_root;

@@ -379,6 +380,15 @@ static ssize_t __sbi_store(struct f2fs_attr *a,
return count;
}

+ if (!strcmp(a->attr.name, "iostat_period_ms")) {
+ if (t < MIN_IOSTAT_PERIOD_MS || t > MAX_IOSTAT_PERIOD_MS)
+ return -EINVAL;
+ spin_lock(&sbi->iostat_lock);
+ sbi->iostat_period_ms = (unsigned int)t;
+ spin_unlock(&sbi->iostat_lock);
+ return count;
+ }
+
*ui = (unsigned int)t;

return count;
@@ -535,6 +545,7 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle_interval, interval_time[GC_TIME]);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info,
umount_discard_timeout, interval_time[UMOUNT_DISCARD_TIMEOUT]);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_period_ms, iostat_period_ms);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_pin_file_thresh, gc_pin_file_threshold);
F2FS_RW_ATTR(F2FS_SBI, f2fs_super_block, extension_list, extension_list);
@@ -615,6 +626,7 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(gc_idle_interval),
ATTR_LIST(umount_discard_timeout),
ATTR_LIST(iostat_enable),
+ ATTR_LIST(iostat_period_ms),
ATTR_LIST(readdir_ra),
ATTR_LIST(gc_pin_file_thresh),
ATTR_LIST(extension_list),
@@ -751,6 +763,33 @@ static int __maybe_unused segment_bits_seq_show(struct seq_file *seq,
return 0;
}

+void f2fs_record_iostat(struct f2fs_sb_info *sbi)
+{
+ unsigned long long iostat_diff[NR_IO_TYPE];
+ int i;
+
+ if (time_is_after_jiffies(sbi->iostat_next_period))
+ return;
+
+ /* Need double check under the lock */
+ spin_lock(&sbi->iostat_lock);
+ if (time_is_after_jiffies(sbi->iostat_next_period)) {
+ spin_unlock(&sbi->iostat_lock);
+ return;
+ }
+ sbi->iostat_next_period = jiffies +
+ msecs_to_jiffies(sbi->iostat_period_ms);
+
+ for (i = 0; i < NR_IO_TYPE; i++) {
+ iostat_diff[i] = sbi->write_iostat[i] -
+ sbi->prev_write_iostat[i];
+ sbi->prev_write_iostat[i] = sbi->write_iostat[i];
+ }
+ spin_unlock(&sbi->iostat_lock);
+
+ trace_f2fs_iostat(sbi, iostat_diff);
+}
+
static int __maybe_unused iostat_info_seq_show(struct seq_file *seq,
void *offset)
{
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index d97adfc327f03..e78c8696e2adc 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -1812,6 +1812,58 @@ DEFINE_EVENT(f2fs_zip_end, f2fs_decompress_pages_end,
TP_ARGS(inode, cluster_idx, compressed_size, ret)
);

+TRACE_EVENT(f2fs_iostat,
+
+ TP_PROTO(struct f2fs_sb_info *sbi, unsigned long long *iostat),
+
+ TP_ARGS(sbi, iostat),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(unsigned long long, app_dio)
+ __field(unsigned long long, app_bio)
+ __field(unsigned long long, app_wio)
+ __field(unsigned long long, app_mio)
+ __field(unsigned long long, fs_dio)
+ __field(unsigned long long, fs_nio)
+ __field(unsigned long long, fs_mio)
+ __field(unsigned long long, fs_gc_dio)
+ __field(unsigned long long, fs_gc_nio)
+ __field(unsigned long long, fs_cp_dio)
+ __field(unsigned long long, fs_cp_nio)
+ __field(unsigned long long, fs_cp_mio)
+ __field(unsigned long long, fs_discard)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = sbi->sb->s_dev;
+ __entry->app_dio = iostat[APP_DIRECT_IO];
+ __entry->app_bio = iostat[APP_BUFFERED_IO];
+ __entry->app_wio = iostat[APP_WRITE_IO];
+ __entry->app_mio = iostat[APP_MAPPED_IO];
+ __entry->fs_dio = iostat[FS_DATA_IO];
+ __entry->fs_nio = iostat[FS_NODE_IO];
+ __entry->fs_mio = iostat[FS_META_IO];
+ __entry->fs_gc_dio = iostat[FS_GC_DATA_IO];
+ __entry->fs_gc_nio = iostat[FS_GC_NODE_IO];
+ __entry->fs_cp_dio = iostat[FS_CP_DATA_IO];
+ __entry->fs_cp_nio = iostat[FS_CP_NODE_IO];
+ __entry->fs_cp_mio = iostat[FS_CP_META_IO];
+ __entry->fs_discard = iostat[FS_DISCARD];
+ ),
+
+ TP_printk("dev = (%d,%d), "
+ "app [write=%llu (direct=%llu, buffered=%llu), mapped=%llu], "
+ "fs [data=%llu, node=%llu, meta=%llu, discard=%llu], "
+ "gc [data=%llu, node=%llu], "
+ "cp [data=%llu, node=%llu, meta=%llu]",
+ show_dev(__entry->dev), __entry->app_wio, __entry->app_dio,
+ __entry->app_bio, __entry->app_mio, __entry->fs_dio,
+ __entry->fs_nio, __entry->fs_mio, __entry->fs_discard,
+ __entry->fs_gc_dio, __entry->fs_gc_nio, __entry->fs_cp_dio,
+ __entry->fs_cp_nio, __entry->fs_cp_mio)
+);
+
#endif /* _TRACE_F2FS_H */

/* This part must be outside protection */
--
2.26.0.110.g2183baf09c-goog