[PATCH v8 06/11] fs: add percpu counters for significant multigrain timestamp events

From: Jeff Layton
Date: Sat Sep 14 2024 - 13:10:25 EST


New percpu counters for counting various stats around mgtimes, and a new
debugfs file for displaying them when CONFIG_DEBUG_FS is enabled:

- number of attempted ctime updates
- number of successful i_ctime_nsec swaps
- number of fine-grained timestamp fetches
- number of coarse-grained floor swaps

Reviewed-by: Josef Bacik <josef@xxxxxxxxxxxxxx>
Reviewed-by: Darrick J. Wong <djwong@xxxxxxxxxx>
Reviewed-by: Jan Kara <jack@xxxxxxx>
Signed-off-by: Jeff Layton <jlayton@xxxxxxxxxx>
---
fs/inode.c | 76 ++++++++++++++++++++++++++++++++++++--
include/linux/timekeeping.h | 1 +
kernel/time/timekeeping.c | 3 +-
kernel/time/timekeeping_debug.c | 12 ++++++
kernel/time/timekeeping_internal.h | 3 ++
5 files changed, 90 insertions(+), 5 deletions(-)

diff --git a/fs/inode.c b/fs/inode.c
index d7da9d06921f..1f0487104c71 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -21,6 +21,8 @@
#include <linux/list_lru.h>
#include <linux/iversion.h>
#include <linux/rw_hint.h>
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
#include <trace/events/writeback.h>
#define CREATE_TRACE_POINTS
#include <trace/events/timestamp.h>
@@ -101,6 +103,70 @@ long get_nr_dirty_inodes(void)
return nr_dirty > 0 ? nr_dirty : 0;
}

+#ifdef CONFIG_DEBUG_FS
+static DEFINE_PER_CPU(long, mg_ctime_updates);
+static DEFINE_PER_CPU(long, mg_fine_stamps);
+static DEFINE_PER_CPU(long, mg_ctime_swaps);
+
+static long get_mg_ctime_updates(void)
+{
+ int i;
+ long sum = 0;
+
+ for_each_possible_cpu(i)
+ sum += per_cpu(mg_ctime_updates, i);
+ return sum < 0 ? 0 : sum;
+}
+
+static long get_mg_fine_stamps(void)
+{
+ int i;
+ long sum = 0;
+
+ for_each_possible_cpu(i)
+ sum += per_cpu(mg_fine_stamps, i);
+ return sum < 0 ? 0 : sum;
+}
+
+static long get_mg_ctime_swaps(void)
+{
+ int i;
+ long sum = 0;
+
+ for_each_possible_cpu(i)
+ sum += per_cpu(mg_ctime_swaps, i);
+ return sum < 0 ? 0 : sum;
+}
+
+#define mgtime_counter_inc(__var) this_cpu_inc(__var)
+
+static int mgts_show(struct seq_file *s, void *p)
+{
+ long ctime_updates = get_mg_ctime_updates();
+ long ctime_swaps = get_mg_ctime_swaps();
+ long fine_stamps = get_mg_fine_stamps();
+ long floor_swaps = get_mg_floor_swaps();
+
+ seq_printf(s, "%ld %ld %ld %ld\n",
+ ctime_updates, ctime_swaps, fine_stamps, floor_swaps);
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(mgts);
+
+static int __init mg_debugfs_init(void)
+{
+ debugfs_create_file("multigrain_timestamps", S_IFREG | S_IRUGO, NULL, NULL, &mgts_fops);
+ return 0;
+}
+late_initcall(mg_debugfs_init);
+
+#else /* ! CONFIG_DEBUG_FS */
+
+#define mgtime_counter_inc() do { } while (0)
+
+#endif /* CONFIG_DEBUG_FS */
+
/*
* Handle nr_inode sysctl
*/
@@ -2655,10 +2721,9 @@ EXPORT_SYMBOL(timestamp_truncate);
*
* If it is multigrain, then we first see if the coarse-grained timestamp is
* distinct from what we have. If so, then we'll just use that. If we have to
- * get a fine-grained timestamp, then do so, and try to swap it into the floor.
- * We accept the new floor value regardless of the outcome of the cmpxchg.
- * After that, we try to swap the new value into i_ctime_nsec. Again, we take
- * the resulting ctime, regardless of the outcome of the swap.
+ * get a fine-grained timestamp, then do so. After that, we try to swap the new
+ * value into i_ctime_nsec. We take the resulting ctime, regardless of the
+ * outcome of the swap.
*/
struct timespec64 inode_set_ctime_current(struct inode *inode)
{
@@ -2687,8 +2752,10 @@ struct timespec64 inode_set_ctime_current(struct inode *inode)
if (timespec64_compare(&now, &ctime) <= 0) {
ktime_get_real_ts64_mg(&now);
now = timestamp_truncate(now, inode);
+ mgtime_counter_inc(mg_fine_stamps);
}
}
+ mgtime_counter_inc(mg_ctime_updates);

/* No need to cmpxchg if it's exactly the same */
if (cns == now.tv_nsec && inode->i_ctime_sec == now.tv_sec) {
@@ -2702,6 +2769,7 @@ struct timespec64 inode_set_ctime_current(struct inode *inode)
/* If swap occurred, then we're (mostly) done */
inode->i_ctime_sec = now.tv_sec;
trace_ctime_ns_xchg(inode, cns, now.tv_nsec, cur);
+ mgtime_counter_inc(mg_ctime_swaps);
} else {
/*
* Was the change due to someone marking the old ctime QUERIED?
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 7aa85246c183..b9c8c597a073 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -48,6 +48,7 @@ extern void ktime_get_coarse_real_ts64(struct timespec64 *ts);
/* Multigrain timestamp interfaces */
extern void ktime_get_coarse_real_ts64_mg(struct timespec64 *ts);
extern void ktime_get_real_ts64_mg(struct timespec64 *ts);
+extern long get_mg_floor_swaps(void);

void getboottime64(struct timespec64 *ts);

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 16937242b904..94b0219955a2 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -2440,7 +2440,7 @@ EXPORT_SYMBOL_GPL(ktime_get_coarse_real_ts64_mg);
* regardless of the outcome of the swap. Note that this is a filesystem
* specific interface and should be avoided outside of that context.
*/
-void ktime_get_real_ts64_mg(struct timespec64 *ts, u64 cookie)
+void ktime_get_real_ts64_mg(struct timespec64 *ts)
{
struct timekeeper *tk = &tk_core.timekeeper;
ktime_t old = atomic64_read(&mg_floor);
@@ -2464,6 +2464,7 @@ void ktime_get_real_ts64_mg(struct timespec64 *ts, u64 cookie)
if (atomic64_try_cmpxchg(&mg_floor, &old, mono)) {
ts->tv_nsec = 0;
timespec64_add_ns(ts, nsecs);
+ mgtime_counter_inc(mg_floor_swaps);
} else {
/*
* Something has changed mg_floor since "old" was
diff --git a/kernel/time/timekeeping_debug.c b/kernel/time/timekeeping_debug.c
index b73e8850e58d..9a3792072762 100644
--- a/kernel/time/timekeeping_debug.c
+++ b/kernel/time/timekeeping_debug.c
@@ -17,6 +17,9 @@

#define NUM_BINS 32

+/* incremented every time mg_floor is updated */
+DEFINE_PER_CPU(long, mg_floor_swaps);
+
static unsigned int sleep_time_bin[NUM_BINS] = {0};

static int tk_debug_sleep_time_show(struct seq_file *s, void *data)
@@ -53,3 +56,12 @@ void tk_debug_account_sleep_time(const struct timespec64 *t)
(s64)t->tv_sec, t->tv_nsec / NSEC_PER_MSEC);
}

+long get_mg_floor_swaps(void)
+{
+ int i;
+ long sum = 0;
+
+ for_each_possible_cpu(i)
+ sum += per_cpu(mg_floor_swaps, i);
+ return sum < 0 ? 0 : sum;
+}
diff --git a/kernel/time/timekeeping_internal.h b/kernel/time/timekeeping_internal.h
index 4ca2787d1642..2b49332b45a5 100644
--- a/kernel/time/timekeeping_internal.h
+++ b/kernel/time/timekeeping_internal.h
@@ -11,8 +11,11 @@
*/
#ifdef CONFIG_DEBUG_FS
extern void tk_debug_account_sleep_time(const struct timespec64 *t);
+DECLARE_PER_CPU(long, mg_floor_swaps);
+#define mgtime_counter_inc(__var) this_cpu_inc(__var)
#else
#define tk_debug_account_sleep_time(x)
+#define mgtime_counter_inc() do { } while (0)
#endif

#ifdef CONFIG_CLOCKSOURCE_VALIDATE_LAST_CYCLE

--
2.46.0