[PATCH] timekeeping: move multigrain ctime floor handling into timekeeper
From: Jeff Layton
Date: Wed Sep 11 2024 - 08:57:34 EST
The kernel test robot reported a performance regression in some
will-it-scale tests due to the multigrain timestamp patches. The data
showed that coarse_ctime() was slowing down current_time(), which is
called frequently in the I/O path.
Add ktime_get_coarse_real_ts64_with_floor(), which returns either the
coarse time or the floor as a realtime value. This avoids some of the
conversion overhead of coarse_ctime(), and recovers some of the
performance in these tests.
The will-it-scale pipe1_threads microbenchmark shows these averages on
my test rig:
v6.11-rc7: 83830660 (baseline)
v6.11-rc7 + mgtime series: 77631748 (93% of baseline)
v6.11-rc7 + mgtime + this: 81620228 (97% of baseline)
Reported-by: kernel test robot <oliver.sang@xxxxxxxxx>
Closes: https://lore.kernel.org/oe-lkp/202409091303.31b2b713-oliver.sang@xxxxxxxxx
Suggested-by: Arnd Bergmann <arnd@xxxxxxxxxx>
Signed-off-by: Jeff Layton <jlayton@xxxxxxxxxx>
---
Arnd suggested moving this into the timekeeper when reviewing an earlier
version of this series, and that turns out to be better for performance.
I'm not sure how this should go in (if acceptable). The multigrain
timestamp patches that this would affect are in Christian's tree, so
that may be best if the timekeeper maintainers are OK with this
approach.
---
fs/inode.c | 35 +++++++++--------------------------
include/linux/timekeeping.h | 2 ++
kernel/time/timekeeping.c | 29 +++++++++++++++++++++++++++++
3 files changed, 40 insertions(+), 26 deletions(-)
diff --git a/fs/inode.c b/fs/inode.c
index 01f7df1973bd..47679a054472 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -2255,25 +2255,6 @@ int file_remove_privs(struct file *file)
}
EXPORT_SYMBOL(file_remove_privs);
-/**
- * coarse_ctime - return the current coarse-grained time
- * @floor: current (monotonic) ctime_floor value
- *
- * Get the coarse-grained time, and then determine whether to
- * return it or the current floor value. Returns the later of the
- * floor and coarse grained timestamps, converted to realtime
- * clock value.
- */
-static ktime_t coarse_ctime(ktime_t floor)
-{
- ktime_t coarse = ktime_get_coarse();
-
- /* If coarse time is already newer, return that */
- if (!ktime_after(floor, coarse))
- return ktime_get_coarse_real();
- return ktime_mono_to_real(floor);
-}
-
/**
* current_time - Return FS time (possibly fine-grained)
* @inode: inode.
@@ -2285,10 +2266,10 @@ static ktime_t coarse_ctime(ktime_t floor)
struct timespec64 current_time(struct inode *inode)
{
ktime_t floor = atomic64_read(&ctime_floor);
- ktime_t now = coarse_ctime(floor);
- struct timespec64 now_ts = ktime_to_timespec64(now);
+ struct timespec64 now_ts;
u32 cns;
+ ktime_get_coarse_real_ts64_with_floor(&now_ts, floor);
if (!is_mgtime(inode))
goto out;
@@ -2745,7 +2726,7 @@ EXPORT_SYMBOL(timestamp_truncate);
*
* Set the inode's ctime to the current value for the inode. Returns the
* current value that was assigned. If this is not a multigrain inode, then we
- * just set it to whatever the coarse_ctime is.
+ * set it to the later of the coarse time and floor value.
*
* If it is multigrain, then we first see if the coarse-grained timestamp is
* distinct from what we have. If so, then we'll just use that. If we have to
@@ -2756,15 +2737,15 @@ EXPORT_SYMBOL(timestamp_truncate);
*/
struct timespec64 inode_set_ctime_current(struct inode *inode)
{
- ktime_t now, floor = atomic64_read(&ctime_floor);
+ ktime_t floor = atomic64_read(&ctime_floor);
struct timespec64 now_ts;
u32 cns, cur;
- now = coarse_ctime(floor);
+ ktime_get_coarse_real_ts64_with_floor(&now_ts, floor);
/* Just return that if this is not a multigrain fs */
if (!is_mgtime(inode)) {
- now_ts = timestamp_truncate(ktime_to_timespec64(now), inode);
+ now_ts = timestamp_truncate(now_ts, inode);
inode_set_ctime_to_ts(inode, now_ts);
goto out;
}
@@ -2777,6 +2758,7 @@ struct timespec64 inode_set_ctime_current(struct inode *inode)
cns = smp_load_acquire(&inode->i_ctime_nsec);
if (cns & I_CTIME_QUERIED) {
ktime_t ctime = ktime_set(inode->i_ctime_sec, cns & ~I_CTIME_QUERIED);
+ ktime_t now = timespec64_to_ktime(now_ts);
if (!ktime_after(now, ctime)) {
ktime_t old, fine;
@@ -2797,10 +2779,11 @@ struct timespec64 inode_set_ctime_current(struct inode *inode)
else
fine = old;
now = ktime_mono_to_real(fine);
+ now_ts = ktime_to_timespec64(now);
}
}
mgtime_counter_inc(mg_ctime_updates);
- now_ts = timestamp_truncate(ktime_to_timespec64(now), inode);
+ now_ts = timestamp_truncate(now_ts, inode);
cur = cns;
/* No need to cmpxchg if it's exactly the same */
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index fc12a9ba2c88..9b3c957ab260 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -44,6 +44,7 @@ extern void ktime_get_ts64(struct timespec64 *ts);
extern void ktime_get_real_ts64(struct timespec64 *tv);
extern void ktime_get_coarse_ts64(struct timespec64 *ts);
extern void ktime_get_coarse_real_ts64(struct timespec64 *ts);
+extern void ktime_get_coarse_real_ts64_with_floor(struct timespec64 *ts, ktime_t floor);
void getboottime64(struct timespec64 *ts);
@@ -68,6 +69,7 @@ enum tk_offsets {
extern ktime_t ktime_get(void);
extern ktime_t ktime_get_with_offset(enum tk_offsets offs);
extern ktime_t ktime_get_coarse_with_offset(enum tk_offsets offs);
+extern ktime_t ktime_get_coarse_with_floor_and_offset(enum tk_offsets offs, ktime_t floor);
extern ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs);
extern ktime_t ktime_get_raw(void);
extern u32 ktime_get_resolution_ns(void);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 5391e4167d60..56b979471c6a 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -2394,6 +2394,35 @@ void ktime_get_coarse_real_ts64(struct timespec64 *ts)
}
EXPORT_SYMBOL(ktime_get_coarse_real_ts64);
+/**
+ * ktime_get_coarse_real_ts64_with_floor - get later of coarse grained time or floor
+ * @ts: timespec64 to be filled
+ * @floor: monotonic floor value
+ *
+ * Adjust @floor to realtime and compare that to the coarse time. Fill
+ * @ts with the later of the two.
+ */
+void ktime_get_coarse_real_ts64_with_floor(struct timespec64 *ts, ktime_t floor)
+{
+ struct timekeeper *tk = &tk_core.timekeeper;
+ unsigned int seq;
+ ktime_t f_real, offset, coarse;
+
+ WARN_ON(timekeeping_suspended);
+
+ do {
+ seq = read_seqcount_begin(&tk_core.seq);
+ *ts = tk_xtime(tk);
+ offset = *offsets[TK_OFFS_REAL];
+ } while (read_seqcount_retry(&tk_core.seq, seq));
+
+ coarse = timespec64_to_ktime(*ts);
+ f_real = ktime_add(floor, offset);
+ if (ktime_after(f_real, coarse))
+ *ts = ktime_to_timespec64(f_real);
+}
+EXPORT_SYMBOL_GPL(ktime_get_coarse_real_ts64_with_floor);
+
void ktime_get_coarse_ts64(struct timespec64 *ts)
{
struct timekeeper *tk = &tk_core.timekeeper;
---
base-commit: 962e66693d6214b1d48f32f68ed002170a98f2c0
change-id: 20240910-mgtime-e244049f2aea
Best regards,
--
Jeff Layton <jlayton@xxxxxxxxxx>