[PATCH v2] x86/vdso: implement clock_gettime(CLOCK_MONOTONIC_RAW, ...)

From: Sverdlin, Alexander (Nokia - DE/Ulm)
Date: Tue Jun 04 2019 - 12:46:21 EST


From: Alexander Sverdlin <alexander.sverdlin@xxxxxxxxx>

Add CLOCK_MONOTONIC_RAW to the existing clock_gettime() vDSO
implementation. This is based on the ideas of Jason Vas Dias and comments
of Thomas Gleixner.

---- Test code ----
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <unistd.h>

#define CLOCK_TYPE CLOCK_MONOTONIC_RAW
#define DURATION_SEC 10

int main(int argc, char **argv)
{
struct timespec t, end;
unsigned long long cnt = 0;

clock_gettime(CLOCK_TYPE, &end);
end.tv_sec += DURATION_SEC;

do {
clock_gettime(CLOCK_TYPE, &t);
++cnt;
} while (t.tv_sec < end.tv_sec || t.tv_nsec < end.tv_nsec);

dprintf(STDOUT_FILENO, "%llu", cnt);

return EXIT_SUCCESS;
}
-------------------

The results from the above test program:

Clock Before After Diff
----- ------ ----- ----
CLOCK_MONOTONIC 355.5M 355.5M
CLOCK_MONOTONIC_RAW 44.9M 371.2M +726%
CLOCK_REALTIME 355.5M 355.5M

Link: https://lore.kernel.org/patchwork/patch/933583/
Link: https://bugzilla.kernel.org/show_bug.cgi?id=198961
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Jason Vas Dias <jason.vas.dias@xxxxxxxxx>
Signed-off-by: Alexander Sverdlin <alexander.sverdlin@xxxxxxxxx>
---
Changelog:
v2: copy do_hres() into do_monotonic_raw()

arch/x86/entry/vdso/vclock_gettime.c | 35 +++++++++++++++++++++++++++++++++
arch/x86/entry/vsyscall/vsyscall_gtod.c | 6 ++++++
arch/x86/include/asm/vgtod.h | 2 ++
3 files changed, 43 insertions(+)

diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index 0f82a70..64736a4 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -168,6 +168,39 @@ notrace static int do_hres(clockid_t clk, struct timespec *ts)
return 0;
}

+/*
+ * Attempts to merge the below copy with the above routine led to 5% performance
+ * drop (CLOCK_MONOTONIC, CLOCK_REALTIME) up to now. Test before making changes.
+ */
+notrace static int do_monotonic_raw(struct timespec *ts)
+{
+ struct vgtod_ts *base = &gtod->basetime[CLOCK_MONOTONIC_RAW];
+ u64 cycles, last, sec, ns;
+ unsigned int seq;
+
+ do {
+ seq = gtod_read_begin(gtod);
+ cycles = vgetcyc(gtod->vclock_mode);
+ ns = base->nsec;
+ last = gtod->cycle_last;
+ if (unlikely((s64)cycles < 0))
+ return vdso_fallback_gettime(CLOCK_MONOTONIC_RAW, ts);
+ if (cycles > last)
+ ns += (cycles - last) * gtod->raw_mult;
+ ns >>= gtod->raw_shift;
+ sec = base->sec;
+ } while (unlikely(gtod_read_retry(gtod, seq)));
+
+ /*
+ * Do this outside the loop: a race inside the loop could result
+ * in __iter_div_u64_rem() being extremely slow.
+ */
+ ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
+ ts->tv_nsec = ns;
+
+ return 0;
+}
+
notrace static void do_coarse(clockid_t clk, struct timespec *ts)
{
struct vgtod_ts *base = &gtod->basetime[clk];
@@ -199,6 +232,8 @@ notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
do_coarse(clock, ts);
return 0;
}
+ if (clock == CLOCK_MONOTONIC_RAW)
+ return do_monotonic_raw(ts);
return vdso_fallback_gettime(clock, ts);
}

diff --git a/arch/x86/entry/vsyscall/vsyscall_gtod.c b/arch/x86/entry/vsyscall/vsyscall_gtod.c
index cfcdba0..9f7744f3 100644
--- a/arch/x86/entry/vsyscall/vsyscall_gtod.c
+++ b/arch/x86/entry/vsyscall/vsyscall_gtod.c
@@ -46,6 +46,8 @@ void update_vsyscall(struct timekeeper *tk)
vdata->mask = tk->tkr_mono.mask;
vdata->mult = tk->tkr_mono.mult;
vdata->shift = tk->tkr_mono.shift;
+ vdata->raw_mult = tk->tkr_raw.mult;
+ vdata->raw_shift = tk->tkr_raw.shift;

base = &vdata->basetime[CLOCK_REALTIME];
base->sec = tk->xtime_sec;
@@ -65,6 +67,10 @@ void update_vsyscall(struct timekeeper *tk)
}
base->nsec = nsec;

+ base = &vdata->basetime[CLOCK_MONOTONIC_RAW];
+ base->sec = tk->raw_sec;
+ base->nsec = tk->tkr_raw.xtime_nsec;
+
base = &vdata->basetime[CLOCK_REALTIME_COARSE];
base->sec = tk->xtime_sec;
base->nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index 913a133..65ac320 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -43,6 +43,8 @@ struct vsyscall_gtod_data {
u64 mask;
u32 mult;
u32 shift;
+ u32 raw_mult;
+ u32 raw_shift;

struct vgtod_ts basetime[VGTOD_BASES];

--
2.4.6