Re: [PATCH v3] x86/vdso: Handle clock_gettime(CLOCK_TAI) in vDSO
From: Thomas Gleixner
Date: Sun Sep 09 2018 - 16:05:34 EST
On Fri, 31 Aug 2018, Andy Lutomirski wrote:
> (Hi, Florian!)
>
> On Fri, Aug 31, 2018 at 6:59 PM, Matt Rickard <matt@xxxxxxxxxxxxxxx> wrote:
> > Process clock_gettime(CLOCK_TAI) in vDSO.
> > This makes the call about as fast as CLOCK_REALTIME and CLOCK_MONOTONIC:
> >
> > nanoseconds
> > before after clockname
> > ---- ----- ---------
> > 233 87 CLOCK_TAI
> > 96 93 CLOCK_REALTIME
> > 88 87 CLOCK_MONOTONIC
>
> Are you sure you did this right? With the clocksource set to TSC
> (which is the only reasonable choice unless KVM has seriously cleaned
> up its act), with retpolines enabled, I get 24ns for CLOCK_MONOTONIC
> without your patch and 32ns with your patch. And there is indeed a
> retpoline in the disassembled output:
>
> e5: e8 07 00 00 00 callq f1 <__vdso_clock_gettime+0x31>
> ea: f3 90 pause
> ec: 0f ae e8 lfence
> ef: eb f9 jmp ea <__vdso_clock_gettime+0x2a>
> f1: 48 89 04 24 mov %rax,(%rsp)
> f5: c3 retq
>
> You're probably going to have to set -fno-jump-tables or do something
> clever like adding a whole array of (seconds, nsec) in gtod and
> indexing that array by the clock id.
See the patch below. It's integrating TAI without slowing down everything
and it definitely does not result in indirect calls.
On a HSW it slows down clock_gettime() by ~0.5ns. On a SKL I get a speedup
by ~0.5ns. On a AMD Epyc server it's 1.2ns speedup. So it somehow depends
on the uarch and I also observed compiler version dependend variations.
Thanks,
tglx
----
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -203,39 +203,18 @@ notrace static inline u64 vgetsns(int *m
return v * gtod->mult;
}
-/* Code size doesn't matter (vdso is 4k anyway) and this is faster. */
-notrace static int __always_inline do_realtime(struct timespec *ts)
+notrace static int __always_inline do_hres(struct timespec *ts, clockid_t clk)
{
- unsigned long seq;
- u64 ns;
+ struct vgtod_ts *base = >od->basetime[clk & VGTOD_HRES_MASK];
+ unsigned int seq;
int mode;
-
- do {
- seq = gtod_read_begin(gtod);
- mode = gtod->vclock_mode;
- ts->tv_sec = gtod->wall_time_sec;
- ns = gtod->wall_time_snsec;
- ns += vgetsns(&mode);
- ns >>= gtod->shift;
- } while (unlikely(gtod_read_retry(gtod, seq)));
-
- ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
- ts->tv_nsec = ns;
-
- return mode;
-}
-
-notrace static int __always_inline do_monotonic(struct timespec *ts)
-{
- unsigned long seq;
u64 ns;
- int mode;
do {
seq = gtod_read_begin(gtod);
mode = gtod->vclock_mode;
- ts->tv_sec = gtod->monotonic_time_sec;
- ns = gtod->monotonic_time_snsec;
+ ts->tv_sec = base->sec;
+ ns = base->nsec;
ns += vgetsns(&mode);
ns >>= gtod->shift;
} while (unlikely(gtod_read_retry(gtod, seq)));
@@ -246,58 +225,50 @@ notrace static int __always_inline do_mo
return mode;
}
-notrace static void do_realtime_coarse(struct timespec *ts)
+notrace static void do_coarse(struct timespec *ts, clockid_t clk)
{
+ struct vgtod_ts *base = >od->basetime[clk];
unsigned long seq;
- do {
- seq = gtod_read_begin(gtod);
- ts->tv_sec = gtod->wall_time_coarse_sec;
- ts->tv_nsec = gtod->wall_time_coarse_nsec;
- } while (unlikely(gtod_read_retry(gtod, seq)));
-}
-notrace static void do_monotonic_coarse(struct timespec *ts)
-{
- unsigned long seq;
do {
seq = gtod_read_begin(gtod);
- ts->tv_sec = gtod->monotonic_time_coarse_sec;
- ts->tv_nsec = gtod->monotonic_time_coarse_nsec;
+ ts->tv_sec = base->sec;
+ ts->tv_nsec = base->nsec;
} while (unlikely(gtod_read_retry(gtod, seq)));
}
notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
{
- switch (clock) {
- case CLOCK_REALTIME:
- if (do_realtime(ts) == VCLOCK_NONE)
- goto fallback;
- break;
- case CLOCK_MONOTONIC:
- if (do_monotonic(ts) == VCLOCK_NONE)
- goto fallback;
- break;
- case CLOCK_REALTIME_COARSE:
- do_realtime_coarse(ts);
- break;
- case CLOCK_MONOTONIC_COARSE:
- do_monotonic_coarse(ts);
- break;
- default:
- goto fallback;
- }
+ unsigned int msk;
- return 0;
-fallback:
+ /* Sort out negative (CPU/FD) and invalid clocks */
+ if (unlikely((unsigned int) clock >= MAX_CLOCKS))
+ return vdso_fallback_gettime(clock, ts);
+
+ /*
+ * Convert the clockid to a bitmask and use it to check which
+ * clocks are handled in the VDSO directly.
+ */
+ msk = 1U << clock;
+ if (likely(msk & VGTOD_HRES)) {
+ if (do_hres(ts, clock) != VCLOCK_NONE)
+ return 0;
+ } else if (msk & VGTOD_COARSE) {
+ do_coarse(ts, clock);
+ return 0;
+ }
return vdso_fallback_gettime(clock, ts);
}
+
int clock_gettime(clockid_t, struct timespec *)
__attribute__((weak, alias("__vdso_clock_gettime")));
notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
{
if (likely(tv != NULL)) {
- if (unlikely(do_realtime((struct timespec *)tv) == VCLOCK_NONE))
+ struct timespec *ts = (struct timespec *) tv;
+
+ if (unlikely(do_hres(ts, CLOCK_REALTIME) == VCLOCK_NONE))
return vdso_fallback_gtod(tv, tz);
tv->tv_usec /= 1000;
}
@@ -318,7 +289,7 @@ int gettimeofday(struct timeval *, struc
notrace time_t __vdso_time(time_t *t)
{
/* This is atomic on x86 so we don't need any locks. */
- time_t result = READ_ONCE(gtod->wall_time_sec);
+ time_t result = READ_ONCE(gtod->basetime[CLOCK_REALTIME].sec);
if (t)
*t = result;
--- a/arch/x86/entry/vsyscall/vsyscall_gtod.c
+++ b/arch/x86/entry/vsyscall/vsyscall_gtod.c
@@ -31,6 +31,8 @@ void update_vsyscall(struct timekeeper *
{
int vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data;
+ struct vgtod_ts *base;
+ u64 nsec;
/* Mark the new vclock used. */
BUILD_BUG_ON(VCLOCK_MAX >= 32);
@@ -45,34 +47,37 @@ void update_vsyscall(struct timekeeper *
vdata->mult = tk->tkr_mono.mult;
vdata->shift = tk->tkr_mono.shift;
- vdata->wall_time_sec = tk->xtime_sec;
- vdata->wall_time_snsec = tk->tkr_mono.xtime_nsec;
-
- vdata->monotonic_time_sec = tk->xtime_sec
- + tk->wall_to_monotonic.tv_sec;
- vdata->monotonic_time_snsec = tk->tkr_mono.xtime_nsec
- + ((u64)tk->wall_to_monotonic.tv_nsec
- << tk->tkr_mono.shift);
- while (vdata->monotonic_time_snsec >=
- (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
- vdata->monotonic_time_snsec -=
- ((u64)NSEC_PER_SEC) << tk->tkr_mono.shift;
- vdata->monotonic_time_sec++;
+ base = &vdata->basetime[CLOCK_REALTIME];
+ base->sec = (gtod_long_t)tk->xtime_sec;
+ base->nsec = tk->tkr_mono.xtime_nsec;
+
+ base = &vdata->basetime[VGTOD_TAI];
+ base->sec = (gtod_long_t)(tk->xtime_sec + (s64)tk->tai_offset);
+ base->nsec = tk->tkr_mono.xtime_nsec;
+
+ base = &vdata->basetime[CLOCK_MONOTONIC];
+ base->sec = (gtod_long_t)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec);
+ nsec = tk->tkr_mono.xtime_nsec;
+ nsec += ((u64)tk->wall_to_monotonic.tv_nsec << tk->tkr_mono.shift);
+ while (nsec >= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
+ nsec -= ((u64)NSEC_PER_SEC) << tk->tkr_mono.shift;
+ base->sec++;
}
+ base->nsec = nsec;
- vdata->wall_time_coarse_sec = tk->xtime_sec;
- vdata->wall_time_coarse_nsec = (long)(tk->tkr_mono.xtime_nsec >>
- tk->tkr_mono.shift);
-
- vdata->monotonic_time_coarse_sec =
- vdata->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec;
- vdata->monotonic_time_coarse_nsec =
- vdata->wall_time_coarse_nsec + tk->wall_to_monotonic.tv_nsec;
-
- while (vdata->monotonic_time_coarse_nsec >= NSEC_PER_SEC) {
- vdata->monotonic_time_coarse_nsec -= NSEC_PER_SEC;
- vdata->monotonic_time_coarse_sec++;
+ base = &vdata->basetime[CLOCK_REALTIME_COARSE];
+ base->sec = (gtod_long_t)tk->xtime_sec;
+ base->nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
+
+ base = &vdata->basetime[CLOCK_MONOTONIC_COARSE];
+ base->sec = (gtod_long_t)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec);
+ nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
+ nsec += tk->wall_to_monotonic.tv_nsec;
+ while (nsec >= NSEC_PER_SEC) {
+ nsec -= NSEC_PER_SEC;
+ base->sec++;
}
+ base->nsec = nsec;
gtod_write_end(vdata);
}
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -5,33 +5,41 @@
#include <linux/compiler.h>
#include <linux/clocksource.h>
+#include <uapi/linux/time.h>
+
#ifdef BUILD_VDSO32_64
typedef u64 gtod_long_t;
#else
typedef unsigned long gtod_long_t;
#endif
+
+struct vgtod_ts {
+ gtod_long_t sec;
+ u64 nsec;
+};
+
+#define VGTOD_BASES (CLOCK_MONOTONIC_COARSE + 1)
+#define VGTOD_HRES (BIT(CLOCK_REALTIME) | BIT(CLOCK_MONOTONIC) | BIT(CLOCK_TAI))
+#define VGTOD_COARSE (BIT(CLOCK_REALTIME_COARSE) | BIT(CLOCK_MONOTONIC_COARSE))
+
+/* Abuse CLOCK_THREAD_CPUTIME_ID for VGTOD CLOCK TAI */
+#define VGTOD_HRES_MASK 0x3
+#define VGTOD_TAI (CLOCK_TAI & VGTOD_HRES_MASK)
+
/*
* vsyscall_gtod_data will be accessed by 32 and 64 bit code at the same time
* so be carefull by modifying this structure.
*/
struct vsyscall_gtod_data {
- unsigned seq;
+ unsigned int seq;
+
+ int vclock_mode;
+ u64 cycle_last;
+ u64 mask;
+ u32 mult;
+ u32 shift;
- int vclock_mode;
- u64 cycle_last;
- u64 mask;
- u32 mult;
- u32 shift;
-
- /* open coded 'struct timespec' */
- u64 wall_time_snsec;
- gtod_long_t wall_time_sec;
- gtod_long_t monotonic_time_sec;
- u64 monotonic_time_snsec;
- gtod_long_t wall_time_coarse_sec;
- gtod_long_t wall_time_coarse_nsec;
- gtod_long_t monotonic_time_coarse_sec;
- gtod_long_t monotonic_time_coarse_nsec;
+ struct vgtod_ts basetime[VGTOD_BASES];
int tz_minuteswest;
int tz_dsttime;
@@ -44,9 +52,9 @@ static inline bool vclock_was_used(int v
return READ_ONCE(vclocks_used) & (1 << vclock);
}
-static inline unsigned gtod_read_begin(const struct vsyscall_gtod_data *s)
+static inline unsigned int gtod_read_begin(const struct vsyscall_gtod_data *s)
{
- unsigned ret;
+ unsigned int ret;
repeat:
ret = READ_ONCE(s->seq);