Re: [PATCH] timekeeping/vsyscall: Prevent math overflow in BOOTTIME update
From: Vincenzo Frascino
Date: Thu Aug 22 2019 - 12:05:05 EST
Hi Thomas,
On 22/08/2019 13:52, Chris Clayton wrote:
> Thanks Thomas.
>
> On 22/08/2019 12:00, Thomas Gleixner wrote:
>> The VDSO update for CLOCK_BOOTTIME has a overflow issue as it shifts the
>> nanoseconds based boot time offset left by the clocksource shift. That
>> overflows once the boot time offset becomes large enough. As a consequence
>> CLOCK_BOOTTIME in the VDSO becomes a random number causing applications to
>> misbehave.
>>
>> Fix it by storing a timespec64 representation of the offset when boot time
>> is adjusted and add that to the MONOTONIC base time value in the vdso data
>> page. Using the timespec64 representation avoids a 64bit division in the
>> update code.
>>
>
> I've tested resume from both suspend and hibernate and this patch fixes the problem I reported.
>
> Tested-by: Chris Clayton <chris2553@xxxxxxxxxxxxxx>
>
I can confirm what reported by Chris. Please see below the scissors.
With this:
Tested-by: Vincenzo Frascino <vincenzo.frascino@xxxxxxx>
--->8---
Clock test start
clk_id: CLOCK_BOOTTIME
clock_getres: 0 1
clock_gettime:2697 489679147
2019-08-22 16:21:57.911
Clock test end
<...Suspend/Resume...>
Clock test start
clk_id: CLOCK_BOOTTIME
clock_getres: 0 1
clock_gettime:4489 684341925
2019-08-22 16:51:50.106
Clock test end
>> Fixes: 44f57d788e7d ("timekeeping: Provide a generic update_vsyscall() implementation")
>> Reported-by: Chris Clayton <chris2553@xxxxxxxxxxxxxx>
>> Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
>> ---
>> include/linux/timekeeper_internal.h | 5 +++++
>> kernel/time/timekeeping.c | 5 +++++
>> kernel/time/vsyscall.c | 22 +++++++++++++---------
>> 3 files changed, 23 insertions(+), 9 deletions(-)
>>
>> --- a/include/linux/timekeeper_internal.h
>> +++ b/include/linux/timekeeper_internal.h
>> @@ -57,6 +57,7 @@ struct tk_read_base {
>> * @cs_was_changed_seq: The sequence number of clocksource change events
>> * @next_leap_ktime: CLOCK_MONOTONIC time value of a pending leap-second
>> * @raw_sec: CLOCK_MONOTONIC_RAW time in seconds
>> + * @monotonic_to_boot: CLOCK_MONOTONIC to CLOCK_BOOTTIME offset
>> * @cycle_interval: Number of clock cycles in one NTP interval
>> * @xtime_interval: Number of clock shifted nano seconds in one NTP
>> * interval.
>> @@ -84,6 +85,9 @@ struct tk_read_base {
>> *
>> * wall_to_monotonic is no longer the boot time, getboottime must be
>> * used instead.
>> + *
>> + * @monotonic_to_boottime is a timespec64 representation of @offs_boot to
>> + * accelerate the VDSO update for CLOCK_BOOTTIME.
>> */
>> struct timekeeper {
>> struct tk_read_base tkr_mono;
>> @@ -99,6 +103,7 @@ struct timekeeper {
>> u8 cs_was_changed_seq;
>> ktime_t next_leap_ktime;
>> u64 raw_sec;
>> + struct timespec64 monotonic_to_boot;
>>
>> /* The following members are for timekeeping internal use */
>> u64 cycle_interval;
>> --- a/kernel/time/timekeeping.c
>> +++ b/kernel/time/timekeeping.c
>> @@ -146,6 +146,11 @@ static void tk_set_wall_to_mono(struct t
>> static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
>> {
>> tk->offs_boot = ktime_add(tk->offs_boot, delta);
>> + /*
>> + * Timespec representation for VDSO update to avoid 64bit division
>> + * on every update.
>> + */
>> + tk->monotonic_to_boot = ktime_to_timespec64(tk->offs_boot);
>> }
>>
>> /*
>> --- a/kernel/time/vsyscall.c
>> +++ b/kernel/time/vsyscall.c
>> @@ -17,7 +17,7 @@ static inline void update_vdso_data(stru
>> struct timekeeper *tk)
>> {
>> struct vdso_timestamp *vdso_ts;
>> - u64 nsec;
>> + u64 nsec, sec;
>>
>> vdata[CS_HRES_COARSE].cycle_last = tk->tkr_mono.cycle_last;
>> vdata[CS_HRES_COARSE].mask = tk->tkr_mono.mask;
>> @@ -45,23 +45,27 @@ static inline void update_vdso_data(stru
>> }
>> vdso_ts->nsec = nsec;
>>
>> - /* CLOCK_MONOTONIC_RAW */
>> - vdso_ts = &vdata[CS_RAW].basetime[CLOCK_MONOTONIC_RAW];
>> - vdso_ts->sec = tk->raw_sec;
>> - vdso_ts->nsec = tk->tkr_raw.xtime_nsec;
>> + /* Copy MONOTONIC time for BOOTTIME */
>> + sec = vdso_ts->sec;
>> + /* Add the boot offset */
>> + sec += tk->monotonic_to_boot.tv_sec;
>> + nsec += (u64)tk->monotonic_to_boot.tv_nsec << tk->tkr_mono.shift;
>>
>> /* CLOCK_BOOTTIME */
>> vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_BOOTTIME];
>> - vdso_ts->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
>> - nsec = tk->tkr_mono.xtime_nsec;
>> - nsec += ((u64)(tk->wall_to_monotonic.tv_nsec +
>> - ktime_to_ns(tk->offs_boot)) << tk->tkr_mono.shift);
>> + vdso_ts->sec = sec;
>> +
>> while (nsec >= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
>> nsec -= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift);
>> vdso_ts->sec++;
>> }
>> vdso_ts->nsec = nsec;
>>
>> + /* CLOCK_MONOTONIC_RAW */
>> + vdso_ts = &vdata[CS_RAW].basetime[CLOCK_MONOTONIC_RAW];
>> + vdso_ts->sec = tk->raw_sec;
>> + vdso_ts->nsec = tk->tkr_raw.xtime_nsec;
>> +
>> /* CLOCK_TAI */
>> vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_TAI];
>> vdso_ts->sec = tk->xtime_sec + (s64)tk->tai_offset;
>>
--
Regards,
Vincenzo