[PATCH 15/16] time: Update tiemkeeper structure using a local shadow

From: John Stultz
Date: Mon Nov 14 2011 - 23:05:03 EST


Uses a local shadow structure to update the timekeeper. This
reduces the timekeeper.lock hold time.

WARNING: This introduces a race, but the window might be provably
so small as to not be observable. This patch needs lots more math
and comments to validate that assumption.

CC: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
CC: Eric Dumazet <eric.dumazet@xxxxxxxxx>
CC: Richard Cochran <richardcochran@xxxxxxxxx>
Signed-off-by: John Stultz <john.stultz@xxxxxxxxxx>
---
kernel/time/timekeeping.c | 45 +++++++++++++++++++++++++--------------------
1 files changed, 25 insertions(+), 20 deletions(-)

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 7870a0e..ba595a3 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -940,6 +940,7 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
static void update_wall_time(void)
{
struct clocksource *clock;
+ struct timekeeper tk;
cycle_t offset;
int shift = 0, maxshift;
unsigned long flags;
@@ -951,10 +952,13 @@ static void update_wall_time(void)
if (unlikely(timekeeping_suspended))
goto out;

- clock = timekeeper.clock;
+ tk = timekeeper;
+ write_sequnlock_irqrestore(&timekeeper.lock, flags);
+
+ clock = tk.clock;

#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
- offset = timekeeper.cycle_interval;
+ offset = tk.cycle_interval;
#else
offset = (clock->read(clock) - clock->cycle_last) & clock->mask;
#endif
@@ -967,19 +971,19 @@ static void update_wall_time(void)
* chunk in one go, and then try to consume the next smaller
* doubled multiple.
*/
- shift = ilog2(offset) - ilog2(timekeeper.cycle_interval);
+ shift = ilog2(offset) - ilog2(tk.cycle_interval);
shift = max(0, shift);
/* Bound shift to one less then what overflows tick_length */
maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1;
shift = min(shift, maxshift);
- while (offset >= timekeeper.cycle_interval) {
- offset = logarithmic_accumulation(&timekeeper, offset, shift);
- if(offset < timekeeper.cycle_interval<<shift)
+ while (offset >= tk.cycle_interval) {
+ offset = logarithmic_accumulation(&tk, offset, shift);
+ if(offset < tk.cycle_interval<<shift)
shift--;
}

/* correct the clock when NTP error is too big */
- timekeeping_adjust(&timekeeper, offset);
+ timekeeping_adjust(&tk, offset);

/*
* Since in the loop above, we accumulate any amount of time
@@ -997,10 +1001,10 @@ static void update_wall_time(void)
* We'll correct this error next time through this function, when
* xtime_nsec is not as small.
*/
- if (unlikely((s64)timekeeper.xtime_nsec < 0)) {
- s64 neg = -(s64)timekeeper.xtime_nsec;
- timekeeper.xtime_nsec = 0;
- timekeeper.ntp_error += neg << timekeeper.ntp_error_shift;
+ if (unlikely((s64)tk.xtime_nsec < 0)) {
+ s64 neg = -(s64)tk.xtime_nsec;
+ tk.xtime_nsec = 0;
+ tk.ntp_error += neg << tk.ntp_error_shift;
}

/*
@@ -1012,23 +1016,24 @@ static void update_wall_time(void)
* the vsyscall implementations are converted to use xtime_nsec
* (shifted nanoseconds), this can be killed.
*/
- remainder = timekeeper.xtime_nsec & ((1<<timekeeper.shift)-1);
- timekeeper.xtime_nsec -= remainder;
- timekeeper.xtime_nsec += 1<<timekeeper.shift;
- timekeeper.ntp_error += remainder <<
- timekeeper.ntp_error_shift;
+ remainder = tk.xtime_nsec & ((1<<tk.shift)-1);
+ tk.xtime_nsec -= remainder;
+ tk.xtime_nsec += 1<<tk.shift;
+ tk.ntp_error += remainder << tk.ntp_error_shift;

/*
* Finally, make sure that after the rounding
* xtime.tv_nsec isn't larger then NSEC_PER_SEC
*/
- if (unlikely(timekeeper.xtime_nsec >=
- (NSEC_PER_SEC << timekeeper.shift))) {
- timekeeper.xtime_nsec -= NSEC_PER_SEC << timekeeper.shift;
- timekeeper.xtime_sec++;
+ if (unlikely(tk.xtime_nsec >= (NSEC_PER_SEC << tk.shift))) {
+ tk.xtime_nsec -= NSEC_PER_SEC << tk.shift;
+ tk.xtime_sec++;
second_overflow();
}

+ write_seqlock_irqsave(&timekeeper.lock, flags);
+
+ timekeeper = tk;
timekeeping_update(&timekeeper, false);

out:
--
1.7.3.2.146.gca209

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/