Re: [PATCH 2/4] time: add a notifier chain for when the system timeis stepped
From: Thomas Gleixner
Date: Mon Jun 24 2013 - 12:30:33 EST
On Mon, 24 Jun 2013, David Vrabel wrote:
> On 22/06/13 00:06, Thomas Gleixner wrote:
> This patch set is fixing the rare case where a guest is started before
> NTP has synced and thus sees an incorrect wallclock time which may cause
> the guest to fail to boot.
You're not fixing it, you are just making the window smaller.
clock_was_set() is called outside of the timekeeper_lock protected
regions, so what prevents the guest to start before the notifier is
invoked?
We already have a synchronous notifier in place and the notifier call
itself is not expensive. What's expensive is the hypercall and there
is no way at the moment to figure out whether the update is relevant
for you or just a tick. Though that's trivial information to provide
without imposing another notifier including the surrounding mess on
the core code.
Completely untested patch below.
Thanks,
tglx
---
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index baeeb5c..6e9f838 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -200,9 +200,9 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
static RAW_NOTIFIER_HEAD(pvclock_gtod_chain);
-static void update_pvclock_gtod(struct timekeeper *tk)
+static void update_pvclock_gtod(struct timekeeper *tk, bool cws)
{
- raw_notifier_call_chain(&pvclock_gtod_chain, 0, tk);
+ raw_notifier_call_chain(&pvclock_gtod_chain, cws, tk);
}
/**
@@ -216,7 +216,7 @@ int pvclock_gtod_register_notifier(struct notifier_block *nb)
raw_spin_lock_irqsave(&timekeeper_lock, flags);
ret = raw_notifier_chain_register(&pvclock_gtod_chain, nb);
- update_pvclock_gtod(tk);
+ update_pvclock_gtod(tk, true);
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
return ret;
@@ -241,14 +241,15 @@ int pvclock_gtod_unregister_notifier(struct notifier_block *nb)
EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier);
/* must hold timekeeper_lock */
-static void timekeeping_update(struct timekeeper *tk, bool clearntp, bool mirror)
+static void timekeeping_update(struct timekeeper *tk, bool clearntp,
+ bool mirror, bool cws)
{
if (clearntp) {
tk->ntp_error = 0;
ntp_clear();
}
update_vsyscall(tk);
- update_pvclock_gtod(tk);
+ update_pvclock_gtod(tk, cws);
if (mirror)
memcpy(&shadow_timekeeper, &timekeeper, sizeof(timekeeper));
@@ -508,7 +509,7 @@ int do_settimeofday(const struct timespec *tv)
tk_set_xtime(tk, tv);
- timekeeping_update(tk, true, true);
+ timekeeping_update(tk, true, true, true);
write_seqcount_end(&timekeeper_seq);
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
@@ -552,7 +553,7 @@ int timekeeping_inject_offset(struct timespec *ts)
tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *ts));
error: /* even if we error out, we forwarded the time, so call update */
- timekeeping_update(tk, true, true);
+ timekeeping_update(tk, true, true, true);
write_seqcount_end(&timekeeper_seq);
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
@@ -633,7 +634,7 @@ static int change_clocksource(void *data)
if (old->disable)
old->disable(old);
}
- timekeeping_update(tk, true, true);
+ timekeeping_update(tk, true, true, true);
write_seqcount_end(&timekeeper_seq);
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
@@ -872,7 +873,7 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
__timekeeping_inject_sleeptime(tk, delta);
- timekeeping_update(tk, true, true);
+ timekeeping_update(tk, true, true, true);
write_seqcount_end(&timekeeper_seq);
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
@@ -954,7 +955,7 @@ static void timekeeping_resume(void)
tk->cycle_last = clock->cycle_last = cycle_now;
tk->ntp_error = 0;
timekeeping_suspended = 0;
- timekeeping_update(tk, false, true);
+ timekeeping_update(tk, false, true, true);
write_seqcount_end(&timekeeper_seq);
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
@@ -1236,9 +1237,10 @@ out_adjust:
* It also calls into the NTP code to handle leapsecond processing.
*
*/
-static inline void accumulate_nsecs_to_secs(struct timekeeper *tk)
+static inline bool accumulate_nsecs_to_secs(struct timekeeper *tk)
{
u64 nsecps = (u64)NSEC_PER_SEC << tk->shift;
+ bool ret = false;
while (tk->xtime_nsec >= nsecps) {
int leap;
@@ -1261,8 +1263,10 @@ static inline void accumulate_nsecs_to_secs(struct timekeeper *tk)
__timekeeping_set_tai_offset(tk, tk->tai_offset - leap);
clock_was_set_delayed();
+ ret = true;
}
}
+ return ret;
}
/**
@@ -1348,6 +1352,7 @@ static void update_wall_time(void)
cycle_t offset;
int shift = 0, maxshift;
unsigned long flags;
+ bool cws;
raw_spin_lock_irqsave(&timekeeper_lock, flags);
@@ -1399,7 +1404,7 @@ static void update_wall_time(void)
* Finally, make sure that after the rounding
* xtime_nsec isn't larger than NSEC_PER_SEC
*/
- accumulate_nsecs_to_secs(tk);
+ cws = accumulate_nsecs_to_secs(tk);
write_seqcount_begin(&timekeeper_seq);
/* Update clock->cycle_last with the new value */
@@ -1415,7 +1420,7 @@ static void update_wall_time(void)
* updating.
*/
memcpy(real_tk, tk, sizeof(*tk));
- timekeeping_update(real_tk, false, false);
+ timekeeping_update(real_tk, false, false, cws);
write_seqcount_end(&timekeeper_seq);
out:
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
@@ -1677,6 +1682,7 @@ int do_adjtimex(struct timex *txc)
if (tai != orig_tai) {
__timekeeping_set_tai_offset(tk, tai);
+ update_pvclock_gtod(tk, true);
clock_was_set_delayed();
}
write_seqcount_end(&timekeeper_seq);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/