[RFC][PATCH] Experimental enhanced NTP error accounting patch

From: john stultz
Date: Fri Mar 03 2006 - 23:55:05 EST


Hey Roman,
I'm sure you've got a number of things going on, but since I didn't
hear anything back from you last time I posted this, I figured I'd try
again.

Here is my first pass implementation of your suggested enhanced NTP
error accounting for the generic timekeeping code.

If you could get a chance to review it and let me know if it addresses
the issues you are concerned about, I would really appreciate it.

Currently it is a bit ugly and I've noted in the comments what I'm
concerned with, but hopefully we can find a way to clean it up a bit.

The patch applies against 2.6.16-rc5-mm2.

Again, I really appreciate the time you've taken in reviewing my patches
and explaining your ideas.

thanks
-john


Signed-off-by: John Stultz <johnstul@xxxxxxxxxx>

Documentation/timekeeping.txt | 22 +---
include/asm-generic/timeofday.h | 5 -
include/linux/clocksource.h | 195 +++++++++++++++++++---------------------
kernel/time/timeofday.c | 140 ++++++++++++++--------------
4 files changed, 173 insertions(+), 189 deletions(-)

linux-2.6.16-rc5_timeofday-ntp-error-fix_B20.patch
============================================
diff --git a/Documentation/timekeeping.txt b/Documentation/timekeeping.txt
index 23a5d9d..4ac91b2 100644
--- a/Documentation/timekeeping.txt
+++ b/Documentation/timekeeping.txt
@@ -37,12 +37,6 @@ this value in cycle_last.
cycle_t cycle_last;


-Further since all clocks drift somewhat from each other, we use the adjustment
-values provided via adjtimex() to correct our clocksource frequency for each
-interval. This frequency adjustment value is stored in ntp_adj.
-
-long ntp_adj;
-
Now that we've covered the core global variables for timekeeping, lets look at
how we maintain these values.

@@ -54,7 +48,7 @@ presented as:
timeofday_periodic_hook():
cycle_now = read_clocksource(clock)
cycle_delta = (cycle_now - cycle_last) & clock->mask
- nsec = cyc2ns(clock, cycle_delta, ntp_adj)
+ nsec = cyc2ns(clock, cycle_delta)
system_time += nsec
cycle_last = cycle_now

@@ -62,10 +56,9 @@ timeofday_periodic_hook():

You can see we read the cycle value from the clocksource, calculate a cycle
delta for the interval since we last called timeofday_periodic_hook(), convert
-that cycle delta to a nanosecond interval (for now ignore ntp_adj), add it to
-the system time and finally set our cycle_last value to cycle_now for the next
-interval. Using this simple algorithm we can correctly measure and record the
-passing of time.
+that cycle delta to a nanosecond interval, add it to the system time and finally
+set our cycle_last value to cycle_now for the next interval. Using this simple
+algorithm we can correctly measure and record the passing of time.

But just storing this info isn't very useful, we also want to make it available
to be used elsewhere. So how do we provide a notion of how much time has passed
@@ -77,7 +70,7 @@ timeofday_peridoic_hook().
get_nsec_offset():
cycle_now = read_clocksource(clock)
cycle_delta = (cycle_now - cycle_last) & clock->mask
- nsec = cyc2ns(clock, cycle_delta, ntp_adj)
+ nsec = cyc2ns(clock, cycle_delta)
return nsec

Here you can see, we read the clocksource, calculate a cycle interval, and
@@ -119,14 +112,14 @@ at a safe point, we use the update_callb
see "How to write a clocksource driver" below), this too must be done
in-between intervals in the periodic_hook call. Finally, since the ntp
adjustment made in the cyc2ns conversion is not static, we need to update the
-ntp state machine and get a calculate a new adjustment value.
+ntp state machine and adjust the clocksource mult value.

This adds some extra pseudo code to the timeofday_periodic_hook function:

timeofday_periodic_hook():
cycle_now = read_clocksource(clock)
cycle_delta = (cycle_now - cycle_last) & clock->mask
- nsec = cyc2ns(clock, cycle_delta, ntp_adj)
+ nsec = cyc2ns(clock, cycle_delta)
system_time += nsec
cycle_last = cycle_now

@@ -140,7 +133,6 @@ timeofday_periodic_hook():

ntp_advance(nsec)
ppm = ntp_get_ppm_adjustment()
- ntp_adj = ppm_to_mult_adj(clock, ppm)


Unfortunately, the actual timeofday_periodic_hook code is not as simple as this
diff --git a/include/asm-generic/timeofday.h b/include/asm-generic/timeofday.h
index 79b952f..ddcae35 100644
--- a/include/asm-generic/timeofday.h
+++ b/include/asm-generic/timeofday.h
@@ -20,10 +20,9 @@ extern void sync_persistent_clock(struct

#ifdef CONFIG_GENERIC_TIME_VSYSCALL
extern void arch_update_vsyscall_gtod(struct timespec wall_time,
- cycle_t offset_base, struct clocksource* clock,
- int ntp_adj);
+ cycle_t offset_base, struct clocksource* clock);
#else
-# define arch_update_vsyscall_gtod(x,y,z,w) do { } while(0)
+# define arch_update_vsyscall_gtod(x,y,z) do { } while(0)
#endif /* CONFIG_GENERIC_TIME_VSYSCALL */

#endif /* CONFIG_GENERIC_TIME */
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index bfd61a2..0c5acf8 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -48,6 +48,8 @@ typedef u64 cycle_t;
* @is_continuous: defines if clocksource is free-running.
* @vread: vsyscall read function
* @vdata: vsyscall data value passed to read function
+ * @interval_cycles: Used internally by timekeeping core, please ignore.
+ * @interval_snsecs: Used internally by timekeeping core, please ignore.
*/
struct clocksource {
char *name;
@@ -61,6 +63,10 @@ struct clocksource {
int is_continuous;
cycle_t (*vread)(void *);
void *vdata;
+
+ /* timekeeping specific data, ignore */
+ cycle_t interval_cycles;
+ u64 interval_snsecs;
};


@@ -127,58 +133,18 @@ static inline cycle_t read_clocksource(s
}

/**
- * ppm_to_mult_adj - Converts shifted ppm values to mult adjustment
- * @cs: Pointer to clocksource
- * @ppm: Shifted PPM value
- *
- * Helper which converts a shifted ppm value to clocksource mult_adj value.
- *
- * XXX - this could use some optimization
- */
-static inline int ppm_to_mult_adj(struct clocksource *cs, int ppm)
-{
- u64 mult_adj;
- int ret_adj;
-
- /* The basic math is as follows:
- * cyc * mult/2^shift * (1 + ppm/MILL) = scaled ns
- * We want to precalculate the ppm factor so it can be added
- * to the multiplyer saving the extra multiplication step.
- * cyc * (mult/2^shift + (mult/2^shift) * (ppm/MILL)) =
- * cyc * (mult/2^shift + (mult*ppm/MILL)/2^shift) =
- * cyc * (mult + (mult*ppm/MILL))/2^shift =
- * Thus we want to calculate the value of:
- * mult*ppm/MILL
- */
- mult_adj = abs(ppm);
- mult_adj = (mult_adj * cs->mult)>>SHIFT_USEC;
- mult_adj += 1000000/2; /* round for div*/
- do_div(mult_adj, 1000000);
- if (ppm < 0)
- ret_adj = -(int)mult_adj;
- else
- ret_adj = (int)mult_adj;
-
- return ret_adj;
-}
-
-/**
* cyc2ns - converts clocksource cycles to nanoseconds
* @cs: Pointer to clocksource
- * @ntp_adj: Multiplier adjustment value
* @cycles: Cycles
*
* Uses the clocksource and ntp ajdustment to convert cycle_ts to nanoseconds.
*
* XXX - This could use some mult_lxl_ll() asm optimization
*/
-static inline s64 cyc2ns(struct clocksource *cs, int ntp_adj, cycle_t cycles)
+static inline s64 cyc2ns(struct clocksource *cs, cycle_t cycles)
{
- u64 ret = cycles;
-
- ret *= (cs->mult + ntp_adj);
- ret >>= cs->shift;
-
+ u64 ret = (u64)cycles;
+ ret = (ret * cs->mult) >> cs->shift;
return ret;
}

@@ -195,12 +161,10 @@ static inline s64 cyc2ns(struct clocksou
*
* XXX - This could use some mult_lxl_ll() asm optimization.
*/
-static inline s64 cyc2ns_rem(struct clocksource *cs, int ntp_adj,
- cycle_t cycles, u64* rem)
+static inline s64 cyc2ns_rem(struct clocksource *cs, cycle_t cycles,
+ u64* rem)
{
- u64 ret = cycles;
-
- ret *= (cs->mult + ntp_adj);
+ u64 ret = (u64)cycles * cs->mult;
if (rem) {
ret += *rem;
*rem = ret & ((1<<cs->shift)-1);
@@ -212,27 +176,6 @@ static inline s64 cyc2ns_rem(struct cloc


/**
- * struct clocksource_interval - Fixed interval conversion structure
- *
- * @cycles: A specified number of cycles
- * @nsecs: The number of nanoseconds equivalent to the cycles value
- * @remainder: Non-integer nanosecond remainder stored in (ns<<cs->shift) units
- * @remainder_ns_overflow: Value at which the remainder is equal to
- * one second
- *
- * This is a optimization structure used by cyc2ns_fixed_rem() to avoid the
- * multiply in cyc2ns().
- *
- * Unless you're the timeofday_periodic_hook, you should not be using this!
- */
-struct clocksource_interval {
- cycle_t cycles;
- s64 nsecs;
- u64 remainder;
- u64 remainder_ns_overflow;
-};
-
-/**
* calculate_clocksource_interval - Calculates a clocksource interval struct
*
* @c: Pointer to clocksource.
@@ -244,61 +187,113 @@ struct clocksource_interval {
*
* Unless you're the timeofday_periodic_hook, you should not be using this!
*/
-static inline struct clocksource_interval
-calculate_clocksource_interval(struct clocksource *c, long adj,
- unsigned long length_nsec)
+static inline void calculate_clocksource_interval(struct clocksource *c,
+ unsigned long length_nsec)
{
- struct clocksource_interval ret;
u64 tmp;

/* XXX - All of this could use a whole lot of optimization */
tmp = length_nsec;
tmp <<= c->shift;
- do_div(tmp, c->mult+adj);
+ tmp += c->mult/2;
+ do_div(tmp, c->mult);
+
+ c->interval_cycles = (cycle_t)tmp;
+ if(c->interval_cycles == 0)
+ c->interval_cycles = 1;

- ret.cycles = (cycle_t)tmp;
- if(ret.cycles == 0)
- ret.cycles = 1;
-
- ret.remainder = 0;
- ret.remainder_ns_overflow = 1 << c->shift;
- ret.nsecs = cyc2ns_rem(c, adj, ret.cycles, &ret.remainder);
+ c->interval_snsecs = (u64)c->interval_cycles * c->mult;

+ printk("requested: %lu calculated %llu ns %llu cyc error: %lld\n", length_nsec, c->interval_snsecs>>c->shift, c->interval_cycles, (s64)length_nsec - (c->interval_snsecs>>c->shift) );
+}
+
+
+static inline s64 snsec2nsec_rem(u64 snsec, u32 shift, u64* rem)
+{
+ s64 ret = snsec >> shift;
+ if(rem)
+ *rem = snsec & ((1<<shift)-1);
return ret;
}

/**
- * cyc2ns_fixed_rem -
- * converts clocksource cycles to nanoseconds using fixed intervals
+ * cyc2sns_fixed_error-
+ * converts clocksource cycles to shifted nanoseconds using fixed intervals
*
- * @interval: precalculated clocksource_interval structure
- * @cycles: Number of clocksource cycles
- * @rem: Remainder
+ * @clock: Current clocksource
+ * @cycles: Number of clocksource cycles to accumulate
+ * @ntp_interval: length of the NTP adjusted interval (in shifted nsecs)
+ * @error: pointer to error accumulation variable
*
* Uses a precalculated fixed cycle/nsec interval to convert cycles to
- * nanoseconds. Returns the unaccumulated cycles in the cycles pointer as
- * well as uses and updates the value at the remainder pointer
+ * shifted nanoseconds. Return the unaccumulated cycles in the cycles
+ * pointer, and the error accumulated in the error pointer.
*
* Unless you're the timeofday_periodic_hook, you should not be using this!
*/
-static inline s64 cyc2ns_fixed_rem(struct clocksource_interval interval,
- cycle_t *cycles, u64* rem)
+/* XXX - 4 values in and 3 values out? Terrible! */
+static inline u64 cyc2sns_fixed_error(struct clocksource *clock,
+ cycle_t *cycles, u64 ntp_interval, s64* error)
+{
+ u64 delta_snsec = 0;
+ s64 interval_error = (s64)ntp_interval - clock->interval_snsecs;
+
+ /* accumulate cycles*/
+ while (*cycles > clock->interval_cycles) {
+ *cycles -= clock->interval_cycles;
+ delta_snsec += clock->interval_snsecs;
+ *error += interval_error;
+ }
+
+ return delta_snsec;
+}
+
+/* XXX - This needs a comment */
+static inline int error_aproximation(u64 error, u64 unit)
{
- s64 delta_nsec = 0;
+ static int saved_adj = 0;
+ u64 adjusted_unit = unit << saved_adj;

- while (*cycles > interval.cycles) {
- delta_nsec += interval.nsecs;
- *cycles -= interval.cycles;
- *rem += interval.remainder;
- while(*rem > interval.remainder_ns_overflow) {
- *rem -= interval.remainder_ns_overflow;
- delta_nsec += 1;
- }
+ if (error > (adjusted_unit * 2)) {
+ /* large error, so increment the adjustment factor */
+ saved_adj++;
+ } else if (error > adjusted_unit) {
+ /* just right, don't touch it */
+ } else if (saved_adj) {
+ /* small error, so drop the adjustment factor */
+ saved_adj--;
+ return 0;
}

- return delta_nsec;
+ return saved_adj;
+}
+
+static inline s64 make_ntp_adj(struct clocksource *clock,
+ cycles_t cycles_delta, s64* error)
+{
+ s64 ret = 0;
+ /* check NTP error */
+ if (*error > (s64)clock->interval_cycles / 2 ) {
+ int adj_factor = error_aproximation(*error,
+ clock->interval_cycles);
+ clock->mult += 1 << adj_factor;
+ clock->interval_snsecs += clock->interval_cycles << adj_factor;
+ ret = -(cycles_delta << adj_factor);
+ *error -= (clock->interval_cycles << adj_factor);
+ /* XXX adj error for cycle_delta offset? */
+ } else if ((-(*error)) > (s64)clock->interval_cycles/2) {
+ int adj_factor = error_aproximation(-(*error),
+ clock->interval_cycles);
+ clock->mult -= 1 << adj_factor;
+ clock->interval_snsecs -= clock->interval_cycles << adj_factor;
+ ret = cycles_delta << adj_factor;
+ *error += (clock->interval_cycles << adj_factor);
+ /* XXX adj error for cycle_delta offset? */
+ }
+ return ret;
}

+
/* used to install a new clocksource */
int register_clocksource(struct clocksource*);
void reselect_clocksource(void);
diff --git a/kernel/time/timeofday.c b/kernel/time/timeofday.c
index 8085b86..9b960d8 100644
--- a/kernel/time/timeofday.c
+++ b/kernel/time/timeofday.c
@@ -39,6 +39,13 @@
/* Periodic hook interval */
#define PERIODIC_INTERVAL_MS 50

+/*
+ * INTERVAL_LEN:
+ * This constant is the requested fixed interval period
+ * in nanoseconds.
+ */
+#define INTERVAL_LEN ((PERIODIC_INTERVAL_MS)*1000000)
+
/* [ktime_t based variables]
* system_time:
* Monotonically increasing counter of the number of nanoseconds
@@ -72,30 +79,12 @@ static struct timespec monotonic_time_of
*/
static cycle_t cycle_last;

-/* [clocksource_interval variables]
- * ts_interval:
- * This clocksource_interval is used in the fixed interval
- * cycles to nanosecond calculation.
- * INTERVAL_LEN:
- * This constant is the requested fixed interval period
- * in nanoseconds.
- */
-static struct clocksource_interval ts_interval;
-#define INTERVAL_LEN ((PERIODIC_INTERVAL_MS-1)*1000000)
-
/* [clocksource data]
* clock:
* current clocksource pointer
*/
static struct clocksource *clock;

-/* [NTP adjustment]
- * ntp_adj:
- * value of the current ntp adjustment, stored in
- * clocksource multiplier units.
- */
-static int ntp_adj;
-
/* [locks]
* system_time_lock:
* generic lock for all locally scoped time values
@@ -145,7 +134,7 @@ static void update_legacy_time_values(vo
write_sequnlock_irqrestore(&xtime_lock, flags);

/* since time state has changed, notify vsyscall code */
- arch_update_vsyscall_gtod(wall_time_ts, cycle_last, clock, ntp_adj);
+ arch_update_vsyscall_gtod(wall_time_ts, cycle_last, clock);
}

/**
@@ -167,7 +156,7 @@ static inline s64 __get_nsec_offset(void
cycle_delta = (cycle_now - cycle_last) & clock->mask;

/* convert to nanoseconds: */
- ns_offset = cyc2ns(clock, ntp_adj, cycle_delta);
+ ns_offset = cyc2ns(clock, cycle_delta);

/*
* special case for jiffies tick/offset based systems,
@@ -499,6 +488,7 @@ static int timeofday_init_device(void)

device_initcall(timeofday_init_device);

+
/**
* timeofday_periodic_hook - Does periodic update of timekeeping values.
* @unused: unused value
@@ -514,30 +504,57 @@ static void timeofday_periodic_hook(unsi

cycle_t cycle_now, cycle_delta;
s64 delta_nsec;
- static u64 remainder;

- long leapsecond = 0;
- struct clocksource* next;
+ /* nanoseconds left-shifted by clock->shift */
+ static u64 shifted_nsecs; /* works as a remainder */
+ static s64 ntp_error; /* Error accumulator */
+ s64 ntp_interval; /* interval length from ntp */

+ static s64 second_check;
+ long leapsecond = 0;
int ppm;
- static int ppm_last;

- int something_changed = 0;
+ struct clocksource* next;
struct clocksource old_clock;
- static s64 second_check;

write_seqlock_irqsave(&system_time_lock, flags);
-
- /* read time source & calc time since last call: */
+/**** Accumulation chunk *****/
+ /* read time source & calc cycle delta since last call: */
cycle_now = read_clocksource(clock);
cycle_delta = (cycle_now - cycle_last) & clock->mask;

- delta_nsec = cyc2ns_fixed_rem(ts_interval, &cycle_delta, &remainder);
+
+ /* Calculate ntp_inteval in clock-shifted nanoseconds
+ * XXX - This should all go away w/ Roman's NTP bits
+ */
+ ppm = ntp_get_ppm_adjustment(); /* ppm = shifted usec per sec */
+ ntp_interval = ((s64)INTERVAL_LEN * ppm);
+ /* convert to clock-shifted nanoseconds */
+ ntp_interval = shift_right(ntp_interval,
+ (SHIFT_USEC + 20 - clock->shift));
+ ntp_interval = (((s64)INTERVAL_LEN)<<clock->shift) + ntp_interval;
+
+ /* convert cycle_delta to shifted nanoseconds using fixed intervals */
+ /* XXX - ugh, way too much state going in and out of that function! */
+ shifted_nsecs += cyc2sns_fixed_error(clock, &cycle_delta, ntp_interval,
+ &ntp_error);
+
+ /* add accumulated cycles to cycle_last */
cycle_last = (cycle_now - cycle_delta)&clock->mask;

+ /* check for large NTP error and adjust clock->mult if needed */
+ /* XXX - This has sideeffects, can we make that more clear? */
+ shifted_nsecs += make_ntp_adj(clock, cycle_delta, &ntp_error);
+
+ /* convert shifted nanoseconds to seconds & remainder */
+ delta_nsec = snsec2nsec_rem(shifted_nsecs, clock->shift,
+ &shifted_nsecs);
+
/* update system_time: */
__increment_system_time(delta_nsec);

+/**** NTP book keeping chunk *****/
+ /* XXX - This all might be possible to ditch w/ Roman's bits */
/* advance the ntp state machine by ns interval: */
ntp_advance(delta_nsec);

@@ -555,69 +572,52 @@ static void timeofday_periodic_hook(unsi
if (ntp_synced())
sync_persistent_clock(wall_time_ts);

+/**** Clocksource change chunk ****/
+ old_clock.mult = 0;
/* if necessary, switch clocksources: */
next = get_next_clocksource();
- if (next != clock) {
+ if (unlikely(next != clock)) {
/* immediately set new cycle_last: */
cycle_last = read_clocksource(next);
/* update cycle_now to avoid problems in accumulation later: */
cycle_now = cycle_last;
+
/* swap clocksources: */
- old_clock = *clock;
+ old_clock.mult = clock->mult;
+ old_clock.shift = clock->shift;
clock = next;
printk(KERN_INFO "Time: %s clocksource has been installed.\n",
clock->name);
- ntp_clear();
- ntp_adj = 0;
- remainder = 0;
- something_changed = 1;
- }
-
- /*
- * now is a safe time, so allow clocksource to adjust
- * itself (for example: to make cpufreq changes):
- */
- if (clock->update_callback) {
+ } else if (unlikely(clock->update_callback)) {
/*
- * since clocksource state might change,
- * keep a copy, but only if we've not
- * already changed timesources:
+ * now is a safe time, so allow clocksource to adjust
+ * itself (for example: to make cpufreq changes):
*/
- if (!something_changed)
- old_clock = *clock;
- if (clock->update_callback()) {
- remainder = 0;
- something_changed = 1;
- }
- }
-
- /* check for new PPM adjustment: */
- ppm = ntp_get_ppm_adjustment();
- if (ppm_last != ppm) {
- /* make sure old_clock is set: */
- if (!something_changed)
- old_clock = *clock;
- something_changed = 1;
+ old_clock.mult = clock->mult;
+ old_clock.shift = clock->shift;
+ if (!clock->update_callback())
+ old_clock.mult = 0;
}

/* if something changed, recalculate the ntp adjustment value: */
- if (something_changed) {
+ if (unlikely(old_clock.mult)) {
/* accumulate current leftover cycles using old_clock: */
if (cycle_delta) {
- delta_nsec = cyc2ns_rem(&old_clock, ntp_adj,
- cycle_delta, &remainder);
+ delta_nsec = cyc2ns_rem(&old_clock, cycle_delta,
+ &shifted_nsecs);
cycle_last = cycle_now;
__increment_system_time(delta_nsec);
ntp_advance(delta_nsec);
+ second_check += delta_nsec;
}

- /* recalculate the ntp adjustment and fixed interval values: */
- ppm_last = ppm;
- ntp_adj = ppm_to_mult_adj(clock, ppm);
- ts_interval = calculate_clocksource_interval(clock, ntp_adj,
- INTERVAL_LEN);
+ ntp_clear();
+ shifted_nsecs = 0;
+ ntp_error = 0;
+ calculate_clocksource_interval(clock, INTERVAL_LEN);
}

+/**** Final bits chunk ****/
update_legacy_time_values();

write_sequnlock_irqrestore(&system_time_lock, flags);
@@ -660,6 +660,7 @@ void __init timeofday_init(void)

/* initialize the clock variable: */
clock = get_next_clocksource();
+ calculate_clocksource_interval(clock, INTERVAL_LEN);

/* initialize cycle_last offset base: */
cycle_last = read_clocksource(clock);
@@ -670,10 +671,7 @@ void __init timeofday_init(void)
read_persistent_clock()));

/* clear NTP scaling factor & state machine: */
- ntp_adj = 0;
ntp_clear();
- ts_interval = calculate_clocksource_interval(clock, ntp_adj,
- INTERVAL_LEN);

/* initialize legacy time values: */
update_legacy_time_values();



-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/