[PATCH v7 4/7] timekeeping: Drive time_offset skew via per-tick ntp_error transfer
From: David Woodhouse
Date: Sun Jun 21 2026 - 18:05:55 EST
From: David Woodhouse <dwmw@xxxxxxxxxxxx>
Currently, the phase offset of time_offset and time_adjust is delivered
by adjusting tick_length in second_overflow(), and immediately draining
time_offset/time_adjust by the amount that the tick_length adjustment is
*estimated* to cause. This is fairly approximate, in part because it is
not always correct to assume that precisely NTP_INTERVAL_FREQ ticks will
occur between one call to second_overflow() and the next. It could also
over and under-run in the final second of delivery.
Instead of inflating tick_length, transfer the intended skew directly
into ntp_error each tick to achieve the desired rate.
In second_overflow(), calculate skew_delta which is the per-tick slew
rate, in the same units as time_offset: (ns << NTP_SCALE_SHIFT) / HZ.
In logarithmic_accumulation(), drain up to 'skew_delta' time units from
time_offset into ntp_error to drive the overall effective rate. The new
ntp_drain_skew() function returns the amount which is actually 'claimed'
by time_offset (and in a future patch, time_adjust). Any overrun which
is delivered by the changed 'mult' (as described below) but not claimed
by ntp_drain_skew() will remain in ntp_error to be corrected away in
subsequent ticks.
Simply transferring the precise amount from time_offset to ntp_error
would be sufficent to make the time *eventually* converge, however the
skew delivered is limited by the choice of { mult, mult+1 } each tick
and thus the convergence would be extremely slow.
In theory we could inflate ntp_err_mult with the magnitude of ntp_error
in the general case — but that would cause overcorrection in a tickless
kernel. Instead, in timekeeping_adjust(), take skew_delta into account
when calculating 'mult', such that the available {mult, mult+1} choices
bracket the overall effective rate *including* the skew, to avoid the
delta just building up in ntp_error.
The effect is that the inflated 'mult' causes ntp_error to grow because
xtime_interval is (e.g.) longer than the true tick_length. But then the
same delta is removed again as it's drained from time_offset.
This gives behaviour equivalent to the old tick_length += delta approach
but with exact per-tick accounting of the time_offset actually imparted
to the clock, and no overrun.
Signed-off-by: David Woodhouse <dwmw@xxxxxxxxxxxx>
Assisted-by: Kiro:claude-opus-4.8
---
include/linux/timekeeper_internal.h | 1 +
kernel/time/ntp.c | 88 +++++++++++++++++++++++++++--
kernel/time/ntp_internal.h | 2 +
kernel/time/timekeeping.c | 36 ++++++++++--
4 files changed, 118 insertions(+), 9 deletions(-)
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index ec81587a1400..fb37a736ec1c 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -189,6 +189,7 @@ struct timekeeper {
u32 ntp_err_mult;
s64 cs_tick_adj;
u32 skip_second_overflow;
+ s64 skew_delta;
s32 tai_offset;
};
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 3fad82c47c4c..064e68e7a77c 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -31,6 +31,9 @@
* @time_state: State of the clock synchronization
* @time_status: Clock status bits
* @time_offset: Time adjustment in nanoseconds
+ * @skew_delta: Per-tick phase slew rate for the coming second, in
+ * @time_offset units (shifted-ns / HZ). Set by
+ * second_overflow().
* @time_constant: PLL time constant
* @time_maxerror: Maximum error in microseconds holding the NTP sync distance
* (NTP dispersion + delay / 2)
@@ -67,6 +70,7 @@ struct ntp_data {
int time_state;
int time_status;
s64 time_offset;
+ s64 skew_delta;
long time_constant;
long time_maxerror;
long time_esterror;
@@ -349,6 +353,7 @@ static void __ntp_clear(struct ntp_data *ntpdata)
ntpdata->tick_length = ntpdata->tick_length_base;
ntpdata->time_offset = 0;
+ ntpdata->skew_delta = 0;
ntpdata->ntp_next_leap_sec = TIME64_MAX;
/* Clear PPS state variables */
@@ -385,6 +390,55 @@ u64 ntp_tick_length(unsigned int tkid)
return tk_ntp_data[tkid].tick_length;
}
+s64 ntp_get_skew_delta(unsigned int tkid)
+{
+ return tk_ntp_data[tkid].skew_delta;
+}
+
+/* Sign of @x as +1 or -1 (zero counts as positive; callers pass nonzero). */
+static inline int signof(s64 x)
+{
+ return x < 0 ? -1 : 1;
+}
+
+static s64 ntp_drain_time_offset(unsigned int tkid, s64 amount)
+{
+ struct ntp_data *ntpdata = &tk_ntp_data[tkid];
+
+ /* Only drain if amount and time_offset have the same sign */
+ if (!amount || signof(amount) != signof(ntpdata->time_offset))
+ return amount;
+
+ /* Clamp: don't overshoot zero */
+ if (abs(amount) > abs(ntpdata->time_offset)) {
+ s64 undrained = amount - ntpdata->time_offset;
+
+ ntpdata->time_offset = 0;
+ return undrained;
+ }
+
+ ntpdata->time_offset -= amount;
+ return 0;
+}
+
+/*
+ * Drain one accumulation's worth of intentional skew as it is delivered.
+ *
+ * @amount is the total intentional per-tick skew for this accumulation
+ * (skew_delta << shift), in time_offset units (shifted_ns / HZ). Returns
+ * the amount actually claimed (same ÷HZ units).
+ */
+s64 ntp_drain_skew(unsigned int tkid, s64 amount, unsigned int shift)
+{
+ s64 unclaimed = ntp_drain_time_offset(tkid, amount);
+
+ /*
+ * Return the amount actually drained from the intentional
+ * phase offset in time_offset.
+ */
+ return amount - unclaimed;
+}
+
/**
* ntp_get_next_leap - Returns the next leapsecond in CLOCK_REALTIME ktime_t
* @tkid: Timekeeper ID
@@ -419,7 +473,6 @@ ktime_t ntp_get_next_leap(unsigned int tkid)
int second_overflow(unsigned int tkid, time64_t secs)
{
struct ntp_data *ntpdata = &tk_ntp_data[tkid];
- s64 delta;
int leap = 0;
s32 rem;
@@ -481,13 +534,38 @@ int second_overflow(unsigned int tkid, time64_t secs)
/* Compute the phase adjustment for the next second */
ntpdata->tick_length = ntpdata->tick_length_base;
- delta = ntp_offset_chunk(ntpdata, ntpdata->time_offset);
- ntpdata->time_offset -= delta;
- ntpdata->tick_length += delta;
-
/* Check PPS signal */
pps_dec_valid(ntpdata);
+ /*
+ * Set the per-tick skew rate for the next second. This is in
+ * the same units as time_offset: (ns << NTP_SCALE_SHIFT) / HZ.
+ * If the result is so low that the skew imparted would round
+ * to zero, pass the bare minimum ±1 to ensure that it *does*
+ * actually drain completely to zero. It won't overshoot because
+ * logarithmic_accumulation() only drains what it can from
+ * time_offset and the rest ends up in ntp_error which drives
+ * the selection of 'mult' immediately each tick.
+ */
+ if (ntpdata->time_offset) {
+ s64 off_chunk = ntp_offset_chunk(ntpdata, ntpdata->time_offset);
+
+ /*
+ * Once the exponential chunk rounds to zero, deliver the last
+ * remaining offset this second so it converges to zero instead
+ * of stalling just above it.
+ */
+ if (!off_chunk)
+ off_chunk = ntpdata->time_offset;
+
+ /* Reduce to per-tick, then floor. */
+ ntpdata->skew_delta = div_s64(off_chunk, NTP_INTERVAL_FREQ);
+ if (!ntpdata->skew_delta)
+ ntpdata->skew_delta = signof(off_chunk);
+ } else {
+ ntpdata->skew_delta = 0;
+ }
+
if (!ntpdata->time_adjust)
goto out;
diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h
index 598e5dd2fc5b..0474a761bafc 100644
--- a/kernel/time/ntp_internal.h
+++ b/kernel/time/ntp_internal.h
@@ -6,6 +6,8 @@ extern void ntp_init(void);
extern void ntp_clear(unsigned int tkid, s64 cs_tick_adj);
/* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */
extern u64 ntp_tick_length(unsigned int tkid);
+extern s64 ntp_get_skew_delta(unsigned int tkid);
+extern s64 ntp_drain_skew(unsigned int tkid, s64 amount, unsigned int shift);
extern ktime_t ntp_get_next_leap(unsigned int tkid);
extern int second_overflow(unsigned int tkid, time64_t secs);
extern int ntp_adjtimex(unsigned int tkid, struct __kernel_timex *txc, const struct timespec64 *ts,
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 53961a1fcf47..89b417e22990 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -423,6 +423,7 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
tk->tkr_raw.mult = clock->mult;
tk->ntp_err_mult = 0;
tk->skip_second_overflow = 0;
+ tk->skew_delta = 0;
tk->cs_id = clock->id;
@@ -2445,17 +2446,26 @@ static __always_inline void timekeeping_apply_adjustment(struct timekeeper *tk,
static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
{
u64 ntp_tl = ntp_tick_length(tk->id);
+ s64 skew = ntp_get_skew_delta(tk->id);
u32 mult;
/*
- * Determine the multiplier from the current NTP tick length.
- * Avoid expensive division when the tick length doesn't change.
+ * Determine the multiplier from the current NTP tick length plus
+ * skew_delta. The skew biases mult so that ±1 dithering can deliver
+ * the time_offset slew rate. Recompute when either changes.
*/
- if (likely(tk->ntp_tick == ntp_tl)) {
+ if (likely(tk->ntp_tick == ntp_tl && tk->skew_delta == skew)) {
+ /* Revert to the base mult rate. */
mult = tk->tkr_mono.mult - tk->ntp_err_mult;
} else {
tk->ntp_tick = ntp_tl;
- mult = div64_u64(tk->ntp_tick >> tk->ntp_error_shift,
+ tk->skew_delta = skew;
+ /*
+ * skew_delta is stored pre-divided by HZ (matching time_offset);
+ * scale it back up to the full per-tick rate for the mult bias.
+ */
+ skew *= NTP_INTERVAL_FREQ;
+ mult = div64_u64((tk->ntp_tick + skew) >> tk->ntp_error_shift,
tk->cycle_interval);
}
@@ -2583,6 +2593,24 @@ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset,
tk->ntp_error += tk->ntp_tick << shift;
tk->ntp_error -= tk->xtime_interval << (tk->ntp_error_shift + shift);
+ /*
+ * When skewing, do so by adjusting ntp_error to impart an extra
+ * target delta into ntp_error per tick, limited to what can be
+ * drained from time_offset to avoid overshoot.
+ *
+ * The base 'mult' value was calculated with the skew taken into
+ * account, such that the per-tick choice of 'mult' vs. 'mult+1'
+ * allows for the desired effective rate and ntp_error does not
+ * grow unbounded.
+ *
+ * Once the full desired phase offset is delivered, any remaining
+ * skew imparted by the adjusted 'mult', accounted above, remains
+ * in ntp_error and will be compensated by the dithering over time.
+ */
+ if (tk->skew_delta)
+ tk->ntp_error += ntp_drain_skew(tk->id, tk->skew_delta << shift,
+ shift) * NTP_INTERVAL_FREQ;
+
return offset;
}
--
2.54.0