[PATCH v5 4/6] timekeeping: Drive time_offset skew via per-tick ntp_error transfer

From: David Woodhouse

Date: Wed Jun 10 2026 - 19:41:07 EST


From: David Woodhouse <dwmw@xxxxxxxxxxxx>

Instead of inflating tick_length to effect the time_offset slew,
transfer the skew to ntp_error per-tick and drain time_offset at the
equivalent per-tick rate:

- ntp_error += skew_delta << shift (biases dithering to deliver skew)
- time_offset -= skew_delta << shift (per-tick drain)

skew_delta is in per-tick units (same as time_offset), computed in
second_overflow() as ntp_offset_chunk() / NTP_INTERVAL_FREQ.

Compute mult from (ntp_tick + skew_delta) so the dithering has enough
bandwidth to deliver the skew rate by selecting between mult and mult+1.
This is equivalent to the old tick_length += delta approach but without
modifying tick_length, and with exact per-tick accounting of the
time_offset drain.

Once time_offset is so low that skew_delta would round to zero, impart
the minimum ±1 per tick. It won't overshoot because anything that can't
be drained from time_offset gets accounted to ntp_error and will drive
the selection of mult vs. mult+1 from the very next tick to compensate.

second_overflow() computes skew_delta (the exponential decay rate)
but no longer drains time_offset or inflates tick_length directly.

Signed-off-by: David Woodhouse <dwmw@xxxxxxxxxxxx>
Assisted-by: Kiro:claude-opus-4.8
---
include/linux/timekeeper_internal.h | 1 +
kernel/time/ntp.c | 48 ++++++++++++++++++++++++++---
kernel/time/ntp_internal.h | 2 ++
kernel/time/timekeeping.c | 45 ++++++++++++++++++++++++---
4 files changed, 87 insertions(+), 9 deletions(-)

diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index ec81587a1400..fb37a736ec1c 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -189,6 +189,7 @@ struct timekeeper {
u32 ntp_err_mult;
s64 cs_tick_adj;
u32 skip_second_overflow;
+ s64 skew_delta;
s32 tai_offset;
};

diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 3fad82c47c4c..f2670e7985b8 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -67,6 +67,7 @@ struct ntp_data {
int time_state;
int time_status;
s64 time_offset;
+ s64 skew_delta;
long time_constant;
long time_maxerror;
long time_esterror;
@@ -385,6 +386,31 @@ u64 ntp_tick_length(unsigned int tkid)
return tk_ntp_data[tkid].tick_length;
}

+s64 ntp_get_skew_delta(unsigned int tkid)
+{
+ return tk_ntp_data[tkid].skew_delta;
+}
+
+s64 ntp_drain_time_offset(unsigned int tkid, s64 amount)
+{
+ struct ntp_data *ntpdata = &tk_ntp_data[tkid];
+
+ /* Only drain if amount and time_offset have the same sign */
+ if (!amount || (amount > 0) != (ntpdata->time_offset > 0))
+ return amount;
+
+ /* Clamp: don't overshoot zero */
+ if (abs(amount) > abs(ntpdata->time_offset)) {
+ s64 undrained = amount - ntpdata->time_offset;
+
+ ntpdata->time_offset = 0;
+ return undrained;
+ }
+
+ ntpdata->time_offset -= amount;
+ return 0;
+}
+
/**
* ntp_get_next_leap - Returns the next leapsecond in CLOCK_REALTIME ktime_t
* @tkid: Timekeeper ID
@@ -419,7 +445,6 @@ ktime_t ntp_get_next_leap(unsigned int tkid)
int second_overflow(unsigned int tkid, time64_t secs)
{
struct ntp_data *ntpdata = &tk_ntp_data[tkid];
- s64 delta;
int leap = 0;
s32 rem;

@@ -481,9 +506,24 @@ int second_overflow(unsigned int tkid, time64_t secs)
/* Compute the phase adjustment for the next second */
ntpdata->tick_length = ntpdata->tick_length_base;

- delta = ntp_offset_chunk(ntpdata, ntpdata->time_offset);
- ntpdata->time_offset -= delta;
- ntpdata->tick_length += delta;
+ /*
+ * Set the per-tick skew rate for the tick code. This is in
+ * the same units as tick_length (ns << NTP_SCALE_SHIFT).
+ * tick_offset is so low that the skew imparted would round to
+ * zero, pass the bare minimum ±1. It won't overshoot because
+ * logarithmic_accumulation() only drains what it can from
+ * time_offset and the rest ends up in ntp_error which drives
+ * the selection of 'mult' immediately each tick.
+ */
+ if (ntpdata->time_offset) {
+ s64 delta = ntp_offset_chunk(ntpdata, ntpdata->time_offset);
+ ntpdata->skew_delta = div_s64(delta, NTP_INTERVAL_FREQ);
+
+ if (!ntpdata->skew_delta)
+ ntpdata->skew_delta = (ntpdata->time_offset > 0) ? 1 : -1;
+ } else {
+ ntpdata->skew_delta = 0;
+ }

/* Check PPS signal */
pps_dec_valid(ntpdata);
diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h
index 598e5dd2fc5b..1e708a2562ea 100644
--- a/kernel/time/ntp_internal.h
+++ b/kernel/time/ntp_internal.h
@@ -6,6 +6,8 @@ extern void ntp_init(void);
extern void ntp_clear(unsigned int tkid, s64 cs_tick_adj);
/* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */
extern u64 ntp_tick_length(unsigned int tkid);
+extern s64 ntp_get_skew_delta(unsigned int tkid);
+extern s64 ntp_drain_time_offset(unsigned int tkid, s64 amount);
extern ktime_t ntp_get_next_leap(unsigned int tkid);
extern int second_overflow(unsigned int tkid, time64_t secs);
extern int ntp_adjtimex(unsigned int tkid, struct __kernel_timex *txc, const struct timespec64 *ts,
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index bdafd599413d..b8b0e9d7fc10 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -408,6 +408,7 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
tk->tkr_raw.mult = clock->mult;
tk->ntp_err_mult = 0;
tk->skip_second_overflow = 0;
+ tk->skew_delta = 0;

tk->cs_id = clock->id;

@@ -2430,18 +2431,27 @@ static __always_inline void timekeeping_apply_adjustment(struct timekeeper *tk,
static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
{
u64 ntp_tl = ntp_tick_length(tk->id);
+ s64 skew = ntp_get_skew_delta(tk->id);
u32 mult;

/*
- * Determine the multiplier from the current NTP tick length.
- * Avoid expensive division when the tick length doesn't change.
+ * Determine the multiplier from the current NTP tick length plus
+ * skew_delta. The skew biases mult so that ±1 dithering can deliver
+ * the time_offset slew rate. Recompute when either changes.
*/
- if (likely(tk->ntp_tick == ntp_tl)) {
+ if (likely(tk->ntp_tick == ntp_tl && tk->skew_delta == skew)) {
+ /* Revert to the base mult rate. */
mult = tk->tkr_mono.mult - tk->ntp_err_mult;
} else {
tk->ntp_tick = ntp_tl;
- mult = div64_u64(tk->ntp_tick >> tk->ntp_error_shift,
- tk->cycle_interval);
+ tk->skew_delta = skew;
+ /*
+ * skew_delta is stored pre-divided by HZ (matching time_offset);
+ * scale it back up to the full per-tick rate for the mult bias.
+ */
+ skew *= NTP_INTERVAL_FREQ;
+ mult = div64_u64((tk->ntp_tick + skew) >> tk->ntp_error_shift,
+ tk->cycle_interval);
}

/*
@@ -2568,6 +2578,31 @@ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset,
tk->ntp_error += tk->ntp_tick << shift;
tk->ntp_error -= tk->xtime_interval << (tk->ntp_error_shift + shift);

+ /*
+ * The above accounting of ntp_error includes the part of clock
+ * skew which was *intentional*, imparted through deliberately
+ * adjusting 'mult' in timekeeping_adjust() taking skew_delta
+ * into account.
+ *
+ * Drain the intentional skew from time_offset, and readjust
+ * ntp_error by the amount that *could* actually be drained.
+ * This ensures that any *overshoot* is correctly left in
+ * ntp_error and will be correctly compensated for over time.
+ */
+ if (tk->skew_delta) {
+ /*
+ * skew_delta is stored pre-divided by HZ, matching time_offset,
+ * so drain it directly. Fold the amount actually drained back
+ * into ntp_error in full clock units (× NTP_INTERVAL_FREQ); any
+ * undrainable overshoot is left in ntp_error to be compensated
+ * by the dithering over subsequent ticks.
+ */
+ s64 drain = tk->skew_delta << shift;
+ s64 unclaimed = ntp_drain_time_offset(tk->id, drain);
+
+ tk->ntp_error += (drain - unclaimed) * NTP_INTERVAL_FREQ;
+ }
+
return offset;
}

--
2.54.0