[PATCH v5 5/6] timekeeping: deliver adjtime() time_adjust via skew_delta
From: David Woodhouse
Date: Wed Jun 10 2026 - 19:41:15 EST
From: David Woodhouse <dwmw@xxxxxxxxxxxx>
The legacy adjtime() slew (ADJ_OFFSET_SINGLESHOT) was the last user of
tick_length != tick_length_base: it slewed the clock by inflating
tick_length directly, which delivered the correction imprecisely (the
per-tick truncation lost ~us over a multi-second slew) and hid the
end-of-slew over-delivery in the inflated baseline.
Route it through the same per-tick skew_delta mechanism as the
time_offset PLL, so tick_length always equals tick_length_base and the
slew is both delivered and accounted precisely:
- second_overflow() adds the MAX_TICKADJ-per-second linear rate to
skew_delta (biasing mult), without inflating tick_length or
decrementing time_adjust itself;
- the per-tick drain in logarithmic_accumulation() apportions the
delivered skew: ntp_drain_time_adjust() claims the linear share
(rate-limited to MAX_TICKADJ/s), ntp_drain_time_offset() takes the
exponential rest, and any overshoot stays in ntp_error.
time_adjust keeps its whole-microsecond API value and full range; the
sub-microsecond remainder being delivered lives in a new time_adjust_frac
(plain shifted-ns), topped up by borrowing whole us from time_adjust as
the drain consumes it. The linear drive is sourced from time_adjust *and*
time_adjust_frac so the final sub-us residual is flushed and the slew
converges exactly rather than stopping ~1us short.
Signed-off-by: David Woodhouse <dwmw@xxxxxxxxxxxx>
Assisted-by: Kiro:claude-opus-4.8
---
kernel/time/ntp.c | 108 +++++++++++++++++++++++++++++++------
kernel/time/ntp_internal.h | 1 +
kernel/time/timekeeping.c | 14 ++---
3 files changed, 100 insertions(+), 23 deletions(-)
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index f2670e7985b8..7a5ff2a6c93b 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -74,6 +74,7 @@ struct ntp_data {
s64 time_freq;
time64_t time_reftime;
long time_adjust;
+ s64 time_adjust_frac;
s64 ntp_tick_adj;
s64 cs_tick_adj;
time64_t ntp_next_leap_sec;
@@ -107,6 +108,9 @@ static struct ntp_data tk_ntp_data[TIMEKEEPERS_MAX] = {
#define SECS_PER_DAY 86400
#define MAX_TICKADJ 500LL /* usecs */
+/* One microsecond of phase, in plain shifted-ns (ns << NTP_SCALE_SHIFT) */
+#define ONE_US_NS ((s64)NSEC_PER_USEC << NTP_SCALE_SHIFT)
+/* Per-tick MAX_TICKADJ slew, in plain shifted-ns */
#define MAX_TICKADJ_SCALED \
(((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ)
#define MAX_TAI_OFFSET 100000
@@ -342,6 +346,7 @@ static void __ntp_clear(struct ntp_data *ntpdata)
{
/* Stop active adjtime() */
ntpdata->time_adjust = 0;
+ ntpdata->time_adjust_frac = 0;
ntpdata->time_status |= STA_UNSYNC;
ntpdata->time_maxerror = NTP_PHASE_LIMIT;
ntpdata->time_esterror = NTP_PHASE_LIMIT;
@@ -411,6 +416,66 @@ s64 ntp_drain_time_offset(unsigned int tkid, s64 amount)
return 0;
}
+/*
+ * Drain the legacy adjtime() correction (time_adjust) as it is delivered.
+ *
+ * @amount is the total intentional per-tick skew for this accumulation
+ * (skew_delta << shift), in time_offset units (shifted_ns / HZ); it covers
+ * both the exponential time_offset slew and the linear adjtime slew. This
+ * function claims only the adjtime share — capped at the MAX_TICKADJ rate —
+ * and returns the remainder for ntp_drain_time_offset().
+ *
+ * time_adjust is in whole us. The sub-us remainder being delivered lives in
+ * time_adjust_frac (same shifted_ns/HZ units as time_offset); we top it up by
+ * borrowing whole us from time_adjust as the drain consumes it.
+ */
+s64 ntp_drain_time_adjust(unsigned int tkid, s64 amount, unsigned int shift)
+{
+ struct ntp_data *ntpdata = &tk_ntp_data[tkid];
+ /* Sign reference: time_adjust if any whole us remain, else the drawer */
+ s64 ref = ntpdata->time_adjust ? (s64)ntpdata->time_adjust
+ : ntpdata->time_adjust_frac;
+ s64 deliver, deficit, claimed;
+
+ if (!amount || !ref || (amount > 0) != (ref > 0))
+ return amount;
+
+ /*
+ * Phase to deliver this accumulation, in plain shifted-ns. The drain
+ * @amount is in ÷HZ units, so multiply by HZ first, then clamp to the
+ * MAX_TICKADJ rate (MAX_TICKADJ_SCALED is the per-tick slew in
+ * shifted-ns). Multiply-then-clamp avoids an s64 divide for the cap.
+ */
+ deliver = min(abs(amount) * NTP_INTERVAL_FREQ,
+ (s64)MAX_TICKADJ_SCALED << shift);
+
+ /* Top up the sub-us drawer from whole-us time_adjust as needed */
+ deficit = deliver - abs(ntpdata->time_adjust_frac);
+ if (deficit > 0 && ntpdata->time_adjust) {
+ long borrow = div64_u64(deficit + ONE_US_NS - 1, ONE_US_NS);
+
+ borrow = min(borrow, (long)abs(ntpdata->time_adjust));
+ if (ntpdata->time_adjust > 0) {
+ ntpdata->time_adjust -= borrow;
+ ntpdata->time_adjust_frac += (s64)borrow * ONE_US_NS;
+ } else {
+ ntpdata->time_adjust += borrow;
+ ntpdata->time_adjust_frac -= (s64)borrow * ONE_US_NS;
+ }
+ }
+
+ /* Never deliver more than the drawer holds */
+ deliver = min(deliver, abs(ntpdata->time_adjust_frac));
+ if (ntpdata->time_adjust_frac > 0)
+ ntpdata->time_adjust_frac -= deliver;
+ else
+ ntpdata->time_adjust_frac += deliver;
+
+ /* Return the unclaimed remainder in ÷HZ drain units for time_offset */
+ claimed = div_s64(deliver, NTP_INTERVAL_FREQ);
+ return (amount > 0) ? amount - claimed : amount + claimed;
+}
+
/**
* ntp_get_next_leap - Returns the next leapsecond in CLOCK_REALTIME ktime_t
* @tkid: Timekeeper ID
@@ -528,26 +593,33 @@ int second_overflow(unsigned int tkid, time64_t secs)
/* Check PPS signal */
pps_dec_valid(ntpdata);
- if (!ntpdata->time_adjust)
- goto out;
-
- if (ntpdata->time_adjust > MAX_TICKADJ) {
- ntpdata->time_adjust -= MAX_TICKADJ;
- ntpdata->tick_length += MAX_TICKADJ_SCALED;
- goto out;
- }
+ /*
+ * Bias the per-tick skew for any pending adjtime() correction, at up
+ * to MAX_TICKADJ (500us) per second. This only sizes the mult bias
+ * (and hence the per-tick drain budget); time_adjust itself is drained
+ * in logarithmic_accumulation() via ntp_drain_time_adjust(), per tick,
+ * so it is never decremented behind the per-tick accounting's back and
+ * never staged into time_offset (which would smear the exponential).
+ */
+ if (ntpdata->time_adjust || ntpdata->time_adjust_frac) {
+ long adj = clamp(ntpdata->time_adjust,
+ (long)-MAX_TICKADJ, (long)MAX_TICKADJ);
+ /*
+ * Per-second linear phase to drive, in shifted-ns: the
+ * whole-us part plus the sub-us drawer, clamped to the
+ * MAX_TICKADJ rate. Including the drawer ensures the drive
+ * (and hence the drain) continues until the last sub-us
+ * remainder is flushed, not just until time_adjust hits zero.
+ */
+ s64 chunk = (s64)adj * ONE_US_NS + ntpdata->time_adjust_frac;
+ s64 max = (s64)MAX_TICKADJ * ONE_US_NS;
- if (ntpdata->time_adjust < -MAX_TICKADJ) {
- ntpdata->time_adjust += MAX_TICKADJ;
- ntpdata->tick_length -= MAX_TICKADJ_SCALED;
- goto out;
+ chunk = clamp(chunk, -max, max);
+ /* shifted-ns/second -> per-tick skew_delta: divide by HZ*HZ */
+ ntpdata->skew_delta += div_s64(chunk,
+ (s64)NTP_INTERVAL_FREQ * NTP_INTERVAL_FREQ);
}
- ntpdata->tick_length += (s64)(ntpdata->time_adjust * NSEC_PER_USEC / NTP_INTERVAL_FREQ)
- << NTP_SCALE_SHIFT;
- ntpdata->time_adjust = 0;
-
-out:
return leap;
}
@@ -840,6 +912,7 @@ int ntp_adjtimex(unsigned int tkid, struct __kernel_timex *txc, const struct tim
if (!(txc->modes & ADJ_OFFSET_READONLY)) {
/* adjtime() is independent from ntp_adjtime() */
ntpdata->time_adjust = txc->offset;
+ ntpdata->time_adjust_frac = 0;
ntp_update_frequency(ntpdata);
audit_ntp_set_old(ad, AUDIT_NTP_ADJUST, save_adjust);
@@ -1081,6 +1154,7 @@ static void hardpps_update_phase(struct ntp_data *ntpdata, long error)
NTP_INTERVAL_FREQ);
/* Cancel running adjtime() */
ntpdata->time_adjust = 0;
+ ntpdata->time_adjust_frac = 0;
}
/* Update jitter */
ntpdata->pps_jitter += (jitter - ntpdata->pps_jitter) >> PPS_INTMIN;
diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h
index 1e708a2562ea..1647fd1a0d90 100644
--- a/kernel/time/ntp_internal.h
+++ b/kernel/time/ntp_internal.h
@@ -8,6 +8,7 @@ extern void ntp_clear(unsigned int tkid, s64 cs_tick_adj);
extern u64 ntp_tick_length(unsigned int tkid);
extern s64 ntp_get_skew_delta(unsigned int tkid);
extern s64 ntp_drain_time_offset(unsigned int tkid, s64 amount);
+extern s64 ntp_drain_time_adjust(unsigned int tkid, s64 amount, unsigned int shift);
extern ktime_t ntp_get_next_leap(unsigned int tkid);
extern int second_overflow(unsigned int tkid, time64_t secs);
extern int ntp_adjtimex(unsigned int tkid, struct __kernel_timex *txc, const struct timespec64 *ts,
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index b8b0e9d7fc10..a9b4f4b8626e 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -2591,15 +2591,17 @@ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset,
*/
if (tk->skew_delta) {
/*
- * skew_delta is stored pre-divided by HZ, matching time_offset,
- * so drain it directly. Fold the amount actually drained back
- * into ntp_error in full clock units (× NTP_INTERVAL_FREQ); any
- * undrainable overshoot is left in ntp_error to be compensated
- * by the dithering over subsequent ticks.
+ * skew_delta (stored ÷HZ, matching time_offset) is the total
+ * intentional skew delivered this accumulation. Apportion it:
+ * the adjtime() linear share goes to time_adjust (capped at
+ * MAX_TICKADJ/s), the exponential rest to time_offset, and any
+ * undrainable overshoot stays in ntp_error (in full clock units,
+ * × NTP_INTERVAL_FREQ) for the dithering to compensate.
*/
s64 drain = tk->skew_delta << shift;
- s64 unclaimed = ntp_drain_time_offset(tk->id, drain);
+ s64 unclaimed = ntp_drain_time_adjust(tk->id, drain, shift);
+ unclaimed = ntp_drain_time_offset(tk->id, unclaimed);
tk->ntp_error += (drain - unclaimed) * NTP_INTERVAL_FREQ;
}
--
2.54.0