[PATCH v6 3/7] timekeeping: Account for clocksource tick quantisation via NTP
From: David Woodhouse
Date: Sun Jun 14 2026 - 10:41:41 EST
From: David Woodhouse <dwmw@xxxxxxxxxxxx>
cycle_interval is an integer number of counter cycles per NTP interval,
so the real time it represents differs from the nominal
NTP_INTERVAL_LENGTH by up to half a counter period. For coarse
clocksources this is significant: the 3.579545 MHz ACPI PM timer at
HZ=1000 rounds 3579.545 cycles up to 3580, making each tick 1.000127 ms
(+127 PPM).
Commit a386b5af8edd ("time: Compensate for rounding on odd-frequency
clocksources") introduced xtime_remainder to compensate for exactly
this, citing the same 127 PPM ACPI PM example. The compensation is
correct and necessary, but it was applied inside the timekeeping
accumulation in timekeeping.c: subtracted in the mult computation in
timekeeping_adjust() and folded into the ntp_error update in
logarithmic_accumulation(). That keeps the base rate correct and leaves
NTP its full symmetric +/-MAXFREQ range rather than +373/-627 PPM, but
the NTP code in ntp.c never sees it: tick_length is computed without the
correction, so ntp.c's notion of how long a tick is disagrees with the
rate timekeeping actually produces.
Make the offset an explicit part of the NTP tick_length instead. Add
ntp_data::cs_tick_adj, a fixed per-second addend that
ntp_update_frequency() includes alongside ntp_tick_adj and time_freq.
tk_setup_internals() computes it from the difference between the real
cycle_interval duration and the nominal interval, stores it in the
timekeeper, and hands it to NTP through a new argument to ntp_clear() --
which already recomputes the frequency and is invoked after every
clocksource (re)configuration. timekeeping_init() now uses TK_UPDATE_ALL
for this; clearing NTP there is otherwise redundant since ntp_init() has
just initialised it.
ntp.c now computes the true tick rate, giving a single source of truth.
Like ntp_tick_adj, cs_tick_adj stays internal to the kernel: userspace
still sees the nominal 1.000000 ms tick via adjtimex and is unaware of
the addends. timekeeping_adjust() and logarithmic_accumulation() use
ntp_tick / xtime_interval directly, and xtime_remainder is removed.
The base-rate arithmetic is unchanged: ntp_tick becomes
xtime_interval << ntp_error_shift, so the mult division yields the same
base mult and the ntp_error accumulation still nets to zero per tick.
Beyond the cleanup of treating all the tick_length contributions
(nominal interval, ntp_tick_adj, cs_tick_adj, time_freq) consistently
as addends in one place, it also prepares for feed-forward discipline:
a future timekeeping_set_reference() will set tick_length to track an
absolute external reference such as a vmclock, and that path needs
ntp.c to own a tick_length that already reflects the clocksource
quantisation, with no hidden correction applied elsewhere.
Signed-off-by: David Woodhouse <dwmw@xxxxxxxxxxxx>
Assisted-by: Kiro:claude-opus-4.8
---
include/linux/timekeeper_internal.h | 8 +++++---
kernel/time/ntp.c | 27 ++++++++++++++++++++++---
kernel/time/ntp_internal.h | 2 +-
kernel/time/timekeeping.c | 31 +++++++++++++++++++++--------
4 files changed, 53 insertions(+), 15 deletions(-)
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index e36d11e33e0c..ec81587a1400 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -84,8 +84,6 @@ struct tk_read_base {
* @cycle_interval: Number of clock cycles in one NTP interval
* @xtime_interval: Number of clock shifted nano seconds in one NTP
* interval.
- * @xtime_remainder: Shifted nano seconds left over when rounding
- * @cycle_interval
* @raw_interval: Shifted raw nano seconds accumulated per NTP interval.
* @next_leap_ktime: CLOCK_MONOTONIC time value of a pending leap-second
* @ntp_tick: The ntp_tick_length() value currently being
@@ -99,6 +97,10 @@ struct tk_read_base {
* @ntp_error_shift: Shift conversion between clock shifted nano seconds and
* ntp shifted nano seconds.
* @ntp_err_mult: Multiplication factor for scaled math conversion
+ * @cs_tick_adj: Per-second adjustment handed to NTP via ntp_clear()
+ * accounting for the difference between the nominal
+ * NTP interval and the real time taken by the
+ * clocksource's integer @cycle_interval (upscaled).
* @skip_second_overflow: Flag used to avoid updating NTP twice with same second
* @tai_offset: The current UTC to TAI offset in seconds
*
@@ -178,7 +180,6 @@ struct timekeeper {
u64 cycle_interval;
u64 xtime_interval;
- s64 xtime_remainder;
u64 raw_interval;
ktime_t next_leap_ktime;
@@ -186,6 +187,7 @@ struct timekeeper {
s64 ntp_error;
u32 ntp_error_shift;
u32 ntp_err_mult;
+ s64 cs_tick_adj;
u32 skip_second_overflow;
s32 tai_offset;
};
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 97fa99b96dd0..3fad82c47c4c 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -39,6 +39,10 @@
* @time_reftime: Time at last adjustment in seconds
* @time_adjust: Adjustment value
* @ntp_tick_adj: Constant boot-param configurable NTP tick adjustment (upscaled)
+ * @cs_tick_adj: Fixed per-second adjustment compensating for the difference
+ * between the nominal NTP interval and the real time taken
+ * by the clocksource's integer @cycle_interval (upscaled).
+ * Set by the timekeeping core via ntp_clear().
* @ntp_next_leap_sec: Second value of the next pending leapsecond, or TIME64_MAX if no leap
*
* @pps_valid: PPS signal watchdog counter
@@ -70,6 +74,7 @@ struct ntp_data {
time64_t time_reftime;
long time_adjust;
s64 ntp_tick_adj;
+ s64 cs_tick_adj;
time64_t ntp_next_leap_sec;
#ifdef CONFIG_NTP_PPS
int pps_valid;
@@ -255,6 +260,7 @@ static void ntp_update_frequency(struct ntp_data *ntpdata)
second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ) << NTP_SCALE_SHIFT;
second_length += ntpdata->ntp_tick_adj;
+ second_length += ntpdata->cs_tick_adj;
second_length += ntpdata->time_freq;
new_base = div_u64(second_length, NTP_INTERVAL_FREQ);
@@ -350,11 +356,26 @@ static void __ntp_clear(struct ntp_data *ntpdata)
}
/**
- * ntp_clear - Clears the NTP state variables
- * @tkid: Timekeeper ID to be able to select proper ntp data array member
+ * ntp_clear - Clear NTP state and set the clocksource quantisation adjustment
+ * @tkid: Timekeeper ID
+ * @cs_tick_adj: Per-second adjustment in ns << NTP_SCALE_SHIFT
+ *
+ * The timekeeping core uses an integer number of cycles (@cycle_interval)
+ * per NTP interval, so the real time that interval represents differs from
+ * the nominal NTP_INTERVAL_LENGTH by up to half a counter period. Folding
+ * this fixed offset into @cs_tick_adj makes it an explicit part of the NTP
+ * tick_length computation in ntp.c, instead of being applied during
+ * timekeeping accumulation where the NTP code never saw it. Like
+ * @ntp_tick_adj it stays internal to the kernel; userspace still sees the
+ * nominal tick via adjtimex. NTP retains its full symmetric ±MAXFREQ range
+ * around the corrected base rate.
+ *
+ * Called whenever the clocksource is (re)configured, which is also when the
+ * rest of the NTP state must be cleared, so the two are done together.
*/
-void ntp_clear(unsigned int tkid)
+void ntp_clear(unsigned int tkid, s64 cs_tick_adj)
{
+ tk_ntp_data[tkid].cs_tick_adj = cs_tick_adj;
__ntp_clear(&tk_ntp_data[tkid]);
}
diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h
index 7084d839c207..598e5dd2fc5b 100644
--- a/kernel/time/ntp_internal.h
+++ b/kernel/time/ntp_internal.h
@@ -3,7 +3,7 @@
#define _LINUX_NTP_INTERNAL_H
extern void ntp_init(void);
-extern void ntp_clear(unsigned int tkid);
+extern void ntp_clear(unsigned int tkid, s64 cs_tick_adj);
/* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */
extern u64 ntp_tick_length(unsigned int tkid);
extern ktime_t ntp_get_next_leap(unsigned int tkid);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index d847bba0481b..bdafd599413d 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -366,7 +366,6 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
/* Go back from cycles -> shifted ns */
tk->xtime_interval = interval * clock->mult;
- tk->xtime_remainder = ntpinterval - tk->xtime_interval;
tk->raw_interval = interval * clock->mult;
/* if changing clocks, convert xtime_nsec shift units */
@@ -386,7 +385,19 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
tk->ntp_error = 0;
tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift;
- tk->ntp_tick = ntpinterval << tk->ntp_error_shift;
+
+ /*
+ * cycle_interval is a whole number of counter cycles, so the real
+ * time it represents differs from the nominal NTP_INTERVAL_LENGTH by
+ * up to half a counter period (e.g. +127 PPM on the 3.579545 MHz ACPI
+ * PM timer at HZ=1000). Record this fixed per-tick offset, scaled up
+ * to a per-second value to match the ntp_update_frequency() addends,
+ * so it can be handed to NTP via ntp_clear() and reflected explicitly
+ * in tick_length rather than applied behind NTP's back.
+ */
+ tk->cs_tick_adj = (((s64)tk->xtime_interval - (s64)ntpinterval) <<
+ tk->ntp_error_shift) * NTP_INTERVAL_FREQ;
+ tk->ntp_tick = (u64)tk->xtime_interval << tk->ntp_error_shift;
/*
* The timekeeper keeps its own mult values for the currently
@@ -803,7 +814,7 @@ static void timekeeping_update_from_shadow(struct tk_data *tkd, unsigned int act
if (action & TK_CLEAR_NTP) {
tk->ntp_error = 0;
- ntp_clear(tk->id);
+ ntp_clear(tk->id, tk->cs_tick_adj);
}
tk_update_leap_state(tk);
@@ -2075,7 +2086,12 @@ void __init timekeeping_init(void)
tk_set_wall_to_mono(tks, wall_to_mono);
- timekeeping_update_from_shadow(&tk_core, TK_CLOCK_WAS_SET);
+ /*
+ * Use TK_UPDATE_ALL so the NTP layer picks up the clocksource's
+ * cs_tick_adj via ntp_clear(). Clearing NTP here is otherwise
+ * redundant as ntp_init() already initialised it above.
+ */
+ timekeeping_update_from_shadow(&tk_core, TK_UPDATE_ALL);
}
/* time in seconds when suspend began for persistent clock */
@@ -2424,8 +2440,8 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
mult = tk->tkr_mono.mult - tk->ntp_err_mult;
} else {
tk->ntp_tick = ntp_tl;
- mult = div64_u64((tk->ntp_tick >> tk->ntp_error_shift) -
- tk->xtime_remainder, tk->cycle_interval);
+ mult = div64_u64(tk->ntp_tick >> tk->ntp_error_shift,
+ tk->cycle_interval);
}
/*
@@ -2550,8 +2566,7 @@ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset,
/* Accumulate error between NTP and clock interval */
tk->ntp_error += tk->ntp_tick << shift;
- tk->ntp_error -= (tk->xtime_interval + tk->xtime_remainder) <<
- (tk->ntp_error_shift + shift);
+ tk->ntp_error -= tk->xtime_interval << (tk->ntp_error_shift + shift);
return offset;
}
--
2.54.0