[PATCHv2] rtc: adapt allowed RTC update error

From: Miroslav Lichvar
Date: Wed Dec 02 2020 - 05:03:14 EST


When the system clock is marked as synchronized via adjtimex(), the
kernel is expected to copy the system time to the RTC every 11 minutes.

There are reports that it doesn't always work reliably. It seems the
current requirement for the RTC update to happen within 5 ticks of the
target time in some cases can consistently fail for hours or even days.

It is better to set the RTC with a larger error than let it drift for
too long.

Instead of increasing the constant again, use a static variable to count
the checks and with each failed check increase the allowed error by one
jiffie. Reset the counter when the check finally succeeds. This will
allow the RTC update to keep good accuracy if it can happen in the first
few attempts and it will not take more than a minute if the timing is
consistently bad for any reason.

Signed-off-by: Miroslav Lichvar <mlichvar@xxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: John Stultz <john.stultz@xxxxxxxxxx>
Cc: Prarit Bhargava <prarit@xxxxxxxxxx>
Cc: Jason Gunthorpe <jgg@xxxxxxxx>
---

Notes:
v2:
- moved the static variable to callers in ntp.c

drivers/rtc/systohc.c | 6 ++++--
include/linux/rtc.h | 14 +++++++++-----
kernel/time/ntp.c | 9 +++++++--
3 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/drivers/rtc/systohc.c b/drivers/rtc/systohc.c
index 8b70f0520e13..0777f590cdae 100644
--- a/drivers/rtc/systohc.c
+++ b/drivers/rtc/systohc.c
@@ -5,6 +5,7 @@
/**
* rtc_set_ntp_time - Save NTP synchronized time to the RTC
* @now: Current time of day
+ * @attempt: Number of previous failures used to adjust allowed error
* @target_nsec: pointer for desired now->tv_nsec value
*
* Replacement for the NTP platform function update_persistent_clock64
@@ -18,7 +19,8 @@
*
* If temporary failure is indicated the caller should try again 'soon'
*/
-int rtc_set_ntp_time(struct timespec64 now, unsigned long *target_nsec)
+int rtc_set_ntp_time(struct timespec64 now, unsigned int attempt,
+ unsigned long *target_nsec)
{
struct rtc_device *rtc;
struct rtc_time tm;
@@ -44,7 +46,7 @@ int rtc_set_ntp_time(struct timespec64 now, unsigned long *target_nsec)
* it does not we update target_nsec and return EPROTO to make the ntp
* code try again later.
*/
- ok = rtc_tv_nsec_ok(rtc->set_offset_nsec, &to_set, &now);
+ ok = rtc_tv_nsec_ok(rtc->set_offset_nsec, attempt, &to_set, &now);
if (!ok) {
err = -EPROTO;
goto out_close;
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index 22d1575e4991..9f3326b43620 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -165,7 +165,8 @@ int __rtc_register_device(struct module *owner, struct rtc_device *rtc);

extern int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm);
extern int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm);
-extern int rtc_set_ntp_time(struct timespec64 now, unsigned long *target_nsec);
+extern int rtc_set_ntp_time(struct timespec64 now, unsigned int attempt,
+ unsigned long *target_nsec);
int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm);
extern int rtc_read_alarm(struct rtc_device *rtc,
struct rtc_wkalrm *alrm);
@@ -213,24 +214,27 @@ static inline bool is_leap_year(unsigned int year)
* a zero in tv_nsecs, such that:
* to_set - set_delay_nsec == now +/- FUZZ
*
+ * The allowed error starts at 5 jiffies on the first attempt and is increased
+ * with each failed attempt to make sure the RTC will be set at some point,
+ * even if the timing is consistently inaccurate.
*/
static inline bool rtc_tv_nsec_ok(s64 set_offset_nsec,
+ unsigned int attempt,
struct timespec64 *to_set,
const struct timespec64 *now)
{
- /* Allowed error in tv_nsec, arbitarily set to 5 jiffies in ns. */
- const unsigned long TIME_SET_NSEC_FUZZ = TICK_NSEC * 5;
+ unsigned long time_set_nsec_fuzz = (5 + attempt) * TICK_NSEC;
struct timespec64 delay = {.tv_sec = 0,
.tv_nsec = set_offset_nsec};

*to_set = timespec64_add(*now, delay);

- if (to_set->tv_nsec < TIME_SET_NSEC_FUZZ) {
+ if (to_set->tv_nsec < time_set_nsec_fuzz) {
to_set->tv_nsec = 0;
return true;
}

- if (to_set->tv_nsec > NSEC_PER_SEC - TIME_SET_NSEC_FUZZ) {
+ if (to_set->tv_nsec > NSEC_PER_SEC - time_set_nsec_fuzz) {
to_set->tv_sec++;
to_set->tv_nsec = 0;
return true;
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 069ca78fb0bf..893bc7ed7845 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -531,6 +531,7 @@ static void sched_sync_hw_clock(struct timespec64 now,

static void sync_rtc_clock(void)
{
+ static unsigned int attempt;
unsigned long target_nsec;
struct timespec64 adjust, now;
int rc;
@@ -548,9 +549,11 @@ static void sync_rtc_clock(void)
* The current RTC in use will provide the target_nsec it wants to be
* called at, and does rtc_tv_nsec_ok internally.
*/
- rc = rtc_set_ntp_time(adjust, &target_nsec);
+ rc = rtc_set_ntp_time(adjust, attempt++, &target_nsec);
if (rc == -ENODEV)
return;
+ if (rc != -EPROTO)
+ attempt = 0;

sched_sync_hw_clock(now, target_nsec, rc);
}
@@ -564,6 +567,7 @@ int __weak update_persistent_clock64(struct timespec64 now64)

static bool sync_cmos_clock(void)
{
+ static unsigned int attempt;
static bool no_cmos;
struct timespec64 now;
struct timespec64 adjust;
@@ -585,7 +589,8 @@ static bool sync_cmos_clock(void)
* implement this legacy API.
*/
ktime_get_real_ts64(&now);
- if (rtc_tv_nsec_ok(-1 * target_nsec, &adjust, &now)) {
+ if (rtc_tv_nsec_ok(-1 * target_nsec, attempt++, &adjust, &now)) {
+ attempt = 0;
if (persistent_clock_is_local)
adjust.tv_sec -= (sys_tz.tz_minuteswest * 60);
rc = update_persistent_clock64(adjust);
--
2.26.2