[PATCH RFC] time: validate watchdog clocksource using second best candidate

From: Konstantin Khlebnikov
Date: Wed May 15 2019 - 03:49:27 EST


Timekeeping watchdog verifies doubtful clocksources using more reliable
candidates. For x86 it likely verifies 'tsc' using 'hpet'. But 'hpet'
is far from perfect too. It's better to have second opinion if possible.

We're seeing sudden jumps of hpet counter to 0xffffffff:

timekeeping watchdog on CPU56: Marking clocksource 'tsc' as unstable because the skew is too large:
'hpet' wd_now: ffffffff wd_last: 19ec5720 mask: ffffffff
'tsc' cs_now: 69b8a15f0aed cs_last: 69b862c9947d mask: ffffffffffffffff

Shaohua Li reported the same case three years ago.
His patch backlisted this exact value and re-read hpet counter.

This patch uses second reliable clocksource as backup for validation.
For x86 this is usually 'acpi_pm'. If watchdog and backup are not consent
then other clocksources will not be marked as unstable at this iteration.

In this case watchdog will print something like:
timekeeping watchdog on CPUxx: Ignoring watchdog 'hpet' hiccup because backup watchdog 'acpi_pm' is not consent:
and dump states of both clocksources.

Also this patch prints 'wd_nsec' and 'cs_nsec' for easier debug.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@xxxxxxxxxxxxxx>
Link: https://lore.kernel.org/patchwork/patch/667413/
---
kernel/time/clocksource.c | 67 ++++++++++++++++++++++++++++++++++-----------
1 file changed, 51 insertions(+), 16 deletions(-)

diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 3bcc19ceb073..c7209c833e97 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -99,6 +99,7 @@ static void clocksource_select(void);

static LIST_HEAD(watchdog_list);
static struct clocksource *watchdog;
+static struct clocksource *watchdog_backup;
static struct timer_list watchdog_timer;
static DECLARE_WORK(watchdog_work, clocksource_watchdog_work);
static DEFINE_SPINLOCK(watchdog_lock);
@@ -190,6 +191,7 @@ static void clocksource_watchdog(struct timer_list *unused)
u64 csnow, wdnow, cslast, wdlast, delta;
int64_t wd_nsec, cs_nsec;
int next_cpu, reset_pending;
+ bool backup_consent = true;

spin_lock(&watchdog_lock);
if (!watchdog_running)
@@ -236,16 +238,33 @@ static void clocksource_watchdog(struct timer_list *unused)

/* Check the deviation from the watchdog clocksource. */
if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) {
- pr_warn("timekeeping watchdog on CPU%d: Marking clocksource '%s' as unstable because the skew is too large:\n",
- smp_processor_id(), cs->name);
- pr_warn(" '%s' wd_now: %llx wd_last: %llx mask: %llx\n",
- watchdog->name, wdnow, wdlast, watchdog->mask);
- pr_warn(" '%s' cs_now: %llx cs_last: %llx mask: %llx\n",
- cs->name, csnow, cslast, cs->mask);
- __clocksource_unstable(cs);
+
+ /* Backup watchdog clocksource is first in the list */
+ if (cs == watchdog_backup)
+ backup_consent = false;
+
+ if (backup_consent)
+ pr_warn("timekeeping watchdog on CPU%d: Marking clocksource '%s' as unstable because the skew is too large:\n",
+ smp_processor_id(), cs->name);
+ else
+ pr_warn("timekeeping watchdog on CPU%d: Ignoring watchdog '%s' hiccup because backup watchdog '%s' is not consent:\n",
+ smp_processor_id(), watchdog->name,
+ watchdog_backup->name);
+
+ pr_warn(" '%s' wd_now: %llx wd_last: %llx mask: %llx wd_nsec: %lld\n",
+ watchdog->name, wdnow, wdlast, watchdog->mask,
+ wd_nsec);
+ pr_warn(" '%s' cs_now: %llx cs_last: %llx mask: %llx cs_nsec: %lld\n",
+ cs->name, csnow, cslast, cs->mask, cs_nsec);
+
+ if (backup_consent)
+ __clocksource_unstable(cs);
continue;
}

+ if (cs == watchdog_backup)
+ continue;
+
if (cs == curr_clocksource && cs->tick_stable)
cs->tick_stable(cs);

@@ -345,7 +364,7 @@ static void clocksource_enqueue_watchdog(struct clocksource *cs)
}
}

-static void clocksource_select_watchdog(bool fallback)
+static void clocksource_select_watchdog(struct clocksource *except)
{
struct clocksource *cs, *old_wd;
unsigned long flags;
@@ -353,26 +372,42 @@ static void clocksource_select_watchdog(bool fallback)
spin_lock_irqsave(&watchdog_lock, flags);
/* save current watchdog */
old_wd = watchdog;
- if (fallback)
+ if (watchdog == except)
watchdog = NULL;

+ if (watchdog_backup) {
+ list_del_init(&watchdog_backup->wd_list);
+ watchdog_backup = NULL;
+ }
+
list_for_each_entry(cs, &clocksource_list, list) {
/* cs is a clocksource to be watched. */
if (cs->flags & CLOCK_SOURCE_MUST_VERIFY)
continue;

/* Skip current if we were requested for a fallback. */
- if (fallback && cs == old_wd)
+ if (cs == except)
continue;

/* Pick the best watchdog. */
- if (!watchdog || cs->rating > watchdog->rating)
+ if (!watchdog || cs->rating > watchdog->rating) {
+ watchdog_backup = watchdog;
watchdog = cs;
+ }
+
+ /* Pick the second best for cross-validation. */
+ if (cs != watchdog &&
+ (!watchdog_backup || cs->rating > watchdog_backup->rating))
+ watchdog_backup = cs;
}
/* If we failed to find a fallback restore the old one. */
if (!watchdog)
watchdog = old_wd;

+ /* Backup watchdog must be first in the list. */
+ if (watchdog_backup)
+ list_add(&watchdog_backup->wd_list, &watchdog_list);
+
/* If we changed the watchdog we need to reset cycles. */
if (watchdog != old_wd)
clocksource_reset_watchdog();
@@ -430,7 +465,7 @@ static int clocksource_watchdog_kthread(void *data)

static bool clocksource_is_watchdog(struct clocksource *cs)
{
- return cs == watchdog;
+ return cs == watchdog || cs == watchdog_backup;
}

#else /* CONFIG_CLOCKSOURCE_WATCHDOG */
@@ -441,7 +476,7 @@ static void clocksource_enqueue_watchdog(struct clocksource *cs)
cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
}

-static void clocksource_select_watchdog(bool fallback) { }
+static void clocksource_select_watchdog(struct clocksource *except) { }
static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { }
static inline void clocksource_resume_watchdog(void) { }
static inline int __clocksource_watchdog_kthread(void) { return 0; }
@@ -933,7 +968,7 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
clocksource_watchdog_unlock(&flags);

clocksource_select();
- clocksource_select_watchdog(false);
+ clocksource_select_watchdog(NULL);
__clocksource_suspend_select(cs);
mutex_unlock(&clocksource_mutex);
return 0;
@@ -962,7 +997,7 @@ void clocksource_change_rating(struct clocksource *cs, int rating)
clocksource_watchdog_unlock(&flags);

clocksource_select();
- clocksource_select_watchdog(false);
+ clocksource_select_watchdog(NULL);
clocksource_suspend_select(false);
mutex_unlock(&clocksource_mutex);
}
@@ -977,7 +1012,7 @@ static int clocksource_unbind(struct clocksource *cs)

if (clocksource_is_watchdog(cs)) {
/* Select and try to install a replacement watchdog. */
- clocksource_select_watchdog(true);
+ clocksource_select_watchdog(cs);
if (clocksource_is_watchdog(cs))
return -EBUSY;
}