[tip:timers/urgent] clocksource: Revert "Remove kthread"

From: tip-bot for Peter Zijlstra
Date: Thu Sep 06 2018 - 06:48:29 EST


Commit-ID: 760902b24960679c2e8592de3a56359d2c205731
Gitweb: https://git.kernel.org/tip/760902b24960679c2e8592de3a56359d2c205731
Author: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
AuthorDate: Wed, 5 Sep 2018 10:41:58 +0200
Committer: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
CommitDate: Thu, 6 Sep 2018 12:42:28 +0200

clocksource: Revert "Remove kthread"

I turns out that the silly spawn kthread from worker was actually needed.

clocksource_watchdog_kthread() cannot be called directly from
clocksource_watchdog_work(), because clocksource_select() calls
timekeeping_notify() which uses stop_machine(). One cannot use
stop_machine() from a workqueue() due lock inversions wrt CPU hotplug.

Revert the patch but add a comment that explain why we jump through such
apparently silly hoops.

Fixes: 7197e77abcb6 ("clocksource: Remove kthread")
Reported-by: Siegfried Metz <frame@xxxxxxxxxxx>
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
Tested-by: Niklas Cassel <niklas.cassel@xxxxxxxxxx>
Tested-by: Kevin Shanahan <kevin@xxxxxxxxxxxxxx>
Tested-by: viktor_jaegerskuepper@xxxxxxxxxx
Tested-by: Siegfried Metz <frame@xxxxxxxxxxx>
Cc: rafael.j.wysocki@xxxxxxxxx
Cc: len.brown@xxxxxxxxx
Cc: diego.viola@xxxxxxxxx
Cc: rui.zhang@xxxxxxxxx
Cc: bjorn.andersson@xxxxxxxxxx
Link: https://lkml.kernel.org/r/20180905084158.GR24124@xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
---
kernel/time/clocksource.c | 40 ++++++++++++++++++++++++++++++----------
1 file changed, 30 insertions(+), 10 deletions(-)

diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index f74fb00d8064..0e6e97a01942 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -133,19 +133,40 @@ static void inline clocksource_watchdog_unlock(unsigned long *flags)
spin_unlock_irqrestore(&watchdog_lock, *flags);
}

+static int clocksource_watchdog_kthread(void *data);
+static void __clocksource_change_rating(struct clocksource *cs, int rating);
+
/*
* Interval: 0.5sec Threshold: 0.0625s
*/
#define WATCHDOG_INTERVAL (HZ >> 1)
#define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4)

+static void clocksource_watchdog_work(struct work_struct *work)
+{
+ /*
+ * We cannot directly run clocksource_watchdog_kthread() here, because
+ * clocksource_select() calls timekeeping_notify() which uses
+ * stop_machine(). One cannot use stop_machine() from a workqueue() due
+ * lock inversions wrt CPU hotplug.
+ *
+ * Also, we only ever run this work once or twice during the lifetime
+ * of the kernel, so there is no point in creating a more permanent
+ * kthread for this.
+ *
+ * If kthread_run fails the next watchdog scan over the
+ * watchdog_list will find the unstable clock again.
+ */
+ kthread_run(clocksource_watchdog_kthread, NULL, "kwatchdog");
+}
+
static void __clocksource_unstable(struct clocksource *cs)
{
cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG);
cs->flags |= CLOCK_SOURCE_UNSTABLE;

/*
- * If the clocksource is registered clocksource_watchdog_work() will
+ * If the clocksource is registered clocksource_watchdog_kthread() will
* re-rate and re-select.
*/
if (list_empty(&cs->list)) {
@@ -156,7 +177,7 @@ static void __clocksource_unstable(struct clocksource *cs)
if (cs->mark_unstable)
cs->mark_unstable(cs);

- /* kick clocksource_watchdog_work() */
+ /* kick clocksource_watchdog_kthread() */
if (finished_booting)
schedule_work(&watchdog_work);
}
@@ -166,7 +187,7 @@ static void __clocksource_unstable(struct clocksource *cs)
* @cs: clocksource to be marked unstable
*
* This function is called by the x86 TSC code to mark clocksources as unstable;
- * it defers demotion and re-selection to a work.
+ * it defers demotion and re-selection to a kthread.
*/
void clocksource_mark_unstable(struct clocksource *cs)
{
@@ -391,9 +412,7 @@ static void clocksource_dequeue_watchdog(struct clocksource *cs)
}
}

-static void __clocksource_change_rating(struct clocksource *cs, int rating);
-
-static int __clocksource_watchdog_work(void)
+static int __clocksource_watchdog_kthread(void)
{
struct clocksource *cs, *tmp;
unsigned long flags;
@@ -418,12 +437,13 @@ static int __clocksource_watchdog_work(void)
return select;
}

-static void clocksource_watchdog_work(struct work_struct *work)
+static int clocksource_watchdog_kthread(void *data)
{
mutex_lock(&clocksource_mutex);
- if (__clocksource_watchdog_work())
+ if (__clocksource_watchdog_kthread())
clocksource_select();
mutex_unlock(&clocksource_mutex);
+ return 0;
}

static bool clocksource_is_watchdog(struct clocksource *cs)
@@ -442,7 +462,7 @@ static void clocksource_enqueue_watchdog(struct clocksource *cs)
static void clocksource_select_watchdog(bool fallback) { }
static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { }
static inline void clocksource_resume_watchdog(void) { }
-static inline int __clocksource_watchdog_work(void) { return 0; }
+static inline int __clocksource_watchdog_kthread(void) { return 0; }
static bool clocksource_is_watchdog(struct clocksource *cs) { return false; }
void clocksource_mark_unstable(struct clocksource *cs) { }

@@ -810,7 +830,7 @@ static int __init clocksource_done_booting(void)
/*
* Run the watchdog first to eliminate unstable clock sources
*/
- __clocksource_watchdog_work();
+ __clocksource_watchdog_kthread();
clocksource_select();
mutex_unlock(&clocksource_mutex);
return 0;