[tip:x86/timers] x86/tsc: Introduce early tsc clocksource

From: tip-bot for Peter Zijlstra
Date: Sun Jan 14 2018 - 14:25:12 EST


Commit-ID: aa83c45762a242acce9b35020363225a7b59d7c9
Gitweb: https://git.kernel.org/tip/aa83c45762a242acce9b35020363225a7b59d7c9
Author: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
AuthorDate: Fri, 22 Dec 2017 10:20:13 +0100
Committer: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
CommitDate: Sun, 14 Jan 2018 20:18:23 +0100

x86/tsc: Introduce early tsc clocksource

Without TSC_KNOWN_FREQ the TSC clocksource is registered so late that the
kernel first switches to the HPET. Using HPET on large CPU count machines is
undesirable.

Therefore register a tsc-early clocksource using the preliminary tsc_khz
from quick calibration. Then when the final TSC calibration is done, it
can switch to the tuned frequency.

The only notably problem is that the real tsc clocksource must be marked
with CLOCK_SOURCE_VALID_FOR_HRES, otherwise it will not be selected when
unregistering tsc-early. tsc-early cannot be left registered, because then
the clocksource code would fall back to it when we tsc clocksource is
marked unstable later.

Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: len.brown@xxxxxxxxx
Cc: rui.zhang@xxxxxxxxx
Cc: Len Brown <lenb@xxxxxxxxxx>
Link: https://lkml.kernel.org/r/20171222092243.431585460@xxxxxxxxxxxxx

---
arch/x86/kernel/tsc.c | 43 +++++++++++++++++++++++++++++++++----------
1 file changed, 33 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index a2c9dd8..fb43027 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1006,8 +1006,6 @@ static void __init detect_art(void)

/* clocksource code */

-static struct clocksource clocksource_tsc;
-
static void tsc_resume(struct clocksource *cs)
{
tsc_verify_tsc_adjust(true);
@@ -1058,12 +1056,31 @@ static void tsc_cs_tick_stable(struct clocksource *cs)
/*
* .mask MUST be CLOCKSOURCE_MASK(64). See comment above read_tsc()
*/
+static struct clocksource clocksource_tsc_early = {
+ .name = "tsc-early",
+ .rating = 299,
+ .read = read_tsc,
+ .mask = CLOCKSOURCE_MASK(64),
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS |
+ CLOCK_SOURCE_MUST_VERIFY,
+ .archdata = { .vclock_mode = VCLOCK_TSC },
+ .resume = tsc_resume,
+ .mark_unstable = tsc_cs_mark_unstable,
+ .tick_stable = tsc_cs_tick_stable,
+};
+
+/*
+ * Must mark VALID_FOR_HRES early such that when we unregister tsc_early
+ * this one will immediately take over. We will only register if TSC has
+ * been found good.
+ */
static struct clocksource clocksource_tsc = {
.name = "tsc",
.rating = 300,
.read = read_tsc,
.mask = CLOCKSOURCE_MASK(64),
.flags = CLOCK_SOURCE_IS_CONTINUOUS |
+ CLOCK_SOURCE_VALID_FOR_HRES |
CLOCK_SOURCE_MUST_VERIFY,
.archdata = { .vclock_mode = VCLOCK_TSC },
.resume = tsc_resume,
@@ -1187,8 +1204,8 @@ static void tsc_refine_calibration_work(struct work_struct *work)
int cpu;

/* Don't bother refining TSC on unstable systems */
- if (check_tsc_unstable())
- goto out;
+ if (tsc_unstable)
+ return;

/*
* Since the work is started early in boot, we may be
@@ -1240,9 +1257,13 @@ static void tsc_refine_calibration_work(struct work_struct *work)
set_cyc2ns_scale(tsc_khz, cpu, tsc_stop);

out:
+ if (tsc_unstable)
+ return;
+
if (boot_cpu_has(X86_FEATURE_ART))
art_related_clocksource = &clocksource_tsc;
clocksource_register_khz(&clocksource_tsc, tsc_khz);
+ clocksource_unregister(&clocksource_tsc_early);
}


@@ -1251,13 +1272,11 @@ static int __init init_tsc_clocksource(void)
if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_disabled > 0 || !tsc_khz)
return 0;

+ if (check_tsc_unstable())
+ return 0;
+
if (tsc_clocksource_reliable)
clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
- /* lower the rating if we already know its unstable: */
- if (check_tsc_unstable()) {
- clocksource_tsc.rating = 0;
- clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
- }

if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3))
clocksource_tsc.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP;
@@ -1270,6 +1289,7 @@ static int __init init_tsc_clocksource(void)
if (boot_cpu_has(X86_FEATURE_ART))
art_related_clocksource = &clocksource_tsc;
clocksource_register_khz(&clocksource_tsc, tsc_khz);
+ clocksource_unregister(&clocksource_tsc_early);
return 0;
}

@@ -1374,9 +1394,12 @@ void __init tsc_init(void)

check_system_tsc_reliable();

- if (unsynchronized_tsc())
+ if (unsynchronized_tsc()) {
mark_tsc_unstable("TSCs unsynchronized");
+ return;
+ }

+ clocksource_register_khz(&clocksource_tsc_early, tsc_khz);
detect_art();
}