[PATCH -v2] x86: Kill notsc

From: Borislav Petkov
Date: Thu Oct 22 2015 - 14:51:35 EST


On Wed, Oct 21, 2015 at 09:01:09PM +0200, Peter Zijlstra wrote:
> I _think_ so, but its late. It would result in sched_clock() being 0
> until you hit that other bit, but that should be fine (maybe).

Ok, here's a v2, it boots fine here:

---
From: Borislav Petkov <bp@xxxxxxx>
Date: Sun, 18 Oct 2015 16:05:32 +0200
Subject: [PATCH -v2] x86: Kill notsc

Kill "notsc" cmdline option and all the glue around it. The two boxes
worldwide which don't have a TSC should disable X86_TSC. Thus, make
native_sched_clock() use TSC unconditionally, even if the TSC is
unstable because that's fine there. This gets rid of the static key
too and makes the function even simpler and faster, which is a Good
Thing(tm).

The jiffies-fallback is for the !X86_TSC case.

Also, we need to initialize the cycles to nanoseconds machinery early,
before sched_init() -> init_idle() calls native_sched_clock() and we
explode, see link below.

Signed-off-by: Borislav Petkov <bp@xxxxxxx>
Link: http://lkml.kernel.org/r/20151021175803.GF3575@xxxxxxx
---
Documentation/kernel-parameters.txt | 2 -
Documentation/x86/x86_64/boot-options.txt | 5 --
arch/x86/include/asm/tsc.h | 4 +-
arch/x86/kernel/apic/apic.c | 2 +-
arch/x86/kernel/setup.c | 1 +
arch/x86/kernel/tsc.c | 91 +++++++++++--------------------
6 files changed, 37 insertions(+), 68 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 22a4b687ea5b..6d2504bd17ce 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2534,8 +2534,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted.

nosync [HW,M68K] Disables sync negotiation for all devices.

- notsc [BUGS=X86-32] Disable Time Stamp Counter
-
nousb [USB] Disable the USB subsystem

nowatchdog [KNL] Disable both lockup detectors, i.e.
diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt
index 68ed3114c363..0e43d94d9567 100644
--- a/Documentation/x86/x86_64/boot-options.txt
+++ b/Documentation/x86/x86_64/boot-options.txt
@@ -88,11 +88,6 @@ APICs

Timing

- notsc
- Don't use the CPU time stamp counter to read the wall time.
- This can be used to work around timing problems on multiprocessor systems
- with not properly synchronized CPUs.
-
nohpet
Don't use the HPET timer.

diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 6d7c5479bcea..41fe8a08b497 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -30,10 +30,10 @@ static inline cycles_t get_cycles(void)
}

extern void tsc_init(void);
+extern void __init early_tsc_init(void);
extern void mark_tsc_unstable(char *reason);
extern int unsynchronized_tsc(void);
extern int check_tsc_unstable(void);
-extern int check_tsc_disabled(void);
extern unsigned long native_calibrate_tsc(void);
extern unsigned long long native_sched_clock_from_tsc(u64 tsc);

@@ -46,7 +46,7 @@ extern int tsc_clocksource_reliable;
extern void check_tsc_sync_source(int cpu);
extern void check_tsc_sync_target(void);

-extern int notsc_setup(char *);
+extern int notsc_setup(void);
extern void tsc_save_sched_clock_state(void);
extern void tsc_restore_sched_clock_state(void);

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 24e94ce454e2..19df3d640c15 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -152,7 +152,7 @@ static int apic_calibrate_pmtmr __initdata;
static __init int setup_apicpmtimer(char *s)
{
apic_calibrate_pmtmr = 1;
- notsc_setup(NULL);
+ notsc_setup();
return 0;
}
__setup("apicpmtimer", setup_apicpmtimer);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index fdb7f2a2d328..a8e2ab8cad5b 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1234,6 +1234,7 @@ void __init setup_arch(char **cmdline_p)
if (efi_enabled(EFI_BOOT))
efi_apply_memmap_quirks();
#endif
+ early_tsc_init();
}

#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index c3f7602cd038..6211df475426 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -34,13 +34,6 @@ EXPORT_SYMBOL(tsc_khz);
*/
static int __read_mostly tsc_unstable;

-/* native_sched_clock() is called before tsc_init(), so
- we must start with the TSC soft disabled to prevent
- erroneous rdtsc usage on !cpu_has_tsc processors */
-static int __read_mostly tsc_disabled = -1;
-
-static DEFINE_STATIC_KEY_FALSE(__use_tsc);
-
int tsc_clocksource_reliable;

/*
@@ -270,29 +263,38 @@ done:
sched_clock_idle_wakeup_event(0);
local_irq_restore(flags);
}
+
+void __init early_tsc_init(void)
+{
+ int cpu;
+
+ /*
+ * We need to init the cycles to ns conversion machinery because
+ * init_idle() below will call sched_clock() which needs it.
+ */
+ for_each_possible_cpu(cpu)
+ cyc2ns_init(cpu);
+}
+
/*
* Scheduler clock - returns current time in nanosec units.
*/
u64 native_sched_clock(void)
{
- if (static_branch_likely(&__use_tsc)) {
- u64 tsc_now = rdtsc();
-
- /* return the value in ns */
- return cycles_2_ns(tsc_now);
- }
-
+#ifdef CONFIG_X86_TSC
+ /* return the value in ns */
+ return cycles_2_ns(rdtsc());
+#else
/*
- * Fall back to jiffies if there's no TSC available:
- * ( But note that we still use it if the TSC is marked
- * unstable. We do this because unlike Time Of Day,
- * the scheduler clock tolerates small errors and it's
- * very important for it to be as fast as the platform
- * can achieve it. )
+ * Fall back to jiffies if there's no TSC available: ( But note that we
+ * still use it if the TSC is marked unstable. We do this because unlike
+ * Time Of Day, the scheduler clock tolerates small errors and it's very
+ * important for it to be as fast as the platform can achieve it. )
+ *
+ * No locking - a rare wrong value is not a big deal:
*/
-
- /* No locking but a rare wrong value is not a big deal: */
return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
+#endif
}

/*
@@ -321,32 +323,15 @@ int check_tsc_unstable(void)
}
EXPORT_SYMBOL_GPL(check_tsc_unstable);

-int check_tsc_disabled(void)
-{
- return tsc_disabled;
-}
-EXPORT_SYMBOL_GPL(check_tsc_disabled);
-
-#ifdef CONFIG_X86_TSC
-int __init notsc_setup(char *str)
-{
- pr_warn("Kernel compiled with CONFIG_X86_TSC, cannot disable TSC completely\n");
- tsc_disabled = 1;
- return 1;
-}
-#else
-/*
- * disable flag for tsc. Takes effect by clearing the TSC cpu flag
- * in cpu/common.c
- */
-int __init notsc_setup(char *str)
+/* Disable the TSC feature flag to avoid further TSC use. */
+int __init notsc_setup(void)
{
+#ifndef CONFIG_X86_TSC
setup_clear_cpu_cap(X86_FEATURE_TSC);
return 1;
-}
#endif
-
-__setup("notsc", notsc_setup);
+ return 0;
+}

static int no_sched_irq_time;

@@ -1139,7 +1124,7 @@ out:

static int __init init_tsc_clocksource(void)
{
- if (!cpu_has_tsc || tsc_disabled > 0 || !tsc_khz)
+ if (!cpu_has_tsc || !tsc_khz)
return 0;

if (tsc_clocksource_reliable)
@@ -1178,7 +1163,7 @@ void __init tsc_init(void)

x86_init.timers.tsc_pre_init();

- if (!cpu_has_tsc) {
+ if (notsc_setup()) {
setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
return;
}
@@ -1202,18 +1187,8 @@ void __init tsc_init(void)
* speed as the bootup CPU. (cpufreq notifiers will fix this
* up if their speed diverges)
*/
- for_each_possible_cpu(cpu) {
- cyc2ns_init(cpu);
+ for_each_possible_cpu(cpu)
set_cyc2ns_scale(cpu_khz, cpu);
- }
-
- if (tsc_disabled > 0)
- return;
-
- /* now allow native_sched_clock() to use rdtsc */
-
- tsc_disabled = 0;
- static_branch_enable(&__use_tsc);

if (!no_sched_irq_time)
enable_sched_clock_irqtime();
@@ -1241,7 +1216,7 @@ unsigned long calibrate_delay_is_known(void)
{
int i, cpu = smp_processor_id();

- if (!tsc_disabled && !cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC))
+ if (!cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC))
return 0;

for_each_online_cpu(i)
--
2.3.5

--
Regards/Gruss,
Boris.

ECO tip #101: Trim your mails when you reply.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/