[PATCH 1/3] x86/kernel: Add option that TSC on Socket 0 being non-null is valid

From: mike.travis
Date: Thu Sep 21 2017 - 16:22:26 EST


Add a flag to indicate that a TSC ADJUST value of non-zero is valid
on Socket 0. This is required on multiple chassis systems for which
the Time Stamp Counter on all the chassis are started asynchronously.
The UV architecture is an example of this.

In this scenario the UV system BIOS will adjust all the TSC ADJUST values
forward so there are no negative ADJUST values. This may cause the TSC
ADJUST value on socket 0 to not necessarily be zero.

This procedure results TSC skew rates that are far less than the values
as set by the current kernel TSC adjustment functions which force the
TSC ADJUST to be zero on Socket 0. Note also that an assumption of
zero for TSC ADJUST was also made by not initializing the adjust values.

Signed-off-by: Mike Travis <mike.travis@xxxxxxx>
Reviewed-by: Dimitri Sivanich <dimitri.sivanich@xxxxxxx>
Reviewed-by: Russ Anderson <russ.anderson@xxxxxxx>
Reviewed-by: Andrew Banman <andrew.abanman@xxxxxxx>
---
arch/x86/include/asm/tsc.h | 2 ++
arch/x86/kernel/tsc.c | 19 +++++++++++++++++++
arch/x86/kernel/tsc_sync.c | 25 +++++++++++++++++++++----
3 files changed, 42 insertions(+), 4 deletions(-)

--- linux.orig/arch/x86/include/asm/tsc.h
+++ linux/arch/x86/include/asm/tsc.h
@@ -35,6 +35,8 @@ extern void tsc_init(void);
extern void mark_tsc_unstable(char *reason);
extern int unsynchronized_tsc(void);
extern int check_tsc_unstable(void);
+extern int check_tsc_socket0_nonzero(void);
+extern void mark_tsc_socket0_nonzero(char *reason);
extern unsigned long native_calibrate_cpu(void);
extern unsigned long native_calibrate_tsc(void);
extern unsigned long long native_sched_clock_from_tsc(u64 tsc);
--- linux.orig/arch/x86/kernel/tsc.c
+++ linux/arch/x86/kernel/tsc.c
@@ -37,6 +37,11 @@ EXPORT_SYMBOL(tsc_khz);
*/
static int __read_mostly tsc_unstable;

+/*
+ * TSC on socket 0 being non-zero may be correct as set by BIOS
+ */
+static int __read_mostly tsc_socket0_nonzero;
+
/* native_sched_clock() is called before tsc_init(), so
we must start with the TSC soft disabled to prevent
erroneous rdtsc usage on !boot_cpu_has(X86_FEATURE_TSC) processors */
@@ -244,6 +249,20 @@ int check_tsc_unstable(void)
}
EXPORT_SYMBOL_GPL(check_tsc_unstable);

+void mark_tsc_socket0_nonzero(char *reason)
+{
+ tsc_socket0_nonzero = 1;
+ pr_info("Marking TSC non-zero value valid for socket 0 due to %s\n",
+ reason);
+}
+EXPORT_SYMBOL_GPL(mark_tsc_socket0_nonzero);
+
+int check_tsc_socket0_nonzero(void)
+{
+ return tsc_socket0_nonzero;
+}
+EXPORT_SYMBOL_GPL(check_tsc_socket0_nonzero);
+
#ifdef CONFIG_X86_TSC
int __init notsc_setup(char *str)
{
--- linux.orig/arch/x86/kernel/tsc_sync.c
+++ linux/arch/x86/kernel/tsc_sync.c
@@ -71,12 +71,22 @@ static void tsc_sanitize_first_cpu(struc
* non zero. We don't do that on non boot cpus because physical
* hotplug should have set the ADJUST register to a value > 0 so
* the TSC is in sync with the already running cpus.
+ *
+ * Also don't force the ADJUST value from non-zero to zero if that
+ * is a valid value for socket 0 as determined by the system BIOS.
+ * This is required where multiple chassis are started asynchronously
+ * with each other and socket 0 may not have an TSC ADJUST value of 0.
*/
if (bootcpu && bootval != 0) {
- pr_warn(FW_BUG "TSC ADJUST: CPU%u: %lld force to 0\n", cpu,
- bootval);
- wrmsrl(MSR_IA32_TSC_ADJUST, 0);
- bootval = 0;
+ if (!check_tsc_socket0_nonzero()) {
+ pr_warn(FW_BUG "TSC ADJUST: CPU%u: %lld force to 0\n",
+ cpu, bootval);
+ wrmsrl(MSR_IA32_TSC_ADJUST, 0);
+ bootval = 0;
+ } else {
+ pr_info("TSC ADJUST: CPU%u: %lld NOT forced to 0\n",
+ cpu, bootval);
+ }
}
cur->adjusted = bootval;
}
@@ -118,6 +128,13 @@ bool tsc_store_and_check_tsc_adjust(bool
cur->warned = false;

/*
+ * If a non-zero TSC value for socket 0 is valid then the default
+ * adjusted value cannot assumed to be zero.
+ */
+ if (check_tsc_socket0_nonzero())
+ cur->adjusted = bootval;
+
+ /*
* Check whether this CPU is the first in a package to come up. In
* this case do not check the boot value against another package
* because the new package might have been physically hotplugged,

--