[PATCH 09/10] x86 tsc: enumerate SKL cpu_khz and tsc_khz via CPUID

From: Len Brown
Date: Fri Jun 17 2016 - 01:24:01 EST


From: Len Brown <len.brown@xxxxxxxxx>

Skylake CPU base-frequency and TSC frequency may differ
by up to 2%.

Enumerate CPU and TSC frequencies separately, allowing
cpu_khz and tsc_khz to differ.

The existing CPU frequency calibration mechanism is unchanged.
However, CPUID extensions are preferred, when available.

CPUID.0x16 is preferred over MSR and timer calibration
for CPU frequency discovery.

CPUID.0x15 takes precedence over CPU-frequency
for TSC frequency discovery.

Signed-off-by: Len Brown <len.brown@xxxxxxxxx>
---
arch/x86/include/asm/tsc.h | 1 +
arch/x86/include/asm/x86_init.h | 4 ++-
arch/x86/kernel/tsc.c | 75 +++++++++++++++++++++++++++++++++++++----
arch/x86/kernel/x86_init.c | 1 +
4 files changed, 73 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index db1f779..a30591e 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -36,6 +36,7 @@ extern void mark_tsc_unstable(char *reason);
extern int unsynchronized_tsc(void);
extern int check_tsc_unstable(void);
extern int check_tsc_disabled(void);
+extern unsigned long native_calibrate_cpu(void);
extern unsigned long native_calibrate_tsc(void);
extern unsigned long long native_sched_clock_from_tsc(u64 tsc);

diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 4dcdf74..08a08a8 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -181,7 +181,8 @@ struct x86_legacy_features {

/**
* struct x86_platform_ops - platform specific runtime functions
- * @calibrate_tsc: calibrate TSC
+ * @calibrate_cpu: calibrate CPU
+ * @calibrate_tsc: calibrate TSC, if different from CPU
* @get_wallclock: get time from HW clock like RTC etc.
* @set_wallclock: set time back to HW clock
* @is_untracked_pat_range exclude from PAT logic
@@ -200,6 +201,7 @@ struct x86_legacy_features {
* semantics.
*/
struct x86_platform_ops {
+ unsigned long (*calibrate_cpu)(void);
unsigned long (*calibrate_tsc)(void);
void (*get_wallclock)(struct timespec *ts);
int (*set_wallclock)(const struct timespec *ts);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 35a3976..e1496b7 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -239,7 +239,7 @@ static inline unsigned long long cycles_2_ns(unsigned long long cyc)
return ns;
}

-static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
+static void set_cyc2ns_scale(unsigned long khz, int cpu)
{
unsigned long long tsc_now, ns_now;
struct cyc2ns_data *data;
@@ -248,7 +248,7 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
local_irq_save(flags);
sched_clock_idle_sleep_event();

- if (!cpu_khz)
+ if (!khz)
goto done;

data = cyc2ns_write_begin(cpu);
@@ -261,7 +261,7 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
* time function is continuous; see the comment near struct
* cyc2ns_data.
*/
- clocks_calc_mult_shift(&data->cyc2ns_mul, &data->cyc2ns_shift, cpu_khz,
+ clocks_calc_mult_shift(&data->cyc2ns_mul, &data->cyc2ns_shift, khz,
NSEC_PER_MSEC, 0);

/*
@@ -665,15 +665,72 @@ success:
}

/**
- * native_calibrate_tsc - calibrate the tsc on boot
+ * native_calibrate_tsc
+ * Determine TSC frequency via CPUID, else return 0.
*/
unsigned long native_calibrate_tsc(void)
{
+ unsigned int eax_denominator, ebx_numerator, ecx_hz, edx;
+ unsigned int crystal_khz;
+
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+ return 0;
+
+ if (boot_cpu_data.cpuid_level < 0x15)
+ return 0;
+
+ eax_denominator = ebx_numerator = ecx_hz = edx = 0;
+
+ /* CPUID 15H TSC/Crystal ratio, plus optionally Crystal Hz */
+ cpuid(0x15, &eax_denominator, &ebx_numerator, &ecx_hz, &edx);
+
+ if (ebx_numerator == 0 || eax_denominator == 0)
+ return 0;
+
+ crystal_khz = ecx_hz / 1000;
+
+ if (crystal_khz == 0) {
+ switch (boot_cpu_data.x86_model) {
+ case 0x4E: /* SKL */
+ case 0x5E: /* SKL */
+ crystal_khz = 24000; /* 24 MHz */
+ }
+ }
+
+ return crystal_khz * ebx_numerator / eax_denominator;
+}
+
+static unsigned long cpu_khz_from_cpuid(void)
+{
+ unsigned int eax_base_mhz, ebx_max_mhz, ecx_bus_mhz, edx;
+
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+ return 0;
+
+ if (boot_cpu_data.cpuid_level < 0x16)
+ return 0;
+
+ eax_base_mhz = ebx_max_mhz = ecx_bus_mhz = edx = 0;
+
+ cpuid(0x16, &eax_base_mhz, &ebx_max_mhz, &ecx_bus_mhz, &edx);
+
+ return eax_base_mhz * 1000;
+}
+
+/**
+ * native_calibrate_cpu - calibrate the cpu on boot
+ */
+unsigned long native_calibrate_cpu(void)
+{
u64 tsc1, tsc2, delta, ref1, ref2;
unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX;
unsigned long flags, latch, ms, fast_calibrate;
int hpet = is_hpet_enabled(), i, loopmin;

+ fast_calibrate = cpu_khz_from_cpuid();
+ if (fast_calibrate)
+ return fast_calibrate;
+
fast_calibrate = cpu_khz_from_msr();
if (fast_calibrate)
return fast_calibrate;
@@ -834,8 +891,10 @@ int recalibrate_cpu_khz(void)
if (!boot_cpu_has(X86_FEATURE_TSC))
return -ENODEV;

+ cpu_khz = x86_platform.calibrate_cpu();
tsc_khz = x86_platform.calibrate_tsc();
- cpu_khz = tsc_khz;
+ if (tsc_khz == 0)
+ tsc_khz = cpu_khz;
cpu_data(0).loops_per_jiffy = cpufreq_scale(cpu_data(0).loops_per_jiffy,
cpu_khz_old, cpu_khz);

@@ -1241,8 +1300,10 @@ void __init tsc_init(void)
return;
}

+ cpu_khz = x86_platform.calibrate_cpu();
tsc_khz = x86_platform.calibrate_tsc();
- cpu_khz = tsc_khz;
+ if (tsc_khz == 0)
+ tsc_khz = cpu_khz;

if (!tsc_khz) {
mark_tsc_unstable("could not calculate TSC khz");
@@ -1262,7 +1323,7 @@ void __init tsc_init(void)
*/
for_each_possible_cpu(cpu) {
cyc2ns_init(cpu);
- set_cyc2ns_scale(cpu_khz, cpu);
+ set_cyc2ns_scale(tsc_khz, cpu);
}

if (tsc_disabled > 0)
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index dad5fe9..58b4592 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -92,6 +92,7 @@ static void default_nmi_init(void) { };
static int default_i8042_detect(void) { return 1; };

struct x86_platform_ops x86_platform = {
+ .calibrate_cpu = native_calibrate_cpu,
.calibrate_tsc = native_calibrate_tsc,
.get_wallclock = mach_get_cmos_time,
.set_wallclock = mach_set_rtc_mmss,
--
2.9.0