[PATCH v2] acpi: Issue _OSC call for native thermal interrupt handling

From: Srinivas Pandruvada
Date: Wed Mar 16 2016 - 18:48:42 EST


There are several reports of freeze on enabling HWP (Hardware PStates)
feature on Skylake based systems by Intel P states driver. The root
cause is identified as the HWP interrupts causing BIOS code to freeze.
HWP interrupts uses thermal LVT.
Linux natively handles thermal interrupts, but in Skylake based systems
SMM will take control of thermal interrupts. This is a problem for several
reasons:
- It is freezing in BIOS when tries to handle thermal interrupt, which
will require BIOS upgrade
- With SMM handling thermal we loose all the reporting features of
Linux arch/x86/kernel/cpu/mcheck/therm_throt driver
- Some thermal drivers like x86-package-temp driver depends on the thermal
threshold interrupts
- The HWP interrupts are useful for debugging and tuning performance

So we need native handling of thermal interrupts. To inform SMM that
OS will handle thermal interrupts, we need to use _OSC under processor
scope very early in ACPI initialization flow. This needs to be done
before SMM code path looks for _OSC capabilities. The bit 12 of
_OSC in processor scope defines whether OS will handle thermal interrupts.
When bit 12 is set to 1, OS will handle thermal interrupts.
Refer to this document for details on _OSC
http://www.intel.com/content/www/us/en/standards/processor-vendor-
specific-acpi-specification.html

This change introduces a new function acpi_early_processor_set_osc(),
which walks acpi name space and finds acpi processor object and
set capability via _OSC method to take over thermal LVT.

Also this change writes HWP status bits to 0 to clear any HWP status
bits in intel_thermal_interrupt().

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@xxxxxxxxxxxxxxx>
---
v2:
Unnecessary newline was introduced, removed that in acpi_processor.c


arch/x86/kernel/cpu/mcheck/therm_throt.c | 8 +++++++
drivers/acpi/acpi_processor.c | 41 ++++++++++++++++++++++++++++++++
drivers/acpi/bus.c | 3 +++
drivers/acpi/internal.h | 2 ++
4 files changed, 54 insertions(+)

diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 2c5aaf8..4599012 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -79,6 +79,8 @@ static atomic_t therm_throt_en = ATOMIC_INIT(0);

static u32 lvtthmr_init __read_mostly;

+static bool thermal_hwp_interrupt_support;
+
#ifdef CONFIG_SYSFS
#define define_therm_throt_device_one_ro(_name) \
static DEVICE_ATTR(_name, 0444, \
@@ -385,6 +387,9 @@ static void intel_thermal_interrupt(void)
{
__u64 msr_val;

+ if (thermal_hwp_interrupt_support)
+ wrmsrl_safe(MSR_HWP_STATUS, 0);
+
rdmsrl(MSR_IA32_THERM_STATUS, msr_val);

/* Check for violation of core thermal thresholds*/
@@ -553,6 +558,9 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);

+ if (static_cpu_has(X86_FEATURE_HWP))
+ thermal_hwp_interrupt_support = true;
+
/* Unmask the thermal vector: */
l = apic_read(APIC_LVTTHMR);
apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c
index 6979186..d88c463 100644
--- a/drivers/acpi/acpi_processor.c
+++ b/drivers/acpi/acpi_processor.c
@@ -491,6 +491,47 @@ static void acpi_processor_remove(struct acpi_device *device)
}
#endif /* CONFIG_ACPI_HOTPLUG_CPU */

+static bool acpi_hwp_native_thermal_lvt_set;
+static acpi_status acpi_set_hwp_native_thermal_lvt_osc(acpi_handle handle,
+ u32 lvl, void *context,
+ void **rv)
+{
+ u8 sb_uuid_str[] = "4077A616-290C-47BE-9EBD-D87058713953";
+ u32 capbuf[2];
+ struct acpi_osc_context osc_context = {
+ .uuid_str = sb_uuid_str,
+ .rev = 1,
+ .cap.length = 8,
+ .cap.pointer = capbuf,
+ };
+
+ if (acpi_hwp_native_thermal_lvt_set)
+ return AE_OK;
+
+ if (!static_cpu_has(X86_FEATURE_HWP))
+ return AE_OK;
+
+ capbuf[0] = 0x0000;
+ capbuf[1] = 0x1000; /* set bit 12 */
+
+ if (ACPI_SUCCESS(acpi_run_osc(handle, &osc_context))) {
+ acpi_hwp_native_thermal_lvt_set = true;
+ kfree(osc_context.ret.pointer);
+ }
+
+ return AE_OK;
+}
+
+void acpi_early_processor_set_osc(void)
+{
+ acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
+ ACPI_UINT32_MAX,
+ acpi_set_hwp_native_thermal_lvt_osc,
+ NULL, NULL, NULL);
+ acpi_get_devices(ACPI_PROCESSOR_DEVICE_HID,
+ acpi_set_hwp_native_thermal_lvt_osc, NULL, NULL);
+}
+
/*
* The following ACPI IDs are known to be suitable for representing as
* processor devices.
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 891c42d..7e73aea 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -1005,6 +1005,9 @@ static int __init acpi_bus_init(void)
goto error1;
}

+ /* Set capability bits for _OSC under processor scope */
+ acpi_early_processor_set_osc();
+
/*
* _OSC method may exist in module level code,
* so it must be run after ACPI_FULL_INITIALIZATION
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index 1e6833a..5c787ac 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -138,6 +138,8 @@ void acpi_early_processor_set_pdc(void);
static inline void acpi_early_processor_set_pdc(void) {}
#endif

+void acpi_early_processor_set_osc(void);
+
/* --------------------------------------------------------------------------
Embedded Controller
-------------------------------------------------------------------------- */
--
2.5.0