[PATCH] cpufreq: intel_pstate: Clear HWP desired on suspend/shutdown and offline

From: Rafael J. Wysocki
Date: Wed Nov 03 2021 - 14:43:54 EST


From: Rafael J. Wysocki <rafael.j.wysocki@xxxxxxxxx>

Commit a365ab6b9dfb ("cpufreq: intel_pstate: Implement the
->adjust_perf() callback") caused intel_pstate to use nonzero HWP
desired values in certain usage scenarios, but it did not prevent
them from being leaked into the confugirations in which HWP desired
is expected to be 0.

The failing scenarios are switching the driver from the passive
mode to the active mode and starting a new kernel via kexec() while
intel_pstate is running in the passive mode.

To address this issue, ensure that HWP desired will be cleared on
offline and suspend/shutdown.

Fixes: a365ab6b9dfb ("cpufreq: intel_pstate: Implement the ->adjust_perf() callback")
Reported-by: Julia Lawall <julia.lawall@xxxxxxxx>
Tested-by: Julia Lawall <julia.lawall@xxxxxxxx>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@xxxxxxxxx>
---
drivers/cpufreq/intel_pstate.c | 32 ++++++++++++++++++++++++++++++--
1 file changed, 30 insertions(+), 2 deletions(-)

Index: linux-pm/drivers/cpufreq/intel_pstate.c
===================================================================
--- linux-pm.orig/drivers/cpufreq/intel_pstate.c
+++ linux-pm/drivers/cpufreq/intel_pstate.c
@@ -1006,9 +1006,16 @@ static void intel_pstate_hwp_offline(str
*/
value &= ~GENMASK_ULL(31, 24);
value |= HWP_ENERGY_PERF_PREFERENCE(cpu->epp_cached);
- WRITE_ONCE(cpu->hwp_req_cached, value);
}

+ /*
+ * Clear the desired perf field in the cached HWP request value to
+ * prevent nonzero desired values from being leaked into the active
+ * mode.
+ */
+ value &= ~HWP_DESIRED_PERF(~0L);
+ WRITE_ONCE(cpu->hwp_req_cached, value);
+
value &= ~GENMASK_ULL(31, 0);
min_perf = HWP_LOWEST_PERF(READ_ONCE(cpu->hwp_cap_cached));

@@ -3003,6 +3010,27 @@ static int intel_cpufreq_cpu_exit(struct
return intel_pstate_cpu_exit(policy);
}

+static int intel_cpufreq_suspend(struct cpufreq_policy *policy)
+{
+ intel_pstate_suspend(policy);
+
+ if (hwp_active) {
+ struct cpudata *cpu = all_cpu_data[policy->cpu];
+ u64 value = READ_ONCE(cpu->hwp_req_cached);
+
+ /*
+ * Clear the desired perf field in MSR_HWP_REQUEST in case
+ * intel_cpufreq_adjust_perf() is in use and the last value
+ * written by it may not be suitable.
+ */
+ value &= ~HWP_DESIRED_PERF(~0L);
+ wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value);
+ WRITE_ONCE(cpu->hwp_req_cached, value);
+ }
+
+ return 0;
+}
+
static struct cpufreq_driver intel_cpufreq = {
.flags = CPUFREQ_CONST_LOOPS,
.verify = intel_cpufreq_verify_policy,
@@ -3012,7 +3040,7 @@ static struct cpufreq_driver intel_cpufr
.exit = intel_cpufreq_cpu_exit,
.offline = intel_cpufreq_cpu_offline,
.online = intel_pstate_cpu_online,
- .suspend = intel_pstate_suspend,
+ .suspend = intel_cpufreq_suspend,
.resume = intel_pstate_resume,
.update_limits = intel_pstate_update_limits,
.name = "intel_cpufreq",