Re: [PATCH 2/2] cpufreq/amd-pstate: Add support for the "Requested CPU Min frequency" BIOS option
From: Dhananjay Ugwekar
Date: Tue Apr 15 2025 - 23:29:52 EST
On 4/16/2025 2:24 AM, Mario Limonciello wrote:
> On 4/15/2025 5:21 AM, Dhananjay Ugwekar wrote:
>> Initialize lower frequency limit to the "Requested CPU Min frequency"
>> BIOS option (if it is set) value as part of the driver->init()
>> callback. The BIOS specified value is passed by the PMFW as min_perf in
>> CPPC_REQ MSR. To ensure that we don't mistake a stale min_perf value in
>> CPPC_REQ value as the "Requested CPU Min frequency" during a kexec wakeup,
>> reset the CPPC_REQ.min_perf value back to the BIOS specified one in the
>> offline, exit and suspend callbacks.
>>
>> amd_pstate_target() and amd_pstate_epp_update_limit() which are invoked
>> as part of the resume() and online() callbacks will take care of restoring
>> the CPPC_REQ back to the latest sane values.
>>
>> Signed-off-by: Dhananjay Ugwekar <dhananjay.ugwekar@xxxxxxx>
>
> I'm generally fine with this, but I have one nit below.
>
>> ---
>> drivers/cpufreq/amd-pstate.c | 62 ++++++++++++++++++++++++++++--------
>> drivers/cpufreq/amd-pstate.h | 2 ++
>> 2 files changed, 51 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
>> index 02de51001eba..d94fd2a38990 100644
>> --- a/drivers/cpufreq/amd-pstate.c
>> +++ b/drivers/cpufreq/amd-pstate.c
>> @@ -389,7 +389,8 @@ static inline int amd_pstate_cppc_enable(struct cpufreq_policy *policy)
>> static int msr_init_perf(struct amd_cpudata *cpudata)
>> {
>> union perf_cached perf = READ_ONCE(cpudata->perf);
>> - u64 cap1, numerator;
>> + u64 cap1, numerator, cppc_req;
>> + u8 min_perf;
>> int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1,
>> &cap1);
>> @@ -400,6 +401,22 @@ static int msr_init_perf(struct amd_cpudata *cpudata)
>> if (ret)
>> return ret;
>> + ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &cppc_req);
>> + if (ret)
>> + return ret;
>> +
>> + WRITE_ONCE(cpudata->cppc_req_cached, cppc_req);
>> + min_perf = FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cppc_req);
>> +
>> + /*
>> + * Clear out the min_perf part to check if the rest of the MSR is 0, if yes, this is an
>> + * indication that the min_perf value is the one specified through the BIOS option
>> + */
>> + cppc_req &= ~(AMD_CPPC_MIN_PERF_MASK);
>> +
>> + if (!cppc_req && min_perf)
>> + perf.bios_min_perf = min_perf;
>
> To avoid a risk of garbage being in perf.bios_min_perf leading to hard to root cause bugs could we initialize this to 0 in the non bios_min_perf case?
>
> something like this:
>
> cppc_req &= ~(AMD_CPPC_MIN_PERF_MASK);
> perf.bios_min_perf = (!cppc_req && min_perf) ? min_perf : 0;
Agreed, better to be safe, will amend
>
>> +
>> perf.highest_perf = numerator;
>> perf.max_limit_perf = numerator;
>> perf.min_limit_perf = FIELD_GET(AMD_CPPC_LOWEST_PERF_MASK, cap1);
>> @@ -580,20 +597,26 @@ static int amd_pstate_verify(struct cpufreq_policy_data *policy_data)
>> {
>> /*
>> * Initialize lower frequency limit (i.e.policy->min) with
>> - * lowest_nonlinear_frequency which is the most energy efficient
>> - * frequency. Override the initial value set by cpufreq core and
>> - * amd-pstate qos_requests.
>> + * lowest_nonlinear_frequency or the min frequency (if) specified in BIOS,
>> + * Override the initial value set by cpufreq core and amd-pstate qos_requests.
>> */
>> if (policy_data->min == FREQ_QOS_MIN_DEFAULT_VALUE) {
>> struct cpufreq_policy *policy __free(put_cpufreq_policy) =
>> cpufreq_cpu_get(policy_data->cpu);
>> struct amd_cpudata *cpudata;
>> + union perf_cached perf;
>> if (!policy)
>> return -EINVAL;
>> cpudata = policy->driver_data;
>> - policy_data->min = cpudata->lowest_nonlinear_freq;
>> + perf = READ_ONCE(cpudata->perf);
>> +
>> + if (perf.bios_min_perf)
>> + policy_data->min = perf_to_freq(perf, cpudata->nominal_freq,
>> + perf.bios_min_perf);
>> + else
>> + policy_data->min = cpudata->lowest_nonlinear_freq;
>> }
>> cpufreq_verify_within_cpu_limits(policy_data);
>> @@ -1041,6 +1064,9 @@ static void amd_pstate_cpu_exit(struct cpufreq_policy *policy)
>> {
>> struct amd_cpudata *cpudata = policy->driver_data;
>> + /* Reset CPPC_REQ MSR to the BIOS value */
>> + amd_pstate_update_perf(policy, cpudata->perf.bios_min_perf, 0U, 0U, 0U, false);
>> +
>> freq_qos_remove_request(&cpudata->req[1]);
>> freq_qos_remove_request(&cpudata->req[0]);
>> policy->fast_switch_possible = false;
>> @@ -1428,7 +1454,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
>> struct amd_cpudata *cpudata;
>> union perf_cached perf;
>> struct device *dev;
>> - u64 value;
>> int ret;
>> /*
>> @@ -1493,12 +1518,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
>> cpudata->epp_default = AMD_CPPC_EPP_BALANCE_PERFORMANCE;
>> }
>> - if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
>> - ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value);
>> - if (ret)
>> - return ret;
>> - WRITE_ONCE(cpudata->cppc_req_cached, value);
>> - }
>> ret = amd_pstate_set_epp(policy, cpudata->epp_default);
>> if (ret)
>> return ret;
>> @@ -1518,6 +1537,9 @@ static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy)
>> struct amd_cpudata *cpudata = policy->driver_data;
>> if (cpudata) {
>> + /* Reset CPPC_REQ MSR to the BIOS value */
>> + amd_pstate_update_perf(policy, cpudata->perf.bios_min_perf, 0U, 0U, 0U, false);
>> +
>> kfree(cpudata);
>> policy->driver_data = NULL;
>> }
>> @@ -1575,13 +1597,27 @@ static int amd_pstate_cpu_online(struct cpufreq_policy *policy)
>> static int amd_pstate_cpu_offline(struct cpufreq_policy *policy)
>> {
>> - return 0;
>> + struct amd_cpudata *cpudata = policy->driver_data;
>> +
>> + /*
>> + * Reset CPPC_REQ MSR to the BIOS value, this will allow us to retain the BIOS specified
>> + * min_perf value across kexec reboots. If this CPU is just onlined normally after this, the
>> + * limits, epp and desired perf will get reset to the cached values in cpudata struct
>> + */
>> + return amd_pstate_update_perf(policy, cpudata->perf.bios_min_perf, 0U, 0U, 0U, false);
>> }
>> static int amd_pstate_suspend(struct cpufreq_policy *policy)
>> {
>> struct amd_cpudata *cpudata = policy->driver_data;
>> + /*
>> + * Reset CPPC_REQ MSR to the BIOS value, this will allow us to retain the BIOS specified
>> + * min_perf value across kexec reboots. If this CPU is just resumed back without kexec,
>> + * the limits, epp and desired perf will get reset to the cached values in cpudata struct
>> + */
>> + amd_pstate_update_perf(policy, cpudata->perf.bios_min_perf, 0U, 0U, 0U, false);
>> +
>> /* invalidate to ensure it's rewritten during resume */
>> cpudata->cppc_req_cached = 0;
>> diff --git a/drivers/cpufreq/amd-pstate.h b/drivers/cpufreq/amd-pstate.h
>> index fbe1c08d3f06..2f7ae364d331 100644
>> --- a/drivers/cpufreq/amd-pstate.h
>> +++ b/drivers/cpufreq/amd-pstate.h
>> @@ -30,6 +30,7 @@
>> * @lowest_perf: the absolute lowest performance level of the processor
>> * @min_limit_perf: Cached value of the performance corresponding to policy->min
>> * @max_limit_perf: Cached value of the performance corresponding to policy->max
>> + * @bios_min_perf: Cached perf value corresponding to the "Requested CPU Min Frequency" BIOS option
>> */
>> union perf_cached {
>> struct {
>> @@ -39,6 +40,7 @@ union perf_cached {
>> u8 lowest_perf;
>> u8 min_limit_perf;
>> u8 max_limit_perf;
>> + u8 bios_min_perf;
>> };
>> u64 val;
>> };
>