Re: [PATCH 2/2] cpufreq/amd-pstate: Add support for the "Requested CPU Min frequency" BIOS option

From: Dhananjay Ugwekar
Date: Tue Apr 15 2025 - 23:29:52 EST


On 4/16/2025 2:24 AM, Mario Limonciello wrote:
> On 4/15/2025 5:21 AM, Dhananjay Ugwekar wrote:
>> Initialize lower frequency limit to the "Requested CPU Min frequency"
>> BIOS option (if it is set) value as part of the driver->init()
>> callback. The BIOS specified value is passed by the PMFW as min_perf in
>> CPPC_REQ MSR. To ensure that we don't mistake a stale min_perf value in
>> CPPC_REQ value as the "Requested CPU Min frequency" during a kexec wakeup,
>> reset the CPPC_REQ.min_perf value back to the BIOS specified one in the
>> offline, exit and suspend callbacks.
>>
>> amd_pstate_target() and amd_pstate_epp_update_limit() which are invoked
>> as part of the resume() and online() callbacks will take care of restoring
>> the CPPC_REQ back to the latest sane values.
>>
>> Signed-off-by: Dhananjay Ugwekar <dhananjay.ugwekar@xxxxxxx>
>
> I'm generally fine with this, but I have one nit below.
>
>> ---
>>   drivers/cpufreq/amd-pstate.c | 62 ++++++++++++++++++++++++++++--------
>>   drivers/cpufreq/amd-pstate.h |  2 ++
>>   2 files changed, 51 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
>> index 02de51001eba..d94fd2a38990 100644
>> --- a/drivers/cpufreq/amd-pstate.c
>> +++ b/drivers/cpufreq/amd-pstate.c
>> @@ -389,7 +389,8 @@ static inline int amd_pstate_cppc_enable(struct cpufreq_policy *policy)
>>   static int msr_init_perf(struct amd_cpudata *cpudata)
>>   {
>>       union perf_cached perf = READ_ONCE(cpudata->perf);
>> -    u64 cap1, numerator;
>> +    u64 cap1, numerator, cppc_req;
>> +    u8 min_perf;
>>         int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1,
>>                        &cap1);
>> @@ -400,6 +401,22 @@ static int msr_init_perf(struct amd_cpudata *cpudata)
>>       if (ret)
>>           return ret;
>>   +    ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &cppc_req);
>> +    if (ret)
>> +        return ret;
>> +
>> +    WRITE_ONCE(cpudata->cppc_req_cached, cppc_req);
>> +    min_perf = FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cppc_req);
>> +
>> +    /*
>> +     * Clear out the min_perf part to check if the rest of the MSR is 0, if yes, this is an
>> +     * indication that the min_perf value is the one specified through the BIOS option
>> +     */
>> +    cppc_req &= ~(AMD_CPPC_MIN_PERF_MASK);
>> +
>> +    if (!cppc_req && min_perf)
>> +        perf.bios_min_perf = min_perf;
>
> To avoid a risk of garbage being in perf.bios_min_perf leading to hard to root cause bugs could we initialize this to 0 in the non bios_min_perf case?
>
> something like this:
>
> cppc_req &= ~(AMD_CPPC_MIN_PERF_MASK);
> perf.bios_min_perf = (!cppc_req && min_perf) ? min_perf : 0;

Agreed, better to be safe, will amend

>
>> +
>>       perf.highest_perf = numerator;
>>       perf.max_limit_perf = numerator;
>>       perf.min_limit_perf = FIELD_GET(AMD_CPPC_LOWEST_PERF_MASK, cap1);
>> @@ -580,20 +597,26 @@ static int amd_pstate_verify(struct cpufreq_policy_data *policy_data)
>>   {
>>       /*
>>        * Initialize lower frequency limit (i.e.policy->min) with
>> -     * lowest_nonlinear_frequency which is the most energy efficient
>> -     * frequency. Override the initial value set by cpufreq core and
>> -     * amd-pstate qos_requests.
>> +     * lowest_nonlinear_frequency or the min frequency (if) specified in BIOS,
>> +     * Override the initial value set by cpufreq core and amd-pstate qos_requests.
>>        */
>>       if (policy_data->min == FREQ_QOS_MIN_DEFAULT_VALUE) {
>>           struct cpufreq_policy *policy __free(put_cpufreq_policy) =
>>                             cpufreq_cpu_get(policy_data->cpu);
>>           struct amd_cpudata *cpudata;
>> +        union perf_cached perf;
>>             if (!policy)
>>               return -EINVAL;
>>             cpudata = policy->driver_data;
>> -        policy_data->min = cpudata->lowest_nonlinear_freq;
>> +        perf = READ_ONCE(cpudata->perf);
>> +
>> +        if (perf.bios_min_perf)
>> +            policy_data->min = perf_to_freq(perf, cpudata->nominal_freq,
>> +                            perf.bios_min_perf);
>> +        else
>> +            policy_data->min = cpudata->lowest_nonlinear_freq;
>>       }
>>         cpufreq_verify_within_cpu_limits(policy_data);
>> @@ -1041,6 +1064,9 @@ static void amd_pstate_cpu_exit(struct cpufreq_policy *policy)
>>   {
>>       struct amd_cpudata *cpudata = policy->driver_data;
>>   +    /* Reset CPPC_REQ MSR to the BIOS value */
>> +    amd_pstate_update_perf(policy, cpudata->perf.bios_min_perf, 0U, 0U, 0U, false);
>> +
>>       freq_qos_remove_request(&cpudata->req[1]);
>>       freq_qos_remove_request(&cpudata->req[0]);
>>       policy->fast_switch_possible = false;
>> @@ -1428,7 +1454,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
>>       struct amd_cpudata *cpudata;
>>       union perf_cached perf;
>>       struct device *dev;
>> -    u64 value;
>>       int ret;
>>         /*
>> @@ -1493,12 +1518,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
>>           cpudata->epp_default = AMD_CPPC_EPP_BALANCE_PERFORMANCE;
>>       }
>>   -    if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
>> -        ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value);
>> -        if (ret)
>> -            return ret;
>> -        WRITE_ONCE(cpudata->cppc_req_cached, value);
>> -    }
>>       ret = amd_pstate_set_epp(policy, cpudata->epp_default);
>>       if (ret)
>>           return ret;
>> @@ -1518,6 +1537,9 @@ static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy)
>>       struct amd_cpudata *cpudata = policy->driver_data;
>>         if (cpudata) {
>> +        /* Reset CPPC_REQ MSR to the BIOS value */
>> +        amd_pstate_update_perf(policy, cpudata->perf.bios_min_perf, 0U, 0U, 0U, false);
>> +
>>           kfree(cpudata);
>>           policy->driver_data = NULL;
>>       }
>> @@ -1575,13 +1597,27 @@ static int amd_pstate_cpu_online(struct cpufreq_policy *policy)
>>     static int amd_pstate_cpu_offline(struct cpufreq_policy *policy)
>>   {
>> -    return 0;
>> +    struct amd_cpudata *cpudata = policy->driver_data;
>> +
>> +    /*
>> +     * Reset CPPC_REQ MSR to the BIOS value, this will allow us to retain the BIOS specified
>> +     * min_perf value across kexec reboots. If this CPU is just onlined normally after this, the
>> +     * limits, epp and desired perf will get reset to the cached values in cpudata struct
>> +     */
>> +    return amd_pstate_update_perf(policy, cpudata->perf.bios_min_perf, 0U, 0U, 0U, false);
>>   }
>>     static int amd_pstate_suspend(struct cpufreq_policy *policy)
>>   {
>>       struct amd_cpudata *cpudata = policy->driver_data;
>>   +    /*
>> +     * Reset CPPC_REQ MSR to the BIOS value, this will allow us to retain the BIOS specified
>> +     * min_perf value across kexec reboots. If this CPU is just resumed back without kexec,
>> +     * the limits, epp and desired perf will get reset to the cached values in cpudata struct
>> +     */
>> +    amd_pstate_update_perf(policy, cpudata->perf.bios_min_perf, 0U, 0U, 0U, false);
>> +
>>       /* invalidate to ensure it's rewritten during resume */
>>       cpudata->cppc_req_cached = 0;
>>   diff --git a/drivers/cpufreq/amd-pstate.h b/drivers/cpufreq/amd-pstate.h
>> index fbe1c08d3f06..2f7ae364d331 100644
>> --- a/drivers/cpufreq/amd-pstate.h
>> +++ b/drivers/cpufreq/amd-pstate.h
>> @@ -30,6 +30,7 @@
>>    * @lowest_perf: the absolute lowest performance level of the processor
>>    * @min_limit_perf: Cached value of the performance corresponding to policy->min
>>    * @max_limit_perf: Cached value of the performance corresponding to policy->max
>> + * @bios_min_perf: Cached perf value corresponding to the "Requested CPU Min Frequency" BIOS option
>>    */
>>   union perf_cached {
>>       struct {
>> @@ -39,6 +40,7 @@ union perf_cached {
>>           u8    lowest_perf;
>>           u8    min_limit_perf;
>>           u8    max_limit_perf;
>> +        u8    bios_min_perf;
>>       };
>>       u64    val;
>>   };
>