RE: [PATCH v6 04/11] cpufreq: amd_pstate: implement Pstate EPP support for the AMD processors

From: Yuan, Perry
Date: Thu Dec 08 2022 - 10:51:40 EST


[AMD Official Use Only - General]

Hi Mario.

> -----Original Message-----
> From: Limonciello, Mario <Mario.Limonciello@xxxxxxx>
> Sent: Saturday, December 3, 2022 1:36 AM
> To: Yuan, Perry <Perry.Yuan@xxxxxxx>; rafael.j.wysocki@xxxxxxxxx; Huang,
> Ray <Ray.Huang@xxxxxxx>; viresh.kumar@xxxxxxxxxx
> Cc: Sharma, Deepak <Deepak.Sharma@xxxxxxx>; Fontenot, Nathan
> <Nathan.Fontenot@xxxxxxx>; Deucher, Alexander
> <Alexander.Deucher@xxxxxxx>; Huang, Shimmer
> <Shimmer.Huang@xxxxxxx>; Du, Xiaojian <Xiaojian.Du@xxxxxxx>; Meng,
> Li (Jassmine) <Li.Meng@xxxxxxx>; Karny, Wyes <Wyes.Karny@xxxxxxx>;
> linux-pm@xxxxxxxxxxxxxxx; linux-kernel@xxxxxxxxxxxxxxx
> Subject: Re: [PATCH v6 04/11] cpufreq: amd_pstate: implement Pstate EPP
> support for the AMD processors
>
> On 12/2/2022 01:47, Perry Yuan wrote:
> > From: Perry Yuan <Perry.Yuan@xxxxxxx>
> >
> > Add EPP driver support for AMD SoCs which support a dedicated MSR for
> > CPPC. EPP is used by the DPM controller to configure the frequency
> > that a core operates at during short periods of activity.
> >
> > The SoC EPP targets are configured on a scale from 0 to 255 where 0
> > represents maximum performance and 255 represents maximum
> efficiency.
> >
> > The amd-pstate driver exports profile string names to userspace that
> > are tied to specific EPP values.
> >
> > The balance_performance string (0x80) provides the best balance for
> > efficiency versus power on most systems, but users can choose other
> > strings to meet their needs as well.
> >
> > $ cat
> >
> /sys/devices/system/cpu/cpufreq/policy0/energy_performance_available_
> p
> > references default performance balance_performance balance_power
> power
> >
> > $ cat
> >
> /sys/devices/system/cpu/cpufreq/policy0/energy_performance_preferenc
> e
> > balance_performance
> >
> > To enable the driver,it needs to add `amd_pstate=active` to kernel
> > command line and kernel will load the active mode epp driver
> >
> > Signed-off-by: Perry Yuan <Perry.Yuan@xxxxxxx>
> > ---
> > drivers/cpufreq/amd-pstate.c | 643
> ++++++++++++++++++++++++++++++++++-
> > include/linux/amd-pstate.h | 35 ++
> > 2 files changed, 672 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/cpufreq/amd-pstate.c
> > b/drivers/cpufreq/amd-pstate.c index 204e39006dda..5989d4d25d0f
> 100644
> > --- a/drivers/cpufreq/amd-pstate.c
> > +++ b/drivers/cpufreq/amd-pstate.c
> > @@ -37,9 +37,12 @@
> > #include <linux/uaccess.h>
> > #include <linux/static_call.h>
> > #include <linux/amd-pstate.h>
> > +#include <linux/cpufreq_common.h>
> >
> > +#ifdef CONFIG_ACPI
> > #include <acpi/processor.h>
> > #include <acpi/cppc_acpi.h>
> > +#endif
> >
> > #include <asm/msr.h>
> > #include <asm/processor.h>
> > @@ -59,9 +62,130 @@
> > * we disable it by default to go acpi-cpufreq on these processors and add
> a
> > * module parameter to be able to enable it manually for debugging.
> > */
> > -static struct cpufreq_driver amd_pstate_driver;
> > +static bool cppc_active;
> > static int cppc_load __initdata;
> >
> > +static struct cpufreq_driver *default_pstate_driver; static struct
> > +amd_cpudata **all_cpu_data; static struct amd_pstate_params
> > +global_params;
> > +
> > +static DEFINE_MUTEX(amd_pstate_limits_lock);
> > +static DEFINE_MUTEX(amd_pstate_driver_lock);
> > +
> > +static bool cppc_boost __read_mostly; struct kobject
> > +*amd_pstate_kobj;
> > +
> > +#ifdef CONFIG_ACPI_CPPC_LIB
> Rather than making this conditional I would think that amd-pstate should
> probably depend on or select CONFIG_ACPI_CPPC_LIB, no?

Yes, the Kconfig select CONFIG_ACPI_CPPC_LIB , I removed the checking in V7.
Thanks for your review.

Perry.

>
> > +static s16 amd_pstate_get_epp(struct amd_cpudata *cpudata, u64
> > +cppc_req_cached) {
> > + s16 epp;
> > + struct cppc_perf_caps perf_caps;
> > + int ret;
> > +
> > + if (boot_cpu_has(X86_FEATURE_CPPC)) {
> > + if (!cppc_req_cached) {
> > + epp = rdmsrl_on_cpu(cpudata->cpu,
> MSR_AMD_CPPC_REQ,
> > + &cppc_req_cached);
> > + if (epp)
> > + return epp;
> > + }
> > + epp = (cppc_req_cached >> 24) & 0xFF;
> > + } else {
> > + ret = cppc_get_epp_caps(cpudata->cpu, &perf_caps);
> > + if (ret < 0) {
> > + pr_debug("Could not retrieve energy perf value
> (%d)\n", ret);
> > + return -EIO;
> > + }
> > + epp = (s16) perf_caps.energy_perf;
> > + }
> > +
> > + return epp;
> > +}
> > +#endif
> > +
> > +static int amd_pstate_get_energy_pref_index(struct amd_cpudata
> > +*cpudata) {
> > + s16 epp;
> > + int index = -EINVAL;
> > +
> > + epp = amd_pstate_get_epp(cpudata, 0);
> > + if (epp < 0)
> > + return epp;
> > +
> > + switch (epp) {
> > + case HWP_EPP_PERFORMANCE:
> > + index = EPP_INDEX_PERFORMANCE;
> > + break;
> > + case HWP_EPP_BALANCE_PERFORMANCE:
> > + index = EPP_INDEX_BALANCE_PERFORMANCE;
> > + break;
> > + case HWP_EPP_BALANCE_POWERSAVE:
> > + index = EPP_INDEX_BALANCE_POWERSAVE;
> > + break;
> > + case HWP_EPP_POWERSAVE:
> > + index = EPP_INDEX_POWERSAVE;
> > + break;
> > + default:
> > + break;
> > + }
> > +
> > + return index;
> > +}
> > +
> > +#ifdef CONFIG_ACPI_CPPC_LIB
> > +static int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp) {
> > + int ret;
> > + struct cppc_perf_ctrls perf_ctrls;
> > +
> > + if (boot_cpu_has(X86_FEATURE_CPPC)) {
> > + u64 value = READ_ONCE(cpudata->cppc_req_cached);
> > +
> > + value &= ~GENMASK_ULL(31, 24);
> > + value |= (u64)epp << 24;
> > + WRITE_ONCE(cpudata->cppc_req_cached, value);
> > +
> > + ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ,
> value);
> > + if (!ret)
> > + cpudata->epp_cached = epp;
> > + } else {
> > + perf_ctrls.energy_perf = epp;
> > + ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1);
> > + if (ret) {
> > + pr_debug("failed to set energy perf value (%d)\n",
> ret);
> > + return ret;
> > + }
> > + cpudata->epp_cached = epp;
> > + }
> > +
> > + return ret;
> > +}
> > +
> > +static int amd_pstate_set_energy_pref_index(struct amd_cpudata
> *cpudata,
> > + int pref_index)
> > +{
> > + int epp = -EINVAL;
> > + int ret;
> > +
> > + if (!pref_index) {
> > + pr_debug("EPP pref_index is invalid\n");
> > + return -EINVAL;
> > + }
> > +
> > + if (epp == -EINVAL)
> > + epp = epp_values[pref_index];
> > +
> > + if (epp > 0 && cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
> {
> > + pr_debug("EPP cannot be set under performance policy\n");
> > + return -EBUSY;
> > + }
> > +
> > + ret = amd_pstate_set_epp(cpudata, epp);
> > +
> > + return ret;
> > +}
> > +#endif
> > +
> > static inline int pstate_enable(bool enable)
> > {
> > return wrmsrl_safe(MSR_AMD_CPPC_ENABLE, enable); @@ -70,11
> +194,21
> > @@ static inline int pstate_enable(bool enable)
> > static int cppc_enable(bool enable)
> > {
> > int cpu, ret = 0;
> > + struct cppc_perf_ctrls perf_ctrls;
> >
> > for_each_present_cpu(cpu) {
> > ret = cppc_set_enable(cpu, enable);
> > if (ret)
> > return ret;
> > +
> > + /* Enable autonomous mode for EPP */
> > + if (!cppc_active) {
> > + /* Set desired perf as zero to allow EPP firmware
> control */
> > + perf_ctrls.desired_perf = 0;
> > + ret = cppc_set_perf(cpu, &perf_ctrls);
> > + if (ret)
> > + return ret;
> > + }
> > }
> >
> > return ret;
> > @@ -417,7 +551,7 @@ static void amd_pstate_boost_init(struct
> amd_cpudata *cpudata)
> > return;
> >
> > cpudata->boost_supported = true;
> > - amd_pstate_driver.boost_enabled = true;
> > + default_pstate_driver->boost_enabled = true;
> > }
> >
> > static void amd_perf_ctl_reset(unsigned int cpu) @@ -591,10 +725,61
> > @@ static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy
> *policy,
> > return sprintf(&buf[0], "%u\n", perf);
> > }
> >
> > +static ssize_t show_energy_performance_available_preferences(
> > + struct cpufreq_policy *policy, char *buf) {
> > + int i = 0;
> > + int ret = 0;
> > +
> > + while (energy_perf_strings[i] != NULL)
> > + ret += sysfs_emit(buf, "%s", energy_perf_strings[i++]);
> > +
> > + ret += sysfs_emit(buf, "\n");
> > +
> > + return ret;
> > +}
> > +
> > +static ssize_t store_energy_performance_preference(
> > + struct cpufreq_policy *policy, const char *buf, size_t count) {
> > + struct amd_cpudata *cpudata = policy->driver_data;
> > + char str_preference[21];
> > + ssize_t ret;
> > +
> > + ret = sscanf(buf, "%20s", str_preference);
> > + if (ret != 1)
> > + return -EINVAL;
> > +
> > + ret = match_string(energy_perf_strings, -1, str_preference);
> > + if (ret < 0)
> > + return -EINVAL;
> > +
> > + mutex_lock(&amd_pstate_limits_lock);
> > + ret = amd_pstate_set_energy_pref_index(cpudata, ret);
> > + mutex_unlock(&amd_pstate_limits_lock);
> > +
> > + return ret ?: count;
> > +}
> > +
> > +static ssize_t show_energy_performance_preference(
> > + struct cpufreq_policy *policy, char *buf) {
> > + struct amd_cpudata *cpudata = policy->driver_data;
> > + int preference;
> > +
> > + preference = amd_pstate_get_energy_pref_index(cpudata);
> > + if (preference < 0)
> > + return preference;
> > +
> > + return sysfs_emit(buf, "%s\n", energy_perf_strings[preference]);
>
> You've got an extra space after "return".

Fixed in V7.

>
> > +}
> > +
> > cpufreq_freq_attr_ro(amd_pstate_max_freq);
> > cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq);
> >
> > cpufreq_freq_attr_ro(amd_pstate_highest_perf);
> > +cpufreq_freq_attr_rw(energy_performance_preference);
> > +cpufreq_freq_attr_ro(energy_performance_available_preferences);
> >
> > static struct freq_attr *amd_pstate_attr[] = {
> > &amd_pstate_max_freq,
> > @@ -603,6 +788,429 @@ static struct freq_attr *amd_pstate_attr[] = {
> > NULL,
> > };
> >
> > +static struct freq_attr *amd_pstate_epp_attr[] = {
> > + &amd_pstate_max_freq,
> > + &amd_pstate_lowest_nonlinear_freq,
> > + &amd_pstate_highest_perf,
> > + &energy_performance_preference,
> > + &energy_performance_available_preferences,
> > + NULL,
> > +};
> > +
> > +static inline void update_boost_state(void) {
> > + u64 misc_en;
> > + struct amd_cpudata *cpudata;
> > +
> > + cpudata = all_cpu_data[0];
> > + rdmsrl(MSR_K7_HWCR, misc_en);
> > + global_params.cppc_boost_disabled = misc_en & BIT_ULL(25); }
> > +
> > +static bool amd_pstate_acpi_pm_profile_server(void)
> > +{
> > + if (acpi_gbl_FADT.preferred_profile == PM_ENTERPRISE_SERVER ||
> > + acpi_gbl_FADT.preferred_profile == PM_PERFORMANCE_SERVER)
> > + return true;
> > +
> > + return false;
> > +}
>
> Right now this is only used to control boost, but I wonder if this should instad
> also control the default EPP policy as all?

It should be able to control the EPP profile as well if needed.
Currently the default EPP profile is balance_performance for all CPUs.
If we want to set different profile for server CPU, there some are codes can be added here.

>
> > +
> > +static int amd_pstate_init_cpu(unsigned int cpunum) {
> > + struct amd_cpudata *cpudata;
> > +
> > + cpudata = all_cpu_data[cpunum];
> > + if (!cpudata) {
> > + cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL);
> > + if (!cpudata)
> > + return -ENOMEM;
> > + WRITE_ONCE(all_cpu_data[cpunum], cpudata);
> > +
> > + cpudata->cpu = cpunum;
> > +
> > + if (cppc_active) {
> > + if (amd_pstate_acpi_pm_profile_server())
> > + cppc_boost = true;
> > + }
> > +
> > + }
> > + cpudata->epp_powersave = -EINVAL;
> > + cpudata->epp_policy = 0; > + pr_debug("controlling: cpu %d\n",
> cpunum);
> > + return 0;
> > +}
> > +
> > +static int __amd_pstate_cpu_init(struct cpufreq_policy *policy) {
> > + int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
> > + struct amd_cpudata *cpudata;
> > + struct device *dev;
> > + int rc;
> > + u64 value;
> > +
> > + rc = amd_pstate_init_cpu(policy->cpu);
> > + if (rc)
> > + return rc;
> > +
> > + cpudata = all_cpu_data[policy->cpu];
> > +
> > + dev = get_cpu_device(policy->cpu);
> > + if (!dev)
> > + goto free_cpudata1;
> > +
> > + rc = amd_pstate_init_perf(cpudata);
> > + if (rc)
> > + goto free_cpudata1;
> > +
> > + min_freq = amd_get_min_freq(cpudata);
> > + max_freq = amd_get_max_freq(cpudata);
> > + nominal_freq = amd_get_nominal_freq(cpudata);
> > + lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata);
> > + if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) {
> > + dev_err(dev, "min_freq(%d) or max_freq(%d) value is
> incorrect\n",
> > + min_freq, max_freq);
> > + ret = -EINVAL;
> > + goto free_cpudata1;
> > + }
> > +
> > + policy->min = min_freq;
> > + policy->max = max_freq;
> > +
> > + policy->cpuinfo.min_freq = min_freq;
> > + policy->cpuinfo.max_freq = max_freq;
> > + /* It will be updated by governor */
> > + policy->cur = policy->cpuinfo.min_freq;
> > +
> > + /* Initial processor data capability frequencies */
> > + cpudata->max_freq = max_freq;
> > + cpudata->min_freq = min_freq;
> > + cpudata->nominal_freq = nominal_freq;
> > + cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq;
> > +
> > + policy->driver_data = cpudata;
> > +
> > + update_boost_state();
> > + cpudata->epp_cached = amd_pstate_get_epp(cpudata, value);
> > +
> > + policy->min = policy->cpuinfo.min_freq;
> > + policy->max = policy->cpuinfo.max_freq;
> > +
> > + if (boot_cpu_has(X86_FEATURE_CPPC))
> > + policy->fast_switch_possible = true;
> > +
> > + if (boot_cpu_has(X86_FEATURE_CPPC)) {
> > + ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ,
> &value);
> > + if (ret)
> > + return ret;
> > + WRITE_ONCE(cpudata->cppc_req_cached, value);
> > +
> > + ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1,
> &value);
> > + if (ret)
> > + return ret;
> > + WRITE_ONCE(cpudata->cppc_cap1_cached, value);
> > + }
> > + amd_pstate_boost_init(cpudata);
> > +
> > + return 0;
> > +
> > +free_cpudata1:
> > + kfree(cpudata);
> > + return ret;
> > +}
> > +
> > +static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) {
> > + int ret;
> > +
> > + ret = __amd_pstate_cpu_init(policy);
> > + if (ret)
> > + return ret;
> > + /*
> > + * Set the policy to powersave to provide a valid fallback value in case
> > + * the default cpufreq governor is neither powersave nor
> performance.
> > + */
> > + policy->policy = CPUFREQ_POLICY_POWERSAVE;
> > +
> > + return 0;
> > +}
> > +
> > +static int amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy) {
> > + pr_debug("CPU %d exiting\n", policy->cpu);
> > + policy->fast_switch_possible = false;
> > + return 0;
> > +}
> > +
> > +static void amd_pstate_update_max_freq(unsigned int cpu) {
> > + struct cpufreq_policy *policy = policy = cpufreq_cpu_get(cpu);
> > +
> > + if (!policy)
> > + return;
> > +
> > + refresh_frequency_limits(policy);
> > + cpufreq_cpu_put(policy);
> > +}
> > +
> > +static void amd_pstate_epp_update_limits(unsigned int cpu) {
> > + mutex_lock(&amd_pstate_driver_lock);
> > + update_boost_state();
> > + if (global_params.cppc_boost_disabled) {
> > + for_each_possible_cpu(cpu)
> > + amd_pstate_update_max_freq(cpu);
> > + } else {
> > + cpufreq_update_policy(cpu);
> > + }
> > + mutex_unlock(&amd_pstate_driver_lock);
> > +}
> > +
> > +static int cppc_boost_hold_time_ns = 3 * NSEC_PER_MSEC;
> > +
> > +static inline void amd_pstate_boost_up(struct amd_cpudata *cpudata) {
> > + u64 hwp_req = READ_ONCE(cpudata->cppc_req_cached);
> > + u64 hwp_cap = READ_ONCE(cpudata->cppc_cap1_cached);
> > + u32 max_limit = (hwp_req & 0xff);
> > + u32 min_limit = (hwp_req & 0xff00) >> 8;
> > + u32 boost_level1;
> > +
> > + /* If max and min are equal or already at max, nothing to boost */
> > + if (max_limit == min_limit)
> > + return;
> > +
> > + /* Set boost max and min to initial value */
> > + if (!cpudata->cppc_boost_min)
> > + cpudata->cppc_boost_min = min_limit;
> > +
> > + boost_level1 = ((AMD_CPPC_NOMINAL_PERF(hwp_cap) +
> min_limit) >> 1);
> > +
> > + if (cpudata->cppc_boost_min < boost_level1)
> > + cpudata->cppc_boost_min = boost_level1;
> > + else if (cpudata->cppc_boost_min <
> AMD_CPPC_NOMINAL_PERF(hwp_cap))
> > + cpudata->cppc_boost_min =
> AMD_CPPC_NOMINAL_PERF(hwp_cap);
> > + else if (cpudata->cppc_boost_min ==
> AMD_CPPC_NOMINAL_PERF(hwp_cap))
> > + cpudata->cppc_boost_min = max_limit;
> > + else
> > + return;
> > +
> > + hwp_req &= ~AMD_CPPC_MIN_PERF(~0L);
> > + hwp_req |= AMD_CPPC_MIN_PERF(cpudata->cppc_boost_min);
> > + wrmsrl(MSR_AMD_CPPC_REQ, hwp_req);
> > + cpudata->last_update = cpudata->sample.time; }
> > +
> > +static inline void amd_pstate_boost_down(struct amd_cpudata *cpudata)
> > +{
> > + bool expired;
> > +
> > + if (cpudata->cppc_boost_min) {
> > + expired = time_after64(cpudata->sample.time, cpudata-
> >last_update +
> > + cppc_boost_hold_time_ns);
> > +
> > + if (expired) {
> > + wrmsrl(MSR_AMD_CPPC_REQ, cpudata-
> >cppc_req_cached);
> > + cpudata->cppc_boost_min = 0;
> > + }
> > + }
> > +
> > + cpudata->last_update = cpudata->sample.time; }
> > +
> > +static inline void amd_pstate_boost_update_util(struct amd_cpudata
> *cpudata,
> > + u64 time)
> > +{
> > + cpudata->sample.time = time;
> > + if (smp_processor_id() != cpudata->cpu)
> > + return;
> > +
> > + if (cpudata->sched_flags & SCHED_CPUFREQ_IOWAIT) {
> > + bool do_io = false;
> > +
> > + cpudata->sched_flags = 0;
> > + /*
> > + * Set iowait_boost flag and update time. Since IO WAIT flag
> > + * is set all the time, we can't just conclude that there is
> > + * some IO bound activity is scheduled on this CPU with just
> > + * one occurrence. If we receive at least two in two
> > + * consecutive ticks, then we treat as boost candidate.
> > + * This is leveraged from Intel Pstate driver.
> > + */
> > + if (time_before64(time, cpudata->last_io_update + 2 *
> TICK_NSEC))
> > + do_io = true;
> > +
> > + cpudata->last_io_update = time;
> > +
> > + if (do_io)
> > + amd_pstate_boost_up(cpudata);
> > +
> > + } else {
> > + amd_pstate_boost_down(cpudata);
> > + }
> > +}
> > +
> > +static inline void amd_pstate_cppc_update_hook(struct update_util_data
> *data,
> > + u64 time, unsigned int flags)
> > +{
> > + struct amd_cpudata *cpudata = container_of(data,
> > + struct amd_cpudata, update_util);
> > +
> > + cpudata->sched_flags |= flags;
> > +
> > + if (smp_processor_id() == cpudata->cpu)
> > + amd_pstate_boost_update_util(cpudata, time); }
> > +
> > +static void amd_pstate_clear_update_util_hook(unsigned int cpu) {
> > + struct amd_cpudata *cpudata = all_cpu_data[cpu];
> > +
> > + if (!cpudata->update_util_set)
> > + return;
> > +
> > + cpufreq_remove_update_util_hook(cpu);
> > + cpudata->update_util_set = false;
> > + synchronize_rcu();
> > +}
> > +
> > +static void amd_pstate_set_update_util_hook(unsigned int cpu_num) {
> > + struct amd_cpudata *cpudata = all_cpu_data[cpu_num];
> > +
> > + if (!cppc_boost) {
> > + if (cpudata->update_util_set)
> > + amd_pstate_clear_update_util_hook(cpudata->cpu);
> > + return;
> > + }
> > +
> > + if (cpudata->update_util_set)
> > + return;
> > +
> > + cpudata->sample.time = 0;
> > + cpufreq_add_update_util_hook(cpu_num, &cpudata->update_util,
> > +
> amd_pstate_cppc_update_hook);
> > + cpudata->update_util_set = true;
> > +}
> > +
> > +static void amd_pstate_epp_init(unsigned int cpu) {
> > + struct amd_cpudata *cpudata = all_cpu_data[cpu];
> > + u32 max_perf, min_perf;
> > + u64 value;
> > + s16 epp;
> > + int ret;
> > +
> > + max_perf = READ_ONCE(cpudata->highest_perf);
> > + min_perf = READ_ONCE(cpudata->lowest_perf);
> > +
> > + value = READ_ONCE(cpudata->cppc_req_cached);
> > +
> > + if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
> > + min_perf = max_perf;
> > +
> > + /* Initial min/max values for CPPC Performance Controls Register */
> > + value &= ~AMD_CPPC_MIN_PERF(~0L);
> > + value |= AMD_CPPC_MIN_PERF(min_perf);
> > +
> > + value &= ~AMD_CPPC_MAX_PERF(~0L);
> > + value |= AMD_CPPC_MAX_PERF(max_perf);
> > +
> > + /* CPPC EPP feature require to set zero to the desire perf bit */
> > + value &= ~AMD_CPPC_DES_PERF(~0L);
> > + value |= AMD_CPPC_DES_PERF(0);
> > +
> > + if (cpudata->epp_policy == cpudata->policy)
> > + goto skip_epp;
> > +
> > + cpudata->epp_policy = cpudata->policy;
> > +
> > + if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
> > + epp = amd_pstate_get_epp(cpudata, value);
> > + cpudata->epp_powersave = epp;
> > + if (epp < 0)
> > + goto skip_epp;
> > + /* force the epp value to be zero for performance policy */
> > + epp = 0;
> > + } else {
> > + if (cpudata->epp_powersave < 0)
> > + goto skip_epp;
> > + /* Get BIOS pre-defined epp value */
> > + epp = amd_pstate_get_epp(cpudata, value);
> > + if (epp)
> > + goto skip_epp;
> > + epp = cpudata->epp_powersave;
> > + }
> > + /* Set initial EPP value */
> > + if (boot_cpu_has(X86_FEATURE_CPPC)) {
> > + value &= ~GENMASK_ULL(31, 24);
> > + value |= (u64)epp << 24;
> > + }
> > +
> > +skip_epp:
> > + WRITE_ONCE(cpudata->cppc_req_cached, value);
> > + ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
> > + if (!ret)
> > + cpudata->epp_cached = epp;
> > +}
> > +
> > +static void amd_pstate_set_max_limits(struct amd_cpudata *cpudata) {
> > + u64 hwp_cap = READ_ONCE(cpudata->cppc_cap1_cached);
> > + u64 hwp_req = READ_ONCE(cpudata->cppc_req_cached);
> > + u32 max_limit = (hwp_cap >> 24) & 0xff;
> > +
> > + hwp_req &= ~AMD_CPPC_MIN_PERF(~0L);
> > + hwp_req |= AMD_CPPC_MIN_PERF(max_limit);
> > + wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, hwp_req); }
> > +
> > +static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) {
> > + struct amd_cpudata *cpudata;
> > +
> > + if (!policy->cpuinfo.max_freq)
> > + return -ENODEV;
> > +
> > + pr_debug("set_policy: cpuinfo.max %u policy->max %u\n",
> > + policy->cpuinfo.max_freq, policy->max);
> > +
> > + cpudata = all_cpu_data[policy->cpu];
> > + cpudata->policy = policy->policy;
> > +
> > + if (boot_cpu_has(X86_FEATURE_CPPC)) {
> > + mutex_lock(&amd_pstate_limits_lock);
> > +
> > + if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
> > + amd_pstate_clear_update_util_hook(policy->cpu);
> > + amd_pstate_set_max_limits(cpudata);
> > + } else {
> > + amd_pstate_set_update_util_hook(policy->cpu);
> > + }
> > +
> > + if (boot_cpu_has(X86_FEATURE_CPPC))
> > + amd_pstate_epp_init(policy->cpu);
> > +
> > + mutex_unlock(&amd_pstate_limits_lock);
> > + }
> > +
> > + return 0;
> > +}
> > +
> > +static void amd_pstate_verify_cpu_policy(struct amd_cpudata *cpudata,
> > + struct cpufreq_policy_data *policy)
> {
> > + update_boost_state();
> > + cpufreq_verify_within_cpu_limits(policy);
> > +}
> > +
> > +static int amd_pstate_epp_verify_policy(struct cpufreq_policy_data
> > +*policy) {
> > + amd_pstate_verify_cpu_policy(all_cpu_data[policy->cpu], policy);
> > + pr_debug("policy_max =%d, policy_min=%d\n", policy->max, policy-
> >min);
> > + return 0;
> > +}
> > +
> > static struct cpufreq_driver amd_pstate_driver = {
> > .flags = CPUFREQ_CONST_LOOPS |
> CPUFREQ_NEED_UPDATE_LIMITS,
> > .verify = amd_pstate_verify,
> > @@ -616,8 +1224,20 @@ static struct cpufreq_driver amd_pstate_driver =
> {
> > .attr = amd_pstate_attr,
> > };
> >
> > +static struct cpufreq_driver amd_pstate_epp_driver = {
> > + .flags = CPUFREQ_CONST_LOOPS,
> > + .verify = amd_pstate_epp_verify_policy,
> > + .setpolicy = amd_pstate_epp_set_policy,
> > + .init = amd_pstate_epp_cpu_init,
> > + .exit = amd_pstate_epp_cpu_exit,
> > + .update_limits = amd_pstate_epp_update_limits,
> > + .name = "amd_pstate_epp",
> > + .attr = amd_pstate_epp_attr,
> > +};
> > +
> > static int __init amd_pstate_init(void)
> > {
> > + static struct amd_cpudata **cpudata;
> > int ret;
> >
> > if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) @@ -644,7
> +1264,8
> > @@ static int __init amd_pstate_init(void)
> > /* capability check */
> > if (boot_cpu_has(X86_FEATURE_CPPC)) {
> > pr_debug("AMD CPPC MSR based functionality is
> supported\n");
> > - amd_pstate_driver.adjust_perf = amd_pstate_adjust_perf;
> > + if (!cppc_active)
> > + default_pstate_driver->adjust_perf =
> amd_pstate_adjust_perf;
> > } else {
> > pr_debug("AMD CPPC shared memory based functionality is
> supported\n");
> > static_call_update(amd_pstate_enable, cppc_enable); @@ -
> 652,6
> > +1273,10 @@ static int __init amd_pstate_init(void)
> > static_call_update(amd_pstate_update_perf,
> cppc_update_perf);
> > }
> >
> > + cpudata = vzalloc(array_size(sizeof(void *), num_possible_cpus()));
> > + if (!cpudata)
> > + return -ENOMEM;
> > + WRITE_ONCE(all_cpu_data, cpudata);
> > /* enable amd pstate feature */
> > ret = amd_pstate_enable(true);
> > if (ret) {
> > @@ -659,9 +1284,9 @@ static int __init amd_pstate_init(void)
> > return ret;
> > }
> >
> > - ret = cpufreq_register_driver(&amd_pstate_driver);
> > + ret = cpufreq_register_driver(default_pstate_driver);
> > if (ret)
> > - pr_err("failed to register amd_pstate_driver with
> return %d\n",
> > + pr_err("failed to register amd pstate driver with
> return %d\n",
> > ret);
> >
> > return ret;
> > @@ -676,8 +1301,14 @@ static int __init amd_pstate_param(char *str)
> > if (!strcmp(str, "disable")) {
> > cppc_load = 0;
> > pr_info("driver is explicitly disabled\n");
> > - } else if (!strcmp(str, "passive"))
> > + } else if (!strcmp(str, "passive")) {
> > cppc_load = 1;
> > + default_pstate_driver = &amd_pstate_driver;
> > + } else if (!strcmp(str, "active")) {
> > + cppc_active = 1;
> > + cppc_load = 1;
> > + default_pstate_driver = &amd_pstate_epp_driver;
> > + }
> >
> > return 0;
> > }
> > diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h
> > index 1c4b8659f171..888af62040f1 100644
> > --- a/include/linux/amd-pstate.h
> > +++ b/include/linux/amd-pstate.h
> > @@ -25,6 +25,7 @@ struct amd_aperf_mperf {
> > u64 aperf;
> > u64 mperf;
> > u64 tsc;
> > + u64 time;
> > };
> >
> > /**
> > @@ -47,6 +48,18 @@ struct amd_aperf_mperf {
> > * @prev: Last Aperf/Mperf/tsc count value read from register
> > * @freq: current cpu frequency value
> > * @boost_supported: check whether the Processor or SBIOS supports
> > boost mode
> > + * @epp_powersave: Last saved CPPC energy performance preference
> > + when policy switched to performance
> > + * @epp_policy: Last saved policy used to set energy-performance
> > +preference
> > + * @epp_cached: Cached CPPC energy-performance preference value
> > + * @policy: Cpufreq policy value
> > + * @sched_flags: Store scheduler flags for possible cross CPU update
> > + * @update_util_set: CPUFreq utility callback is set
> > + * @last_update: Time stamp of the last performance state update
> > + * @cppc_boost_min: Last CPPC boosted min performance state
> > + * @cppc_cap1_cached: Cached value of the last CPPC Capabilities MSR
> > + * @update_util: Cpufreq utility callback information
> > + * @sample: the stored performance sample
> > *
> > * The amd_cpudata is key private data for each CPU thread in AMD P-
> State, and
> > * represents all the attributes and goals that AMD P-State requests at
> runtime.
> > @@ -72,6 +85,28 @@ struct amd_cpudata {
> >
> > u64 freq;
> > bool boost_supported;
> > +
> > + /* EPP feature related attributes*/
> > + s16 epp_powersave;
> > + s16 epp_policy;
> > + s16 epp_cached;
> > + u32 policy;
> > + u32 sched_flags;
> > + bool update_util_set;
> > + u64 last_update;
> > + u64 last_io_update;
> > + u32 cppc_boost_min;
> > + u64 cppc_cap1_cached;
> > + struct update_util_data update_util;
> > + struct amd_aperf_mperf sample;
> > +};
> > +
> > +/**
> > + * struct amd_pstate_params - global parameters for the performance
> > +control
> > + * @ cppc_boost_disabled wheher the core performance boost disabled
> > +*/ struct amd_pstate_params {
> > + bool cppc_boost_disabled;
> > };
> >
> > #endif /* _LINUX_AMD_PSTATE_H */