Re: [PATCH v3 4/5] cpufreq: intel_pstate: Add ->offline and ->online callbacks

From: Rafael J. Wysocki
Date: Mon Aug 31 2020 - 14:16:10 EST


On Thu, Aug 27, 2020 at 5:28 PM Rafael J. Wysocki <rjw@xxxxxxxxxxxxx> wrote:
>
> From: "Rafael J. Wysocki" <rafael.j.wysocki@xxxxxxxxx>
>
> Add ->offline and ->online driver callbacks to prepare for taking a
> CPU offline and to restore its working configuration when it goes
> back online, respectively, to avoid invoking the ->init callback on
> every CPU online which is quite a bit of unnecessary overhead.
>
> Define ->offline and ->online so that they can be used in the
> passive mode as well as in the active mode and because ->offline
> will do the majority of ->stop_cpu work, the passive mode does
> not need that callback any more, so drop it from there.
>
> Also modify the active mode ->suspend and ->resume callbacks to
> prevent them from interfering with the new ->offline and ->online
> ones in case the latter are invoked withing the system-wide suspend
> and resume code flow and make the passive mode use them too.
>
> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@xxxxxxxxx>
> ---
>
> -> v2: Rearrange intel_pstate_init_cpu() to restore some of the previous
> behavior of it to retain the current active-mode EPP management.
>
> v2 -> v3:
> * Fold the previous [5/5] in, rework intel_pstate_resume(), add
> intel_pstate_suspend().
> * Drop intel_pstate_hwp_save_state() and drop epp_saved from struct cpudata.
> * Update the changelog.
>
> ---
> drivers/cpufreq/intel_pstate.c | 139 +++++++++++++++++++++------------
> 1 file changed, 91 insertions(+), 48 deletions(-)
>
> diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
> index b308c39b6204..a265ccbcbbd7 100644
> --- a/drivers/cpufreq/intel_pstate.c
> +++ b/drivers/cpufreq/intel_pstate.c
> @@ -219,14 +219,13 @@ struct global_params {
> * @epp_policy: Last saved policy used to set EPP/EPB
> * @epp_default: Power on default HWP energy performance
> * preference/bias
> - * @epp_saved: Saved EPP/EPB during system suspend or CPU offline
> - * operation
> * @epp_cached Cached HWP energy-performance preference value
> * @hwp_req_cached: Cached value of the last HWP Request MSR
> * @hwp_cap_cached: Cached value of the last HWP Capabilities MSR
> * @last_io_update: Last time when IO wake flag was set
> * @sched_flags: Store scheduler flags for possible cross CPU update
> * @hwp_boost_min: Last HWP boosted min performance
> + * @suspended: Whether or not the driver has been suspended.
> *
> * This structure stores per CPU instance data for all CPUs.
> */
> @@ -258,13 +257,13 @@ struct cpudata {
> s16 epp_powersave;
> s16 epp_policy;
> s16 epp_default;
> - s16 epp_saved;
> s16 epp_cached;
> u64 hwp_req_cached;
> u64 hwp_cap_cached;
> u64 last_io_update;
> unsigned int sched_flags;
> u32 hwp_boost_min;
> + bool suspended;
> };
>
> static struct cpudata **all_cpu_data;
> @@ -871,12 +870,6 @@ static void intel_pstate_hwp_set(unsigned int cpu)
>
> cpu_data->epp_policy = cpu_data->policy;
>
> - if (cpu_data->epp_saved >= 0) {
> - epp = cpu_data->epp_saved;
> - cpu_data->epp_saved = -EINVAL;
> - goto update_epp;
> - }
> -
> if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE) {
> epp = intel_pstate_get_epp(cpu_data, value);
> cpu_data->epp_powersave = epp;
> @@ -903,7 +896,6 @@ static void intel_pstate_hwp_set(unsigned int cpu)
>
> epp = cpu_data->epp_powersave;
> }
> -update_epp:
> if (boot_cpu_has(X86_FEATURE_HWP_EPP)) {
> value &= ~GENMASK_ULL(31, 24);
> value |= (u64)epp << 24;
> @@ -915,14 +907,24 @@ static void intel_pstate_hwp_set(unsigned int cpu)
> wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
> }
>
> -static void intel_pstate_hwp_force_min_perf(int cpu)
> +static void intel_pstate_hwp_offline(struct cpudata *cpu)
> {
> - u64 value;
> + u64 value = READ_ONCE(cpu->hwp_req_cached);
> int min_perf;
>
> - value = all_cpu_data[cpu]->hwp_req_cached;
> + if (boot_cpu_has(X86_FEATURE_HWP_EPP)) {
> + /*
> + * In case the EPP has been set to "performance" by the
> + * active mode "performance" scaling algorithm, replace that
> + * temporary value with the cached EPP one.
> + */
> + value &= ~GENMASK_ULL(31, 24);
> + value |= HWP_ENERGY_PERF_PREFERENCE(cpu->epp_cached);
> + WRITE_ONCE(cpu->hwp_req_cached, value);
> + }
> +
> value &= ~GENMASK_ULL(31, 0);
> - min_perf = HWP_LOWEST_PERF(all_cpu_data[cpu]->hwp_cap_cached);
> + min_perf = HWP_LOWEST_PERF(cpu->hwp_cap_cached);
>
> /* Set hwp_max = hwp_min */
> value |= HWP_MAX_PERF(min_perf);
> @@ -932,19 +934,7 @@ static void intel_pstate_hwp_force_min_perf(int cpu)
> if (boot_cpu_has(X86_FEATURE_HWP_EPP))
> value |= HWP_ENERGY_PERF_PREFERENCE(HWP_EPP_POWERSAVE);
>
> - wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
> -}
> -
> -static int intel_pstate_hwp_save_state(struct cpufreq_policy *policy)
> -{
> - struct cpudata *cpu_data = all_cpu_data[policy->cpu];
> -
> - if (!hwp_active)
> - return 0;
> -
> - cpu_data->epp_saved = intel_pstate_get_epp(cpu_data, 0);
> -
> - return 0;
> + wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value);
> }
>
> #define POWER_CTL_EE_ENABLE 1
> @@ -971,8 +961,22 @@ static void set_power_ctl_ee_state(bool input)
>
> static void intel_pstate_hwp_enable(struct cpudata *cpudata);
>
> +static int intel_pstate_suspend(struct cpufreq_policy *policy)
> +{
> + struct cpudata *cpu = all_cpu_data[policy->cpu];
> +
> + pr_debug("CPU %d suspending\n", cpu->cpu);
> +
> + cpu->suspended = true;
> +
> + return 0;
> +}
> +
> static int intel_pstate_resume(struct cpufreq_policy *policy)
> {
> + struct cpudata *cpu = all_cpu_data[policy->cpu];
> +
> + pr_debug("CPU %d resuming\n", cpu->cpu);
>
> /* Only restore if the system default is changed */
> if (power_ctl_ee_state == POWER_CTL_EE_ENABLE)
> @@ -980,18 +984,22 @@ static int intel_pstate_resume(struct cpufreq_policy *policy)
> else if (power_ctl_ee_state == POWER_CTL_EE_DISABLE)
> set_power_ctl_ee_state(false);
>
> - if (!hwp_active)
> - return 0;
> + if (hwp_active) {
> + mutex_lock(&intel_pstate_limits_lock);
>
> - mutex_lock(&intel_pstate_limits_lock);
> + /*
> + * Enable for all CPUs, because the boot CPU may not be the
> + * first one to resume.
> + */
> + intel_pstate_hwp_enable(cpu);
>
> - if (policy->cpu == 0)
> - intel_pstate_hwp_enable(all_cpu_data[policy->cpu]);
> + wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST,
> + READ_ONCE(cpu->hwp_req_cached));
>
> - all_cpu_data[policy->cpu]->epp_policy = 0;
> - intel_pstate_hwp_set(policy->cpu);
> + mutex_unlock(&intel_pstate_limits_lock);
> + }
>
> - mutex_unlock(&intel_pstate_limits_lock);
> + cpu->suspended = false;
>
> return 0;
> }
> @@ -1440,7 +1448,6 @@ static void intel_pstate_hwp_enable(struct cpudata *cpudata)
> wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00);
>
> wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1);
> - cpudata->epp_policy = 0;
> if (cpudata->epp_default == -EINVAL)
> cpudata->epp_default = intel_pstate_get_epp(cpudata, 0);
> }
> @@ -2111,7 +2118,6 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
>
> cpu->epp_default = -EINVAL;
> cpu->epp_powersave = -EINVAL;
> - cpu->epp_saved = -EINVAL;
> }
>
> cpu = all_cpu_data[cpunum];
> @@ -2122,6 +2128,7 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
> const struct x86_cpu_id *id;
>
> intel_pstate_hwp_enable(cpu);
> + cpu->epp_policy = 0;
>
> id = x86_match_cpu(intel_pstate_hwp_boost_ids);
> if (id && intel_pstate_acpi_pm_profile_server())
> @@ -2308,28 +2315,59 @@ static int intel_pstate_verify_policy(struct cpufreq_policy_data *policy)
> return 0;
> }
>
> -static void intel_cpufreq_stop_cpu(struct cpufreq_policy *policy)
> +static int intel_pstate_cpu_offline(struct cpufreq_policy *policy)
> {
> + struct cpudata *cpu = all_cpu_data[policy->cpu];
> +
> + pr_debug("CPU %d going offline\n", cpu->cpu);
> +
> + if (cpu->suspended)
> + return 0;
> +
> + intel_pstate_exit_perf_limits(policy);
> +
> + /*
> + * If the CPU is an SMT thread and it goes offline with the performance
> + * settings different from the minimum, it will prevent its sibling
> + * from getting to lower performance levels, so force the minimum
> + * performance on CPU offline to prevent that from happening.
> + */
> if (hwp_active)
> - intel_pstate_hwp_force_min_perf(policy->cpu);
> + intel_pstate_hwp_offline(cpu);
> else
> - intel_pstate_set_min_pstate(all_cpu_data[policy->cpu]);
> + intel_pstate_set_min_pstate(cpu);
> +
> + return 0;
> }
>
> -static void intel_pstate_stop_cpu(struct cpufreq_policy *policy)
> +static int intel_pstate_cpu_online(struct cpufreq_policy *policy)
> {
> - pr_debug("CPU %d exiting\n", policy->cpu);
> + struct cpudata *cpu = all_cpu_data[policy->cpu];
> +
> + pr_debug("CPU %d going online\n", cpu->cpu);
> +
> + if (cpu->suspended)

_cpu_online() needs to enable HWP if "suspended", or the MSR accesses
in _set_policy() will trigger warnings when resuming from ACPI S3.

This has been fixed in the intel_pstate-testing branch already and I
will send an update of the patch tomorrow.

Thanks!