On 2/18/2025 3:36 AM, Mario Limonciello wrote:
From: Mario Limonciello <mario.limonciello@xxxxxxx>Can we use cur_perf.highest_perf here ?
By storing perf values in a union all the writes and reads can
be done atomically, removing the need for some concurrency protections.
While making this change, also drop the cached frequency values,
using inline helpers to calculate them on demand from perf value.
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@xxxxxxx>
Signed-off-by: Mario Limonciello <mario.limonciello@xxxxxxx>
---
v3:
* Pick up tag
v2:
* cache perf variable in unit tests
* Drop unnecessary check from amd_pstate_update_min_max_limit()
* Consistency with READ_ONCE()
* Drop unneeded policy checks
* add kdoc
---
drivers/cpufreq/amd-pstate-ut.c | 18 +--
drivers/cpufreq/amd-pstate.c | 195 ++++++++++++++++++--------------
drivers/cpufreq/amd-pstate.h | 49 +++++---
3 files changed, 151 insertions(+), 111 deletions(-)
diff --git a/drivers/cpufreq/amd-pstate-ut.c b/drivers/cpufreq/amd-pstate-ut.c
index 445278cf40b61..ba3e06f349c6d 100644
--- a/drivers/cpufreq/amd-pstate-ut.c
+++ b/drivers/cpufreq/amd-pstate-ut.c
@@ -129,6 +129,7 @@ static void amd_pstate_ut_check_perf(u32 index)
struct cppc_perf_caps cppc_perf;
struct cpufreq_policy *policy = NULL;
struct amd_cpudata *cpudata = NULL;
+ union perf_cached cur_perf;
for_each_possible_cpu(cpu) {
policy = cpufreq_cpu_get(cpu);
@@ -162,19 +163,20 @@ static void amd_pstate_ut_check_perf(u32 index)
lowest_perf = AMD_CPPC_LOWEST_PERF(cap1);
}
- if (highest_perf != READ_ONCE(cpudata->highest_perf) && !cpudata->hw_prefcore) {
+ cur_perf = READ_ONCE(cpudata->perf);
+ if (highest_perf != cur_perf.highest_perf && !cpudata->hw_prefcore) {
pr_err("%s cpu%d highest=%d %d highest perf doesn't match\n",
- __func__, cpu, highest_perf, cpudata->highest_perf);
+ __func__, cpu, highest_perf, cpudata->perf.highest_perf);
goto skip_test;Can we use cur_perf.(nominal/lowest_nonlinear/lowest)_perf here as well ?
}
- if ((nominal_perf != READ_ONCE(cpudata->nominal_perf)) ||
- (lowest_nonlinear_perf != READ_ONCE(cpudata->lowest_nonlinear_perf)) ||
- (lowest_perf != READ_ONCE(cpudata->lowest_perf))) {
+ if (nominal_perf != cur_perf.nominal_perf ||
+ (lowest_nonlinear_perf != cur_perf.lowest_nonlinear_perf) ||
+ (lowest_perf != cur_perf.lowest_perf)) {
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
pr_err("%s cpu%d nominal=%d %d lowest_nonlinear=%d %d lowest=%d %d, they should be equal!\n",
- __func__, cpu, nominal_perf, cpudata->nominal_perf,
- lowest_nonlinear_perf, cpudata->lowest_nonlinear_perf,
- lowest_perf, cpudata->lowest_perf);
+ __func__, cpu, nominal_perf, cpudata->perf.nominal_perf,
+ lowest_nonlinear_perf, cpudata->perf.lowest_nonlinear_perf,
+ lowest_perf, cpudata->perf.lowest_perf);
goto skip_test;[Snip]
}
@@ -888,25 +896,24 @@ static u32 amd_pstate_get_transition_latency(unsigned int cpu)
}
/*
- * amd_pstate_init_freq: Initialize the max_freq, min_freq,
- * nominal_freq and lowest_nonlinear_freq for
- * the @cpudata object.
+ * amd_pstate_init_freq: Initialize the nominal_freq and lowest_nonlinear_freq
+ * for the @cpudata object.
*
- * Requires: highest_perf, lowest_perf, nominal_perf and
- * lowest_nonlinear_perf members of @cpudata to be
- * initialized.
+ * Requires: all perf members of @cpudata to be initialized.
*
- * Returns 0 on success, non-zero value on failure.
+ * Returns 0 on success, non-zero value on failure.
*/
static int amd_pstate_init_freq(struct amd_cpudata *cpudata)
{
- int ret;
u32 min_freq, nominal_freq, lowest_nonlinear_freq;
struct cppc_perf_caps cppc_perf;
+ union perf_cached perf;
+ int ret;
ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
if (ret)
return ret;
+ perf = READ_ONCE(cpudata->perf);
if (quirks && quirks->nominal_freq)
nominal_freq = quirks->nominal_freq;
@@ -918,6 +925,7 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata)
if (quirks && quirks->lowest_freq) {
min_freq = quirks->lowest_freq;
+ perf.lowest_perf = freq_to_perf(perf, nominal_freq, min_freq);
I think we forgot to write back this value to the cpudata->perf variable
} else[Snip]
min_freq = cppc_perf.lowest_freq;
min_freq *= 1000;
@@ -934,7 +942,7 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata)
return -EINVAL;
}
- lowest_nonlinear_freq = perf_to_freq(cpudata, cpudata->lowest_nonlinear_perf);
+ lowest_nonlinear_freq = perf_to_freq(perf, nominal_freq, perf.lowest_nonlinear_perf);
WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq);
if (lowest_nonlinear_freq <= min_freq || lowest_nonlinear_freq > nominal_freq) {
diff --git a/drivers/cpufreq/amd-pstate.h b/drivers/cpufreq/amd-pstate.h
index 0149933692458..8421c83c07919 100644
--- a/drivers/cpufreq/amd-pstate.h
+++ b/drivers/cpufreq/amd-pstate.h
@@ -13,6 +13,34 @@
/*********************************************************************
* AMD P-state INTERFACE *
*********************************************************************/
+
+/**
+ * union perf_cached - A union to cache performance-related data.
+ * @highest_perf: the maximum performance an individual processor may reach,
+ * assuming ideal conditions
+ * For platforms that do not support the preferred core feature, the
+ * highest_pef may be configured with 166 or 255, to avoid max frequency
s/highest_pef/highest_perf/
Also I think this statement is bit confusing, how about,
"For platforms that support the preferred core feature, the highest_perf value maybe
configured to any value in the range 166-256 by the firmware (because the preferred
core ranking is encoded in the highest_perf value). To maintain consistency across
all platforms, we split the highest_perf and preferred core ranking values into
cpudata->perf.highest_perf and cpudata->prefcore_ranking."
+ * calculated wrongly. we take the fixed value as the highest_perf.
+ * @nominal_perf: the maximum sustained performance level of the processor,
+ * assuming ideal operating conditions
+ * @lowest_nonlinear_perf: the lowest performance level at which nonlinear power
+ * savings are achieved
+ * @lowest_perf: the absolute lowest performance level of the processor
+ * @min_limit_perf: Cached value of the performance corresponding to policy->min
+ * @max_limit_perf: Cached value of the performance corresponding to policy->max
+ */
+union perf_cached {
+ struct {
+ u8 highest_perf;
+ u8 nominal_perf;
+ u8 lowest_nonlinear_perf;
+ u8 lowest_perf;
+ u8 min_limit_perf;
+ u8 max_limit_perf;
Just a thought, how about adding the "u8 desired_perf" (last requested) and "u8 prefcore_ranking"
in this. We can pursue it as a separate patch if you want.
I think there is value in adding desired_perf atleast, so that when we are caching the
min, max limits in the perf_cached variable, desired perf level is also atomically
updated into the shared cpudata structure.
Thanks,
Dhananjay
+ };
+ u64 val;
+};
+
/**
* struct amd_aperf_mperf
* @aperf: actual performance frequency clock count
@@ -30,20 +58,9 @@ struct amd_aperf_mperf {
* @cpu: CPU number
* @req: constraint request to apply
* @cppc_req_cached: cached performance request hints
- * @highest_perf: the maximum performance an individual processor may reach,
- * assuming ideal conditions
- * For platforms that do not support the preferred core feature, the
- * highest_pef may be configured with 166 or 255, to avoid max frequency
- * calculated wrongly. we take the fixed value as the highest_perf.
- * @nominal_perf: the maximum sustained performance level of the processor,
- * assuming ideal operating conditions
- * @lowest_nonlinear_perf: the lowest performance level at which nonlinear power
- * savings are achieved
- * @lowest_perf: the absolute lowest performance level of the processor
+ * @perf: cached performance-related data
* @prefcore_ranking: the preferred core ranking, the higher value indicates a higher
* priority.
- * @min_limit_perf: Cached value of the performance corresponding to policy->min
- * @max_limit_perf: Cached value of the performance corresponding to policy->max
* @min_limit_freq: Cached value of policy->min (in khz)
* @max_limit_freq: Cached value of policy->max (in khz)
* @nominal_freq: the frequency (in khz) that mapped to nominal_perf
@@ -68,13 +85,9 @@ struct amd_cpudata {
struct freq_qos_request req[2];
u64 cppc_req_cached;
- u8 highest_perf;
- u8 nominal_perf;
- u8 lowest_nonlinear_perf;
- u8 lowest_perf;
+ union perf_cached perf;
+
u8 prefcore_ranking;
- u8 min_limit_perf;
- u8 max_limit_perf;
u32 min_limit_freq;
u32 max_limit_freq;
u32 nominal_freq;