[PATCH v4 3/3] arch_topology: Add topology_update_cpu_capacity() for runtime updates

From: Xueqin Luo

Date: Mon Jun 08 2026 - 06:17:43 EST


When the CPPC Highest Performance register changes at runtime
(e.g. via ACPI Notify(0x85)), the scheduler's view of CPU capacity
and the frequency invariance engine's reference values become stale,
as topology_init_cpu_capacity_cppc() is only called once during boot.

Keep raw_capacity allocated after CPPC init instead of freeing it,
and introduce topology_update_cpu_capacity() to update per-CPU
raw_capacity, capacity_freq_ref, and the normalized CPU capacity
scale at runtime.

Call this new function from cppc_cpufreq_update_limits() so that
highest_perf changes are immediately reflected in the topology
subsystem.

Signed-off-by: Xueqin Luo <luoxueqin@xxxxxxxxxx>
---
drivers/base/arch_topology.c | 63 ++++++++++++++++++++++++++++++++++
drivers/cpufreq/cppc_cpufreq.c | 7 ++++
include/linux/arch_topology.h | 6 ++++
3 files changed, 76 insertions(+)

diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index 8c5e47c28d9a..c9594540b6f2 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -229,6 +229,7 @@ static void update_topology_flags_workfn(struct work_struct *work)
}

static u32 *raw_capacity;
+static DEFINE_MUTEX(raw_capacity_lock);

static int free_raw_capacity(void)
{
@@ -372,13 +373,75 @@ static inline void topology_init_cpu_capacity_cppc(void)
schedule_work(&update_topology_flags_work);
pr_debug("cpu_capacity: cpu_capacity initialization done\n");

+ /*
+ * Keep raw_capacity for runtime updates via
+ * topology_update_cpu_capacity().
+ */
+ return;
+
exit:
free_raw_capacity();
}
+
void acpi_processor_init_invariance_cppc(void)
{
topology_init_cpu_capacity_cppc();
}
+
+/**
+ * topology_update_cpu_capacity - Update CPU capacity after highest_perf change
+ * @cpu: CPU whose highest performance changed
+ * @perf_caps: Updated CPPC performance capabilities for @cpu
+ *
+ * When the CPPC Highest Performance register changes at runtime
+ * (e.g. via Notify(0x85)), the scheduler's view of CPU capacity
+ * and the frequency invariance engine's reference values become
+ * stale. This function updates them.
+ *
+ */
+void topology_update_cpu_capacity(unsigned int cpu,
+ struct cppc_perf_caps *perf_caps)
+{
+ u64 highest_perf = perf_caps->highest_perf;
+ u64 capacity, capacity_scale = 0;
+ int c;
+
+ if (!raw_capacity || cpu >= num_possible_cpus())
+ return;
+
+ /*
+ * Serialize concurrent updates from multiple CPUs receiving
+ * Notify(0x85) simultaneously. The normalization loop reads
+ * raw_capacity[] for all CPUs, so concurrent writes would
+ * cause torn reads and inconsistent capacity values.
+ */
+ guard(mutex)(&raw_capacity_lock);
+
+ pr_debug("cpu_capacity: CPU%d cpu_capacity=%u -> %llu (raw)\n",
+ cpu, raw_capacity[cpu], highest_perf);
+
+ raw_capacity[cpu] = highest_perf;
+ per_cpu(capacity_freq_ref, cpu) =
+ cppc_perf_to_khz(perf_caps, highest_perf);
+ freq_inv_set_max_ratio(cpu,
+ per_cpu(capacity_freq_ref, cpu) * HZ_PER_KHZ);
+
+ /* Re-normalize all CPUs: capacity is relative. */
+ for_each_possible_cpu(c)
+ capacity_scale = max_t(u64, capacity_scale, raw_capacity[c]);
+
+ for_each_possible_cpu(c) {
+ capacity = raw_capacity[c];
+ capacity = div64_u64(capacity << SCHED_CAPACITY_SHIFT,
+ capacity_scale);
+ topology_set_cpu_scale(c, capacity);
+ pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n",
+ c, topology_get_cpu_scale(c));
+ }
+
+ schedule_work(&update_topology_flags_work);
+}
+EXPORT_SYMBOL_GPL(topology_update_cpu_capacity);
#endif

#ifdef CONFIG_CPU_FREQ
diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index 80a44a9a29fa..9fe96fa44435 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -901,6 +901,13 @@ static void cppc_cpufreq_update_limits(struct cpufreq_policy *policy)
policy->boost_supported = highest_perf >
cpu_data->perf_caps.nominal_perf;

+ /*
+ * Update the topology capacity to reflect the changed
+ * highest performance so that the scheduler and the
+ * frequency invariance engine use up-to-date values.
+ */
+ topology_update_cpu_capacity(policy->cpu, &cpu_data->perf_caps);
+
policy->cpuinfo.max_freq = cppc_perf_to_khz(&cpu_data->perf_caps,
policy->boost_enabled ?
highest_perf : cpu_data->perf_caps.nominal_perf);
diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h
index ebd7f8935f96..fc0dbab76b36 100644
--- a/include/linux/arch_topology.h
+++ b/include/linux/arch_topology.h
@@ -11,6 +11,12 @@
void topology_normalize_cpu_scale(void);
int topology_update_cpu_topology(void);

+#ifdef CONFIG_ACPI_CPPC_LIB
+struct cppc_perf_caps;
+void topology_update_cpu_capacity(unsigned int cpu,
+ struct cppc_perf_caps *perf_caps);
+#endif
+
struct device_node;
bool topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu);

--
2.43.0