[PATCH v1] cpufreq: intel_pstate: Adjust the .adjust_perf() driver callback

From: Rafael J. Wysocki

Date: Wed Jun 17 2026 - 12:14:55 EST


From: Rafael J. Wysocki <rafael.j.wysocki@xxxxxxxxx>

In some cases, the processor may not actually stick to the "desired"
performance level programmed through the driver's .adjust_perf()
callback and may go above it, which may not be desirable (for instance,
there may be a UCLAMP_MAX limit set for the task currently running on
the given CPU which should be respected).

Address that by adjusting the .adjust_perf() callback to take an
additional argument, max_perf, representing the maximum allowed
performance level of the CPU and update the intel_pstate driver to
take that argument into account as appropriate.

Accordingly, adjust cpufreq_driver_adjust_perf() and the other existing
user of .adjust_perf(), which is the amd-pstate driver (but the behavior
of that driver is not changed).

While at it, also update the cpufreq_driver_adjust_perf()
documentation to reflect this change and some previous code
changes that have not been taken into account in it.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@xxxxxxxxx>
---
drivers/cpufreq/amd-pstate.c | 1 +
drivers/cpufreq/cpufreq.c | 14 +++++++++-----
drivers/cpufreq/intel_pstate.c | 9 ++++++++-
include/linux/cpufreq.h | 2 ++
kernel/sched/cpufreq_schedutil.c | 4 +++-
5 files changed, 23 insertions(+), 7 deletions(-)

--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -781,6 +781,7 @@ static unsigned int amd_pstate_fast_swit
static void amd_pstate_adjust_perf(struct cpufreq_policy *policy,
unsigned long _min_perf,
unsigned long target_perf,
+ unsigned long _max_perf,
unsigned long capacity)
{
u8 max_perf, min_perf, des_perf, cap_perf;
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -2252,14 +2252,17 @@ EXPORT_SYMBOL_GPL(cpufreq_driver_fast_sw
* @policy: cpufreq policy object of the target CPU.
* @min_perf: Minimum (required) performance level (units of @capacity).
* @target_perf: Target (desired) performance level (units of @capacity).
+ * @max_perf: Maximum (allowed) performance level (units of @capacity).
* @capacity: Capacity of the target CPU.
*
- * Carry out a fast performance level switch of @cpu without sleeping.
+ * Carry out a fast performance level adjustment for the CPU represeted by
+ * @policy without sleeping.
*
* The driver's ->adjust_perf() callback invoked by this function must be
- * suitable for being called from within RCU-sched read-side critical sections
- * and it is expected to select a suitable performance level equal to or above
- * @min_perf and preferably equal to or below @target_perf.
+ * suitable for calling from within RCU-sched read-side critical sections and
+ * it is expected to program the proocessor to select suitable performance
+ * levels betweem @min_perf and @max_perf inclusive and preferably close to
+ * @target_perf going forward for the CPU represented by @policy.
*
* This function must not be called if policy->fast_switch_enabled is unset.
*
@@ -2271,9 +2274,10 @@ EXPORT_SYMBOL_GPL(cpufreq_driver_fast_sw
void cpufreq_driver_adjust_perf(struct cpufreq_policy *policy,
unsigned long min_perf,
unsigned long target_perf,
+ unsigned long max_perf,
unsigned long capacity)
{
- cpufreq_driver->adjust_perf(policy, min_perf, target_perf, capacity);
+ cpufreq_driver->adjust_perf(policy, min_perf, target_perf, max_perf, capacity);
}

/**
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -3241,6 +3241,7 @@ static unsigned int intel_cpufreq_fast_s
static void intel_cpufreq_adjust_perf(struct cpufreq_policy *policy,
unsigned long min_perf,
unsigned long target_perf,
+ unsigned long max_perf,
unsigned long capacity)
{
struct cpudata *cpu = all_cpu_data[policy->cpu];
@@ -3271,7 +3272,13 @@ static void intel_cpufreq_adjust_perf(st
if (min_pstate > cpu->max_perf_ratio)
min_pstate = cpu->max_perf_ratio;

- max_pstate = min(cap_pstate, cpu->max_perf_ratio);
+ max_pstate = cap_pstate;
+ if (max_perf < capacity)
+ max_pstate = DIV_ROUND_UP(cap_pstate * max_perf, capacity);
+
+ if (max_pstate > cpu->max_perf_ratio)
+ max_pstate = cpu->max_perf_ratio;
+
if (max_pstate < min_pstate)
max_pstate = min_pstate;

--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -379,6 +379,7 @@ struct cpufreq_driver {
void (*adjust_perf)(struct cpufreq_policy *policy,
unsigned long min_perf,
unsigned long target_perf,
+ unsigned long max_perf,
unsigned long capacity);

/*
@@ -624,6 +625,7 @@ unsigned int cpufreq_driver_fast_switch(
void cpufreq_driver_adjust_perf(struct cpufreq_policy *policy,
unsigned long min_perf,
unsigned long target_perf,
+ unsigned long max_perf,
unsigned long capacity);
bool cpufreq_driver_has_adjust_perf(void);
int cpufreq_driver_target(struct cpufreq_policy *policy,
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -50,6 +50,7 @@ struct sugov_cpu {

unsigned long util;
unsigned long bw_min;
+ unsigned long bw_max;

/* The field below is for single-CPU policies only: */
#ifdef CONFIG_NO_HZ_COMMON
@@ -232,6 +233,7 @@ static void sugov_get_util(struct sugov_
util = effective_cpu_util(sg_cpu->cpu, util, &min, &max);
util = max(util, boost);
sg_cpu->bw_min = min;
+ sg_cpu->bw_max = max;
sg_cpu->util = sugov_effective_cpu_perf(sg_cpu->cpu, util, min, max);
}

@@ -484,7 +486,7 @@ static void sugov_update_single_perf(str
sg_cpu->util = prev_util;

cpufreq_driver_adjust_perf(sg_policy->policy, sg_cpu->bw_min,
- sg_cpu->util, max_cap);
+ sg_cpu->util, sg_cpu->bw_max, max_cap);

sg_policy->last_freq_update_time = time;
}