Re: [PATCH v2] cpufreq: intel_pstate: Adjust the .adjust_perf() driver callback

From: Mario Limonciello

Date: Sun Jun 21 2026 - 15:07:31 EST


On 6/19/26 08:43, Zhongqiu Han wrote:
On 6/19/2026 10:52 PM, Rafael J. Wysocki wrote:
From: Rafael J. Wysocki <rafael.j.wysocki@xxxxxxxxx>

In some cases, the processor may not actually stick to the "desired"
performance level programmed through the driver's .adjust_perf()
callback and may go above it, which may not be desirable (for instance,
there may be a UCLAMP_MAX limit set for the task currently running on
the given CPU which should be respected).

Address that by adjusting the .adjust_perf() callback to take an
additional argument, max_perf, representing the maximum allowed
performance level of the CPU and update the intel_pstate driver to
take that argument into account as appropriate.

Accordingly, adjust cpufreq_driver_adjust_perf() and the other existing
user of .adjust_perf(), which is the amd-pstate driver (but the behavior
of that driver is not changed).

While at it, also update the cpufreq_driver_adjust_perf()
documentation to reflect this change and some previous code
changes that have not been taken into account in it.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@xxxxxxxxx>
Acked-by: Viresh Kumar <viresh.kumar@xxxxxxxxxx>

Looks good to me. Thanks

Reviewed-by: Zhongqiu Han <zhongqiu.han@xxxxxxxxxxxxxxxx>

Reviewed-by: Mario Limonciello (AMD) <superm1@xxxxxxxxxx>

---

This is an update of

https://lore.kernel.org/linux-pm/14060154.uLZWGnKmhe@rafael.j.wysocki/

sent mainly because the v1 did not update the Rust bindings by
omission.  It also fixes a few typos present in the v1.

Thanks!

---
  drivers/cpufreq/amd-pstate.c     |    1 +
  drivers/cpufreq/cpufreq.c        |   14 +++++++++-----
  drivers/cpufreq/intel_pstate.c   |    9 ++++++++-
  include/linux/cpufreq.h          |    2 ++
  kernel/sched/cpufreq_schedutil.c |    4 +++-
  rust/kernel/cpufreq.rs           |    6 ++++--
  6 files changed, 27 insertions(+), 9 deletions(-)

--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -781,6 +781,7 @@ static unsigned int amd_pstate_fast_swit
  static void amd_pstate_adjust_perf(struct cpufreq_policy *policy,
                     unsigned long _min_perf,
                     unsigned long target_perf,
+                   unsigned long _max_perf,
                     unsigned long capacity)
  {
      u8 max_perf, min_perf, des_perf, cap_perf;
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -2252,14 +2252,17 @@ EXPORT_SYMBOL_GPL(cpufreq_driver_fast_sw
   * @policy: cpufreq policy object of the target CPU.
   * @min_perf: Minimum (required) performance level (units of @capacity).
   * @target_perf: Target (desired) performance level (units of @capacity).
+ * @max_perf: Maximum (allowed) performance level (units of @capacity).
   * @capacity: Capacity of the target CPU.
   *
- * Carry out a fast performance level switch of @cpu without sleeping.
+ * Carry out a fast performance level adjustment for the CPU represented by
+ * @policy without sleeping.
   *
   * The driver's ->adjust_perf() callback invoked by this function must be
- * suitable for being called from within RCU-sched read-side critical sections
- * and it is expected to select a suitable performance level equal to or above
- * @min_perf and preferably equal to or below @target_perf.
+ * suitable for calling from within RCU-sched read-side critical sections and
+ * it is expected to program the processor to select suitable performance
+ * levels between @min_perf and @max_perf inclusive and preferably close to
+ * @target_perf going forward for the CPU represented by @policy.
   *
   * This function must not be called if policy->fast_switch_enabled is unset.
   *
@@ -2271,9 +2274,10 @@ EXPORT_SYMBOL_GPL(cpufreq_driver_fast_sw
  void cpufreq_driver_adjust_perf(struct cpufreq_policy *policy,
                   unsigned long min_perf,
                   unsigned long target_perf,
+                 unsigned long max_perf,
                   unsigned long capacity)
  {
-    cpufreq_driver->adjust_perf(policy, min_perf, target_perf, capacity);
+    cpufreq_driver->adjust_perf(policy, min_perf, target_perf, max_perf, capacity);
  }
  /**
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -3241,6 +3241,7 @@ static unsigned int intel_cpufreq_fast_s
  static void intel_cpufreq_adjust_perf(struct cpufreq_policy *policy,
                        unsigned long min_perf,
                        unsigned long target_perf,
+                      unsigned long max_perf,
                        unsigned long capacity)
  {
      struct cpudata *cpu = all_cpu_data[policy->cpu];
@@ -3271,7 +3272,13 @@ static void intel_cpufreq_adjust_perf(st
      if (min_pstate > cpu->max_perf_ratio)
          min_pstate = cpu->max_perf_ratio;
-    max_pstate = min(cap_pstate, cpu->max_perf_ratio);
+    max_pstate = cap_pstate;
+    if (max_perf < capacity)
+        max_pstate = DIV_ROUND_UP(cap_pstate * max_perf, capacity);
+
+    if (max_pstate > cpu->max_perf_ratio)
+        max_pstate = cpu->max_perf_ratio;
+
      if (max_pstate < min_pstate)
          max_pstate = min_pstate;
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -379,6 +379,7 @@ struct cpufreq_driver {
      void        (*adjust_perf)(struct cpufreq_policy *policy,
                         unsigned long min_perf,
                         unsigned long target_perf,
+                       unsigned long max_perf,
                         unsigned long capacity);
      /*
@@ -624,6 +625,7 @@ unsigned int cpufreq_driver_fast_switch(
  void cpufreq_driver_adjust_perf(struct cpufreq_policy *policy,
                  unsigned long min_perf,
                  unsigned long target_perf,
+                unsigned long max_perf,
                  unsigned long capacity);
  bool cpufreq_driver_has_adjust_perf(void);
  int cpufreq_driver_target(struct cpufreq_policy *policy,
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -50,6 +50,7 @@ struct sugov_cpu {
      unsigned long        util;
      unsigned long        bw_min;
+    unsigned long        bw_max;
      /* The field below is for single-CPU policies only: */
  #ifdef CONFIG_NO_HZ_COMMON
@@ -232,6 +233,7 @@ static void sugov_get_util(struct sugov_
      util = effective_cpu_util(sg_cpu->cpu, util, &min, &max);
      util = max(util, boost);
      sg_cpu->bw_min = min;
+    sg_cpu->bw_max = max;
      sg_cpu->util = sugov_effective_cpu_perf(sg_cpu->cpu, util, min, max);
  }
@@ -484,7 +486,7 @@ static void sugov_update_single_perf(str
          sg_cpu->util = prev_util;
      cpufreq_driver_adjust_perf(sg_policy->policy, sg_cpu->bw_min,
-                   sg_cpu->util, max_cap);
+                   sg_cpu->util, sg_cpu->bw_max, max_cap);
      sg_policy->last_freq_update_time = time;
  }
--- a/rust/kernel/cpufreq.rs
+++ b/rust/kernel/cpufreq.rs
@@ -792,7 +792,8 @@ pub trait Driver {
      }
      /// Driver's `adjust_perf` callback.
-    fn adjust_perf(_policy: &mut Policy, _min_perf: usize, _target_perf: usize, _capacity: usize) {
+    fn adjust_perf(_policy: &mut Policy, _min_perf: usize, _target_perf: usize,
+                   _max_perf: usize, _capacity: usize) {
          build_error!(VTABLE_DEFAULT_ERROR)
      }
@@ -1262,12 +1263,13 @@ impl<T: Driver> Registration<T> {
          ptr: *mut bindings::cpufreq_policy,
          min_perf: c_ulong,
          target_perf: c_ulong,
+        max_perf: c_ulong,
          capacity: c_ulong,
      ) {
          // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the
          // lifetime of `policy`.
          let policy = unsafe { Policy::from_raw_mut(ptr) };
-        T::adjust_perf(policy, min_perf, target_perf, capacity);
+        T::adjust_perf(policy, min_perf, target_perf, max_perf, capacity);
      }
      /// Driver's `get_intermediate` callback.