Re: [PATCH v4 2/6] x86,sched: Add support for frequency invariance on SKYLAKE_X

From: Peter Zijlstra
Date: Wed Dec 18 2019 - 15:06:40 EST


On Wed, Nov 13, 2019 at 01:46:50PM +0100, Giovanni Gherdovich wrote:
> The scheduler needs the ratio freq_curr/freq_max for frequency-invariant
> accounting. On SKYLAKE_X CPUs set freq_max to the highest frequency that can
> be sustained by a group of at least 4 cores.
>
> From the changelog of commit 31e07522be56 ("tools/power turbostat: fix
> decoding for GLM, DNV, SKX turbo-ratio limits"):
>
> > Newer processors do not hard-code the the number of cpus in each bin
> > to {1, 2, 3, 4, 5, 6, 7, 8} Rather, they can specify any number
> > of CPUS in each of the 8 bins:
> >
> > eg.
> >
> > ...
> > 37 * 100.0 = 3600.0 MHz max turbo 4 active cores
> > 38 * 100.0 = 3700.0 MHz max turbo 3 active cores
> > 39 * 100.0 = 3800.0 MHz max turbo 2 active cores
> > 39 * 100.0 = 3900.0 MHz max turbo 1 active cores
> >
> > could now look something like this:
> >
> > ...
> > 37 * 100.0 = 3600.0 MHz max turbo 16 active cores
> > 38 * 100.0 = 3700.0 MHz max turbo 8 active cores
> > 39 * 100.0 = 3800.0 MHz max turbo 4 active cores
> > 39 * 100.0 = 3900.0 MHz max turbo 2 active cores
>
> This encoding of turbo levels applies to both SKYLAKE_X and GOLDMONT/GOLDMONT_D,
> but we treat these two classes in separate commits because their freq_max
> values need to be different. For SKX we prefer a lower freq_max in the ratio
> freq_curr/freq_max, allowing load and utilization to overshoot and the
> schedutil governor to be more performance-oriented. Models from the Atom
> series (such as GOLDMONT*) are handled in a forthcoming commit as they have to
> favor power-efficiency over performance.

Can we at least use a single function to decode both? A little like the
below. I'm not married to the naming, but I think it is a little silly
to have 2 different functions to decode the exact same MSRs.

(one could even go as far as to make a boot param to override the {1,4}
default core count for these things)

---

Index: linux-2.6/arch/x86/kernel/smpboot.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/smpboot.c
+++ linux-2.6/arch/x86/kernel/smpboot.c
@@ -1863,27 +1863,6 @@ static const struct x86_cpu_id has_glm_t
{}
};

-static bool glm_set_cpu_max_freq(u64 *ratio, u64 *turbo_ratio)
-{
- int err;
-
- if (!x86_match_cpu(has_glm_turbo_ratio_limits))
- return false;
-
- err = rdmsrl_safe(MSR_PLATFORM_INFO, ratio);
- if (err)
- return false;
-
- err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, turbo_ratio);
- if (err)
- return false;
-
- *ratio = (*ratio >> 8) & 0xFF; /* max P state ratio */
- *turbo_ratio = *turbo_ratio & 0xFF; /* highest turbo ratio */
-
- return true;
-}
-
static int get_knl_turbo_ratio(u64 *turbo_ratio)
{
u64 msr;
@@ -1933,53 +1912,35 @@ static bool knl_set_cpu_max_freq(u64 *ra
return true;
}

-static int get_turbo_ratio_group(u64 *turbo_ratio)
+static bool skx_set_cpu_max_freq(u64 *ratio, u64 *turbo_ratio, int size)
{
- u64 ratio, core_counts;
- u32 group_size = 0;
- int err, i, found = 0;
+ u64 ratios, counts;
+ u32 group_size;
+ int err, i;

- err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &ratio);
- if (err)
- return err;
-
- err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT1, &core_counts);
+ err = rdmsrl_safe(MSR_PLATFORM_INFO, ratio);
if (err)
- return err;
-
- for (i = 0; i < 64; i += 8) {
- group_size = (core_counts >> i) & 0xFF;
- if (group_size >= 4) {
- *turbo_ratio = (ratio >> i) & 0xFF;
- found = 1;
- break;
- }
- }
-
- if (!found)
- return 1;
-
- return 0;
-}
-
-static bool skx_set_cpu_max_freq(u64 *ratio, u64 *turbo_ratio)
-{
- int err;
-
- if (!x86_match_cpu(has_skx_turbo_ratio_limits))
return false;

- err = rdmsrl_safe(MSR_PLATFORM_INFO, ratio);
+ *ratio = (*ratio >> 8) & 0xFF; /* max P state ratio */
+
+ err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &ratios);
if (err)
return false;

- err = get_turbo_ratio_group(turbo_ratio); /* 4C (circa) turbo ratio */
+ err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT1, &counts);
if (err)
return false;

- *ratio = (*ratio >> 8) & 0xFF; /* max P state ratio */
+ for (i = 0; i < 64; i += 8) {
+ group_size = (counts >> i) & 0xFF;
+ if (group_size >= size) {
+ *turbo_ratio = (ratios >> i) & 0xFF;
+ return true;
+ }
+ }

- return true;
+ return false;
}

static bool core_set_cpu_max_freq(u64 *ratio, u64 *turbo_ratio)
@@ -2010,13 +1971,15 @@ static void intel_set_cpu_max_freq(void)
if (slv_set_cpu_max_freq(&ratio, &turbo_ratio))
goto set_value;

- if (glm_set_cpu_max_freq(&ratio, &turbo_ratio))
+ if (x86_match_cpu(has_glm_turbo_ratio_limits) &&
+ skx_set_cpu_max_freq(&ratio, &turbo_ratio, 1))
goto set_value;

if (knl_set_cpu_max_freq(&ratio, &turbo_ratio))
goto set_value;

- if (skx_set_cpu_max_freq(&ratio, &turbo_ratio))
+ if (x86_match_cpu(has_skx_turbo_ratio_limits) &&
+ skx_set_cpu_max_freq(&ratio, &turbo_ratio, 4))
goto set_value;

core_set_cpu_max_freq(&ratio, &turbo_ratio);