Re: [PATCH v4 3/6] x86,sched: Add support for frequency invariance on XEON_PHI_KNL/KNM

From: Peter Zijlstra
Date: Wed Dec 18 2019 - 15:22:55 EST


On Wed, Nov 13, 2019 at 01:46:51PM +0100, Giovanni Gherdovich wrote:
> The scheduler needs the ratio freq_curr/freq_max for frequency-invariant
> accounting. On Xeon Phi CPUs set freq_max to the second-highest frequency
> reported by the CPU.
>
> Xeon Phi CPUs such as Knights Landing and Knights Mill typically have either
> one or two turbo frequencies; in the former case that's 100 MHz above the base
> frequency, in the latter case the two levels are 100 MHz and 200 MHz above
> base frequency.
>
> We set freq_max to the second-highest frequency reported by the CPU. This
> could be the base frequency (if only one turbo level is available) or the first
> turbo level (if two levels are available). The rationale is to compromise
> between power efficiency or performance -- going straight to max turbo would
> favor efficiency and blindly using base freq would favor performance.
>
> For reference, this is how MSR_TURBO_RATIO_LIMIT must be parsed on a Xeon Phi
> to get the available frequencies (taken from a comment in turbostat's sources):
>
> [0] -- Reserved
> [7:1] -- Base value of number of active cores of bucket 1.
> [15:8] -- Base value of freq ratio of bucket 1.
> [20:16] -- +ve delta of number of active cores of bucket 2.
> i.e. active cores of bucket 2 =
> active cores of bucket 1 + delta
> [23:21] -- Negative delta of freq ratio of bucket 2.
> i.e. freq ratio of bucket 2 =
> freq ratio of bucket 1 - delta
> [28:24]-- +ve delta of number of active cores of bucket 3.
> [31:29]-- -ve delta of freq ratio of bucket 3.
> [36:32]-- +ve delta of number of active cores of bucket 4.
> [39:37]-- -ve delta of freq ratio of bucket 4.
> [44:40]-- +ve delta of number of active cores of bucket 5.
> [47:45]-- -ve delta of freq ratio of bucket 5.
> [52:48]-- +ve delta of number of active cores of bucket 6.
> [55:53]-- -ve delta of freq ratio of bucket 6.
> [60:56]-- +ve delta of number of active cores of bucket 7.
> [63:61]-- -ve delta of freq ratio of bucket 7.

Does it make sense to write a complete decoder and pass a @size
parameter just like the skx/glm case?

(I've no idea on the 4 I passed in, probably wants to be something else)

---
Index: linux-2.6/arch/x86/kernel/smpboot.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/smpboot.c
+++ linux-2.6/arch/x86/kernel/smpboot.c
@@ -1863,36 +1863,12 @@ static const struct x86_cpu_id has_glm_t
{}
};

-static int get_knl_turbo_ratio(u64 *turbo_ratio)
+static bool knl_set_cpu_max_freq(u64 *ratio, u64 *turbo_ratio, int size)
{
+ int delta_cores, delta_fratio;
+ int cores, fratio;
+ int err, i;
u64 msr;
- u32 ratio, delta_ratio;
- int err, i, found = 0;
-
- err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
- if (err)
- return err;
-
- ratio = (msr >> 8) & 0xFF;
-
- for (i = 16; i < 64; i += 8) {
- delta_ratio = (msr >> (i + 5)) & 0x7;
- if (delta_ratio) {
- *turbo_ratio = ratio - delta_ratio;
- found = 1;
- break;
- }
- }
-
- if (!found)
- return 1;
-
- return 0;
-}
-
-static bool knl_set_cpu_max_freq(u64 *ratio, u64 *turbo_ratio)
-{
- int err;

if (!x86_match_cpu(has_knl_turbo_ratio_limits))
return false;
@@ -1901,15 +1877,32 @@ static bool knl_set_cpu_max_freq(u64 *ra
if (err)
return false;

- /* second highest turbo ratio */
- err = get_knl_turbo_ratio(turbo_ratio);
+ *ratio = (*ratio >> 8) & 0xFF; /* max P state ratio */
+
+ err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
if (err)
return false;

- /* max P state ratio */
- *ratio = (*ratio >> 8) & 0xFF;
+ cores = (msr >> 1) & 0x7F;
+ fratio = (msr >> 8) & 0xFF;

- return true;
+ i = 16;
+ do {
+ if (cores >= size) {
+ *turbo_ratio = fratio;
+ return true;
+ }
+
+ delta_cores = (msr >> i) & 0x1F;
+ delta_fratio = (msr >> (i + 5)) & 0x07;
+
+ cores += delta_cores;
+ fratio -= delta_fratio;
+
+ i += 8;
+ } while (i < 64);
+
+ return false;
}

static bool skx_set_cpu_max_freq(u64 *ratio, u64 *turbo_ratio, int size)
@@ -1975,7 +1968,7 @@ static void intel_set_cpu_max_freq(void)
skx_set_cpu_max_freq(&ratio, &turbo_ratio, 1))
goto set_value;

- if (knl_set_cpu_max_freq(&ratio, &turbo_ratio))
+ if (knl_set_cpu_max_freq(&ratio, &turbo_ratio, 4))
goto set_value;

if (x86_match_cpu(has_skx_turbo_ratio_limits) &&