[PATCH v4 2/2] x86/xstate: Make AVX512 status tracking more accurate
From: Noah Goldstein
Date: Wed Oct 27 2021 - 13:17:44 EST
Split the timestamps for tracking the AVX512 status into
'avx512_ZMM_Hi256_timestamp' and 'avx512_Hi16_ZMM_timestamp'. They are
used for tracking XFEATURE_ZMM_Hi256 and XFEATURE_Hi16_ZMM use
respectively.
The purpose of tracking the AVX512 status is to convey information
about possible frequency throttling. The current implementation has
false positives on XFEATURE_OPMASK use and any usage of the hi16 xmm
and ymm registers which are included in the XFEATURE_Hi16_ZMM set as
neither will cause frequency throttling.
The implementation splits the relevant xfeature sets to add more
clarity to the output. The 'avx512_ZMM_Hi256_timestamp' will not have
false positives so its value will at least be indicative frequency
throttling. Since 'avx512_Hi16_ZMM_timestamp' can still indicate
frequency throttling from zmm16...zmm31 use though had false positives
it is separated. As well since existing code may be relying on
"AVX512_elapsed_ms" as a catchall output both xfeature sets are use to
compute its output (taking whichever, if any, of the two are in use).
Signed-off-by: Noah Goldstein <goldstein.w.n@xxxxxxxxx>
---
arch/x86/include/asm/fpu/types.h | 16 ++++++++--
arch/x86/kernel/fpu/xstate.c | 53 +++++++++++++++++++++++++++-----
2 files changed, 58 insertions(+), 11 deletions(-)
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index f5a38a5f3ae1..cb10909fa3da 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -330,11 +330,21 @@ struct fpu {
unsigned int last_cpu;
/*
- * @avx512_timestamp:
+ * @avx512_ZMM_Hi256_timestamp:
*
- * Records the timestamp of AVX512 use during last context switch.
+ * Records the timestamp of AVX512 use in the ZMM_Hi256 xfeature
+ * set. This include zmm0...zmm15.
*/
- unsigned long avx512_timestamp;
+ unsigned long avx512_ZMM_Hi256_timestamp;
+
+ /*
+ * @avx512_Hi16_ZMM_timestamp:
+ *
+ * Records the timestamp of AVX512 use in the Hi16_ZMM xfeature
+ * set. This includes usage of any of the hi16 xmm, ymm, or zmm
+ * registers.
+ */
+ unsigned long avx512_Hi16_ZMM_timestamp;
/*
* @state:
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 00b495914be2..3bb1a425ce56 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1246,7 +1246,7 @@ void xrstors(struct xregs_state *xstate, u64 mask)
}
/*
- * Track of the state of desired avx architecture features.
+ * Track of the state of desired avx related xfeatures.
*/
void fpu_update_avx_timestamp(struct fpu *fpu)
{
@@ -1254,18 +1254,28 @@ void fpu_update_avx_timestamp(struct fpu *fpu)
* AVX512 state is tracked here because its use is known to slow
* the max clock speed of the core.
*/
- if (fpu->state.xsave.header.xfeatures & XFEATURE_MASK_AVX512)
- fpu->avx512_timestamp = jiffies;
+
+ /*
+ * Store a separate state for ZMM_Hi256 and Hi16_ZMM xfeature use.
+ * If ZMM_Hi256 is used the machine has certainly used a zmm
+ * register. Hi16_ZMM, however, has false positives on usage of
+ * hi16 xmm and ymm registers.
+ */
+ if (fpu->state.xsave.header.xfeatures & XFEATURE_MASK_ZMM_Hi256)
+ fpu->avx512_ZMM_Hi256_timestamp = jiffies;
+ if (fpu->state.xsave.header.xfeatures & XFEATURE_MASK_Hi16_ZMM)
+ fpu->avx512_Hi16_ZMM_timestamp = jiffies;
}
+
#ifdef CONFIG_PROC_PID_ARCH_STATUS
+
/*
- * Report the amount of time elapsed in millisecond since last AVX512
- * use in the task.
+ * Helper function for computing proper output for avx512_status
+ * timestamp.
*/
-static void avx512_status(struct seq_file *m, struct task_struct *task)
+static long avx_status_compute_delta(unsigned long timestamp)
{
- unsigned long timestamp = READ_ONCE(task->thread.fpu.avx512_timestamp);
long delta;
if (!timestamp) {
@@ -1282,8 +1292,35 @@ static void avx512_status(struct seq_file *m, struct task_struct *task)
delta = LONG_MAX;
delta = jiffies_to_msecs(delta);
}
+ return delta;
+}
- seq_put_decimal_ll(m, "AVX512_elapsed_ms:\t", delta);
+/*
+ * Report the amount of time elapsed in millisecond since last AVX512
+ * use in the task.
+ */
+static void avx512_status(struct seq_file *m, struct task_struct *task)
+{
+ unsigned long timestamp;
+ long delta_ZMM_Hi256, delta_Hi16_ZMM, delta_unified;
+
+ timestamp = READ_ONCE(task->thread.fpu.avx512_ZMM_Hi256_timestamp);
+ delta_ZMM_Hi256 = avx_status_compute_delta(timestamp);
+
+ timestamp = READ_ONCE(task->thread.fpu.avx512_Hi16_ZMM_timestamp);
+ delta_Hi16_ZMM = avx_status_compute_delta(timestamp);
+
+ /*
+ * Report unified delta of most recent AVX512 usage from either
+ * Hi16_ZMM or ZMM_Hi256 xfeature sets.
+ */
+ delta_unified = timestamp ? delta_Hi16_ZMM : delta_ZMM_Hi256;
+
+ seq_put_decimal_ll(m, "AVX512_elapsed_ms:\t", delta_unified);
+ seq_putc(m, '\n');
+ seq_put_decimal_ll(m, "AVX512_ZMM_Hi256_elapsed_ms:\t", delta_ZMM_Hi256);
+ seq_putc(m, '\n');
+ seq_put_decimal_ll(m, "AVX512_Hi16_ZMM_elapsed_ms:\t", delta_Hi16_ZMM);
seq_putc(m, '\n');
}
--
2.25.1