Expand the PMU counters test to verify that LLC references and misses have
non-zero counts when the code being executed while the LLC event(s) is
active is evicted via CFLUSH{,OPT}. Note, CLFLUSH{,OPT} requires a fence
of some kind to ensure the cache lines are flushed before execution
continues. Use MFENCE for simplicity (performance is not a concern).
Suggested-by: Jim Mattson <jmattson@xxxxxxxxxx>
Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
---
.../selftests/kvm/x86_64/pmu_counters_test.c | 59 +++++++++++++------
1 file changed, 40 insertions(+), 19 deletions(-)
diff --git a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
index b9c073d3ade9..90381382c51f 100644
--- a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
+++ b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
@@ -14,9 +14,9 @@
/*
* Number of "extra" instructions that will be counted, i.e. the number of
* instructions that are needed to set up the loop and then disabled the
- * counter. 2 MOV, 2 XOR, 1 WRMSR.
+ * counter. 1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE, 2 MOV, 2 XOR, 1 WRMSR.
*/
-#define NUM_EXTRA_INSNS 5
+#define NUM_EXTRA_INSNS 7
#define NUM_INSNS_RETIRED (NUM_BRANCHES + NUM_EXTRA_INSNS)
static uint8_t kvm_pmu_version;
@@ -107,6 +107,12 @@ static void guest_assert_event_count(uint8_t idx,
case INTEL_ARCH_BRANCHES_RETIRED_INDEX:
GUEST_ASSERT_EQ(count, NUM_BRANCHES);
break;
+ case INTEL_ARCH_LLC_REFERENCES_INDEX:
+ case INTEL_ARCH_LLC_MISSES_INDEX:
+ if (!this_cpu_has(X86_FEATURE_CLFLUSHOPT) &&
+ !this_cpu_has(X86_FEATURE_CLFLUSH))
+ break;
+ fallthrough;
case INTEL_ARCH_CPU_CYCLES_INDEX:
case INTEL_ARCH_REFERENCE_CYCLES_INDEX:
GUEST_ASSERT_NE(count, 0);
@@ -123,29 +129,44 @@ static void guest_assert_event_count(uint8_t idx,
GUEST_ASSERT_EQ(_rdpmc(pmc), 0xdead);
}
+/*
+ * Enable and disable the PMC in a monolithic asm blob to ensure that the
+ * compiler can't insert _any_ code into the measured sequence. Note, ECX
+ * doesn't need to be clobbered as the input value, @pmc_msr, is restored
+ * before the end of the sequence.
+ *
+ * If CLFUSH{,OPT} is supported, flush the cacheline containing (at least) the
+ * start of the loop to force LLC references and misses, i.e. to allow testing
+ * that those events actually count.
+ */
+#define GUEST_MEASURE_EVENT(_msr, _value, clflush) \
+do { \
+ __asm__ __volatile__("wrmsr\n\t" \
+ clflush "\n\t" \
+ "mfence\n\t" \
+ "1: mov $" __stringify(NUM_BRANCHES) ", %%ecx\n\t" \
+ "loop .\n\t" \
+ "mov %%edi, %%ecx\n\t" \
+ "xor %%eax, %%eax\n\t" \
+ "xor %%edx, %%edx\n\t" \
+ "wrmsr\n\t" \
+ :: "a"((uint32_t)_value), "d"(_value >> 32), \
+ "c"(_msr), "D"(_msr) \
+ ); \
+} while (0)
+
static void __guest_test_arch_event(uint8_t idx, struct kvm_x86_pmu_feature event,
uint32_t pmc, uint32_t pmc_msr,
uint32_t ctrl_msr, uint64_t ctrl_msr_value)
{
wrmsr(pmc_msr, 0);
- /*
- * Enable and disable the PMC in a monolithic asm blob to ensure that
- * the compiler can't insert _any_ code into the measured sequence.
- * Note, ECX doesn't need to be clobbered as the input value, @pmc_msr,
- * is restored before the end of the sequence.
- */
- __asm__ __volatile__("wrmsr\n\t"
- "mov $" __stringify(NUM_BRANCHES) ", %%ecx\n\t"
- "loop .\n\t"
- "mov %%edi, %%ecx\n\t"
- "xor %%eax, %%eax\n\t"
- "xor %%edx, %%edx\n\t"
- "wrmsr\n\t"
- :: "a"((uint32_t)ctrl_msr_value),
- "d"(ctrl_msr_value >> 32),
- "c"(ctrl_msr), "D"(ctrl_msr)
- );
+ if (this_cpu_has(X86_FEATURE_CLFLUSHOPT))
+ GUEST_MEASURE_EVENT(ctrl_msr, ctrl_msr_value, "clflushopt 1f");
+ else if (this_cpu_has(X86_FEATURE_CLFLUSH))
+ GUEST_MEASURE_EVENT(ctrl_msr, ctrl_msr_value, "clflush 1f");
+ else
+ GUEST_MEASURE_EVENT(ctrl_msr, ctrl_msr_value, "nop");
guest_assert_event_count(idx, event, pmc, pmc_msr);
}