[kvm-unit-tests Patch v4 17/17] x86: pmu: Optimize emulated instruction validation

From: Dapeng Mi
Date: Thu Apr 18 2024 - 23:51:05 EST


For support CPUs supporting PERF_GLOBAL_CTRL MSR, the validation for
emulated instruction can be improved to check against precise counts for
instructions and branches events instead of a rough range.

Move enabling and disabling PERF_GLOBAL_CTRL MSR into kvm_fep_asm blob,
thus instructions and branches events can be verified against precise
counts.

Signed-off-by: Dapeng Mi <dapeng1.mi@xxxxxxxxxxxxxxx>
---
x86/pmu.c | 108 ++++++++++++++++++++++++++++++++----------------------
1 file changed, 65 insertions(+), 43 deletions(-)

diff --git a/x86/pmu.c b/x86/pmu.c
index e0da522c004b..dd83f157b35c 100644
--- a/x86/pmu.c
+++ b/x86/pmu.c
@@ -13,11 +13,6 @@

#define N 1000000

-// These values match the number of instructions and branches in the
-// assembly block in check_emulated_instr().
-#define EXPECTED_INSTR 17
-#define EXPECTED_BRNCH 5
-
#define IBPB_JMP_INSTRNS 7
#define IBPB_JMP_BRANCHES 1
#define IBPB_JMP_ASM(_wrmsr) \
@@ -54,6 +49,40 @@ do { \
: "edi"); \
} while (0)

+/* the number of instructions and branches of the kvm_fep_asm() blob */
+#define KVM_FEP_INSTR 22
+#define KVM_FEP_BRNCH 5
+
+/*
+ * KVM_FEP is a magic prefix that forces emulation so
+ * 'KVM_FEP "jne label\n"' just counts as a single instruction.
+ */
+#define kvm_fep_asm(_wrmsr) \
+do { \
+ asm volatile( \
+ _wrmsr "\n\t" \
+ "mov %%ecx, %%edi;\n\t" \
+ "mov $0x0, %%eax;\n\t" \
+ "cmp $0x0, %%eax;\n\t" \
+ KVM_FEP "jne 1f\n\t" \
+ KVM_FEP "jne 1f\n\t" \
+ KVM_FEP "jne 1f\n\t" \
+ KVM_FEP "jne 1f\n\t" \
+ KVM_FEP "jne 1f\n\t" \
+ "mov $0xa, %%eax; cpuid;\n\t" \
+ "mov $0xa, %%eax; cpuid;\n\t" \
+ "mov $0xa, %%eax; cpuid;\n\t" \
+ "mov $0xa, %%eax; cpuid;\n\t" \
+ "mov $0xa, %%eax; cpuid;\n\t" \
+ "1: mov %%edi, %%ecx; \n\t" \
+ "xor %%eax, %%eax; \n\t" \
+ "xor %%edx, %%edx;\n\t" \
+ _wrmsr "\n\t" \
+ : \
+ : "a"(eax), "d"(edx), "c"(ecx) \
+ : "ebx", "edi"); \
+} while (0)
+
typedef struct {
uint32_t ctr;
uint32_t idx;
@@ -639,6 +668,7 @@ static void check_running_counter_wrmsr(void)

static void check_emulated_instr(void)
{
+ u32 eax, edx, ecx;
uint64_t status, instr_start, brnch_start;
uint64_t gp_counter_width = (1ull << pmu.gp_counter_width) - 1;
unsigned int branch_idx = pmu.is_intel ?
@@ -646,6 +676,7 @@ static void check_emulated_instr(void)
unsigned int instruction_idx = pmu.is_intel ?
INTEL_INSTRUCTIONS_IDX :
AMD_INSTRUCTIONS_IDX;
+
pmu_counter_t brnch_cnt = {
.ctr = MSR_GP_COUNTERx(0),
/* branch instructions */
@@ -661,55 +692,46 @@ static void check_emulated_instr(void)
if (this_cpu_has_perf_global_status())
pmu_clear_global_status();

- start_event(&brnch_cnt);
- start_event(&instr_cnt);
+ __start_event(&brnch_cnt, 0);
+ __start_event(&instr_cnt, 0);

- brnch_start = -EXPECTED_BRNCH;
- instr_start = -EXPECTED_INSTR;
+ brnch_start = -KVM_FEP_BRNCH;
+ instr_start = -KVM_FEP_INSTR;
wrmsr(MSR_GP_COUNTERx(0), brnch_start & gp_counter_width);
wrmsr(MSR_GP_COUNTERx(1), instr_start & gp_counter_width);
- // KVM_FEP is a magic prefix that forces emulation so
- // 'KVM_FEP "jne label\n"' just counts as a single instruction.
- asm volatile(
- "mov $0x0, %%eax\n"
- "cmp $0x0, %%eax\n"
- KVM_FEP "jne label\n"
- KVM_FEP "jne label\n"
- KVM_FEP "jne label\n"
- KVM_FEP "jne label\n"
- KVM_FEP "jne label\n"
- "mov $0xa, %%eax\n"
- "cpuid\n"
- "mov $0xa, %%eax\n"
- "cpuid\n"
- "mov $0xa, %%eax\n"
- "cpuid\n"
- "mov $0xa, %%eax\n"
- "cpuid\n"
- "mov $0xa, %%eax\n"
- "cpuid\n"
- "label:\n"
- :
- :
- : "eax", "ebx", "ecx", "edx");

- if (this_cpu_has_perf_global_ctrl())
- wrmsr(pmu.msr_global_ctl, 0);
+ if (this_cpu_has_perf_global_ctrl()) {
+ eax = BIT(0) | BIT(1);
+ ecx = pmu.msr_global_ctl;
+ edx = 0;
+ kvm_fep_asm("wrmsr");
+ } else {
+ eax = ecx = edx = 0;
+ kvm_fep_asm("nop");
+ }

- stop_event(&brnch_cnt);
- stop_event(&instr_cnt);
+ __stop_event(&brnch_cnt);
+ __stop_event(&instr_cnt);

// Check that the end count - start count is at least the expected
// number of instructions and branches.
- report(instr_cnt.count - instr_start >= EXPECTED_INSTR,
- "instruction count");
- report(brnch_cnt.count - brnch_start >= EXPECTED_BRNCH,
- "branch count");
+ if (this_cpu_has_perf_global_ctrl()) {
+ report(instr_cnt.count - instr_start == KVM_FEP_INSTR,
+ "instruction count");
+ report(brnch_cnt.count - brnch_start == KVM_FEP_BRNCH,
+ "branch count");
+ } else {
+ report(instr_cnt.count - instr_start >= KVM_FEP_INSTR,
+ "instruction count");
+ report(brnch_cnt.count - brnch_start >= KVM_FEP_BRNCH,
+ "branch count");
+ }
+
if (this_cpu_has_perf_global_status()) {
// Additionally check that those counters overflowed properly.
status = rdmsr(pmu.msr_global_status);
- report(status & 1, "branch counter overflow");
- report(status & 2, "instruction counter overflow");
+ report(status & BIT_ULL(0), "branch counter overflow");
+ report(status & BIT_ULL(1), "instruction counter overflow");
}

report_prefix_pop();
--
2.34.1