[kvm-unit-tests patch v6 13/18] x86: pmu: Improve instruction and branches events verification

From: Dapeng Mi
Date: Sat Sep 14 2024 - 03:05:05 EST


If HW supports GLOBAL_CTRL MSR, enabling and disabling PMCs are moved in
__precise_count_loop(). Thus, instructions and branches events can be
verified against a precise count instead of a rough range.

BTW, some intermittent failures on AMD processors using PerfMonV2 is
seen due to variance in counts. This probably has to do with the way
instructions leading to a VM-Entry or VM-Exit are accounted when
counting retired instructions and branches.

https://lore.kernel.org/all/6d512a14-ace1-41a3-801e-0beb41425734@xxxxxxx/

So only enable this precise check for Intel processors.

Signed-off-by: Dapeng Mi <dapeng1.mi@xxxxxxxxxxxxxxx>
---
x86/pmu.c | 37 +++++++++++++++++++++++++++++++++++++
1 file changed, 37 insertions(+)

diff --git a/x86/pmu.c b/x86/pmu.c
index 270f11b9..13c7c45d 100644
--- a/x86/pmu.c
+++ b/x86/pmu.c
@@ -19,6 +19,11 @@
#define EXPECTED_INSTR 17
#define EXPECTED_BRNCH 5

+
+/* Enable GLOBAL_CTRL + disable GLOBAL_CTRL instructions */
+#define EXTRA_INSTRNS (3 + 3)
+#define LOOP_INSTRNS (N * 10 + EXTRA_INSTRNS)
+#define LOOP_BRANCHES (N)
#define LOOP_ASM(_wrmsr) \
_wrmsr "\n\t" \
"mov %%ecx, %%edi; mov %%ebx, %%ecx;\n\t" \
@@ -123,6 +128,30 @@ static inline void loop(u64 cntrs)
__precise_loop(cntrs);
}

+static void adjust_events_range(struct pmu_event *gp_events,
+ int instruction_idx, int branch_idx)
+{
+ /*
+ * If HW supports GLOBAL_CTRL MSR, enabling and disabling PMCs are
+ * moved in __precise_loop(). Thus, instructions and branches events
+ * can be verified against a precise count instead of a rough range.
+ *
+ * We see some intermittent failures on AMD processors using PerfMonV2
+ * due to variance in counts. This probably has to do with the way
+ * instructions leading to a VM-Entry or VM-Exit are accounted when
+ * counting retired instructions and branches. Thus only enable the
+ * precise validation for Intel processors.
+ */
+ if (pmu.is_intel && this_cpu_has_perf_global_ctrl()) {
+ /* instructions event */
+ gp_events[instruction_idx].min = LOOP_INSTRNS;
+ gp_events[instruction_idx].max = LOOP_INSTRNS;
+ /* branches event */
+ gp_events[branch_idx].min = LOOP_BRANCHES;
+ gp_events[branch_idx].max = LOOP_BRANCHES;
+ }
+}
+
volatile uint64_t irq_received;

static void cnt_overflow(isr_regs_t *regs)
@@ -832,6 +861,9 @@ static void check_invalid_rdpmc_gp(void)

int main(int ac, char **av)
{
+ int instruction_idx;
+ int branch_idx;
+
setup_vm();
handle_irq(PMI_VECTOR, cnt_overflow);
buf = malloc(N*64);
@@ -845,13 +877,18 @@ int main(int ac, char **av)
}
gp_events = (struct pmu_event *)intel_gp_events;
gp_events_size = sizeof(intel_gp_events)/sizeof(intel_gp_events[0]);
+ instruction_idx = INTEL_INSTRUCTIONS_IDX;
+ branch_idx = INTEL_BRANCHES_IDX;
report_prefix_push("Intel");
set_ref_cycle_expectations();
} else {
gp_events_size = sizeof(amd_gp_events)/sizeof(amd_gp_events[0]);
gp_events = (struct pmu_event *)amd_gp_events;
+ instruction_idx = AMD_INSTRUCTIONS_IDX;
+ branch_idx = AMD_BRANCHES_IDX;
report_prefix_push("AMD");
}
+ adjust_events_range(gp_events, instruction_idx, branch_idx);

printf("PMU version: %d\n", pmu.version);
printf("GP counters: %d\n", pmu.nr_gp_counters);
--
2.40.1