[PATCH] KVM: x86/pmu: Add PEBS support for Intel Sapphire Rapids

From: Like Xu
Date: Wed Sep 21 2022 - 02:48:52 EST


From: Like Xu <likexu@xxxxxxxxxxx>

Virtualization support for SPR PEBS has officially available in the
Intel SDM (June 2022) and has been validated on late stepping machines:

Compared to Ice Lake Server, the PDIR counter available (Fixed 0) on SPR
is unchanged, but the capability is enhanced to Instruction-Accurate PDIR
(PDIR++), where PEBS is taken on the next instruction after the one that
caused the overflow. Also, it introduces a new Precise Distribution (PDist)
facility that eliminates the skid when a precise event is programmed
on general programmable counter 0.

For guest usage, KVM will raise attr.precise_ip to 3 in both cases
mentioned above, requesting the correct hardware counter (PRIR++
or PDist) from the perf sub-system on the host as usual.

Cc: Kan Liang <kan.liang@xxxxxxxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Signed-off-by: Like Xu <likexu@xxxxxxxxxxx>
---
arch/x86/events/intel/core.c | 1 +
arch/x86/kvm/pmu.c | 17 ++++++++++++++---
2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 2db93498ff71..804540ba4599 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -6288,6 +6288,7 @@ __init int intel_pmu_init(void)
x86_pmu.pebs_constraints = intel_spr_pebs_event_constraints;
x86_pmu.extra_regs = intel_spr_extra_regs;
x86_pmu.limit_period = spr_limit_period;
+ x86_pmu.pebs_ept = 1;
x86_pmu.pebs_aliases = NULL;
x86_pmu.pebs_prec_dist = true;
x86_pmu.pebs_block = true;
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 02f9e4f245bd..81e9d7c2332d 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -28,9 +28,18 @@
struct x86_pmu_capability __read_mostly kvm_pmu_cap;
EXPORT_SYMBOL_GPL(kvm_pmu_cap);

-static const struct x86_cpu_id vmx_icl_pebs_cpu[] = {
+/* Precise Distribution of Instructions Retired (PDIR) */
+static const struct x86_cpu_id vmx_pebs_pdir_cpu[] = {
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, NULL),
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, NULL),
+ /* Instruction-Accurate PDIR (PDIR++) */
+ X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, NULL),
+ {}
+};
+
+/* Precise Distribution (PDist) */
+static const struct x86_cpu_id vmx_pebs_pdist_cpu[] = {
+ X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, NULL),
{}
};

@@ -181,12 +190,14 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
* the accuracy of the PEBS profiling result, because the "event IP"
* in the PEBS record is calibrated on the guest side.
*
- * On Icelake everything is fine. Other hardware (GLC+, TNT+) that
+ * On Icelake everything is fine. Other hardware (TNT+) that
* could possibly care here is unsupported and needs changes.
*/
attr.precise_ip = 1;
- if (x86_match_cpu(vmx_icl_pebs_cpu) && pmc->idx == 32)
+ if ((pmc->idx == 32 && x86_match_cpu(vmx_pebs_pdir_cpu)) ||
+ (pmc->idx == 0 && x86_match_cpu(vmx_pebs_pdist_cpu))) {
attr.precise_ip = 3;
+ }
}

event = perf_event_create_kernel_counter(&attr, -1, current,
--
2.37.3