[PATCH v4 09/16] KVM: x86/pmu: Add PEBS_DATA_CFG MSR emulation to support adaptive PEBS

From: Like Xu
Date: Mon Mar 29 2021 - 01:51:05 EST


If IA32_PERF_CAPABILITIES.PEBS_BASELINE [bit 14] is set, the adaptive
PEBS is supported. The PEBS_DATA_CFG MSR and adaptive record enable
bits (IA32_PERFEVTSELx.Adaptive_Record and IA32_FIXED_CTR_CTRL.
FCx_Adaptive_Record) are also supported.

Adaptive PEBS provides software the capability to configure the PEBS
records to capture only the data of interest, keeping the record size
compact. An overflow of PMCx results in generation of an adaptive PEBS
record with state information based on the selections specified in
MSR_PEBS_DATA_CFG (Memory Info [bit 0], GPRs [bit 1], XMMs [bit 2],
and LBRs [bit 3], LBR Entries [bit 31:24]). By default, the PEBS record
will only contain the Basic group.

When guest adaptive PEBS is enabled, the IA32_PEBS_ENABLE MSR will
be added to the perf_guest_switch_msr() and switched during the VMX
transitions just like CORE_PERF_GLOBAL_CTRL MSR.

Co-developed-by: Luwei Kang <luwei.kang@xxxxxxxxx>
Signed-off-by: Luwei Kang <luwei.kang@xxxxxxxxx>
Signed-off-by: Like Xu <like.xu@xxxxxxxxxxxxxxx>
---
arch/x86/events/intel/core.c | 11 ++++++++++-
arch/x86/include/asm/kvm_host.h | 2 ++
arch/x86/kvm/vmx/pmu_intel.c | 16 ++++++++++++++++
3 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 7f3821a59b84..3bbdfc4f6931 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3844,6 +3844,7 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data)
struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
struct kvm_pmu *pmu = (struct kvm_pmu *)data;
+ bool baseline = x86_pmu.intel_cap.pebs_baseline;

arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask;
@@ -3863,6 +3864,12 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data)
arr[2].host = (unsigned long)ds;
arr[2].guest = pmu->ds_area;

+ if (baseline) {
+ arr[3].msr = MSR_PEBS_DATA_CFG;
+ arr[3].host = cpuc->pebs_data_cfg;
+ arr[3].guest = pmu->pebs_data_cfg;
+ }
+
/*
* If PMU counter has PEBS enabled it is not enough to
* disable counter on a guest entry since PEBS memory
@@ -3879,9 +3886,11 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data)
else {
arr[1].guest = arr[1].host;
arr[2].guest = arr[2].host;
+ if (baseline)
+ arr[3].guest = arr[3].host;
}

- *nr = 3;
+ *nr = baseline ? 4 : 3;
}

return arr;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 2275cc144f58..94366da2dfee 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -463,6 +463,8 @@ struct kvm_pmu {
u64 ds_area;
u64 pebs_enable;
u64 pebs_enable_mask;
+ u64 pebs_data_cfg;
+ u64 pebs_data_cfg_mask;

/*
* The gate to release perf_events not marked in
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 77d30106abca..7f18c760dbae 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -226,6 +226,9 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
case MSR_IA32_DS_AREA:
ret = guest_cpuid_has(vcpu, X86_FEATURE_DS);
break;
+ case MSR_PEBS_DATA_CFG:
+ ret = vcpu->arch.perf_capabilities & PERF_CAP_PEBS_BASELINE;
+ break;
default:
ret = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0) ||
get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0) ||
@@ -379,6 +382,9 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_DS_AREA:
msr_info->data = pmu->ds_area;
return 0;
+ case MSR_PEBS_DATA_CFG:
+ msr_info->data = pmu->pebs_data_cfg;
+ return 0;
default:
if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
(pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
@@ -452,6 +458,14 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
pmu->ds_area = data;
return 0;
+ case MSR_PEBS_DATA_CFG:
+ if (pmu->pebs_data_cfg == data)
+ return 0;
+ if (!(data & pmu->pebs_data_cfg_mask)) {
+ pmu->pebs_data_cfg = data;
+ return 0;
+ }
+ break;
default:
if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
(pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
@@ -505,6 +519,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->reserved_bits = 0xffffffff00200000ull;
pmu->fixed_ctr_ctrl_mask = ~0ull;
pmu->pebs_enable_mask = ~0ull;
+ pmu->pebs_data_cfg_mask = ~0ull;

entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
if (!entry)
@@ -579,6 +594,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
for (i = 0; i < pmu->nr_arch_fixed_counters; i++)
pmu->fixed_ctr_ctrl_mask &=
~(1ULL << (INTEL_PMC_IDX_FIXED + i * 4));
+ pmu->pebs_data_cfg_mask = ~0xff00000full;
} else
pmu->pebs_enable_mask = ~((1ull << pmu->nr_arch_gp_counters) - 1);
} else {
--
2.29.2