[PATCH v4 04/10] KVM/x86: intel_pmu_lbr_enable
From: Wei Wang
Date: Wed Dec 26 2018 - 05:01:01 EST
The lbr stack is architecturally specific, for example, SKX has 32 lbr
stack entries while HSW has 16 entries, so a HSW guest running on a SKX
machine may not get accurate perf results. Currently, we forbid the
guest lbr enabling when the guest and host see different lbr stack
entries.
Signed-off-by: Wei Wang <wei.w.wang@xxxxxxxxx>
Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx>
Cc: Andi Kleen <ak@xxxxxxxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
---
arch/x86/kvm/cpuid.h | 8 ++++
arch/x86/kvm/pmu.c | 8 ++++
arch/x86/kvm/pmu.h | 2 +
arch/x86/kvm/pmu_intel.c | 117 +++++++++++++++++++++++++++++++++++++++++++++++
arch/x86/kvm/x86.c | 3 +-
5 files changed, 136 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 9a327d5..92bdc7d 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -123,6 +123,14 @@ static inline bool guest_cpuid_is_amd(struct kvm_vcpu *vcpu)
return best && best->ebx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx;
}
+static inline bool guest_cpuid_is_intel(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *best;
+
+ best = kvm_find_cpuid_entry(vcpu, 0, 0);
+ return best && best->ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx;
+}
+
static inline int guest_cpuid_family(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 58ead7d..b438ffa 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -299,6 +299,14 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
return 0;
}
+bool kvm_pmu_lbr_enable(struct kvm_vcpu *vcpu)
+{
+ if (guest_cpuid_is_intel(vcpu))
+ return kvm_x86_ops->pmu_ops->lbr_enable(vcpu);
+
+ return false;
+}
+
void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu)
{
if (lapic_in_kernel(vcpu))
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index ba8898e..5f3c7a4 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -28,6 +28,7 @@ struct kvm_pmu_ops {
struct kvm_pmc *(*msr_idx_to_pmc)(struct kvm_vcpu *vcpu, unsigned idx);
int (*is_valid_msr_idx)(struct kvm_vcpu *vcpu, unsigned idx);
bool (*is_valid_msr)(struct kvm_vcpu *vcpu, u32 msr);
+ bool (*lbr_enable)(struct kvm_vcpu *vcpu);
int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
void (*refresh)(struct kvm_vcpu *vcpu);
@@ -106,6 +107,7 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel);
void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int fixed_idx);
void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx);
+bool kvm_pmu_lbr_enable(struct kvm_vcpu *vcpu);
void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu);
void kvm_pmu_handle_event(struct kvm_vcpu *vcpu);
int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
diff --git a/arch/x86/kvm/pmu_intel.c b/arch/x86/kvm/pmu_intel.c
index 5ab4a36..c04cb6d 100644
--- a/arch/x86/kvm/pmu_intel.c
+++ b/arch/x86/kvm/pmu_intel.c
@@ -15,6 +15,7 @@
#include <linux/kvm_host.h>
#include <linux/perf_event.h>
#include <asm/perf_event.h>
+#include <asm/intel-family.h>
#include "x86.h"
#include "cpuid.h"
#include "lapic.h"
@@ -164,6 +165,121 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
return ret;
}
+static bool intel_pmu_lbr_enable(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = vcpu->kvm;
+ u8 vcpu_model = guest_cpuid_model(vcpu);
+ unsigned int vcpu_lbr_nr;
+
+ if (x86_perf_get_lbr_stack(&kvm->arch.lbr_stack))
+ return false;
+
+ if (guest_cpuid_family(vcpu) != boot_cpu_data.x86)
+ return false;
+
+ /*
+ * It could be possible that people have vcpus of old model run on
+ * physcal cpus of newer model, for example a BDW guest on a SKX
+ * machine (but not possible to be the other way around).
+ * The BDW guest may not get accurate results on a SKX machine as it
+ * only reads 16 entries of the lbr stack while there are 32 entries
+ * of recordings. So we currently forbid the lbr enabling when the
+ * vcpu and physical cpu see different lbr stack entries.
+ */
+ switch (vcpu_model) {
+ case INTEL_FAM6_CORE2_MEROM:
+ case INTEL_FAM6_CORE2_MEROM_L:
+ case INTEL_FAM6_CORE2_PENRYN:
+ case INTEL_FAM6_CORE2_DUNNINGTON:
+ /* intel_pmu_lbr_init_core() */
+ vcpu_lbr_nr = 4;
+ break;
+ case INTEL_FAM6_NEHALEM:
+ case INTEL_FAM6_NEHALEM_EP:
+ case INTEL_FAM6_NEHALEM_EX:
+ /* intel_pmu_lbr_init_nhm() */
+ vcpu_lbr_nr = 16;
+ break;
+ case INTEL_FAM6_ATOM_BONNELL:
+ case INTEL_FAM6_ATOM_BONNELL_MID:
+ case INTEL_FAM6_ATOM_SALTWELL:
+ case INTEL_FAM6_ATOM_SALTWELL_MID:
+ case INTEL_FAM6_ATOM_SALTWELL_TABLET:
+ /* intel_pmu_lbr_init_atom() */
+ vcpu_lbr_nr = 8;
+ break;
+ case INTEL_FAM6_ATOM_SILVERMONT:
+ case INTEL_FAM6_ATOM_SILVERMONT_X:
+ case INTEL_FAM6_ATOM_SILVERMONT_MID:
+ case INTEL_FAM6_ATOM_AIRMONT:
+ case INTEL_FAM6_ATOM_AIRMONT_MID:
+ /* intel_pmu_lbr_init_slm() */
+ vcpu_lbr_nr = 8;
+ break;
+ case INTEL_FAM6_ATOM_GOLDMONT:
+ case INTEL_FAM6_ATOM_GOLDMONT_X:
+ /* intel_pmu_lbr_init_skl(); */
+ vcpu_lbr_nr = 32;
+ break;
+ case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
+ /* intel_pmu_lbr_init_skl()*/
+ vcpu_lbr_nr = 32;
+ break;
+ case INTEL_FAM6_WESTMERE:
+ case INTEL_FAM6_WESTMERE_EP:
+ case INTEL_FAM6_WESTMERE_EX:
+ /* intel_pmu_lbr_init_nhm() */
+ vcpu_lbr_nr = 16;
+ break;
+ case INTEL_FAM6_SANDYBRIDGE:
+ case INTEL_FAM6_SANDYBRIDGE_X:
+ /* intel_pmu_lbr_init_snb() */
+ vcpu_lbr_nr = 16;
+ break;
+ case INTEL_FAM6_IVYBRIDGE:
+ case INTEL_FAM6_IVYBRIDGE_X:
+ /* intel_pmu_lbr_init_snb() */
+ vcpu_lbr_nr = 16;
+ break;
+ case INTEL_FAM6_HASWELL_CORE:
+ case INTEL_FAM6_HASWELL_X:
+ case INTEL_FAM6_HASWELL_ULT:
+ case INTEL_FAM6_HASWELL_GT3E:
+ /* intel_pmu_lbr_init_hsw() */
+ vcpu_lbr_nr = 16;
+ break;
+ case INTEL_FAM6_BROADWELL_CORE:
+ case INTEL_FAM6_BROADWELL_XEON_D:
+ case INTEL_FAM6_BROADWELL_GT3E:
+ case INTEL_FAM6_BROADWELL_X:
+ /* intel_pmu_lbr_init_hsw() */
+ vcpu_lbr_nr = 16;
+ break;
+ case INTEL_FAM6_XEON_PHI_KNL:
+ case INTEL_FAM6_XEON_PHI_KNM:
+ /* intel_pmu_lbr_init_knl() */
+ vcpu_lbr_nr = 8;
+ break;
+ case INTEL_FAM6_SKYLAKE_MOBILE:
+ case INTEL_FAM6_SKYLAKE_DESKTOP:
+ case INTEL_FAM6_SKYLAKE_X:
+ case INTEL_FAM6_KABYLAKE_MOBILE:
+ case INTEL_FAM6_KABYLAKE_DESKTOP:
+ /* intel_pmu_lbr_init_skl() */
+ vcpu_lbr_nr = 32;
+ break;
+ default:
+ vcpu_lbr_nr = 0;
+ pr_warn("%s: vcpu model not supported %d\n", __func__,
+ vcpu_model);
+ }
+
+ if (vcpu_lbr_nr != kvm->arch.lbr_stack.nr)
+ return false;
+
+ return true;
+}
+
static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
@@ -350,6 +466,7 @@ struct kvm_pmu_ops intel_pmu_ops = {
.msr_idx_to_pmc = intel_msr_idx_to_pmc,
.is_valid_msr_idx = intel_is_valid_msr_idx,
.is_valid_msr = intel_is_valid_msr,
+ .lbr_enable = intel_pmu_lbr_enable,
.get_msr = intel_pmu_get_msr,
.set_msr = intel_pmu_set_msr,
.refresh = intel_pmu_refresh,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 50efee4..02e29fd 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4505,8 +4505,7 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
break;
case KVM_CAP_X86_GUEST_LBR:
r = -EINVAL;
- if (cap->args[0] &&
- x86_perf_get_lbr_stack(&kvm->arch.lbr_stack)) {
+ if (cap->args[0] && !kvm_pmu_lbr_enable(kvm->vcpus[0])) {
pr_err("Failed to enable the guest lbr feature\n");
break;
}
--
2.7.4