[PATCH] perf/x86/intel/pt: Don't die on VMXON

From: Alexander Shishkin
Date: Fri Apr 01 2016 - 12:27:49 EST


Some versions of Intel PT do not support tracing across VMXON, more
specifically, VMXON will clear TraceEn control bit and any attempt to
set it before VMXOFF will throw a #GP, which in the current state of
things will crash the kernel. Namely,

$ perf record -e intel_pt// kvm -nographic

on such a machine will kill it.

To avoid this, notify the intel_pt driver before VMXON and after
VMXOFF so that it knows when not to enable itself.

Signed-off-by: Alexander Shishkin <alexander.shishkin@xxxxxxxxxxxxxxx>
---
arch/x86/events/intel/pt.c | 71 +++++++++++++++++++++++++++++++++------
arch/x86/events/intel/pt.h | 2 ++
arch/x86/include/asm/perf_event.h | 4 +++
arch/x86/kvm/vmx.c | 4 +++
4 files changed, 70 insertions(+), 11 deletions(-)

diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 127f58c179..a16f2ebc12 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -136,9 +136,17 @@ static int __init pt_pmu_hw_init(void)
struct dev_ext_attribute *de_attrs;
struct attribute **attrs;
size_t size;
+ u64 reg;
int ret;
long i;

+ if (test_cpu_cap(&boot_cpu_data, X86_FEATURE_VMX)) {
+ /* Intel SDM, 36.5 "Tracing post-VMXON" */
+ rdmsrl(MSR_IA32_VMX_MISC, reg);
+ if (reg & BIT(14))
+ pt_pmu.vmx = true;
+ }
+
attrs = NULL;

for (i = 0; i < PT_CPUID_LEAVES; i++) {
@@ -269,20 +277,23 @@ static void pt_config(struct perf_event *event)

reg |= (event->attr.config & PT_CONFIG_MASK);

+ event->hw.config = reg;
wrmsrl(MSR_IA32_RTIT_CTL, reg);
}

-static void pt_config_start(bool start)
+static void pt_config_stop(struct perf_event *event)
{
- u64 ctl;
+ u64 ctl = READ_ONCE(event->hw.config);

- rdmsrl(MSR_IA32_RTIT_CTL, ctl);
- if (start)
- ctl |= RTIT_CTL_TRACEEN;
- else
- ctl &= ~RTIT_CTL_TRACEEN;
+ /* may be already stopped by a PMI*/
+ if (!(ctl & RTIT_CTL_TRACEEN))
+ return;
+
+ ctl ^= RTIT_CTL_TRACEEN;
wrmsrl(MSR_IA32_RTIT_CTL, ctl);

+ WRITE_ONCE(event->hw.config, ctl);
+
/*
* A wrmsr that disables trace generation serializes other PT
* registers and causes all data packets to be written to memory,
@@ -291,8 +302,7 @@ static void pt_config_start(bool start)
* The below WMB, separating data store and aux_head store matches
* the consumer's RMB that separates aux_head load and data load.
*/
- if (!start)
- wmb();
+ wmb();
}

static void pt_config_buffer(void *buf, unsigned int topa_idx,
@@ -922,11 +932,17 @@ void intel_pt_interrupt(void)
if (!ACCESS_ONCE(pt->handle_nmi))
return;

- pt_config_start(false);
+ /*
+ * If VMX is on and PT does not support it, don't touch anything.
+ */
+ if (ACCESS_ONCE(pt->vmx_on))
+ return;

if (!event)
return;

+ pt_config_stop(event);
+
buf = perf_get_aux(&pt->handle);
if (!buf)
return;
@@ -963,6 +979,35 @@ void intel_pt_interrupt(void)
}
}

+void intel_pt_vmxon(int entry)
+{
+ struct pt *pt = this_cpu_ptr(&pt_ctx);
+ struct perf_event *event;
+ unsigned long flags;
+
+ /* PT plays nice with VMX, do nothing */
+ if (pt_pmu.vmx)
+ return;
+
+ /*
+ * VMX entry will clear RTIT_CTL.TraceEn; we need to make
+ * sure to not try to set it while VMX is on. Disable
+ * interrupts to avoid racing with pmu callbacks;
+ * concurrent PMI should be handled fine.
+ */
+ local_irq_save(flags);
+ WRITE_ONCE(pt->vmx_on, entry);
+
+ if (entry) {
+ /* prevent pt_config_stop() from writing RTIT_CTL */
+ event = pt->handle.event;
+ if (event)
+ event->hw.config = 0;
+ }
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(intel_pt_vmxon);
+
/*
* PMU callbacks
*/
@@ -973,6 +1018,9 @@ static void pt_event_start(struct perf_event *event, int mode)
struct pt *pt = this_cpu_ptr(&pt_ctx);
struct pt_buffer *buf;

+ if (ACCESS_ONCE(pt->vmx_on))
+ return;
+
buf = perf_aux_output_begin(&pt->handle, event);
if (!buf)
goto fail_stop;
@@ -1007,7 +1055,8 @@ static void pt_event_stop(struct perf_event *event, int mode)
* see comment in intel_pt_interrupt().
*/
ACCESS_ONCE(pt->handle_nmi) = 0;
- pt_config_start(false);
+
+ pt_config_stop(event);

if (event->hw.state == PERF_HES_STOPPED)
return;
diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h
index 336878a5d2..b0731630cd 100644
--- a/arch/x86/events/intel/pt.h
+++ b/arch/x86/events/intel/pt.h
@@ -65,6 +65,7 @@ enum pt_capabilities {
struct pt_pmu {
struct pmu pmu;
u32 caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
+ bool vmx;
};

/**
@@ -111,6 +112,7 @@ struct pt_buffer {
struct pt {
struct perf_output_handle handle;
int handle_nmi;
+ int vmx_on;
};

#endif /* __INTEL_PT_H__ */
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 5a2ed3ed2f..8d34c39982 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -285,6 +285,10 @@ static inline void perf_events_lapic_init(void) { }
static inline void perf_check_microcode(void) { }
#endif

+#ifdef CONFIG_CPU_SUP_INTEL
+ extern void intel_pt_vmxon(int entry);
+#endif
+
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
extern void amd_pmu_enable_virt(void);
extern void amd_pmu_disable_virt(void);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1735ae9d68..744ea43b79 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3075,6 +3075,8 @@ static __init int vmx_disabled_by_bios(void)

static void kvm_cpu_vmxon(u64 addr)
{
+ intel_pt_vmxon(1);
+
asm volatile (ASM_VMX_VMXON_RAX
: : "a"(&addr), "m"(addr)
: "memory", "cc");
@@ -3144,6 +3146,8 @@ static void vmclear_local_loaded_vmcss(void)
static void kvm_cpu_vmxoff(void)
{
asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
+
+ intel_pt_vmxon(0);
}

static void hardware_disable(void)
--
2.8.0.rc3