[PATCH 3/9] KVM: x86: add Intel processor trace virtualization mode
From: Luwei Kang
Date: Mon Oct 16 2017 - 21:56:22 EST
From: Chao Peng <chao.p.peng@xxxxxxxxxxxxxxx>
PT virtualization can be work in one of 4 possible modes:
a. system-wide: trace both host/guest and output to host buffer;
b. host-only: only trace host and output to host buffer;
c. guest-only: only trace guest and output to guest buffer;
d. host-guest: trace host/guest simultaneous and output to their
respective buffer.
Signed-off-by: Chao Peng <chao.p.peng@xxxxxxxxxxxxxxx>
Signed-off-by: Luwei Kang <luwei.kang@xxxxxxxxx>
---
arch/x86/include/asm/intel_pt.h | 7 ++++
arch/x86/include/asm/msr-index.h | 1 +
arch/x86/include/asm/vmx.h | 6 +++
arch/x86/kvm/vmx.c | 88 ++++++++++++++++++++++++++++++++++++++--
4 files changed, 98 insertions(+), 4 deletions(-)
diff --git a/arch/x86/include/asm/intel_pt.h b/arch/x86/include/asm/intel_pt.h
index 73c8942..b5dd33c 100644
--- a/arch/x86/include/asm/intel_pt.h
+++ b/arch/x86/include/asm/intel_pt.h
@@ -1,6 +1,13 @@
#ifndef _ASM_X86_INTEL_PT_H
#define _ASM_X86_INTEL_PT_H
+enum pt_mode {
+ PT_MODE_SYSTEM = 0,
+ PT_MODE_HOST,
+ PT_MODE_GUEST,
+ PT_MODE_HOST_GUEST,
+};
+
enum pt_capabilities {
PT_CAP_max_subleaf = 0,
PT_CAP_cr3_filtering,
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 80b26e1..57433e4 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -751,6 +751,7 @@
#define VMX_BASIC_INOUT 0x0040000000000000LLU
/* MSR_IA32_VMX_MISC bits */
+#define MSR_IA32_VMX_MISC_INTEL_PT (1ULL << 14)
#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29)
#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F
/* AMD-V MSRs */
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index caec841..80e3e22 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -76,7 +76,9 @@
#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000
#define SECONDARY_EXEC_RDSEED 0x00010000
#define SECONDARY_EXEC_ENABLE_PML 0x00020000
+#define SECONDARY_EXEC_PT_CONCEAL_VMX 0x00080000
#define SECONDARY_EXEC_XSAVES 0x00100000
+#define SECONDARY_EXEC_PT_USE_GPA 0x01000000
#define SECONDARY_EXEC_TSC_SCALING 0x02000000
#define PIN_BASED_EXT_INTR_MASK 0x00000001
@@ -97,6 +99,8 @@
#define VM_EXIT_LOAD_IA32_EFER 0x00200000
#define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000
#define VM_EXIT_CLEAR_BNDCFGS 0x00800000
+#define VM_EXIT_PT_SUPPRESS_PIP 0x01000000
+#define VM_EXIT_CLEAR_IA32_RTIT_CTL 0x02000000
#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff
@@ -108,6 +112,8 @@
#define VM_ENTRY_LOAD_IA32_PAT 0x00004000
#define VM_ENTRY_LOAD_IA32_EFER 0x00008000
#define VM_ENTRY_LOAD_BNDCFGS 0x00010000
+#define VM_ENTRY_PT_SUPPRESS_PIP 0x00020000
+#define VM_ENTRY_LOAD_IA32_RTIT_CTL 0x00040000
#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 95a0160..f0cae7c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -50,6 +50,7 @@
#include <asm/apic.h>
#include <asm/irq_remapping.h>
#include <asm/mmu_context.h>
+#include <asm/intel_pt.h>
#include "trace.h"
#include "pmu.h"
@@ -178,6 +179,10 @@
static int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
module_param(ple_window_max, int, S_IRUGO);
+/* Default is host guest mode. */
+static int __read_mostly pt_mode = PT_MODE_HOST_GUEST;
+module_param(pt_mode, int, S_IRUGO);
+
extern const ulong vmx_return;
#define NR_AUTOLOAD_MSRS 8
@@ -1321,6 +1326,19 @@ static inline bool cpu_has_vmx_vmfunc(void)
SECONDARY_EXEC_ENABLE_VMFUNC;
}
+static inline bool cpu_has_vmx_intel_pt(void)
+{
+ u64 vmx_msr;
+
+ rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
+ return vmx_msr & MSR_IA32_VMX_MISC_INTEL_PT;
+}
+
+static inline bool cpu_has_vmx_pt_use_gpa(void)
+{
+ return vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_PT_USE_GPA;
+}
+
static inline bool report_flexpriority(void)
{
return flexpriority_enabled;
@@ -3661,6 +3679,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
SECONDARY_EXEC_RDRAND |
SECONDARY_EXEC_ENABLE_PML |
SECONDARY_EXEC_TSC_SCALING |
+ SECONDARY_EXEC_PT_USE_GPA |
+ SECONDARY_EXEC_PT_CONCEAL_VMX |
SECONDARY_EXEC_ENABLE_VMFUNC;
if (adjust_vmx_controls(min2, opt2,
MSR_IA32_VMX_PROCBASED_CTLS2,
@@ -3694,7 +3714,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
#endif
opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT |
- VM_EXIT_CLEAR_BNDCFGS;
+ VM_EXIT_CLEAR_BNDCFGS | VM_EXIT_PT_SUPPRESS_PIP |
+ VM_EXIT_CLEAR_IA32_RTIT_CTL;
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
&_vmexit_control) < 0)
return -EIO;
@@ -3713,11 +3734,25 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
_pin_based_exec_control &= ~PIN_BASED_POSTED_INTR;
min = VM_ENTRY_LOAD_DEBUG_CONTROLS;
- opt = VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS;
+ opt = VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS |
+ VM_ENTRY_PT_SUPPRESS_PIP | VM_ENTRY_LOAD_IA32_RTIT_CTL;
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
&_vmentry_control) < 0)
return -EIO;
+ /*
+ * If one of them is set to 1, all must be set to 1. This ensures that
+ * Intel PT output will not switch between using GPAs and PPAs for
+ * output without first being disabled.
+ */
+ if (!(_cpu_based_2nd_exec_control & SECONDARY_EXEC_PT_USE_GPA) ||
+ !(_vmexit_control & VM_EXIT_CLEAR_IA32_RTIT_CTL) ||
+ !(_vmentry_control & VM_ENTRY_LOAD_IA32_RTIT_CTL)) {
+ _cpu_based_2nd_exec_control &= ~SECONDARY_EXEC_PT_USE_GPA;
+ _vmexit_control &= ~VM_EXIT_CLEAR_IA32_RTIT_CTL;
+ _vmentry_control &= ~VM_ENTRY_LOAD_IA32_RTIT_CTL;
+ }
+
rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high);
/* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */
@@ -5279,6 +5314,38 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx)
return exec_control;
}
+static u32 vmx_vmexit_control(struct vcpu_vmx *vmx)
+{
+ u32 vmexit_control = vmcs_config.vmexit_ctrl;
+
+ /*
+ * Enable VMX-specific packet information and disable VMCS
+ * controls for IA32_RTIT_CTL MSR in system mode.
+ */
+ if (pt_mode == PT_MODE_SYSTEM)
+ vmexit_control &= ~(
+ VM_EXIT_PT_SUPPRESS_PIP |
+ VM_EXIT_CLEAR_IA32_RTIT_CTL);
+
+ return vmexit_control;
+}
+
+static u32 vmx_vmentry_control(struct vcpu_vmx *vmx)
+{
+ u32 vmentry_control = vmcs_config.vmentry_ctrl;
+
+ /*
+ * Enable VMX-specific packet information and disable VMCS
+ * controls for IA32_RTIT_CTL MSR in system mode.
+ */
+ if (pt_mode == PT_MODE_SYSTEM)
+ vmentry_control &= ~(
+ VM_ENTRY_PT_SUPPRESS_PIP |
+ VM_ENTRY_LOAD_IA32_RTIT_CTL);
+
+ return vmentry_control;
+}
+
static bool vmx_rdrand_supported(void)
{
return vmcs_config.cpu_based_2nd_exec_ctrl &
@@ -5409,6 +5476,15 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
}
}
+ /*
+ * Enable VMX-specific packet information and disable VMCS
+ * controls for IA32_RTIT_CTL MSR in system mode.
+ */
+ if (pt_mode == PT_MODE_SYSTEM)
+ exec_control &= ~(
+ SECONDARY_EXEC_PT_CONCEAL_VMX |
+ SECONDARY_EXEC_PT_USE_GPA);
+
vmx->secondary_exec_control = exec_control;
}
@@ -5521,10 +5597,10 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
}
- vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl);
+ vm_exit_controls_init(vmx, vmx_vmexit_control(vmx));
/* 22.2.1, 20.8.1 */
- vm_entry_controls_init(vmx, vmcs_config.vmentry_ctrl);
+ vm_entry_controls_init(vmx, vmx_vmentry_control(vmx));
vmx->vcpu.arch.cr0_guest_owned_bits = X86_CR0_TS;
vmcs_writel(CR0_GUEST_HOST_MASK, ~X86_CR0_TS);
@@ -6865,6 +6941,10 @@ static __init int hardware_setup(void)
kvm_mce_cap_supported |= MCG_LMCE_P;
+ if (!enable_ept || !pt_cap_get(PT_CAP_topa_output) ||
+ !cpu_has_vmx_intel_pt() || !cpu_has_vmx_pt_use_gpa())
+ pt_mode = PT_MODE_SYSTEM;
+
return alloc_kvm_area();
out:
--
1.8.3.1