Re: [PATCH] KVM: VMX: enable LBR virtualization

From: Jian Zhou
Date: Mon Oct 12 2015 - 08:10:42 EST

Next message: Matt Fleming: "Re: [PATCH v8 2/2] efi: a misc char interface for user to update efi firmware"
Previous message: Clemens Ladisch: "Re: [PATCH v3 1/4] usb: gadget: f_midi: free usb request when done"
Next in thread: Paolo Bonzini: "Re: [PATCH] KVM: VMX: enable LBR virtualization"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

ping...

> Using vmx msr store/load mechanism and msr intercept bitmap
> to implement LBR virtualization.
>
> Signed-off-by: Jian Zhou <jianjay.zhou@xxxxxxxxxx>
> Signed-off-by: Stephen He <herongguang.he@xxxxxxxxxx>
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 2beee03..244f68c 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -887,6 +887,12 @@ struct kvm_x86_ops {
> gfn_t offset, unsigned long mask);
> /* pmu operations of sub-arch */
> const struct kvm_pmu_ops *pmu_ops;
> +
> + void (*vmcs_write64)(unsigned long field, u64 value);
> + u64 (*vmcs_read64)(unsigned long field);
> +
> + int (*add_atomic_switch_msr)(struct kvm_vcpu *vcpu, u32 msr, u64 guest_val, u64 host_val);
> + void (*disable_intercept_guest_msr)(struct kvm_vcpu *vcpu, u32 msr);
> };
>
> struct kvm_arch_async_pf {
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 06ef490..2305308 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -159,7 +159,7 @@ module_param(ple_window_max, int, S_IRUGO);
>
> extern const ulong vmx_return;
>
> -#define NR_AUTOLOAD_MSRS 8
> +#define NR_AUTOLOAD_MSRS 256
> #define VMCS02_POOL_SIZE 1
>
> struct vmcs {
> @@ -1630,6 +1630,7 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
> --m->nr;
> m->guest[i] = m->guest[m->nr];
> m->host[i] = m->host[m->nr];
> + vmcs_write32(VM_EXIT_MSR_STORE_COUNT, m->nr);
> vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr);
> vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr);
> }
> @@ -1645,7 +1646,7 @@ static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx,
> vm_exit_controls_setbit(vmx, exit);
> }
>
> -static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
> +static int add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
> u64 guest_val, u64 host_val)
> {
> unsigned i;
> @@ -1660,7 +1661,7 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
> GUEST_IA32_EFER,
> HOST_IA32_EFER,
> guest_val, host_val);
> - return;
> + return 0;
> }
> break;
> case MSR_CORE_PERF_GLOBAL_CTRL:
> @@ -1671,7 +1672,7 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
> GUEST_IA32_PERF_GLOBAL_CTRL,
> HOST_IA32_PERF_GLOBAL_CTRL,
> guest_val, host_val);
> - return;
> + return 0;
> }
> break;
> }
> @@ -1683,9 +1684,10 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
> if (i == NR_AUTOLOAD_MSRS) {
> printk_once(KERN_WARNING "Not enough msr switch entries. "
> "Can't add msr %x\n", msr);
> - return;
> + return -ENOSPC;
> } else if (i == m->nr) {
> ++m->nr;
> + vmcs_write32(VM_EXIT_MSR_STORE_COUNT, m->nr);
> vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr);
> vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr);
> }
> @@ -1694,6 +1696,15 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
> m->guest[i].value = guest_val;
> m->host[i].index = msr;
> m->host[i].value = host_val;
> +
> + return 0;
> +}
> +
> +static int vmx_add_atomic_switch_msr(struct kvm_vcpu *vcpu, u32 msr, u64 guest_val, u64 host_val)
> +{
> + struct vcpu_vmx *vmx = to_vmx(vcpu);
> +
> + return add_atomic_switch_msr(vmx, msr, guest_val, host_val);
> }
>
> static void reload_tss(void)
> @@ -4332,6 +4343,20 @@ static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
> msr, MSR_TYPE_W);
> }
>
> +static void vmx_disable_intercept_guest_msr(struct kvm_vcpu *vcpu, u32 msr)
> +{
> + if (irqchip_in_kernel(vcpu->kvm) && apic_x2apic_mode(vcpu->arch.apic)) {
> + vmx_disable_intercept_msr_read_x2apic(msr);
> + vmx_disable_intercept_msr_write_x2apic(msr);
> + }
> + else {
> + if (is_long_mode(vcpu))
> + vmx_disable_intercept_for_msr(msr, true);
> + else
> + vmx_disable_intercept_for_msr(msr, false);
> + }
> +}
> +
> static int vmx_vm_has_apicv(struct kvm *kvm)
> {
> return enable_apicv && irqchip_in_kernel(kvm);
> @@ -4654,6 +4679,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
> #endif
>
> vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
> + vmcs_write64(VM_EXIT_MSR_STORE_ADDR, __pa(vmx->msr_autoload.guest));
> vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
> vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host));
> vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
> @@ -10409,6 +10435,12 @@ static struct kvm_x86_ops vmx_x86_ops = {
> .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
>
> .pmu_ops = &intel_pmu_ops,
> +
> + .vmcs_write64 = vmcs_write64,
> + .vmcs_read64 = vmcs_read64,
> +
> + .add_atomic_switch_msr = vmx_add_atomic_switch_msr,
> + .disable_intercept_guest_msr = vmx_disable_intercept_guest_msr,
> };
>
> static int __init vmx_init(void)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 92511d4..f1fcd7c 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -176,6 +176,113 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
>
> u64 __read_mostly host_xcr0;
>
> +/* Netburst (P4) last-branch recording */
> +#define MSR_P4_LER_FROM_LIP 0x000001d7
> +#define MSR_P4_LER_TO_LIP 0x000001d8
> +#define MSR_P4_LASTBRANCH_TOS 0x000001da
> +#define MSR_P4_LASTBRANCH_0 0x000001db
> +#define NUM_MSR_P4_LASTBRANCH 4
> +#define MSR_P4_LASTBRANCH_0_FROM_LIP 0x00000680
> +#define MSR_P4_LASTBRANCH_0_TO_LIP 0x000006c0
> +#define NUM_MSR_P4_LASTBRANCH_FROM_TO 16
> +
> +/* Pentium M (and Core) last-branch recording */
> +#define MSR_PM_LASTBRANCH_TOS 0x000001c9
> +#define MSR_PM_LASTBRANCH_0 0x00000040
> +#define NUM_MSR_PM_LASTBRANCH 8
> +
> +/* Core 2 and Atom last-branch recording */
> +#define MSR_C2_LASTBRANCH_TOS 0x000001c9
> +#define MSR_C2_LASTBRANCH_0_FROM_IP 0x00000040
> +#define MSR_C2_LASTBRANCH_0_TO_IP 0x00000060
> +#define NUM_MSR_C2_LASTBRANCH_FROM_TO 4
> +#define NUM_MSR_ATOM_LASTBRANCH_FROM_TO 8
> +
> +struct lbr_info {
> + u32 base, count;
> +} p4_lbr[] = {
> + { MSR_LBR_SELECT, 1 },
> + { MSR_P4_LER_FROM_LIP, 1 },
> + { MSR_P4_LER_TO_LIP, 1 },
> + { MSR_P4_LASTBRANCH_TOS, 1 },
> + { MSR_P4_LASTBRANCH_0_FROM_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO },
> + { MSR_P4_LASTBRANCH_0_TO_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO },
> + { 0, 0 }
> +}, c2_lbr[] = {
> + { MSR_LBR_SELECT, 1 },
> + { MSR_IA32_LASTINTFROMIP, 1 },
> + { MSR_IA32_LASTINTTOIP, 1 },
> + { MSR_C2_LASTBRANCH_TOS, 1 },
> + { MSR_C2_LASTBRANCH_0_FROM_IP, NUM_MSR_C2_LASTBRANCH_FROM_TO },
> + { MSR_C2_LASTBRANCH_0_TO_IP, NUM_MSR_C2_LASTBRANCH_FROM_TO },
> + { 0, 0 }
> +}, nh_lbr[] = {
> + { MSR_LBR_SELECT, 1 },
> + { MSR_IA32_LASTINTFROMIP, 1 },
> + { MSR_IA32_LASTINTTOIP, 1 },
> + { MSR_C2_LASTBRANCH_TOS, 1 },
> + { MSR_P4_LASTBRANCH_0_FROM_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO },
> + { MSR_P4_LASTBRANCH_0_TO_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO },
> + { 0, 0 }
> +}, at_lbr[] = {
> + { MSR_LBR_SELECT, 1 },
> + { MSR_IA32_LASTINTFROMIP, 1 },
> + { MSR_IA32_LASTINTTOIP, 1 },
> + { MSR_C2_LASTBRANCH_TOS, 1 },
> + { MSR_C2_LASTBRANCH_0_FROM_IP, NUM_MSR_ATOM_LASTBRANCH_FROM_TO },
> + { MSR_C2_LASTBRANCH_0_TO_IP, NUM_MSR_ATOM_LASTBRANCH_FROM_TO },
> + { 0, 0 }
> +};
> +
> +static const struct lbr_info *last_branch_msr_get(void)
> +{
> + switch ( boot_cpu_data.x86 )
> + {
> + case 6:
> + switch ( boot_cpu_data.x86_model )
> + {
> + /* Core2 Duo */
> + case 15:
> + /* Enhanced Core */
> + case 23:
> + return c2_lbr;
> + break;
> + /* Nehalem */
> + case 26: case 30: case 31: case 46:
> + /* Westmere */
> + case 37: case 44: case 47:
> + /* Sandy Bridge */
> + case 42: case 45:
> + /* Ivy Bridge */
> + case 58: case 62:
> + /* Haswell */
> + case 60: case 63: case 69: case 70:
> + /* future */
> + case 61: case 78:
> + return nh_lbr;
> + break;
> + /* Atom */
> + case 28: case 38: case 39: case 53: case 54:
> + /* Silvermont */
> + case 55: case 74: case 77: case 90: case 93:
> + return at_lbr;
> + break;
> + }
> + break;
> + case 15:
> + switch ( boot_cpu_data.x86_model )
> + {
> + /* Pentium4/Xeon with em64t */
> + case 3: case 4: case 6:
> + return p4_lbr;
> + break;
> + }
> + break;
> + }
> +
> + return NULL;
> +}
> +
> static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
>
> static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
> @@ -1917,6 +2024,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> bool pr = false;
> u32 msr = msr_info->index;
> u64 data = msr_info->data;
> + u64 supported = 0;
> + static const struct lbr_info *lbr = NULL;
> + int i = 0;
> + int value = 0;
>
> switch (msr) {
> case MSR_AMD64_NB_CFG:
> @@ -1948,16 +2059,34 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> }
> break;
> case MSR_IA32_DEBUGCTLMSR:
> - if (!data) {
> - /* We support the non-activated case already */
> - break;
> - } else if (data & ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) {
> - /* Values other than LBR and BTF are vendor-specific,
> - thus reserved and should throw a #GP */
> + supported = DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
> +
> + if (data & ~supported) {
> + /* Values other than LBR, BTF and FREEZE_LBRS_ON_PMI are not supported,
> + * thus reserved and should throw a #GP */
> + vcpu_unimpl(vcpu, "unsupported MSR_IA32_DEBUGCTLMSR wrmsr: 0x%llx\n", data);
> return 1;
> }
> - vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
> - __func__, data);
> +
> + if (data & DEBUGCTLMSR_LBR) {
> + lbr = last_branch_msr_get();
> + if (lbr == NULL)
> + break;
> +
> + for (; (value == 0) && lbr->count; lbr++)
> + for (i = 0; (value == 0) && (i < lbr->count); i++)
> + if ((value = kvm_x86_ops->add_atomic_switch_msr(vcpu, lbr->base + i, 0, 0)) == 0)
> + kvm_x86_ops->disable_intercept_guest_msr(vcpu, lbr->base + i);
> + }
> +
> + if (value == 0) {
> + kvm_x86_ops->vmcs_write64(GUEST_IA32_DEBUGCTL, data);
> + }
> + else {
> + /* throw a #GP */
> + return 1;
> + }
> +
> break;
> case 0x200 ... 0x2ff:
> return kvm_mtrr_set_msr(vcpu, msr, data);
> @@ -2178,9 +2307,11 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
> int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> {
> switch (msr_info->index) {
> + case MSR_IA32_DEBUGCTLMSR:
> + msr_info->data = kvm_x86_ops->vmcs_read64(GUEST_IA32_DEBUGCTL);
> + break;
> case MSR_IA32_PLATFORM_ID:
> case MSR_IA32_EBL_CR_POWERON:
> - case MSR_IA32_DEBUGCTLMSR:
> case MSR_IA32_LASTBRANCHFROMIP:
> case MSR_IA32_LASTBRANCHTOIP:
> case MSR_IA32_LASTINTFROMIP:
> --
> 1.7.12.4
>
>
>
> .
>

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Next message: Matt Fleming: "Re: [PATCH v8 2/2] efi: a misc char interface for user to update efi firmware"
Previous message: Clemens Ladisch: "Re: [PATCH v3 1/4] usb: gadget: f_midi: free usb request when done"
Next in thread: Paolo Bonzini: "Re: [PATCH] KVM: VMX: enable LBR virtualization"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]