Re: [PATCH] KVM: VMX: enable LBR virtualization

From: Paolo Bonzini
Date: Mon Oct 12 2015 - 08:44:15 EST




On 12/10/2015 14:10, Jian Zhou wrote:
> ping...

I think your expectations for review RTT are a bit too optimistic.
I have only worked 4 hours since you posted the patch... But it was on
my list anyway, so let's do it.

First of all, you should move the implementation entirely into vmx.c,
because these new hooks are not acceptable:

>> + void (*vmcs_write64)(unsigned long field, u64 value);
>> + u64 (*vmcs_read64)(unsigned long field);
>> +
>> + int (*add_atomic_switch_msr)(struct kvm_vcpu *vcpu, u32 msr, u64 guest_val, u64 host_val);
>> + void (*disable_intercept_guest_msr)(struct kvm_vcpu *vcpu, u32 msr);

x86.c must not have any knowledge of VMX internals such as the VMCS.
Also, AMD have their own implementation of LBR virtualization.

In addition, the MSR numbers may differ between the guest and the host,
because it is possible to emulate e.g. a Core CPU on a Core 2 CPU. So I
recommend against using the atomic switch mechanism for the from/to MSRs.

Instead, if GUEST_DEBUGCTL.LBR = 1 you can read the from/to MSRs into an
array stored in struct kvm_arch_vcpu at vmexit time. Reading the
from/to MSRs should cause a vmexit in the first implementation. Any
optimization of this can be done separately.

As a benefit, this will force you to implement a mechanism for passing
the contents of the MSRs to userspace and read them back. This is
necessary for debugging and for migration. You will also have to
implement support for the feature in QEMU in order to support migration
of virtual machines that use LBRs.

>> +/* Core 2 and Atom last-branch recording */
>> +#define MSR_C2_LASTBRANCH_TOS 0x000001c9
>> +#define MSR_C2_LASTBRANCH_0_FROM_IP 0x00000040
>> +#define MSR_C2_LASTBRANCH_0_TO_IP 0x00000060
>> +#define NUM_MSR_C2_LASTBRANCH_FROM_TO 4
>> +#define NUM_MSR_ATOM_LASTBRANCH_FROM_TO 8
>> +
>> +struct lbr_info {
>> + u32 base, count;
>> +} p4_lbr[] = {
>> + { MSR_LBR_SELECT, 1 },
>> + { MSR_P4_LER_FROM_LIP, 1 },
>> + { MSR_P4_LER_TO_LIP, 1 },
>> + { MSR_P4_LASTBRANCH_TOS, 1 },
>> + { MSR_P4_LASTBRANCH_0_FROM_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO },
>> + { MSR_P4_LASTBRANCH_0_TO_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO },
>> + { 0, 0 }
>> +}, c2_lbr[] = {
>> + { MSR_LBR_SELECT, 1 },
>> + { MSR_IA32_LASTINTFROMIP, 1 },
>> + { MSR_IA32_LASTINTTOIP, 1 },
>> + { MSR_C2_LASTBRANCH_TOS, 1 },
>> + { MSR_C2_LASTBRANCH_0_FROM_IP, NUM_MSR_C2_LASTBRANCH_FROM_TO },
>> + { MSR_C2_LASTBRANCH_0_TO_IP, NUM_MSR_C2_LASTBRANCH_FROM_TO },
>> + { 0, 0 }
>> +}, nh_lbr[] = {
>> + { MSR_LBR_SELECT, 1 },
>> + { MSR_IA32_LASTINTFROMIP, 1 },
>> + { MSR_IA32_LASTINTTOIP, 1 },
>> + { MSR_C2_LASTBRANCH_TOS, 1 },

Nehalem has 16 LBR records, not 4. I suggest that you reorganize the
tables so that it is easy to match them against tables in the Intel SDM.

Note that you have to compute the number of records according to the
_guest_ CPUID, not the host CPUID. For simplicity I suggest that you
only enable LBR virtualization if the host machine has 16 LBR entries,
and make it possible to disable it through a kvm_intel module parameter.

Thanks,

Paolo

>> + { MSR_P4_LASTBRANCH_0_FROM_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO },
>> + { MSR_P4_LASTBRANCH_0_TO_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO },
>> + { 0, 0 }
>> +}, at_lbr[] = {
>> + { MSR_LBR_SELECT, 1 },
>> + { MSR_IA32_LASTINTFROMIP, 1 },
>> + { MSR_IA32_LASTINTTOIP, 1 },
>> + { MSR_C2_LASTBRANCH_TOS, 1 },
>> + { MSR_C2_LASTBRANCH_0_FROM_IP, NUM_MSR_ATOM_LASTBRANCH_FROM_TO },
>> + { MSR_C2_LASTBRANCH_0_TO_IP, NUM_MSR_ATOM_LASTBRANCH_FROM_TO },
>> + { 0, 0 }
>> +};
>> +
>> +static const struct lbr_info *last_branch_msr_get(void)
>> +{
>> + switch ( boot_cpu_data.x86 )
>> + {
>> + case 6:
>> + switch ( boot_cpu_data.x86_model )
>> + {
>> + /* Core2 Duo */
>> + case 15:
>> + /* Enhanced Core */
>> + case 23:
>> + return c2_lbr;
>> + break;
>> + /* Nehalem */
>> + case 26: case 30: case 31: case 46:
>> + /* Westmere */
>> + case 37: case 44: case 47:
>> + /* Sandy Bridge */
>> + case 42: case 45:
>> + /* Ivy Bridge */
>> + case 58: case 62:
>> + /* Haswell */
>> + case 60: case 63: case 69: case 70:
>> + /* future */
>> + case 61: case 78:
>> + return nh_lbr;
>> + break;
>> + /* Atom */
>> + case 28: case 38: case 39: case 53: case 54:
>> + /* Silvermont */
>> + case 55: case 74: case 77: case 90: case 93:
>> + return at_lbr;
>> + break;
>> + }
>> + break;
>> + case 15:
>> + switch ( boot_cpu_data.x86_model )
>> + {
>> + /* Pentium4/Xeon with em64t */
>> + case 3: case 4: case 6:
>> + return p4_lbr;
>> + break;
>> + }
>> + break;
>> + }
>> +
>> + return NULL;
>> +}
>> +
>> static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
>>
>> static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
>> @@ -1917,6 +2024,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>> bool pr = false;
>> u32 msr = msr_info->index;
>> u64 data = msr_info->data;
>> + u64 supported = 0;
>> + static const struct lbr_info *lbr = NULL;
>> + int i = 0;
>> + int value = 0;
>>
>> switch (msr) {
>> case MSR_AMD64_NB_CFG:
>> @@ -1948,16 +2059,34 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>> }
>> break;
>> case MSR_IA32_DEBUGCTLMSR:
>> - if (!data) {
>> - /* We support the non-activated case already */
>> - break;
>> - } else if (data & ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) {
>> - /* Values other than LBR and BTF are vendor-specific,
>> - thus reserved and should throw a #GP */
>> + supported = DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
>> +
>> + if (data & ~supported) {
>> + /* Values other than LBR, BTF and FREEZE_LBRS_ON_PMI are not supported,
>> + * thus reserved and should throw a #GP */
>> + vcpu_unimpl(vcpu, "unsupported MSR_IA32_DEBUGCTLMSR wrmsr: 0x%llx\n", data);
>> return 1;
>> }
>> - vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
>> - __func__, data);
>> +
>> + if (data & DEBUGCTLMSR_LBR) {
>> + lbr = last_branch_msr_get();
>> + if (lbr == NULL)
>> + break;
>> +
>> + for (; (value == 0) && lbr->count; lbr++)
>> + for (i = 0; (value == 0) && (i < lbr->count); i++)
>> + if ((value = kvm_x86_ops->add_atomic_switch_msr(vcpu, lbr->base + i, 0, 0)) == 0)
>> + kvm_x86_ops->disable_intercept_guest_msr(vcpu, lbr->base + i);
>> + }
>> +
>> + if (value == 0) {
>> + kvm_x86_ops->vmcs_write64(GUEST_IA32_DEBUGCTL, data);
>> + }
>> + else {
>> + /* throw a #GP */
>> + return 1;
>> + }
>> +
>> break;
>> case 0x200 ... 0x2ff:
>> return kvm_mtrr_set_msr(vcpu, msr, data);
>> @@ -2178,9 +2307,11 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
>> int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>> {
>> switch (msr_info->index) {
>> + case MSR_IA32_DEBUGCTLMSR:
>> + msr_info->data = kvm_x86_ops->vmcs_read64(GUEST_IA32_DEBUGCTL);
>> + break;
>> case MSR_IA32_PLATFORM_ID:
>> case MSR_IA32_EBL_CR_POWERON:
>> - case MSR_IA32_DEBUGCTLMSR:
>> case MSR_IA32_LASTBRANCHFROMIP:
>> case MSR_IA32_LASTBRANCHTOIP:
>> case MSR_IA32_LASTINTFROMIP:
>> --
>> 1.7.12.4
>>
>>
>>
>> .
>>
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/