Re: [PATCH] KVM: nVMX: nested VPID emulation

From: Jan Kiszka
Date: Mon Sep 14 2015 - 10:54:38 EST


On 2015-09-14 14:52, Wanpeng Li wrote:
> VPID is used to tag address space and avoid a TLB flush. Currently L0 use
> the same VPID to run L1 and all its guests. KVM flushes VPID when switching
> between L1 and L2.
>
> This patch advertises VPID to the L1 hypervisor, then address space of L1 and
> L2 can be separately treated and avoid TLB flush when swithing between L1 and
> L2. This patch gets ~3x performance improvement for lmbench 8p/64k ctxsw.
>
> Signed-off-by: Wanpeng Li <wanpeng.li@xxxxxxxxxxx>
> ---
> arch/x86/kvm/vmx.c | 39 ++++++++++++++++++++++++++++++++-------
> 1 file changed, 32 insertions(+), 7 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index da1590e..06bc31e 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -1157,6 +1157,11 @@ static inline bool nested_cpu_has_virt_x2apic_mode(struct vmcs12 *vmcs12)
> return nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
> }
>
> +static inline bool nested_cpu_has_vpid(struct vmcs12 *vmcs12)
> +{
> + return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_VPID);
> +}
> +
> static inline bool nested_cpu_has_apic_reg_virt(struct vmcs12 *vmcs12)
> {
> return nested_cpu_has2(vmcs12, SECONDARY_EXEC_APIC_REGISTER_VIRT);
> @@ -2471,6 +2476,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
> SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
> SECONDARY_EXEC_RDTSCP |
> SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
> + SECONDARY_EXEC_ENABLE_VPID |
> SECONDARY_EXEC_APIC_REGISTER_VIRT |
> SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
> SECONDARY_EXEC_WBINVD_EXITING |
> @@ -4160,7 +4166,7 @@ static void allocate_vpid(struct vcpu_vmx *vmx)
> int vpid;
>
> vmx->vpid = 0;
> - if (!enable_vpid)
> + if (!enable_vpid || is_guest_mode(&vmx->vcpu))
> return;
> spin_lock(&vmx_vpid_lock);
> vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);
> @@ -6738,6 +6744,14 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
> }
> vmcs12 = kmap(page);
> vmcs12->launch_state = 0;
> + if (enable_vpid) {
> + if (nested_cpu_has_vpid(vmcs12)) {
> + spin_lock(&vmx_vpid_lock);
> + if (vmcs12->virtual_processor_id != 0)
> + __clear_bit(vmcs12->virtual_processor_id, vmx_vpid_bitmap);
> + spin_unlock(&vmx_vpid_lock);

Maybe enhance free_vpid (and also allocate_vpid) to work generically and
let the caller decide where to take the vpid from or where to store it?

> + }
> + }
> kunmap(page);
> nested_release_page(page);
>
> @@ -9189,6 +9203,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
> {
> struct vcpu_vmx *vmx = to_vmx(vcpu);
> u32 exec_control;
> + int vpid;
>
> vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
> vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
> @@ -9438,13 +9453,21 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
> else
> vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset);
>
> +
> if (enable_vpid) {
> - /*
> - * Trivially support vpid by letting L2s share their parent
> - * L1's vpid. TODO: move to a more elaborate solution, giving
> - * each L2 its own vpid and exposing the vpid feature to L1.
> - */
> - vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
> + if (nested_cpu_has_vpid(vmcs12)) {
> + if (vmcs12->virtual_processor_id == 0) {
> + spin_lock(&vmx_vpid_lock);
> + vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);
> + if (vpid < VMX_NR_VPIDS)
> + __set_bit(vpid, vmx_vpid_bitmap);
> + spin_unlock(&vmx_vpid_lock);
> + vmcs_write16(VIRTUAL_PROCESSOR_ID, vpid);

It's a bit non-obvious that vpid == VMX_NR_VPIDS (no free vpids) will
lead to vpid == 0 when writing VIRTUAL_PROCESSOR_ID. You should leave at
least a comment. Or generalize allocate_vpid as that one is already
clearer in this regard.

> + } else
> + vmcs_write16(VIRTUAL_PROCESSOR_ID, vmcs12->virtual_processor_id);
> + } else
> + vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
> +
> vmx_flush_tlb(vcpu);
> }
>
> @@ -9973,6 +9996,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
> vmcs12_save_pending_event(vcpu, vmcs12);
> }
>
> + if (nested_cpu_has_vpid(vmcs12))
> + vmcs12->virtual_processor_id = vmcs_read16(VIRTUAL_PROCESSOR_ID);
> /*
> * Drop what we picked up for L2 via vmx_complete_interrupts. It is
> * preserved above and would only end up incorrectly in L1.
>

Last but not least: the guest can now easily exhaust the host's pool of
vpid by simply spawning plenty of VCPUs for L2, no? Is this acceptable
or should there be some limit?

Jan

--
Siemens AG, Corporate Technology, CT RTC ITP SES-DE
Corporate Competence Center Embedded Linux
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/