Re: [PATCH v6 9/9] KVM: VMX: Optimize memory allocation for PID-pointer table
From: Maxim Levitsky
Date: Fri Feb 25 2022 - 12:30:08 EST
On Fri, 2022-02-25 at 16:22 +0800, Zeng Guang wrote:
> Current kvm allocates 8 pages in advance for Posted Interrupt Descriptor
> pointer (PID-pointer) table to accommodate vCPUs with APIC ID up to
> KVM_MAX_VCPU_IDS - 1. This policy wastes some memory because most of
> VMs have less than 512 vCPUs and then just need one page.
>
> If user hypervisor specify max practical vcpu id prior to vCPU creation,
> IPIv can allocate only essential memory for PID-pointer table and reduce
> the memory footprint of VMs.
>
> Suggested-by: Sean Christopherson <seanjc@xxxxxxxxxx>
> Signed-off-by: Zeng Guang <guang.zeng@xxxxxxxxx>
> ---
> arch/x86/kvm/vmx/vmx.c | 45 ++++++++++++++++++++++++++++--------------
> 1 file changed, 30 insertions(+), 15 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 0cb141c277ef..22bfb4953289 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -230,9 +230,6 @@ static const struct {
> };
>
> #define L1D_CACHE_ORDER 4
> -
> -/* PID(Posted-Interrupt Descriptor)-pointer table entry is 64-bit long */
> -#define MAX_PID_TABLE_ORDER get_order(KVM_MAX_VCPU_IDS * sizeof(u64))
> #define PID_TABLE_ENTRY_VALID 1
>
> static void *vmx_l1d_flush_pages;
> @@ -4434,6 +4431,24 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
> return exec_control;
> }
>
> +static int vmx_alloc_pid_table(struct kvm_vmx *kvm_vmx)
> +{
> + struct page *pages;
> +
> + if(kvm_vmx->pid_table)
> + return 0;
> +
> + pages = alloc_pages(GFP_KERNEL | __GFP_ZERO,
> + get_order(kvm_vmx->kvm.arch.max_vcpu_id * sizeof(u64)));
> +
> + if (!pages)
> + return -ENOMEM;
> +
> + kvm_vmx->pid_table = (void *)page_address(pages);
> + kvm_vmx->pid_last_index = kvm_vmx->kvm.arch.max_vcpu_id - 1;
> + return 0;
> +}
> +
> #define VMX_XSS_EXIT_BITMAP 0
>
> static void init_vmcs(struct vcpu_vmx *vmx)
> @@ -7159,6 +7174,16 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
> goto free_vmcs;
> }
>
> + if (enable_ipiv && kvm_vcpu_apicv_active(vcpu)) {
> + struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm);
> +
> + mutex_lock(&vcpu->kvm->lock);
> + err = vmx_alloc_pid_table(kvm_vmx);
> + mutex_unlock(&vcpu->kvm->lock);
> + if (err)
> + goto free_vmcs;
> + }
This could be dangerous. If APICv is temporary inhibited,
this code won't run and we will end up without PID table.
I think that kvm_vcpu_apicv_active should be just dropped
from this condition.
Best regards,
Maxim Levitsky
> +
> return 0;
>
> free_vmcs:
> @@ -7202,17 +7227,6 @@ static int vmx_vm_init(struct kvm *kvm)
> }
> }
>
> - if (enable_ipiv) {
> - struct page *pages;
> -
> - pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, MAX_PID_TABLE_ORDER);
> - if (!pages)
> - return -ENOMEM;
> -
> - to_kvm_vmx(kvm)->pid_table = (void *)page_address(pages);
> - to_kvm_vmx(kvm)->pid_last_index = KVM_MAX_VCPU_IDS - 1;
> - }
> -
> return 0;
> }
>
> @@ -7809,7 +7823,8 @@ static void vmx_vm_destroy(struct kvm *kvm)
> struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm);
>
> if (kvm_vmx->pid_table)
> - free_pages((unsigned long)kvm_vmx->pid_table, MAX_PID_TABLE_ORDER);
> + free_pages((unsigned long)kvm_vmx->pid_table,
> + get_order((kvm_vmx->pid_last_index + 1) * sizeof(u64)));
> }
>
> static struct kvm_x86_ops vmx_x86_ops __initdata = {