[PATCH 3/4] KVM: X86: Refactor kvm_arch_vcpu_create

From: Xiaoyao Li
Date: Tue Oct 15 2019 - 12:55:45 EST


Current x86 arch vcpu creation flow is a little bit messed.
Specifically, vcpu's data structure allocation and vcpu initialization
are mixed up, which is unfriendly to read.

Seperating the vcpu_create and vcpu_init just like what ARM does, that
it first calls vcpu_create related functions for vcpu's data structure
allocation and then calls vcpu_init related functions to initialize the
vcpu.

Signed-off-by: Xiaoyao Li <xiaoyao.li@xxxxxxxxx>
---
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/kvm/svm.c | 63 +++++++++---------
arch/x86/kvm/vmx/vmx.c | 109 ++++++++++++++++----------------
arch/x86/kvm/x86.c | 16 +++++
4 files changed, 102 insertions(+), 87 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 5d8056ff7390..d574c2391145 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1015,6 +1015,7 @@ struct kvm_x86_ops {

/* Create, but do not attach this VCPU */
struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id);
+ int (*vcpu_init)(struct kvm_vcpu *vcpu);
void (*vcpu_free)(struct kvm_vcpu *vcpu);
void (*vcpu_reset)(struct kvm_vcpu *vcpu, bool init_event);

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index e479ea9bc9da..d35ef5456462 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2138,6 +2138,32 @@ static int avic_init_vcpu(struct vcpu_svm *svm)
return ret;
}

+static int svm_vcpu_init(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+ int err;
+
+ err = avic_init_vcpu(svm);
+ if (err)
+ return err;
+
+ /* We initialize this flag to true to make sure that the is_running
+ * bit would be set the first time the vcpu is loaded.
+ */
+ svm->avic_is_running = true;
+
+ svm_vcpu_init_msrpm(svm->msrpm);
+ svm_vcpu_init_msrpm(svm->nested.msrpm);
+
+ clear_page(svm->vmcb);
+ svm->asid_generation = 0;
+ init_vmcb(svm);
+
+ svm_init_osvw(vcpu);
+
+ return 0;
+}
+
static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
{
struct vcpu_svm *svm;
@@ -2150,17 +2176,15 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
BUILD_BUG_ON_MSG(offsetof(struct vcpu_svm, vcpu) != 0,
"struct kvm_vcpu must be at offset 0 for arch usercopy region");

+ err = -ENOMEM;
svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT);
- if (!svm) {
- err = -ENOMEM;
+ if (!svm)
goto out;
- }

svm->vcpu.arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache,
GFP_KERNEL_ACCOUNT);
if (!svm->vcpu.arch.user_fpu) {
printk(KERN_ERR "kvm: failed to allocate kvm userspace's fpu\n");
- err = -ENOMEM;
goto free_partial_svm;
}

@@ -2168,18 +2192,12 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
GFP_KERNEL_ACCOUNT);
if (!svm->vcpu.arch.guest_fpu) {
printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n");
- err = -ENOMEM;
goto free_user_fpu;
}

- err = kvm_vcpu_init(&svm->vcpu, kvm, id);
- if (err)
- goto free_svm;
-
- err = -ENOMEM;
page = alloc_page(GFP_KERNEL_ACCOUNT);
if (!page)
- goto uninit;
+ goto free_svm;

msrpm_pages = alloc_pages(GFP_KERNEL_ACCOUNT, MSRPM_ALLOC_ORDER);
if (!msrpm_pages)
@@ -2193,43 +2211,22 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
if (!hsave_page)
goto free_page3;

- err = avic_init_vcpu(svm);
- if (err)
- goto free_page4;
-
- /* We initialize this flag to true to make sure that the is_running
- * bit would be set the first time the vcpu is loaded.
- */
- svm->avic_is_running = true;
-
svm->nested.hsave = page_address(hsave_page);

svm->msrpm = page_address(msrpm_pages);
- svm_vcpu_init_msrpm(svm->msrpm);
-
svm->nested.msrpm = page_address(nested_msrpm_pages);
- svm_vcpu_init_msrpm(svm->nested.msrpm);

svm->vmcb = page_address(page);
- clear_page(svm->vmcb);
svm->vmcb_pa = __sme_set(page_to_pfn(page) << PAGE_SHIFT);
- svm->asid_generation = 0;
- init_vmcb(svm);
-
- svm_init_osvw(&svm->vcpu);

return &svm->vcpu;

-free_page4:
- __free_page(hsave_page);
free_page3:
__free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER);
free_page2:
__free_pages(msrpm_pages, MSRPM_ALLOC_ORDER);
free_page1:
__free_page(page);
-uninit:
- kvm_vcpu_uninit(&svm->vcpu);
free_svm:
kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.guest_fpu);
free_user_fpu:
@@ -2263,7 +2260,6 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
__free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
__free_page(virt_to_page(svm->nested.hsave));
__free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
- kvm_vcpu_uninit(vcpu);
kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.user_fpu);
kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.guest_fpu);
kmem_cache_free(kvm_vcpu_cache, svm);
@@ -7242,6 +7238,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {

.vcpu_create = svm_create_vcpu,
.vcpu_free = svm_free_vcpu,
+ .vcpu_init = svm_vcpu_init,
.vcpu_reset = svm_vcpu_reset,

.vm_alloc = svm_vm_alloc,
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 7051511c27c2..2f54a3bcb6a5 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6705,30 +6705,78 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
nested_vmx_free_vcpu(vcpu);
free_loaded_vmcs(vmx->loaded_vmcs);
kfree(vmx->guest_msrs);
- kvm_vcpu_uninit(vcpu);
kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.user_fpu);
kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.guest_fpu);
kmem_cache_free(kvm_vcpu_cache, vmx);
}

+static int vmx_vcpu_init(struct kvm_vcpu *vcpu)
+{
+ int cpu;
+ int err;
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ cpu = get_cpu();
+ vmx_vcpu_load(vcpu, cpu);
+ vmx->vcpu.cpu = cpu;
+ vmx_vmcs_setup(vmx);
+ vmx_vcpu_put(vcpu);
+ put_cpu();
+
+ if (cpu_need_virtualize_apic_accesses(&vmx->vcpu)) {
+ err = alloc_apic_access_page(vcpu->kvm);
+ if (err)
+ return -ENOMEM;
+ }
+
+ if (enable_ept && !enable_unrestricted_guest) {
+ err = init_rmode_identity_map(vcpu->kvm);
+ if (err)
+ return -ENOMEM;
+ }
+
+ if (nested)
+ nested_vmx_setup_ctls_msrs(&vmx->nested.msrs,
+ vmx_capability.ept,
+ kvm_vcpu_apicv_active(&vmx->vcpu));
+ else
+ memset(&vmx->nested.msrs, 0, sizeof(vmx->nested.msrs));
+
+
+ vmx->nested.posted_intr_nv = -1;
+ vmx->nested.current_vmptr = -1ull;
+
+ vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED;
+
+ /*
+ * Enforce invariant: pi_desc.nv is always either POSTED_INTR_VECTOR
+ * or POSTED_INTR_WAKEUP_VECTOR.
+ */
+ vmx->pi_desc.nv = POSTED_INTR_VECTOR;
+ vmx->pi_desc.sn = 1;
+
+ vmx->ept_pointer = INVALID_PAGE;
+
+ return 0;
+}
+
static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
{
int err;
struct vcpu_vmx *vmx;
- int cpu;

BUILD_BUG_ON_MSG(offsetof(struct vcpu_vmx, vcpu) != 0,
"struct kvm_vcpu must be at offset 0 for arch usercopy region");

+ err = -ENOMEM;
vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT);
if (!vmx)
- return ERR_PTR(-ENOMEM);
+ goto out;

vmx->vcpu.arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache,
GFP_KERNEL_ACCOUNT);
if (!vmx->vcpu.arch.user_fpu) {
printk(KERN_ERR "kvm: failed to allocate kvm userspace's fpu\n");
- err = -ENOMEM;
goto free_partial_vcpu;
}

@@ -6736,18 +6784,11 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
GFP_KERNEL_ACCOUNT);
if (!vmx->vcpu.arch.guest_fpu) {
printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n");
- err = -ENOMEM;
goto free_user_fpu;
}

vmx->vpid = allocate_vpid();

- err = kvm_vcpu_init(&vmx->vcpu, kvm, id);
- if (err)
- goto free_vcpu;
-
- err = -ENOMEM;
-
/*
* If PML is turned on, failure on enabling PML just results in failure
* of creating the vcpu, therefore we can simplify PML logic (by
@@ -6757,7 +6798,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
if (enable_pml) {
vmx->pml_pg = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
if (!vmx->pml_pg)
- goto uninit_vcpu;
+ goto free_vcpu;
}

vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL_ACCOUNT);
@@ -6772,55 +6813,13 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
goto free_msrs;

vmx->loaded_vmcs = &vmx->vmcs01;
- cpu = get_cpu();
- vmx_vcpu_load(&vmx->vcpu, cpu);
- vmx->vcpu.cpu = cpu;
- vmx_vmcs_setup(vmx);
- vmx_vcpu_put(&vmx->vcpu);
- put_cpu();
- if (cpu_need_virtualize_apic_accesses(&vmx->vcpu)) {
- err = alloc_apic_access_page(kvm);
- if (err)
- goto free_vmcs;
- }
-
- if (enable_ept && !enable_unrestricted_guest) {
- err = init_rmode_identity_map(kvm);
- if (err)
- goto free_vmcs;
- }
-
- if (nested)
- nested_vmx_setup_ctls_msrs(&vmx->nested.msrs,
- vmx_capability.ept,
- kvm_vcpu_apicv_active(&vmx->vcpu));
- else
- memset(&vmx->nested.msrs, 0, sizeof(vmx->nested.msrs));
-
- vmx->nested.posted_intr_nv = -1;
- vmx->nested.current_vmptr = -1ull;
-
- vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED;
-
- /*
- * Enforce invariant: pi_desc.nv is always either POSTED_INTR_VECTOR
- * or POSTED_INTR_WAKEUP_VECTOR.
- */
- vmx->pi_desc.nv = POSTED_INTR_VECTOR;
- vmx->pi_desc.sn = 1;
-
- vmx->ept_pointer = INVALID_PAGE;

return &vmx->vcpu;

-free_vmcs:
- free_loaded_vmcs(vmx->loaded_vmcs);
free_msrs:
kfree(vmx->guest_msrs);
free_pml:
vmx_destroy_pml_buffer(vmx);
-uninit_vcpu:
- kvm_vcpu_uninit(&vmx->vcpu);
free_vcpu:
free_vpid(vmx->vpid);
kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.guest_fpu);
@@ -6828,6 +6827,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.user_fpu);
free_partial_vcpu:
kmem_cache_free(kvm_vcpu_cache, vmx);
+out:
return ERR_PTR(err);
}

@@ -7764,6 +7764,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {

.vcpu_create = vmx_create_vcpu,
.vcpu_free = vmx_free_vcpu,
+ .vcpu_init = vmx_vcpu_init,
.vcpu_reset = vmx_vcpu_reset,

.prepare_guest_switch = vmx_prepare_switch_to_guest,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f26f8be4e621..fd9f1a1f0f01 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9016,6 +9016,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)

kvmclock_reset(vcpu);

+ kvm_vcpu_uninit(vcpu);
kvm_x86_ops->vcpu_free(vcpu);
free_cpumask_var(wbinvd_dirty_mask);
}
@@ -9024,6 +9025,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
unsigned int id)
{
struct kvm_vcpu *vcpu;
+ int err;

if (kvm_check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
printk_once(KERN_WARNING
@@ -9031,6 +9033,14 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
"guest TSC will not be reliable\n");

vcpu = kvm_x86_ops->vcpu_create(kvm, id);
+ if (IS_ERR(vcpu))
+ return vcpu;
+
+ err = kvm_vcpu_init(vcpu, kvm, id);
+ if (err) {
+ kvm_x86_ops->vcpu_free(vcpu);
+ return ERR_PTR(err);
+ }

return vcpu;
}
@@ -9083,6 +9093,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
kvm_mmu_unload(vcpu);
vcpu_put(vcpu);

+ kvm_vcpu_uninit(vcpu);
kvm_x86_ops->vcpu_free(vcpu);
}

@@ -9379,8 +9390,13 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)

kvm_hv_vcpu_init(vcpu);

+ if (kvm_x86_ops->vcpu_init(vcpu))
+ goto fail_free_wbinvd_dirty_mask;
+
return 0;

+fail_free_wbinvd_dirty_mask:
+ free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
fail_free_mce_banks:
kfree(vcpu->arch.mce_banks);
fail_free_lapic:
--
2.19.1