Re: [PATCH RFC 2/7] KVM: nVMX: modify vmcs12 fields to match Hyper-V enlightened VMCS
From: Jim Mattson
Date: Mon Dec 18 2017 - 16:28:48 EST
At this point in time, I don't think you can just blithely change the
virtual VMCS layout and revision number. Existing VMs using the old
layout and revision number must continue to work on versions of kvm
past this point. You could tie the layout and revision number changes
to KVM_CAP_HYPERV_ENLIGHTENED_VMCS if you like, but kvm must be able
to continue to service VMs using the previous layout and revision
number in perpetuity.
On Mon, Dec 18, 2017 at 12:23 PM, Jim Mattson <jmattson@xxxxxxxxxx> wrote:
> Yikes! This breaks migration to/from older versions of kvm. Will you
> be submitting another change to handle dynamic conversion between
> formats?
>
> On Mon, Dec 18, 2017 at 9:17 AM, Vitaly Kuznetsov <vkuznets@xxxxxxxxxx> wrote:
>> From: Ladi Prosek <lprosek@xxxxxxxxxx>
>>
>> Reorders existing fields and adds fields specific to Hyper-V. The layout
>> now matches Hyper-V TLFS 5.0b 16.11.2 Enlightened VMCS.
>>
>> Fields used by KVM but missing from Hyper-V are placed in the second half
>> of the VMCS page to minimize the chances they will clash with future
>> enlightened VMCS versions.
>>
>> Signed-off-by: Ladi Prosek <lprosek@xxxxxxxxxx>
>> Signed-off-by: Vitaly Kuznetsov <vkuznets@xxxxxxxxxx>
>> ---
>> [Vitaly]: Update VMCS12_REVISION to some new arbitrary number.
>> ---
>> arch/x86/kvm/vmx.c | 321 +++++++++++++++++++++++++++++++----------------------
>> 1 file changed, 187 insertions(+), 134 deletions(-)
>>
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index 8eba631c4dbd..cd5f29a57880 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -239,159 +239,212 @@ struct __packed vmcs12 {
>> u32 revision_id;
>> u32 abort;
>>
>> + union {
>> + u64 hv_vmcs[255];
>> + struct {
>> + u16 host_es_selector;
>> + u16 host_cs_selector;
>> + u16 host_ss_selector;
>> + u16 host_ds_selector;
>> + u16 host_fs_selector;
>> + u16 host_gs_selector;
>> + u16 host_tr_selector;
>> +
>> + u64 host_ia32_pat;
>> + u64 host_ia32_efer;
>> +
>> + /*
>> + * To allow migration of L1 (complete with its L2
>> + * guests) between machines of different natural widths
>> + * (32 or 64 bit), we cannot have unsigned long fields
>> + * with no explicit size. We use u64 (aliased
>> + * natural_width) instead. Luckily, x86 is
>> + * little-endian.
>> + */
>> + natural_width host_cr0;
>> + natural_width host_cr3;
>> + natural_width host_cr4;
>> +
>> + natural_width host_ia32_sysenter_esp;
>> + natural_width host_ia32_sysenter_eip;
>> + natural_width host_rip;
>> + u32 host_ia32_sysenter_cs;
>> +
>> + u32 pin_based_vm_exec_control;
>> + u32 vm_exit_controls;
>> + u32 secondary_vm_exec_control;
>> +
>> + u64 io_bitmap_a;
>> + u64 io_bitmap_b;
>> + u64 msr_bitmap;
>> +
>> + u16 guest_es_selector;
>> + u16 guest_cs_selector;
>> + u16 guest_ss_selector;
>> + u16 guest_ds_selector;
>> + u16 guest_fs_selector;
>> + u16 guest_gs_selector;
>> + u16 guest_ldtr_selector;
>> + u16 guest_tr_selector;
>> +
>> + u32 guest_es_limit;
>> + u32 guest_cs_limit;
>> + u32 guest_ss_limit;
>> + u32 guest_ds_limit;
>> + u32 guest_fs_limit;
>> + u32 guest_gs_limit;
>> + u32 guest_ldtr_limit;
>> + u32 guest_tr_limit;
>> + u32 guest_gdtr_limit;
>> + u32 guest_idtr_limit;
>> +
>> + u32 guest_es_ar_bytes;
>> + u32 guest_cs_ar_bytes;
>> + u32 guest_ss_ar_bytes;
>> + u32 guest_ds_ar_bytes;
>> + u32 guest_fs_ar_bytes;
>> + u32 guest_gs_ar_bytes;
>> + u32 guest_ldtr_ar_bytes;
>> + u32 guest_tr_ar_bytes;
>> +
>> + natural_width guest_es_base;
>> + natural_width guest_cs_base;
>> + natural_width guest_ss_base;
>> + natural_width guest_ds_base;
>> + natural_width guest_fs_base;
>> + natural_width guest_gs_base;
>> + natural_width guest_ldtr_base;
>> + natural_width guest_tr_base;
>> + natural_width guest_gdtr_base;
>> + natural_width guest_idtr_base;
>> +
>> + u64 padding64_1[3];
>> +
>> + u64 vm_exit_msr_store_addr;
>> + u64 vm_exit_msr_load_addr;
>> + u64 vm_entry_msr_load_addr;
>> +
>> + natural_width cr3_target_value0;
>> + natural_width cr3_target_value1;
>> + natural_width cr3_target_value2;
>> + natural_width cr3_target_value3;
>> +
>> + u32 page_fault_error_code_mask;
>> + u32 page_fault_error_code_match;
>> +
>> + u32 cr3_target_count;
>> + u32 vm_exit_msr_store_count;
>> + u32 vm_exit_msr_load_count;
>> + u32 vm_entry_msr_load_count;
>> +
>> + u64 tsc_offset;
>> + u64 virtual_apic_page_addr;
>> + u64 vmcs_link_pointer;
>> +
>> + u64 guest_ia32_debugctl;
>> + u64 guest_ia32_pat;
>> + u64 guest_ia32_efer;
>> +
>> + u64 guest_pdptr0;
>> + u64 guest_pdptr1;
>> + u64 guest_pdptr2;
>> + u64 guest_pdptr3;
>> +
>> + natural_width guest_pending_dbg_exceptions;
>> + natural_width guest_sysenter_esp;
>> + natural_width guest_sysenter_eip;
>> +
>> + u32 guest_activity_state;
>> + u32 guest_sysenter_cs;
>> +
>> + natural_width cr0_guest_host_mask;
>> + natural_width cr4_guest_host_mask;
>> + natural_width cr0_read_shadow;
>> + natural_width cr4_read_shadow;
>> + natural_width guest_cr0;
>> + natural_width guest_cr3;
>> + natural_width guest_cr4;
>> + natural_width guest_dr7;
>> +
>> + natural_width host_fs_base;
>> + natural_width host_gs_base;
>> + natural_width host_tr_base;
>> + natural_width host_gdtr_base;
>> + natural_width host_idtr_base;
>> + natural_width host_rsp;
>> +
>> + u64 ept_pointer;
>> +
>> + u16 virtual_processor_id;
>> + u16 padding16[3];
>> +
>> + u64 padding64_2[5];
>> + u64 guest_physical_address;
>> +
>> + u32 vm_instruction_error;
>> + u32 vm_exit_reason;
>> + u32 vm_exit_intr_info;
>> + u32 vm_exit_intr_error_code;
>> + u32 idt_vectoring_info_field;
>> + u32 idt_vectoring_error_code;
>> + u32 vm_exit_instruction_len;
>> + u32 vmx_instruction_info;
>> +
>> + natural_width exit_qualification;
>> + natural_width padding64_3[4];
>> +
>> + natural_width guest_linear_address;
>> + natural_width guest_rsp;
>> + natural_width guest_rflags;
>> +
>> + u32 guest_interruptibility_info;
>> + u32 cpu_based_vm_exec_control;
>> + u32 exception_bitmap;
>> + u32 vm_entry_controls;
>> + u32 vm_entry_intr_info_field;
>> + u32 vm_entry_exception_error_code;
>> + u32 vm_entry_instruction_len;
>> + u32 tpr_threshold;
>> +
>> + natural_width guest_rip;
>> +
>> + u32 hv_clean_fields;
>> + u32 hv_padding_32;
>> + u32 hv_synthetic_controls;
>> + u32 hv_enlightenments_control;
>> + u32 hv_vp_id;
>> +
>> + u64 hv_vm_id;
>> + u64 partition_assist_page;
>> + u64 padding64_4[4];
>> + u64 guest_bndcfgs;
>> + u64 padding64_5[7];
>> + u64 xss_exit_bitmap;
>> + u64 padding64_6[7];
>> + };
>> + };
>> +
>> + /* Synthetic and KVM-specific fields: */
>> u32 launch_state; /* set to 0 by VMCLEAR, to 1 by VMLAUNCH */
>> u32 padding[7]; /* room for future expansion */
>>
>> - u64 io_bitmap_a;
>> - u64 io_bitmap_b;
>> - u64 msr_bitmap;
>> - u64 vm_exit_msr_store_addr;
>> - u64 vm_exit_msr_load_addr;
>> - u64 vm_entry_msr_load_addr;
>> - u64 tsc_offset;
>> - u64 virtual_apic_page_addr;
>> u64 apic_access_addr;
>> u64 posted_intr_desc_addr;
>> u64 vm_function_control;
>> - u64 ept_pointer;
>> u64 eoi_exit_bitmap0;
>> u64 eoi_exit_bitmap1;
>> u64 eoi_exit_bitmap2;
>> u64 eoi_exit_bitmap3;
>> u64 eptp_list_address;
>> - u64 xss_exit_bitmap;
>> - u64 guest_physical_address;
>> - u64 vmcs_link_pointer;
>> u64 pml_address;
>> - u64 guest_ia32_debugctl;
>> - u64 guest_ia32_pat;
>> - u64 guest_ia32_efer;
>> u64 guest_ia32_perf_global_ctrl;
>> - u64 guest_pdptr0;
>> - u64 guest_pdptr1;
>> - u64 guest_pdptr2;
>> - u64 guest_pdptr3;
>> - u64 guest_bndcfgs;
>> - u64 host_ia32_pat;
>> - u64 host_ia32_efer;
>> u64 host_ia32_perf_global_ctrl;
>> u64 padding64[8]; /* room for future expansion */
>> - /*
>> - * To allow migration of L1 (complete with its L2 guests) between
>> - * machines of different natural widths (32 or 64 bit), we cannot have
>> - * unsigned long fields with no explict size. We use u64 (aliased
>> - * natural_width) instead. Luckily, x86 is little-endian.
>> - */
>> - natural_width cr0_guest_host_mask;
>> - natural_width cr4_guest_host_mask;
>> - natural_width cr0_read_shadow;
>> - natural_width cr4_read_shadow;
>> - natural_width cr3_target_value0;
>> - natural_width cr3_target_value1;
>> - natural_width cr3_target_value2;
>> - natural_width cr3_target_value3;
>> - natural_width exit_qualification;
>> - natural_width guest_linear_address;
>> - natural_width guest_cr0;
>> - natural_width guest_cr3;
>> - natural_width guest_cr4;
>> - natural_width guest_es_base;
>> - natural_width guest_cs_base;
>> - natural_width guest_ss_base;
>> - natural_width guest_ds_base;
>> - natural_width guest_fs_base;
>> - natural_width guest_gs_base;
>> - natural_width guest_ldtr_base;
>> - natural_width guest_tr_base;
>> - natural_width guest_gdtr_base;
>> - natural_width guest_idtr_base;
>> - natural_width guest_dr7;
>> - natural_width guest_rsp;
>> - natural_width guest_rip;
>> - natural_width guest_rflags;
>> - natural_width guest_pending_dbg_exceptions;
>> - natural_width guest_sysenter_esp;
>> - natural_width guest_sysenter_eip;
>> - natural_width host_cr0;
>> - natural_width host_cr3;
>> - natural_width host_cr4;
>> - natural_width host_fs_base;
>> - natural_width host_gs_base;
>> - natural_width host_tr_base;
>> - natural_width host_gdtr_base;
>> - natural_width host_idtr_base;
>> - natural_width host_ia32_sysenter_esp;
>> - natural_width host_ia32_sysenter_eip;
>> - natural_width host_rsp;
>> - natural_width host_rip;
>> - natural_width paddingl[8]; /* room for future expansion */
>> - u32 pin_based_vm_exec_control;
>> - u32 cpu_based_vm_exec_control;
>> - u32 exception_bitmap;
>> - u32 page_fault_error_code_mask;
>> - u32 page_fault_error_code_match;
>> - u32 cr3_target_count;
>> - u32 vm_exit_controls;
>> - u32 vm_exit_msr_store_count;
>> - u32 vm_exit_msr_load_count;
>> - u32 vm_entry_controls;
>> - u32 vm_entry_msr_load_count;
>> - u32 vm_entry_intr_info_field;
>> - u32 vm_entry_exception_error_code;
>> - u32 vm_entry_instruction_len;
>> - u32 tpr_threshold;
>> - u32 secondary_vm_exec_control;
>> - u32 vm_instruction_error;
>> - u32 vm_exit_reason;
>> - u32 vm_exit_intr_info;
>> - u32 vm_exit_intr_error_code;
>> - u32 idt_vectoring_info_field;
>> - u32 idt_vectoring_error_code;
>> - u32 vm_exit_instruction_len;
>> - u32 vmx_instruction_info;
>> - u32 guest_es_limit;
>> - u32 guest_cs_limit;
>> - u32 guest_ss_limit;
>> - u32 guest_ds_limit;
>> - u32 guest_fs_limit;
>> - u32 guest_gs_limit;
>> - u32 guest_ldtr_limit;
>> - u32 guest_tr_limit;
>> - u32 guest_gdtr_limit;
>> - u32 guest_idtr_limit;
>> - u32 guest_es_ar_bytes;
>> - u32 guest_cs_ar_bytes;
>> - u32 guest_ss_ar_bytes;
>> - u32 guest_ds_ar_bytes;
>> - u32 guest_fs_ar_bytes;
>> - u32 guest_gs_ar_bytes;
>> - u32 guest_ldtr_ar_bytes;
>> - u32 guest_tr_ar_bytes;
>> - u32 guest_interruptibility_info;
>> - u32 guest_activity_state;
>> - u32 guest_sysenter_cs;
>> - u32 host_ia32_sysenter_cs;
>> u32 vmx_preemption_timer_value;
>> u32 padding32[7]; /* room for future expansion */
>> - u16 virtual_processor_id;
>> u16 posted_intr_nv;
>> - u16 guest_es_selector;
>> - u16 guest_cs_selector;
>> - u16 guest_ss_selector;
>> - u16 guest_ds_selector;
>> - u16 guest_fs_selector;
>> - u16 guest_gs_selector;
>> - u16 guest_ldtr_selector;
>> - u16 guest_tr_selector;
>> u16 guest_intr_status;
>> u16 guest_pml_index;
>> - u16 host_es_selector;
>> - u16 host_cs_selector;
>> - u16 host_ss_selector;
>> - u16 host_ds_selector;
>> - u16 host_fs_selector;
>> - u16 host_gs_selector;
>> - u16 host_tr_selector;
>> };
>>
>> /*
>> @@ -399,7 +452,7 @@ struct __packed vmcs12 {
>> * layout of struct vmcs12 is changed. MSR_IA32_VMX_BASIC returns this id, and
>> * VMPTRLD verifies that the VMCS region that L1 is loading contains this id.
>> */
>> -#define VMCS12_REVISION 0x11e57ed0
>> +#define VMCS12_REVISION 0x11e586b1
>>
>> /*
>> * VMCS12_SIZE is the number of bytes L1 should allocate for the VMXON region
>> --
>> 2.14.3
>>