Re: [PATCH v2 12/25] KVM: VMX: Handle FRED event data

From: Sean Christopherson
Date: Wed Jun 12 2024 - 18:52:35 EST


On Wed, Feb 07, 2024, Xin Li wrote:
> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
> index 4889754415b5..6b796c5c9c2b 100644
> --- a/arch/x86/include/asm/vmx.h
> +++ b/arch/x86/include/asm/vmx.h
> @@ -256,8 +256,12 @@ enum vmcs_field {
> PID_POINTER_TABLE_HIGH = 0x00002043,
> SECONDARY_VM_EXIT_CONTROLS = 0x00002044,
> SECONDARY_VM_EXIT_CONTROLS_HIGH = 0x00002045,
> + INJECTED_EVENT_DATA = 0x00002052,
> + INJECTED_EVENT_DATA_HIGH = 0x00002053,
> GUEST_PHYSICAL_ADDRESS = 0x00002400,
> GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401,
> + ORIGINAL_EVENT_DATA = 0x00002404,
> + ORIGINAL_EVENT_DATA_HIGH = 0x00002405,

Are these the actual names from the SDM? E.g. is there no FRED_ prefix to clue
in readers that they are FRED specific? (unless they aren't FRED specific?)

> VMCS_LINK_POINTER = 0x00002800,
> VMCS_LINK_POINTER_HIGH = 0x00002801,
> GUEST_IA32_DEBUGCTL = 0x00002802,
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index ee61d2c25cb0..f622fb90a098 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -1871,9 +1871,29 @@ static void vmx_inject_exception(struct kvm_vcpu *vcpu)
> vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
> vmx->vcpu.arch.event_exit_inst_len);
> intr_info |= INTR_TYPE_SOFT_EXCEPTION;
> - } else
> + } else {
> intr_info |= INTR_TYPE_HARD_EXCEPTION;
>
> + if (kvm_is_fred_enabled(vcpu)) {
> + u64 event_data = 0;
> +
> + if (is_debug(intr_info))
> + /*
> + * Compared to DR6, FRED #DB event data saved on
> + * the stack frame have bits 4 ~ 11 and 16 ~ 31
> + * inverted, i.e.,
> + * fred_db_event_data = dr6 ^ 0xFFFF0FF0UL
> + */
> + event_data = vcpu->arch.dr6 ^ DR6_RESERVED;
> + else if (is_page_fault(intr_info))
> + event_data = vcpu->arch.cr2;
> + else if (is_nm_fault(intr_info))
> + event_data = to_vmx(vcpu)->fred_xfd_event_data;
> +
> + vmcs_write64(INJECTED_EVENT_DATA, event_data);
> + }
> + }
> +
> vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
>
> vmx_clear_hlt(vcpu);
> @@ -7082,8 +7102,11 @@ static void handle_nm_fault_irqoff(struct kvm_vcpu *vcpu)
> *
> * Queuing exception is done in vmx_handle_exit. See comment there.
> */
> - if (vcpu->arch.guest_fpu.fpstate->xfd)
> + if (vcpu->arch.guest_fpu.fpstate->xfd) {
> rdmsrl(MSR_IA32_XFD_ERR, vcpu->arch.guest_fpu.xfd_err);
> + to_vmx(vcpu)->fred_xfd_event_data = vcpu->arch.cr0 & X86_CR0_TS

kvm_is_cr0_bit_set(), don't read vcpu->arch.cr0 directly.

> + ? 0 : vcpu->arch.guest_fpu.xfd_err;

Maybe this?

if (kvm_is_cr0_bit_set(vcpu, X86_CR0_TS))
to_vmx(vcpu)->fred_xfd_event_data = 0;
else
to_vmx(vcpu)->fred_xfd_event_data = vcpu->arch.guest_fpu.xfd_err;

Hmm, but why does this need to be cached _now_? I.e. why does fred_xfd_event_data
need to exist? Wouldn't it be simpler and more robust to use vcpu->arch.guest_fpu.xfd_err
directly in vmx_inject_exception()?

> + }
> }
>
> static void handle_exception_irqoff(struct vcpu_vmx *vmx)
> @@ -7199,29 +7222,28 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
> vmx->loaded_vmcs->entry_time));
> }
>
> -static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
> - u32 idt_vectoring_info,
> - int instr_len_field,
> - int error_code_field)
> +static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu, bool vectoring)
> {
> - u8 vector;
> - int type;
> - bool idtv_info_valid;
> -
> - idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
> + u32 event_id = vectoring ? to_vmx(vcpu)->idt_vectoring_info
> + : vmcs_read32(VM_ENTRY_INTR_INFO_FIELD);


Preferred style for ternary operators is:

u32 event_id = vectoring ? to_vmx(vcpu)->idt_vectoring_info :
vmcs_read32(VM_ENTRY_INTR_INFO_FIELD);

That said, I don't think this is a net positive versus passing in all params.
The bare true/false is somewhat inscrutable, and in this code, it's hard to
understand why KVM looks at X instead of Y without the conext of the caller.

> + int instr_len_field = vectoring ? VM_EXIT_INSTRUCTION_LEN
> + : VM_ENTRY_INSTRUCTION_LEN;
> + int error_code_field = vectoring ? IDT_VECTORING_ERROR_CODE
> + : VM_ENTRY_EXCEPTION_ERROR_CODE;
> + int event_data_field = vectoring ? ORIGINAL_EVENT_DATA
> + : INJECTED_EVENT_DATA;
> + u8 vector = event_id & INTR_INFO_VECTOR_MASK;
> + int type = event_id & INTR_INFO_INTR_TYPE_MASK;
>
> vcpu->arch.nmi_injected = false;
> kvm_clear_exception_queue(vcpu);
> kvm_clear_interrupt_queue(vcpu);
>
> - if (!idtv_info_valid)
> + if (!(event_id & INTR_INFO_VALID_MASK))
> return;
>
> kvm_make_request(KVM_REQ_EVENT, vcpu);
>
> - vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK;
> - type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK;
> -
> switch (type) {
> case INTR_TYPE_NMI_INTR:
> vcpu->arch.nmi_injected = true;
> @@ -7236,10 +7258,31 @@ static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
> vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
> fallthrough;
> case INTR_TYPE_HARD_EXCEPTION:
> - if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
> - u32 err = vmcs_read32(error_code_field);
> - kvm_requeue_exception_e(vcpu, vector, err);
> - } else
> + if (kvm_is_fred_enabled(vcpu)) {
> + /* Save event data for being used as injected-event data */
> + u64 event_data = vmcs_read64(event_data_field);
> +
> + switch (vector) {
> + case DB_VECTOR:
> + /* %dr6 should be equal to (event_data ^ DR6_RESERVED) */

DR6, no need to use assembly syntax, but I'd just drop this comment, as well as
the CR2 comment. They add no insight beyond what the code literally does.

> + vcpu->arch.dr6 = event_data ^ DR6_RESERVED;
> + break;
> + case NM_VECTOR:
> + to_vmx(vcpu)->fred_xfd_event_data = event_data;
> + break;
> + case PF_VECTOR:
> + /* %cr2 should be equal to event_data */
> + vcpu->arch.cr2 = event_data;
> + break;
> + default:
> + WARN_ON(event_data != 0);
> + break;
> + }
> + }