[PATCH 10/18] KVM: x86: Keep track of instruction length during faults

From: Nicolas Saenz Julienne
Date: Sun Jun 09 2024 - 11:56:31 EST


Both VMX and SVM provide the length of the instruction
being run at the time of the page fault. Save it within 'struct
kvm_page_fault', as it'll become useful in the future.

Signed-off-by: Nicolas Saenz Julienne <nsaenz@xxxxxxxxxx>
---
arch/x86/kvm/mmu/mmu.c | 11 ++++++++---
arch/x86/kvm/mmu/mmu_internal.h | 5 ++++-
arch/x86/kvm/vmx/vmx.c | 16 ++++++++++++++--
3 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 8d74bdef68c1d..39b113afefdfc 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4271,7 +4271,8 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
work->arch.cr3 != kvm_mmu_get_guest_pgd(vcpu, vcpu->arch.mmu))
return;

- kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, work->arch.error_code, true, NULL);
+ kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, work->arch.error_code,
+ true, NULL, 0);
}

static inline u8 kvm_max_level_for_order(int order)
@@ -5887,7 +5888,7 @@ int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 err

if (r == RET_PF_INVALID) {
r = kvm_mmu_do_page_fault(vcpu, cr2_or_gpa, error_code, false,
- &emulation_type);
+ &emulation_type, insn_len);
if (KVM_BUG_ON(r == RET_PF_INVALID, vcpu->kvm))
return -EIO;
}
@@ -5924,8 +5925,12 @@ int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 err
if (!mmio_info_in_cache(vcpu, cr2_or_gpa, direct) && !is_guest_mode(vcpu))
emulation_type |= EMULTYPE_ALLOW_RETRY_PF;
emulate:
+ /*
+ * x86_emulate_instruction() expects insn to contain data if
+ * insn_len > 0.
+ */
return x86_emulate_instruction(vcpu, cr2_or_gpa, emulation_type, insn,
- insn_len);
+ insn ? insn_len : 0);
}
EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);

diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index ce2fcd19ba6be..a0cde1a0e39b0 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -192,6 +192,7 @@ struct kvm_page_fault {
const gpa_t addr;
const u64 error_code;
const bool prefetch;
+ const u8 insn_len;

/* Derived from error_code. */
const bool exec;
@@ -288,11 +289,13 @@ static inline void kvm_mmu_prepare_memory_fault_exit(struct kvm_vcpu *vcpu,
}

static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
- u64 err, bool prefetch, int *emulation_type)
+ u64 err, bool prefetch,
+ int *emulation_type, u8 insn_len)
{
struct kvm_page_fault fault = {
.addr = cr2_or_gpa,
.error_code = err,
+ .insn_len = insn_len,
.exec = err & PFERR_FETCH_MASK,
.write = err & PFERR_WRITE_MASK,
.present = err & PFERR_PRESENT_MASK,
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index ac0682fece604..9ba38e0b0c7a8 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5807,11 +5807,13 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
if (unlikely(allow_smaller_maxphyaddr && !kvm_vcpu_is_legal_gpa(vcpu, gpa)))
return kvm_emulate_instruction(vcpu, 0);

- return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
+ return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL,
+ vmcs_read32(VM_EXIT_INSTRUCTION_LEN));
}

static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
{
+ u8 insn_len = 0;
gpa_t gpa;

if (vmx_check_emulate_instruction(vcpu, EMULTYPE_PF, NULL, 0))
@@ -5828,7 +5830,17 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
return kvm_skip_emulated_instruction(vcpu);
}

- return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0);
+ /*
+ * Using VMCS.VM_EXIT_INSTRUCTION_LEN on EPT misconfig depends on
+ * undefined behavior: Intel's SDM doesn't mandate the VMCS field be
+ * set when EPT misconfig occurs. In practice, real hardware updates
+ * VM_EXIT_INSTRUCTION_LEN on EPT misconfig, but other hypervisors
+ * (namely Hyper-V) don't set it due to it being undefined behavior.
+ */
+ if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
+ insn_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
+
+ return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, insn_len);
}

static int handle_nmi_window(struct kvm_vcpu *vcpu)
--
2.40.1