[PATCH 4/4] KVM: x86: allow overwriting L2 reinjected exception with L1 vmexit
From: Paolo Bonzini
Date: Wed Aug 23 2017 - 16:44:38 EST
A reinjected exception is already recorded in either the IDT-vectored
event information fields or the EXITINTINFO fields; if the handling of
an exception in L0 causes a vmexit, we don't really need to keep the
reinjected exception in vcpu->arch.exception.
Teach kvm_multiple_exception to recognize this scenario through a
new kvm_x86_ops callback.
Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
---
arch/x86/include/asm/kvm_host.h | 2 +
arch/x86/kvm/svm.c | 79 +++++++++++++++++++-------------------
arch/x86/kvm/vmx.c | 84 +++++++++++++++++++++--------------------
arch/x86/kvm/x86.c | 9 +++++
4 files changed, 95 insertions(+), 79 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6db0ed9cf59e..643308143bea 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -962,6 +962,8 @@ struct kvm_x86_ops {
unsigned char *hypercall_addr);
void (*set_irq)(struct kvm_vcpu *vcpu);
void (*set_nmi)(struct kvm_vcpu *vcpu);
+ int (*nested_check_exception)(struct kvm_vcpu *vcpu,
+ struct kvm_queued_exception *ex);
void (*queue_exception)(struct kvm_vcpu *vcpu);
void (*cancel_injection)(struct kvm_vcpu *vcpu);
int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 7e190b21a30b..32c8d8f62985 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -291,8 +291,6 @@ struct amd_svm_iommu_ir {
static int nested_svm_exit_handled(struct vcpu_svm *svm);
static int nested_svm_intercept(struct vcpu_svm *svm);
static int nested_svm_vmexit(struct vcpu_svm *svm);
-static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
- bool has_error_code, u32 error_code);
enum {
VMCB_INTERCEPTS, /* Intercept vectors, TSC offset,
@@ -650,6 +648,42 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
svm_set_interrupt_shadow(vcpu, 0);
}
+static void nested_svm_queue_exception(struct kvm_vcpu *vcpu,
+ struct kvm_queued_exception *ex)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + ex->nr;
+ svm->vmcb->control.exit_code_hi = 0;
+ svm->vmcb->control.exit_info_1 = ex->error_code;
+
+ /*
+ * FIXME: we should not write CR2 when L1 intercepts an L2 #PF exception.
+ * The fix is to add the ancillary datum (CR2 or DR6) to structs
+ * kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6 can be
+ * written only when inject_pending_event runs (DR6 would written here
+ * too). This should be conditional on a new capability---if the
+ * capability is disabled, kvm_multiple_exception would write the
+ * ancillary information to CR2 or DR6, for backwards ABI-compatibility.
+ */
+ if (ex->nested_apf)
+ svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token;
+ else
+ svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
+
+ svm->nested.exit_required = true;
+}
+
+static int nested_svm_check_exception(struct kvm_vcpu *vcpu,
+ struct kvm_queued_exception *ex)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+ unsigned int nr = ex->nr;
+
+ return ((svm->nested.intercept_exceptions & (1 << nr)) ||
+ (nr == PF_VECTOR && ex->nested_apf));
+}
+
static void svm_queue_exception(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -662,9 +696,11 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu)
* If we are within a nested VM we'd better #VMEXIT and let the guest
* handle the exception
*/
- if (!reinject &&
- nested_svm_check_exception(svm, nr, has_error_code, error_code))
+ if (!reinject && is_guest_mode(vcpu) &&
+ nested_svm_check_exception(vcpu, &vcpu->arch.exception)) {
+ nested_svm_queue_exception(vcpu, &vcpu->arch.exception);
return;
+ }
if (nr == BP_VECTOR && !static_cpu_has(X86_FEATURE_NRIPS)) {
unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu);
@@ -2428,40 +2464,6 @@ static int nested_svm_check_permissions(struct vcpu_svm *svm)
return 0;
}
-static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
- bool has_error_code, u32 error_code)
-{
- int vmexit;
-
- if (!is_guest_mode(&svm->vcpu))
- return 0;
-
- vmexit = nested_svm_intercept(svm);
- if (vmexit != NESTED_EXIT_DONE)
- return 0;
-
- svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
- svm->vmcb->control.exit_code_hi = 0;
- svm->vmcb->control.exit_info_1 = error_code;
-
- /*
- * FIXME: we should not write CR2 when L1 intercepts an L2 #PF exception.
- * The fix is to add the ancillary datum (CR2 or DR6) to structs
- * kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6 can be
- * written only when inject_pending_event runs (DR6 would written here
- * too). This should be conditional on a new capability---if the
- * capability is disabled, kvm_multiple_exception would write the
- * ancillary information to CR2 or DR6, for backwards ABI-compatibility.
- */
- if (svm->vcpu.arch.exception.nested_apf)
- svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token;
- else
- svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
-
- svm->nested.exit_required = true;
- return vmexit;
-}
-
/* This function returns true if it is save to enable the irq window */
static inline bool nested_svm_intr(struct vcpu_svm *svm)
{
@@ -5448,6 +5450,7 @@ static void svm_setup_mce(struct kvm_vcpu *vcpu)
.patch_hypercall = svm_patch_hypercall,
.set_irq = svm_set_irq,
.set_nmi = svm_inject_nmi,
+ .nested_check_exception = nested_svm_check_exception,
.queue_exception = svm_queue_exception,
.cancel_injection = svm_cancel_injection,
.interrupt_allowed = svm_interrupt_allowed,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f8ef38094acc..ddabed8425b3 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2438,15 +2438,41 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
vmx_set_interrupt_shadow(vcpu, 0);
}
-static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu,
- unsigned long exit_qual)
+static void nested_vmx_queue_exception(struct kvm_vcpu *vcpu,
+ struct kvm_queued_exception *ex)
{
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
- unsigned int nr = vcpu->arch.exception.nr;
- u32 intr_info = nr | INTR_INFO_VALID_MASK;
+ unsigned int nr = ex->nr;
+ u32 intr_info = nr | INTR_INFO_VALID_MASK;
+ unsigned long exit_qual = 0;
- if (vcpu->arch.exception.has_error_code) {
- vmcs12->vm_exit_intr_error_code = vcpu->arch.exception.error_code;
+ /*
+ * FIXME: we must not write CR2 or DR6 when L1 intercepts an L2 #PF
+ * or #DB exception.
+ *
+ * The fix is to add the ancillary datum (CR2 or DR6) to structs
+ * kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6
+ * can be written only when inject_pending_event runs. This should be
+ * conditional on a new capability---if the capability is disabled,
+ * kvm_multiple_exception would write the ancillary information to
+ * CR2 or DR6, for backwards ABI-compatibility.
+ */
+ switch (nr) {
+ case PF_VECTOR:
+ if (ex->nested_apf)
+ exit_qual = vcpu->arch.apf.nested_apf_token;
+ else
+ exit_qual = vcpu->arch.cr2;
+ break;
+ case DB_VECTOR:
+ exit_qual = vcpu->arch.dr6;
+ break;
+ default:
+ exit_qual = 0;
+ }
+
+ if (ex->has_error_code) {
+ vmcs12->vm_exit_intr_error_code = ex->error_code;
intr_info |= INTR_INFO_DELIVER_CODE_MASK;
}
@@ -2466,43 +2492,16 @@ static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu,
* KVM wants to inject page-faults which it got to the guest. This function
* checks whether in a nested guest, we need to inject them to L1 or L2.
*/
-static int nested_vmx_check_exception(struct kvm_vcpu *vcpu)
+static int nested_vmx_check_exception(struct kvm_vcpu *vcpu,
+ struct kvm_queued_exception *ex)
{
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
- unsigned int nr = vcpu->arch.exception.nr;
+ unsigned int nr = ex->nr;
- if (nr == PF_VECTOR) {
- if (vcpu->arch.exception.nested_apf) {
- nested_vmx_inject_exception_vmexit(vcpu,
- vcpu->arch.apf.nested_apf_token);
- return 1;
- }
- /*
- * FIXME: we must not write CR2 when L1 intercepts an L2 #PF exception.
- * The fix is to add the ancillary datum (CR2 or DR6) to structs
- * kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6
- * can be written only when inject_pending_event runs. This should be
- * conditional on a new capability---if the capability is disabled,
- * kvm_multiple_exception would write the ancillary information to
- * CR2 or DR6, for backwards ABI-compatibility.
- */
- if (nested_vmx_is_page_fault_vmexit(vmcs12,
- vcpu->arch.exception.error_code)) {
- nested_vmx_inject_exception_vmexit(vcpu, vcpu->arch.cr2);
- return 1;
- }
- } else {
- unsigned long exit_qual = 0;
- if (nr == DB_VECTOR)
- exit_qual = vcpu->arch.dr6;
-
- if (vmcs12->exception_bitmap & (1u << nr)) {
- nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
- return 1;
- }
- }
-
- return 0;
+ return (nr == PF_VECTOR
+ ? (ex->nested_apf ||
+ nested_vmx_is_page_fault_vmexit(vmcs12, ex->error_code))
+ : vmcs12->exception_bitmap & (1u << nr));
}
static void vmx_queue_exception(struct kvm_vcpu *vcpu)
@@ -2515,8 +2514,10 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
u32 intr_info = nr | INTR_INFO_VALID_MASK;
if (!reinject && is_guest_mode(vcpu) &&
- nested_vmx_check_exception(vcpu))
+ nested_vmx_check_exception(vcpu, &vcpu->arch.exception)) {
+ nested_vmx_queue_exception(vcpu, &vcpu->arch.exception);
return;
+ }
if (has_error_code) {
vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
@@ -11886,6 +11887,7 @@ static void vmx_setup_mce(struct kvm_vcpu *vcpu)
.patch_hypercall = vmx_patch_hypercall,
.set_irq = vmx_inject_irq,
.set_nmi = vmx_inject_nmi,
+ .nested_check_exception = nested_vmx_check_exception,
.queue_exception = vmx_queue_exception,
.cancel_injection = vmx_cancel_injection,
.interrupt_allowed = vmx_interrupt_allowed,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 88b91114c5a8..55c709531eb9 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -390,6 +390,15 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
if (!vcpu->arch.exception.pending)
goto queue;
+ /*
+ * If the previous exception was recorded during an L2->L0
+ * exit, we can overwrite it with one that causes an L0->L1
+ * nested vmexit.
+ */
+ if (vcpu->arch.exception.reinject && is_guest_mode(vcpu) &&
+ kvm_x86_ops->nested_check_exception(vcpu, &ex))
+ goto queue;
+
/* to check exception */
prev_nr = vcpu->arch.exception.nr;
if (prev_nr == DF_VECTOR) {
--
1.8.3.1