[PATCH v4 3/4] KVM: x86: On emulation failure, convey the exit reason, etc. to userspace

From: David Edmondson
Date: Fri Aug 13 2021 - 03:12:36 EST


Should instruction emulation fail, include the VM exit reason, etc. in
the emulation_failure data passed to userspace, in order that the VMM
can report it as a debugging aid when describing the failure.

Suggested-by: Joao Martins <joao.m.martins@xxxxxxxxxx>
Signed-off-by: David Edmondson <david.edmondson@xxxxxxxxxx>
---
arch/x86/include/asm/kvm_host.h | 3 ++
arch/x86/kvm/vmx/vmx.c | 5 +--
arch/x86/kvm/x86.c | 73 ++++++++++++++++++++++++++-------
include/uapi/linux/kvm.h | 7 ++++
4 files changed, 70 insertions(+), 18 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e3c0788bcdc2..da2d8f3a2019 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1630,6 +1630,9 @@ extern u64 kvm_mce_cap_supported;
int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type);
int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
void *insn, int insn_len);
+void __kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu,
+ u64 *data, u8 ndata);
+void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu);

void kvm_enable_efer_bits(u64);
bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 6e5706ecce0b..9d14f68651f1 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5367,10 +5367,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)

if (vmx->emulation_required && !vmx->rmode.vm86_active &&
vcpu->arch.exception.pending) {
- vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
- vcpu->run->internal.suberror =
- KVM_INTERNAL_ERROR_EMULATION;
- vcpu->run->internal.ndata = 0;
+ kvm_prepare_emulation_failure_exit(vcpu);
return 0;
}

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e5d5c5ed7dd4..35639391de7b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7465,29 +7465,78 @@ void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
}
EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);

-static void prepare_emulation_failure_exit(struct kvm_vcpu *vcpu)
+static void prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, u64 *data,
+ u8 ndata, u8 *insn_bytes, u8 insn_size)
{
- struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
- u32 insn_size = ctxt->fetch.end - ctxt->fetch.data;
struct kvm_run *run = vcpu->run;
+ u8 ndata_start;
+ u64 info[5];
+
+ /*
+ * Zero the whole array used to retrieve the exit info, casting to u32
+ * for select entries will leave some chunks uninitialized.
+ */
+ memset(&info, 0, sizeof(info));
+
+ static_call(kvm_x86_get_exit_info)(vcpu, (u32 *)&info[0], &info[1],
+ &info[2], (u32 *)&info[3],
+ (u32 *)&info[4]);

run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
run->emulation_failure.suberror = KVM_INTERNAL_ERROR_EMULATION;
- run->emulation_failure.ndata = 0;
+
+ /*
+ * There's currently space for 13 entries, but 5 are used for the exit
+ * reason and info. Restrict to 4 to reduce the maintenance burden
+ * when expanding kvm_run.emulation_failure in the future.
+ */
+ if (WARN_ON_ONCE(ndata > 4))
+ ndata = 4;
+
+ /* Always include the flags as a 'data' entry. */
+ ndata_start = 1;
run->emulation_failure.flags = 0;

if (insn_size) {
- run->emulation_failure.ndata = 3;
+ ndata_start += (sizeof(run->emulation_failure.insn_size) +
+ sizeof(run->emulation_failure.insn_bytes)) /
+ sizeof(u64);
run->emulation_failure.flags |=
KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES;
run->emulation_failure.insn_size = insn_size;
memset(run->emulation_failure.insn_bytes, 0x90,
sizeof(run->emulation_failure.insn_bytes));
- memcpy(run->emulation_failure.insn_bytes,
- ctxt->fetch.data, insn_size);
+ memcpy(run->emulation_failure.insn_bytes, insn_bytes, insn_size);
}
+
+ memcpy(&run->internal.data[ndata_start], info, sizeof(info));
+ memcpy(&run->internal.data[ndata_start + ARRAY_SIZE(info)], data,
+ ndata * sizeof(u64));
+
+ run->emulation_failure.ndata = ndata_start + ARRAY_SIZE(info) + ndata;
}

+static void prepare_emulation_ctxt_failure_exit(struct kvm_vcpu *vcpu)
+{
+ struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
+
+ prepare_emulation_failure_exit(vcpu, NULL, 0, ctxt->fetch.data,
+ ctxt->fetch.end - ctxt->fetch.data);
+}
+
+void __kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, u64 *data,
+ u8 ndata)
+{
+ prepare_emulation_failure_exit(vcpu, data, ndata, NULL, 0);
+}
+EXPORT_SYMBOL_GPL(__kvm_prepare_emulation_failure_exit);
+
+void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu)
+{
+ __kvm_prepare_emulation_failure_exit(vcpu, NULL, 0);
+}
+EXPORT_SYMBOL_GPL(kvm_prepare_emulation_failure_exit);
+
static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
{
struct kvm *kvm = vcpu->kvm;
@@ -7502,16 +7551,14 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)

if (kvm->arch.exit_on_emulation_error ||
(emulation_type & EMULTYPE_SKIP)) {
- prepare_emulation_failure_exit(vcpu);
+ prepare_emulation_ctxt_failure_exit(vcpu);
return 0;
}

kvm_queue_exception(vcpu, UD_VECTOR);

if (!is_guest_mode(vcpu) && static_call(kvm_x86_get_cpl)(vcpu) == 0) {
- vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
- vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
- vcpu->run->internal.ndata = 0;
+ prepare_emulation_ctxt_failure_exit(vcpu);
return 0;
}

@@ -12104,9 +12151,7 @@ int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r,
* doesn't seem to be a real use-case behind such requests, just return
* KVM_EXIT_INTERNAL_ERROR for now.
*/
- vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
- vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
- vcpu->run->internal.ndata = 0;
+ kvm_prepare_emulation_failure_exit(vcpu);

return 0;
}
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 6c79c1ce3703..e86cc2de7b5c 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -397,6 +397,12 @@ struct kvm_run {
* "ndata" is correct, that new fields are enumerated in "flags",
* and that each flag enumerates fields that are 64-bit aligned
* and sized (so that ndata+internal.data[] is valid/accurate).
+ *
+ * Space beyond the defined fields may be used to
+ * store arbitrary debug information relating to the
+ * emulation failure. It is accounted for in "ndata"
+ * but otherwise unspecified and is not represented in
+ * "flags".
*/
struct {
__u32 suberror;
@@ -408,6 +414,7 @@ struct kvm_run {
__u8 insn_bytes[15];
};
};
+ /* Arbitrary debug data may follow. */
} emulation_failure;
/* KVM_EXIT_OSI */
struct {
--
2.30.2