[PATCH v3 8/8] DO NOT MERGE: KVM: selftests: Reproduce nested RIP restore bug
From: Yosry Ahmed
Date: Tue Feb 24 2026 - 20:03:38 EST
Update svm_nested_soft_inject_test such that L1 syncs to userspace
before running L2. The test then enables single-stepping and steps
through guest code until VMRUN is execute, and saves/restores the VM
immediately after (before L2 runs).
This reproduces a bug in save/restore where L2's RIP is not used
correctly to construct the vmcb02 at the destination.
Signed-off-by: Yosry Ahmed <yosry@xxxxxxxxxx>
---
.../testing/selftests/kvm/lib/x86/processor.c | 8 +-
.../kvm/x86/svm_nested_soft_inject_test.c | 74 +++++++++++++++----
2 files changed, 65 insertions(+), 17 deletions(-)
diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c
index fab18e9be66c9..7e0213a88697d 100644
--- a/tools/testing/selftests/kvm/lib/x86/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86/processor.c
@@ -1275,6 +1275,8 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vcpu *vcpu)
return state;
}
+#define LOAD_REGS_BEFORE_NESTED 1
+
void vcpu_load_state(struct kvm_vcpu *vcpu, struct kvm_x86_state *state)
{
vcpu_sregs_set(vcpu, &state->sregs);
@@ -1287,10 +1289,14 @@ void vcpu_load_state(struct kvm_vcpu *vcpu, struct kvm_x86_state *state)
vcpu_events_set(vcpu, &state->events);
vcpu_mp_state_set(vcpu, &state->mp_state);
vcpu_debugregs_set(vcpu, &state->debugregs);
- vcpu_regs_set(vcpu, &state->regs);
+ if (LOAD_REGS_BEFORE_NESTED)
+ vcpu_regs_set(vcpu, &state->regs);
if (state->nested.size)
vcpu_nested_state_set(vcpu, &state->nested);
+
+ if (!LOAD_REGS_BEFORE_NESTED)
+ vcpu_regs_set(vcpu, &state->regs);
}
void kvm_x86_state_cleanup(struct kvm_x86_state *state)
diff --git a/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c b/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c
index 4bd1655f9e6d0..dfefd8eed392a 100644
--- a/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c
+++ b/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c
@@ -101,6 +101,7 @@ static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t i
vmcb->control.next_rip = vmcb->save.rip;
}
+ GUEST_SYNC(true);
run_guest(vmcb, svm->vmcb_gpa);
__GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL,
"Expected VMMCAL #VMEXIT, got '0x%lx', info1 = '0x%lx, info2 = '0x%lx'",
@@ -131,6 +132,7 @@ static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t i
/* The return address pushed on stack, skip over UD2 */
vmcb->control.next_rip = vmcb->save.rip + 2;
+ GUEST_SYNC(true);
run_guest(vmcb, svm->vmcb_gpa);
__GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_HLT,
"Expected HLT #VMEXIT, got '0x%lx', info1 = '0x%lx, info2 = '0x%lx'",
@@ -140,6 +142,24 @@ static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t i
GUEST_DONE();
}
+static struct kvm_vcpu *save_and_restore_vm(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
+{
+ struct kvm_x86_state *state = vcpu_save_state(vcpu);
+
+ kvm_vm_release(vm);
+ vcpu = vm_recreate_with_one_vcpu(vm);
+ vcpu_load_state(vcpu, state);
+ kvm_x86_state_cleanup(state);
+ return vcpu;
+}
+
+static bool is_nested_run_pending(struct kvm_vcpu *vcpu)
+{
+ struct kvm_x86_state *state = vcpu_save_state(vcpu);
+
+ return state->nested.size && (state->nested.flags & KVM_STATE_NESTED_RUN_PENDING);
+}
+
static void run_test(bool is_nmi)
{
struct kvm_vcpu *vcpu;
@@ -173,22 +193,44 @@ static void run_test(bool is_nmi)
memset(&debug, 0, sizeof(debug));
vcpu_guest_debug_set(vcpu, &debug);
- struct ucall uc;
-
- alarm(2);
- vcpu_run(vcpu);
- alarm(0);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- break;
- /* NOT REACHED */
- case UCALL_DONE:
- goto done;
- default:
- TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+ for (;;) {
+ struct kvm_guest_debug debug;
+ struct ucall uc;
+
+ alarm(2);
+ vcpu_run(vcpu);
+ alarm(0);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ /*
+ * L1 syncs before calling run_guest(), single-step over
+ * all instructions until VMRUN, and save+restore right
+ * after it (before L2 actually runs).
+ */
+ debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
+ vcpu_guest_debug_set(vcpu, &debug);
+
+ do {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG);
+ } while (!is_nested_run_pending(vcpu));
+
+ memset(&debug, 0, sizeof(debug));
+ vcpu_guest_debug_set(vcpu, &debug);
+ vcpu = save_and_restore_vm(vm, vcpu);
+ break;
+
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ /* NOT REACHED */
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+ }
}
done:
kvm_vm_free(vm);
--
2.53.0.414.gf7e9f6c205-goog