Re: [PATCH v3 7/8] KVM: nSVM: Delay setting soft IRQ RIP tracking fields until vCPU run
From: Sean Christopherson
Date: Wed Mar 04 2026 - 13:35:01 EST
On Wed, Mar 04, 2026, Yosry Ahmed wrote:
> On Tue, Feb 24, 2026 at 5:00 PM Yosry Ahmed <yosry@xxxxxxxxxx> wrote:
> > diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> > index ded4372f2d499..7948e601ea784 100644
> > --- a/arch/x86/kvm/svm/svm.c
> > +++ b/arch/x86/kvm/svm/svm.c
> > @@ -3670,11 +3670,21 @@ static int pre_svm_run(struct kvm_vcpu *vcpu)
> > * This is done here (as opposed to when preparing vmcb02) to use the
> > * most up-to-date value of RIP regardless of the order of restoring
> > * registers and nested state in the vCPU save+restore path.
> > + *
> > + * Simiarly, initialize svm->soft_int_* fields here to use the most
> > + * up-to-date values of RIP and CS base, regardless of restore order.
> > */
> > if (is_guest_mode(vcpu) && svm->nested.nested_run_pending) {
> > if (boot_cpu_has(X86_FEATURE_NRIPS) &&
> > !guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS))
> > svm->vmcb->control.next_rip = kvm_rip_read(vcpu);
> > +
> > + if (svm->soft_int_injected) {
> > + svm->soft_int_csbase = svm->vmcb->save.cs.base;
> > + svm->soft_int_old_rip = kvm_rip_read(vcpu);
> > + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS))
> > + svm->soft_int_next_rip = kvm_rip_read(vcpu);
> > + }
>
> AI found a bug here. These fields will be left uninitialized if we
> cancel injection before pre_svm_run() (e.g. due to
> kvm_vcpu_exit_request()). I was going to suggest moving this to
> pre-run, but this leaves a larger gap where RIP can be updated from
> under us. Sean has a better fixup in progress.
With comments to explain the madness, this should work as fixup. It's gross and
brittle, but the only alternative I see is to add a flag to differentiate the
save/restore case from the VMRUN case. Which isn't terrible, but IMO most of
the brittleness comes from the disaster that is the architecture.
Given that the soft int reinjection code will be inherently brittle, and that
the save/restore scenario will be _extremely_ rare, I think it's worth the extra
bit of nastiness so that _if_ there's a bug, it's at least slightly more likely
we'll find it via the normal VMRUN path.
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 258aa3bfb84b..2bfbaf92d3e5 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3755,6 +3755,16 @@ static int svm_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
return svm_invoke_exit_handler(vcpu, svm->vmcb->control.exit_code);
}
+static void svm_set_nested_run_soft_int_state(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ svm->soft_int_csbase = svm->vmcb->save.cs.base;
+ svm->soft_int_old_rip = kvm_rip_read(vcpu);
+ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS))
+ svm->soft_int_next_rip = kvm_rip_read(vcpu);
+}
+
static int pre_svm_run(struct kvm_vcpu *vcpu)
{
struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu);
@@ -3797,12 +3807,8 @@ static int pre_svm_run(struct kvm_vcpu *vcpu)
!guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS))
svm->vmcb->control.next_rip = kvm_rip_read(vcpu);
- if (svm->soft_int_injected) {
- svm->soft_int_csbase = svm->vmcb->save.cs.base;
- svm->soft_int_old_rip = kvm_rip_read(vcpu);
- if (!guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS))
- svm->soft_int_next_rip = kvm_rip_read(vcpu);
- }
+ if (svm->soft_int_injected)
+ svm_set_nested_run_soft_int_state(vcpu);
}
return 0;
@@ -4250,6 +4256,9 @@ static void svm_complete_soft_interrupt(struct kvm_vcpu *vcpu, u8 vector,
bool is_soft = (type == SVM_EXITINTINFO_TYPE_SOFT);
struct vcpu_svm *svm = to_svm(vcpu);
+ if (is_guest_mode(vcpu) && svm->nested.nested_run_pending)
+ svm_set_nested_run_soft_int_state(vcpu);
+
/*
* If NRIPS is enabled, KVM must snapshot the pre-VMRUN next_rip that's
* associated with the original soft exception/interrupt. next_rip is