Re: [PATCH] KVM: Drop kvm_vcpu.ready to squash race where "ready" can get stuck "true"

From: Paolo Bonzini

Date: Tue Apr 14 2026 - 12:07:48 EST


On 4/9/26 23:33, Sean Christopherson wrote:
+static inline bool kvm_vcpu_is_runnable_and_scheduled_out(struct kvm_vcpu *vcpu)
+{
+ return READ_ONCE(vcpu->preempted) ||
+ (READ_ONCE(vcpu->scheduled_out) &&
+ READ_ONCE(vcpu->wants_to_run) &&

wants_to_run doesn't seem important here, because blocking will never be set outside KVM_RUN (unlike scheduled_out which can be set within any vcpu_load/vcpu_put pair, if you're unlucky enough).

+ READ_ONCE(vcpu->stat.generic.blocking) &&
+ !kvm_vcpu_is_blocking(vcpu));

If you get here you have done the finish_rcuwait() in kvm_vcpu_block(), meaning that you've been already scheduled in, haven't you? So, you would need something like this:

static inline bool kvm_vcpu_is_runnable_and_scheduled_out(struct kvm_vcpu *vcpu)
{
if (READ_ONCE(vcpu->preempted))
return true;

if (!READ_ONCE(vcpu->scheduled_out))
return false;
if (!READ_ONCE(vcpu->stat.generic.blocking))
return false;
return rcuwait_was_woken(kvm_arch_vcpu_get_wait(vcpu));
}

// in rcuwait.h
static inline bool rcuwait_was_woken(struct rcuwait *w)
{
guard(rcu)();
struct task_struct *t = rcu_access_pointer(w->task);
return t && !task_is_runnable(t);
}

Paolo

+}
+
#ifdef __KVM_HAVE_ARCH_INTC_INITIALIZED
/*
* returns true if the virtual interrupt controller is initialized and
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 9faf70ccae7a..9f71e32daac5 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -455,7 +455,6 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
kvm_vcpu_set_in_spin_loop(vcpu, false);
kvm_vcpu_set_dy_eligible(vcpu, false);
vcpu->preempted = false;
- vcpu->ready = false;
preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops);
vcpu->last_used_slot = NULL;
@@ -3803,7 +3802,6 @@ EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_halt);
bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu)
{
if (__kvm_vcpu_wake_up(vcpu)) {
- WRITE_ONCE(vcpu->ready, true);
++vcpu->stat.generic.halt_wakeup;
return true;
}
@@ -4008,7 +4006,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
continue;
vcpu = xa_load(&kvm->vcpu_array, idx);
- if (!READ_ONCE(vcpu->ready))
+ if (!kvm_vcpu_is_runnable_and_scheduled_out(vcpu))
continue;
if (kvm_vcpu_is_blocking(vcpu) && !vcpu_dy_runnable(vcpu))
continue;
@@ -6393,7 +6391,6 @@ static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
WRITE_ONCE(vcpu->preempted, false);
- WRITE_ONCE(vcpu->ready, false);
__this_cpu_write(kvm_running_vcpu, vcpu);
kvm_arch_vcpu_load(vcpu, cpu);
@@ -6408,10 +6405,9 @@ static void kvm_sched_out(struct preempt_notifier *pn,
WRITE_ONCE(vcpu->scheduled_out, true);
- if (task_is_runnable(current) && vcpu->wants_to_run) {
+ if (task_is_runnable(current) && vcpu->wants_to_run)
WRITE_ONCE(vcpu->preempted, true);
- WRITE_ONCE(vcpu->ready, true);
- }
+
kvm_arch_vcpu_put(vcpu);
__this_cpu_write(kvm_running_vcpu, NULL);
}

base-commit: b89df297a47e641581ee67793592e5c6ae0428f4