[PATCH 3/6] KVM: Grab vcpu->mutex across installing the vCPU's fd and bumping online_vcpus

From: Sean Christopherson
Date: Wed Oct 09 2024 - 11:13:11 EST


During vCPU creation, acquire vcpu->mutex prior to exposing the vCPU to
userspace, and hold the mutex until online_vcpus is bumped, i.e. until the
vCPU is fully online from KVM's perspective.

To ensure asynchronous vCPU ioctls also wait for the vCPU to come online,
explicitly check online_vcpus at the start of kvm_vcpu_ioctl(), and take
the vCPU's mutex to wait if necessary (having to wait for any ioctl should
be exceedingly rare, i.e. not worth optimizing).

Reported-by: Will Deacon <will@xxxxxxxxxx>
Reported-by: Michal Luczaj <mhal@xxxxxxx>
Link: https://lore.kernel.org/all/20240730155646.1687-1-will@xxxxxxxxxx
Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
---
virt/kvm/kvm_main.c | 47 ++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 46 insertions(+), 1 deletion(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 05cbb2548d99..fca9f74e9544 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -4287,7 +4287,14 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id)
if (r)
goto unlock_vcpu_destroy;

- /* Now it's all set up, let userspace reach it */
+ /*
+ * Now it's all set up, let userspace reach it. Grab the vCPU's mutex
+ * so that userspace can't invoke vCPU ioctl()s until the vCPU is fully
+ * visible (per online_vcpus), e.g. so that KVM doesn't get tricked
+ * into a NULL-pointer dereference because KVM thinks the _current_
+ * vCPU doesn't exist.
+ */
+ mutex_lock(&vcpu->mutex);
kvm_get_kvm(kvm);
r = create_vcpu_fd(vcpu);
if (r < 0)
@@ -4304,6 +4311,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id)
*/
smp_wmb();
atomic_inc(&kvm->online_vcpus);
+ mutex_unlock(&vcpu->mutex);

mutex_unlock(&kvm->lock);
kvm_arch_vcpu_postcreate(vcpu);
@@ -4311,6 +4319,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id)
return r;

kvm_put_xa_release:
+ mutex_unlock(&vcpu->mutex);
kvm_put_kvm_no_destroy(kvm);
xa_release(&kvm->vcpu_array, vcpu->vcpu_idx);
unlock_vcpu_destroy:
@@ -4437,6 +4446,33 @@ static int kvm_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu,
}
#endif

+static int kvm_wait_for_vcpu_online(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = vcpu->kvm;
+
+ /*
+ * In practice, this happy path will always be taken, as a well-behaved
+ * VMM will never invoke a vCPU ioctl() before KVM_CREATE_VCPU returns.
+ */
+ if (likely(vcpu->vcpu_idx < atomic_read(&kvm->online_vcpus)))
+ return 0;
+
+ /*
+ * Acquire and release the vCPU's mutex to wait for vCPU creation to
+ * complete (kvm_vm_ioctl_create_vcpu() holds the mutex until the vCPU
+ * is fully online).
+ */
+ if (mutex_lock_killable(&vcpu->mutex))
+ return -EINTR;
+
+ mutex_unlock(&vcpu->mutex);
+
+ if (WARN_ON_ONCE(!kvm_get_vcpu(kvm, vcpu->vcpu_idx)))
+ return -EIO;
+
+ return 0;
+}
+
static long kvm_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -4452,6 +4488,15 @@ static long kvm_vcpu_ioctl(struct file *filp,
if (unlikely(_IOC_TYPE(ioctl) != KVMIO))
return -EINVAL;

+ /*
+ * Wait for the vCPU to be online before handling the ioctl(), as KVM
+ * assumes the vCPU is reachable via vcpu_array, i.e. may dereference
+ * a NULL pointer if userspace invokes an ioctl() before KVM is ready.
+ */
+ r = kvm_wait_for_vcpu_online(vcpu);
+ if (r)
+ return r;
+
/*
* Some architectures have vcpu ioctls that are asynchronous to vcpu
* execution; mutex_lock() would break them.
--
2.47.0.rc0.187.ge670bccf7e-goog