[PATCH 1/2] KVM: x86: fix usage of kvm_lock in set_nx_huge_pages()

From: Paolo Bonzini
Date: Fri Jan 24 2025 - 14:11:38 EST


Protect the whole function with kvm_lock() so that all accesses to
nx_hugepage_mitigation_hard_disabled are under the lock; but drop it
when calling out to the MMU to avoid complex circular locking
situations such as the following:

__kvm_set_memory_region()
lock(&kvm->slots_lock)
set_nx_huge_pages()
lock(kvm_lock)
lock(&kvm->slots_lock)
__kvmclock_cpufreq_notifier()
lock(cpu_hotplug_lock)
lock(kvm_lock)
lock(&kvm->srcu)
kvm_lapic_set_base()
static_branch_inc()
lock(cpu_hotplug_lock)
sync(&kvm->srcu)

The deadlock is basically theoretical but anyway it is as follows:
- __kvm_set_memory_region() waits for kvm->srcu with kvm->slots_lock taken
- set_nx_huge_pages() waits for kvm->slots_lock with kvm_lock taken
- __kvmclock_cpufreq_notifier() waits for kvm_lock with cpu_hotplug_lock taken
- KVM_RUN waits for cpu_hotplug_lock with kvm->srcu taken
- therefore __kvm_set_memory_region() never completes synchronize_srcu(&kvm->srcu).

To break the deadlock, release kvm_lock while taking kvm->slots_lock, which
breaks the chain:

lock(&kvm->slots_lock)
set_nx_huge_pages()
lock(kvm_lock)
__kvmclock_cpufreq_notifier()
lock(cpu_hotplug_lock)
lock(kvm_lock)
lock(&kvm->srcu)
kvm_lapic_set_base()
static_branch_inc()
lock(cpu_hotplug_lock)
unlock(kvm_lock)
unlock(kvm_lock)
unlock(cpu_hotplug_lock)
unlock(cpu_hotplug_lock)
unlock(&kvm->srcu)
lock(&kvm->slots_lock)
sync(&kvm->srcu)
unlock(&kvm->slots_lock)
unlock(&kvm->slots_lock)

Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
---
arch/x86/kvm/mmu/mmu.c | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 2401606db260..1d8b45e7bb94 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -7114,6 +7114,7 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp)
bool old_val = nx_huge_pages;
bool new_val;

+ guard(mutex)(&kvm_lock);
if (nx_hugepage_mitigation_hard_disabled)
return -EPERM;

@@ -7127,13 +7128,10 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp)
} else if (sysfs_streq(val, "never")) {
new_val = 0;

- mutex_lock(&kvm_lock);
if (!list_empty(&vm_list)) {
- mutex_unlock(&kvm_lock);
return -EBUSY;
}
nx_hugepage_mitigation_hard_disabled = true;
- mutex_unlock(&kvm_lock);
} else if (kstrtobool(val, &new_val) < 0) {
return -EINVAL;
}
@@ -7143,16 +7141,19 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp)
if (new_val != old_val) {
struct kvm *kvm;

- mutex_lock(&kvm_lock);
-
list_for_each_entry(kvm, &vm_list, vm_list) {
+ kvm_get_kvm(kvm);
+ mutex_unlock(&kvm_lock);
+
mutex_lock(&kvm->slots_lock);
kvm_mmu_zap_all_fast(kvm);
mutex_unlock(&kvm->slots_lock);

vhost_task_wake(kvm->arch.nx_huge_page_recovery_thread);
+
+ mutex_lock(&kvm_lock);
+ kvm_put_kvm(kvm);
}
- mutex_unlock(&kvm_lock);
}

return 0;
--
2.43.5