[PATCH v5 09/30] KVM: Drop kvm_count_lock and instead protect kvm_usage_count with kvm_lock

From: isaku . yamahata
Date: Thu Sep 22 2022 - 14:21:53 EST


From: Isaku Yamahata <isaku.yamahata@xxxxxxxxx>

Because kvm_count_lock unnecessarily complicates the KVM locking convention
Drop kvm_count_lock and instead protect kvm_usage_count with kvm_lock for
simplicity. kvm_arch_hardware_enable/disable() callbacks depend on
non-preemptiblity with the spin lock. Add preempt_disable/enable()
around hardware enable/disable callback to keep the assumption.

Because kvm_suspend() and kvm_resume() is called with interrupt disabled,
they don't need preempt_disable/enable() pair.

Opportunistically add some comments on locking.

Suggested-by: Sean Christopherson <seanjc@xxxxxxxxxx>
Signed-off-by: Isaku Yamahata <isaku.yamahata@xxxxxxxxx>
---
Documentation/virt/kvm/locking.rst | 14 +++-----
virt/kvm/kvm_main.c | 56 +++++++++++++++++++++++-------
2 files changed, 49 insertions(+), 21 deletions(-)

diff --git a/Documentation/virt/kvm/locking.rst b/Documentation/virt/kvm/locking.rst
index 845a561629f1..55d6559ace2a 100644
--- a/Documentation/virt/kvm/locking.rst
+++ b/Documentation/virt/kvm/locking.rst
@@ -216,15 +216,11 @@ time it will be set using the Dirty tracking mechanism described above.
:Type: mutex
:Arch: any
:Protects: - vm_list
-
-``kvm_count_lock``
-^^^^^^^^^^^^^^^^^^
-
-:Type: raw_spinlock_t
-:Arch: any
-:Protects: - hardware virtualization enable/disable
-:Comment: 'raw' because hardware enabling/disabling must be atomic /wrt
- migration.
+ - kvm_usage_count
+ - hardware virtualization enable/disable
+:Comment: Use cpus_read_lock() for hardware virtualization enable/disable
+ because hardware enabling/disabling must be atomic /wrt
+ CPU hotplug. The lock order is cpus lock => kvm_lock.

``kvm->mn_invalidate_lock``
^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index b1bf44af523c..c4b908553726 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -100,7 +100,6 @@ EXPORT_SYMBOL_GPL(halt_poll_ns_shrink);
*/

DEFINE_MUTEX(kvm_lock);
-static DEFINE_RAW_SPINLOCK(kvm_count_lock);
LIST_HEAD(vm_list);

static cpumask_var_t cpus_hardware_enabled;
@@ -4996,6 +4995,8 @@ static void hardware_enable_nolock(void *junk)
int cpu = raw_smp_processor_id();
int r;

+ WARN_ON_ONCE(preemptible());
+
if (cpumask_test_cpu(cpu, cpus_hardware_enabled))
return;

@@ -5019,7 +5020,7 @@ static int kvm_online_cpu(unsigned int cpu)
if (ret)
return ret;

- raw_spin_lock(&kvm_count_lock);
+ mutex_lock(&kvm_lock);
/*
* Abort the CPU online process if hardware virtualization cannot
* be enabled. Otherwise running VMs would encounter unrecoverable
@@ -5028,13 +5029,20 @@ static int kvm_online_cpu(unsigned int cpu)
if (kvm_usage_count) {
WARN_ON_ONCE(atomic_read(&hardware_enable_failed));

+ /*
+ * arch callback kvm_arch_hardware_eanble() assumes that
+ * preemption is disabled for historical reason. Disable
+ * preemption until all arch callbacks are fixed.
+ */
+ preempt_disable();
hardware_enable_nolock(NULL);
+ preempt_enable();
if (atomic_read(&hardware_enable_failed)) {
atomic_set(&hardware_enable_failed, 0);
ret = -EIO;
}
}
- raw_spin_unlock(&kvm_count_lock);
+ mutex_unlock(&kvm_lock);
return ret;
}

@@ -5042,6 +5050,8 @@ static void hardware_disable_nolock(void *junk)
{
int cpu = raw_smp_processor_id();

+ WARN_ON_ONCE(preemptible());
+
if (!cpumask_test_cpu(cpu, cpus_hardware_enabled))
return;
cpumask_clear_cpu(cpu, cpus_hardware_enabled);
@@ -5050,10 +5060,18 @@ static void hardware_disable_nolock(void *junk)

static int kvm_offline_cpu(unsigned int cpu)
{
- raw_spin_lock(&kvm_count_lock);
- if (kvm_usage_count)
+ mutex_lock(&kvm_lock);
+ if (kvm_usage_count) {
+ /*
+ * arch callback kvm_arch_hardware_disable() assumes that
+ * preemption is disabled for historical reason. Disable
+ * preemption until all arch callbacks are fixed.
+ */
+ preempt_disable();
hardware_disable_nolock(NULL);
- raw_spin_unlock(&kvm_count_lock);
+ preempt_enable();
+ }
+ mutex_unlock(&kvm_lock);
return 0;
}

@@ -5068,9 +5086,11 @@ static void hardware_disable_all_nolock(void)

static void hardware_disable_all(void)
{
- raw_spin_lock(&kvm_count_lock);
+ cpus_read_lock();
+ mutex_lock(&kvm_lock);
hardware_disable_all_nolock();
- raw_spin_unlock(&kvm_count_lock);
+ mutex_unlock(&kvm_lock);
+ cpus_read_unlock();
}

static int hardware_enable_all(void)
@@ -5088,7 +5108,7 @@ static int hardware_enable_all(void)
* Disable CPU hotplug to prevent this case from happening.
*/
cpus_read_lock();
- raw_spin_lock(&kvm_count_lock);
+ mutex_lock(&kvm_lock);

kvm_usage_count++;
if (kvm_usage_count == 1) {
@@ -5101,7 +5121,7 @@ static int hardware_enable_all(void)
}
}

- raw_spin_unlock(&kvm_count_lock);
+ mutex_unlock(&kvm_lock);
cpus_read_unlock();

return r;
@@ -5708,15 +5728,27 @@ static void kvm_init_debug(void)

static int kvm_suspend(void)
{
- if (kvm_usage_count)
+ /*
+ * The caller ensures that CPU hotplug is disabled by
+ * cpu_hotplug_disable() and other CPUs are offlined. No need for
+ * locking.
+ */
+ lockdep_assert_not_held(&kvm_lock);
+
+ if (kvm_usage_count) {
+ /*
+ * Because kvm_suspend() is called with interrupt disabled, no
+ * need to disable preemption.
+ */
hardware_disable_nolock(NULL);
+ }
return 0;
}

static void kvm_resume(void)
{
if (kvm_usage_count) {
- lockdep_assert_not_held(&kvm_count_lock);
+ lockdep_assert_not_held(&kvm_lock);
hardware_enable_nolock(NULL);
}
}
--
2.25.1