[RFC PATCH 05/14] KVM: x86: Refactor hardware enable/disable operations into a new file

From: Anish Ghulati
Date: Tue Nov 07 2023 - 15:20:40 EST


Move KVM's hardware enabling to vac.{h,c} as a first step towards
building VAC and all of the system-wide virtualization support as a
separate module.

Defer moving arch code to future patches to keep the diff reasonable.

No functional change intended.

Signed-off-by: Anish Ghulati <aghulati@xxxxxxxxxx>
---
virt/kvm/kvm_main.c | 197 +-------------------------------------------
virt/kvm/vac.c | 177 +++++++++++++++++++++++++++++++++++++++
virt/kvm/vac.h | 26 ++++++
3 files changed, 204 insertions(+), 196 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index f585a159b4f5..fb50deaad3fd 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -59,6 +59,7 @@
#include "coalesced_mmio.h"
#include "async_pf.h"
#include "kvm_mm.h"
+#include "vac.h"
#include "vfio.h"

#include <trace/events/ipi.h>
@@ -140,8 +141,6 @@ static int kvm_no_compat_open(struct inode *inode, struct file *file)
#define KVM_COMPAT(c) .compat_ioctl = kvm_no_compat_ioctl, \
.open = kvm_no_compat_open
#endif
-static int hardware_enable_all(void);
-static void hardware_disable_all(void);

static void kvm_io_bus_destroy(struct kvm_io_bus *bus);

@@ -5167,200 +5166,6 @@ static struct miscdevice kvm_dev = {
&kvm_chardev_ops,
};

-#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING
-__visible bool kvm_rebooting;
-EXPORT_SYMBOL_GPL(kvm_rebooting);
-
-static DEFINE_PER_CPU(bool, hardware_enabled);
-static int kvm_usage_count;
-
-static int __hardware_enable_nolock(void)
-{
- if (__this_cpu_read(hardware_enabled))
- return 0;
-
- if (kvm_arch_hardware_enable()) {
- pr_info("kvm: enabling virtualization on CPU%d failed\n",
- raw_smp_processor_id());
- return -EIO;
- }
-
- __this_cpu_write(hardware_enabled, true);
- return 0;
-}
-
-static void hardware_enable_nolock(void *failed)
-{
- if (__hardware_enable_nolock())
- atomic_inc(failed);
-}
-
-static int kvm_online_cpu(unsigned int cpu)
-{
- int ret = 0;
-
- /*
- * Abort the CPU online process if hardware virtualization cannot
- * be enabled. Otherwise running VMs would encounter unrecoverable
- * errors when scheduled to this CPU.
- */
- mutex_lock(&kvm_lock);
- if (kvm_usage_count)
- ret = __hardware_enable_nolock();
- mutex_unlock(&kvm_lock);
- return ret;
-}
-
-static void hardware_disable_nolock(void *junk)
-{
- /*
- * Note, hardware_disable_all_nolock() tells all online CPUs to disable
- * hardware, not just CPUs that successfully enabled hardware!
- */
- if (!__this_cpu_read(hardware_enabled))
- return;
-
- kvm_arch_hardware_disable();
-
- __this_cpu_write(hardware_enabled, false);
-}
-
-static int kvm_offline_cpu(unsigned int cpu)
-{
- mutex_lock(&kvm_lock);
- if (kvm_usage_count)
- hardware_disable_nolock(NULL);
- mutex_unlock(&kvm_lock);
- return 0;
-}
-
-static void hardware_disable_all_nolock(void)
-{
- BUG_ON(!kvm_usage_count);
-
- kvm_usage_count--;
- if (!kvm_usage_count)
- on_each_cpu(hardware_disable_nolock, NULL, 1);
-}
-
-static void hardware_disable_all(void)
-{
- cpus_read_lock();
- mutex_lock(&kvm_lock);
- hardware_disable_all_nolock();
- mutex_unlock(&kvm_lock);
- cpus_read_unlock();
-}
-
-static int hardware_enable_all(void)
-{
- atomic_t failed = ATOMIC_INIT(0);
- int r;
-
- /*
- * Do not enable hardware virtualization if the system is going down.
- * If userspace initiated a forced reboot, e.g. reboot -f, then it's
- * possible for an in-flight KVM_CREATE_VM to trigger hardware enabling
- * after kvm_reboot() is called. Note, this relies on system_state
- * being set _before_ kvm_reboot(), which is why KVM uses a syscore ops
- * hook instead of registering a dedicated reboot notifier (the latter
- * runs before system_state is updated).
- */
- if (system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF ||
- system_state == SYSTEM_RESTART)
- return -EBUSY;
-
- /*
- * When onlining a CPU, cpu_online_mask is set before kvm_online_cpu()
- * is called, and so on_each_cpu() between them includes the CPU that
- * is being onlined. As a result, hardware_enable_nolock() may get
- * invoked before kvm_online_cpu(), which also enables hardware if the
- * usage count is non-zero. Disable CPU hotplug to avoid attempting to
- * enable hardware multiple times.
- */
- cpus_read_lock();
- mutex_lock(&kvm_lock);
-
- r = 0;
-
- kvm_usage_count++;
- if (kvm_usage_count == 1) {
- on_each_cpu(hardware_enable_nolock, &failed, 1);
-
- if (atomic_read(&failed)) {
- hardware_disable_all_nolock();
- r = -EBUSY;
- }
- }
-
- mutex_unlock(&kvm_lock);
- cpus_read_unlock();
-
- return r;
-}
-
-static void kvm_shutdown(void)
-{
- /*
- * Disable hardware virtualization and set kvm_rebooting to indicate
- * that KVM has asynchronously disabled hardware virtualization, i.e.
- * that relevant errors and exceptions aren't entirely unexpected.
- * Some flavors of hardware virtualization need to be disabled before
- * transferring control to firmware (to perform shutdown/reboot), e.g.
- * on x86, virtualization can block INIT interrupts, which are used by
- * firmware to pull APs back under firmware control. Note, this path
- * is used for both shutdown and reboot scenarios, i.e. neither name is
- * 100% comprehensive.
- */
- pr_info("kvm: exiting hardware virtualization\n");
- kvm_rebooting = true;
- on_each_cpu(hardware_disable_nolock, NULL, 1);
-}
-
-static int kvm_suspend(void)
-{
- /*
- * Secondary CPUs and CPU hotplug are disabled across the suspend/resume
- * callbacks, i.e. no need to acquire kvm_lock to ensure the usage count
- * is stable. Assert that kvm_lock is not held to ensure the system
- * isn't suspended while KVM is enabling hardware. Hardware enabling
- * can be preempted, but the task cannot be frozen until it has dropped
- * all locks (userspace tasks are frozen via a fake signal).
- */
- lockdep_assert_not_held(&kvm_lock);
- lockdep_assert_irqs_disabled();
-
- if (kvm_usage_count)
- hardware_disable_nolock(NULL);
- return 0;
-}
-
-static void kvm_resume(void)
-{
- lockdep_assert_not_held(&kvm_lock);
- lockdep_assert_irqs_disabled();
-
- if (kvm_usage_count)
- WARN_ON_ONCE(__hardware_enable_nolock());
-}
-
-static struct syscore_ops kvm_syscore_ops = {
- .suspend = kvm_suspend,
- .resume = kvm_resume,
- .shutdown = kvm_shutdown,
-};
-#else /* CONFIG_KVM_GENERIC_HARDWARE_ENABLING */
-static int hardware_enable_all(void)
-{
- return 0;
-}
-
-static void hardware_disable_all(void)
-{
-
-}
-#endif /* CONFIG_KVM_GENERIC_HARDWARE_ENABLING */
-
static void kvm_iodevice_destructor(struct kvm_io_device *dev)
{
if (dev->ops->destructor)
diff --git a/virt/kvm/vac.c b/virt/kvm/vac.c
index 18d2ae7d3e47..ff034a53af50 100644
--- a/virt/kvm/vac.c
+++ b/virt/kvm/vac.c
@@ -1,3 +1,180 @@
// SPDX-License-Identifier: GPL-2.0-only

#include "vac.h"
+
+#include <linux/cpu.h>
+#include <linux/percpu.h>
+#include <linux/mutex.h>
+
+#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING
+DEFINE_MUTEX(vac_lock);
+
+__visible bool kvm_rebooting;
+EXPORT_SYMBOL_GPL(kvm_rebooting);
+
+static DEFINE_PER_CPU(bool, hardware_enabled);
+static int kvm_usage_count;
+
+static int __hardware_enable_nolock(void)
+{
+ if (__this_cpu_read(hardware_enabled))
+ return 0;
+
+ if (kvm_arch_hardware_enable()) {
+ pr_info("kvm: enabling virtualization on CPU%d failed\n",
+ raw_smp_processor_id());
+ return -EIO;
+ }
+
+ __this_cpu_write(hardware_enabled, true);
+ return 0;
+}
+
+static void hardware_enable_nolock(void *failed)
+{
+ if (__hardware_enable_nolock())
+ atomic_inc(failed);
+}
+
+int kvm_online_cpu(unsigned int cpu)
+{
+ int ret = 0;
+
+ /*
+ * Abort the CPU online process if hardware virtualization cannot
+ * be enabled. Otherwise running VMs would encounter unrecoverable
+ * errors when scheduled to this CPU.
+ */
+ mutex_lock(&vac_lock);
+ if (kvm_usage_count)
+ ret = __hardware_enable_nolock();
+ mutex_unlock(&vac_lock);
+ return ret;
+}
+
+static void hardware_disable_nolock(void *junk)
+{
+ /*
+ * Note, hardware_disable_all_nolock() tells all online CPUs to disable
+ * hardware, not just CPUs that successfully enabled hardware!
+ */
+ if (!__this_cpu_read(hardware_enabled))
+ return;
+
+ kvm_arch_hardware_disable();
+
+ __this_cpu_write(hardware_enabled, false);
+}
+
+int kvm_offline_cpu(unsigned int cpu)
+{
+ mutex_lock(&vac_lock);
+ if (kvm_usage_count)
+ hardware_disable_nolock(NULL);
+ mutex_unlock(&vac_lock);
+ return 0;
+}
+
+static void hardware_disable_all_nolock(void)
+{
+ BUG_ON(!kvm_usage_count);
+
+ kvm_usage_count--;
+ if (!kvm_usage_count)
+ on_each_cpu(hardware_disable_nolock, NULL, 1);
+}
+
+void hardware_disable_all(void)
+{
+ cpus_read_lock();
+ mutex_lock(&vac_lock);
+ hardware_disable_all_nolock();
+ mutex_unlock(&vac_lock);
+ cpus_read_unlock();
+}
+
+int hardware_enable_all(void)
+{
+ atomic_t failed = ATOMIC_INIT(0);
+ int r = 0;
+
+ /*
+ * When onlining a CPU, cpu_online_mask is set before kvm_online_cpu()
+ * is called, and so on_each_cpu() between them includes the CPU that
+ * is being onlined. As a result, hardware_enable_nolock() may get
+ * invoked before kvm_online_cpu(), which also enables hardware if the
+ * usage count is non-zero. Disable CPU hotplug to avoid attempting to
+ * enable hardware multiple times.
+ */
+ cpus_read_lock();
+ mutex_lock(&vac_lock);
+
+ kvm_usage_count++;
+ if (kvm_usage_count == 1) {
+ on_each_cpu(hardware_enable_nolock, &failed, 1);
+
+ if (atomic_read(&failed)) {
+ hardware_disable_all_nolock();
+ r = -EBUSY;
+ }
+ }
+
+ mutex_unlock(&vac_lock);
+ cpus_read_unlock();
+
+ return r;
+}
+
+static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
+ void *v)
+{
+ /*
+ * Some (well, at least mine) BIOSes hang on reboot if
+ * in vmx root mode.
+ *
+ * And Intel TXT required VMX off for all cpu when system shutdown.
+ */
+ pr_info("kvm: exiting hardware virtualization\n");
+ kvm_rebooting = true;
+ on_each_cpu(hardware_disable_nolock, NULL, 1);
+ return NOTIFY_OK;
+}
+
+static int kvm_suspend(void)
+{
+ /*
+ * Secondary CPUs and CPU hotplug are disabled across the suspend/resume
+ * callbacks, i.e. no need to acquire vac_lock to ensure the usage count
+ * is stable. Assert that vac_lock is not held to ensure the system
+ * isn't suspended while KVM is enabling hardware. Hardware enabling
+ * can be preempted, but the task cannot be frozen until it has dropped
+ * all locks (userspace tasks are frozen via a fake signal).
+ */
+ lockdep_assert_not_held(&vac_lock);
+ lockdep_assert_irqs_disabled();
+
+ if (kvm_usage_count)
+ hardware_disable_nolock(NULL);
+ return 0;
+}
+
+static void kvm_resume(void)
+{
+ lockdep_assert_not_held(&vac_lock);
+ lockdep_assert_irqs_disabled();
+
+ if (kvm_usage_count)
+ WARN_ON_ONCE(__hardware_enable_nolock());
+}
+
+struct notifier_block kvm_reboot_notifier = {
+ .notifier_call = kvm_reboot,
+ .priority = 0,
+};
+
+struct syscore_ops kvm_syscore_ops = {
+ .suspend = kvm_suspend,
+ .resume = kvm_resume,
+};
+
+#endif
diff --git a/virt/kvm/vac.h b/virt/kvm/vac.h
index 8f7123a916c5..aed178a16bdb 100644
--- a/virt/kvm/vac.h
+++ b/virt/kvm/vac.h
@@ -3,4 +3,30 @@
#ifndef __KVM_VAC_H__
#define __KVM_VAC_H__

+#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING
+
+#include <linux/kvm_host.h>
+#include <linux/syscore_ops.h>
+
+int kvm_online_cpu(unsigned int cpu);
+int kvm_offline_cpu(unsigned int cpu);
+void hardware_disable_all(void);
+int hardware_enable_all(void);
+
+extern struct notifier_block kvm_reboot_notifier;
+
+extern struct syscore_ops kvm_syscore_ops;
+
+#else /* CONFIG_KVM_GENERIC_HARDWARE_ENABLING */
+static inline int hardware_enable_all(void)
+{
+ return 0;
+}
+
+static inline void hardware_disable_all(void)
+{
+
+}
+#endif /* CONFIG_KVM_GENERIC_HARDWARE_ENABLING */
+
#endif
--
2.42.0.869.gea05f2083d-goog