[PATCH] KVM: fix OOPS on flush_work

From: Paolo Bonzini
Date: Wed Oct 26 2016 - 07:45:00 EST


The conversion done by commit 3706feacd007 ("KVM: Remove deprecated
create_singlethread_workqueue") is broken. It flushes a single work
item &irqfd->shutdown instead of all of them, and even worse if there
is no irqfd on the list then you get a NULL pointer dereference.
Revert the virt/kvm/eventfd.c part of that patch; to avoid the
deprecated function, just allocate our own workqueue---it does
not even have to be unbound---with alloc_workqueue.

Fixes: 3706feacd007
Cc: stable@xxxxxxxxxxxxxxx
Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
---
virt/kvm/eventfd.c | 22 +++++++++++++++++++---
virt/kvm/kvm_main.c | 6 ++++++
2 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index f397e9b20370..a29786dd9522 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -42,6 +42,7 @@

#ifdef CONFIG_HAVE_KVM_IRQFD

+static struct workqueue_struct *irqfd_cleanup_wq;

static void
irqfd_inject(struct work_struct *work)
@@ -167,7 +168,7 @@

list_del_init(&irqfd->list);

- schedule_work(&irqfd->shutdown);
+ queue_work(irqfd_cleanup_wq, &irqfd->shutdown);
}

int __attribute__((weak)) kvm_arch_set_irq_inatomic(
@@ -554,7 +555,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
* so that we guarantee there will not be any more interrupts on this
* gsi once this deassign function returns.
*/
- flush_work(&irqfd->shutdown);
+ flush_workqueue(irqfd_cleanup_wq);

return 0;
}
@@ -591,7 +592,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
* Block until we know all outstanding shutdown jobs have completed
* since we do not take a kvm* reference.
*/
- flush_work(&irqfd->shutdown);
+ flush_workqueue(irqfd_cleanup_wq);

}

@@ -621,8 +622,23 @@ void kvm_irq_routing_update(struct kvm *kvm)
spin_unlock_irq(&kvm->irqfds.lock);
}

+/*
+ * create a host-wide workqueue for issuing deferred shutdown requests
+ * aggregated from all vm* instances. We need our own isolated
+ * queue to ease flushing work items when a VM exits.
+ */
+int kvm_irqfd_init(void)
+{
+ irqfd_cleanup_wq = alloc_workqueue("kvm-irqfd-cleanup", 0, 0);
+ if (!irqfd_cleanup_wq)
+ return -ENOMEM;
+
+ return 0;
+}
+
void kvm_irqfd_exit(void)
{
+ destroy_workqueue(irqfd_cleanup_wq);
}
#endif

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 28510e72618a..d92c3d5b0fbe 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3846,7 +3846,12 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
* kvm_arch_init makes sure there's at most one caller
* for architectures that support multiple implementations,
* like intel and amd on x86.
+ * kvm_arch_init must be called before kvm_irqfd_init to avoid creating
+ * conflicts in case kvm is already setup for another implementation.
*/
+ r = kvm_irqfd_init();
+ if (r)
+ goto out_irqfd;

if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) {
r = -ENOMEM;
@@ -3928,6 +3933,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
free_cpumask_var(cpus_hardware_enabled);
out_free_0:
kvm_irqfd_exit();
+out_irqfd:
kvm_arch_exit();
out_fail:
return r;
--
1.8.3.1