[PATCH] KVM: Unlimit number of ioeventfd assignments for real

From: Peter Xu
Date: Fri Sep 27 2019 - 21:42:48 EST


Previously we've tried to unlimit ioeventfd creation (6ea34c9b78c1,
"kvm: exclude ioeventfd from counting kvm_io_range limit",
2013-06-04), because that can be easily done by fd limitations and
otherwise it can easily reach the current maximum of 1000 iodevices.
Meanwhile, we still use the counter to limit the maximum allowed kvm
io devices to be created besides ioeventfd.

6ea34c9b78c1 achieved that in most cases, however it'll still fali the
ioeventfd creation when non-ioeventfd io devices overflows to 1000.
Then the next ioeventfd creation will fail while logically it should
be the next non-ioeventfd iodevice creation to fail.

That's not really a big problem at all because when it happens it
probably means something has leaked in userspace (or even malicious
program) so it's a bug to fix there. However the error message like
"ioeventfd creation failed" with an -ENOSPACE is really confusing and
may let people think about the fact that it's the ioeventfd that is
leaked (while in most cases it's not!).

Let's use this patch to unlimit the creation of ioeventfd for real
this time, assuming this is also a bugfix of 6ea34c9b78c1. To me more
importantly, when with a bug in userspace this patch can probably give
us another more meaningful failure on what has overflowed/leaked
rather than "ioeventfd creation failure: -ENOSPC".

CC: Dr. David Alan Gilbert <dgilbert@xxxxxxxxxx>
Signed-off-by: Peter Xu <peterx@xxxxxxxxxx>
---
include/linux/kvm_host.h | 3 +++
virt/kvm/eventfd.c | 4 ++--
virt/kvm/kvm_main.c | 23 ++++++++++++++++++++---
3 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index fcb46b3374c6..d8530e7d85d4 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -192,6 +192,9 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
int len, void *val);
int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
int len, struct kvm_io_device *dev);
+int kvm_io_bus_register_dev_ioeventfd(struct kvm *kvm, enum kvm_bus bus_idx,
+ gpa_t addr, int len,
+ struct kvm_io_device *dev);
void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
struct kvm_io_device *dev);
struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx,
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 67b6fc153e9c..3cb0e1c3279b 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -823,8 +823,8 @@ static int kvm_assign_ioeventfd_idx(struct kvm *kvm,

kvm_iodevice_init(&p->dev, &ioeventfd_ops);

- ret = kvm_io_bus_register_dev(kvm, bus_idx, p->addr, p->length,
- &p->dev);
+ ret = kvm_io_bus_register_dev_ioeventfd(kvm, bus_idx, p->addr,
+ p->length, &p->dev);
if (ret < 0)
goto unlock_fail;

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index c6a91b044d8d..242cfcaa9a56 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3809,8 +3809,10 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
}

/* Caller must hold slots_lock. */
-int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
- int len, struct kvm_io_device *dev)
+static int __kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
+ gpa_t addr, int len,
+ struct kvm_io_device *dev,
+ bool check_limit)
{
int i;
struct kvm_io_bus *new_bus, *bus;
@@ -3821,7 +3823,8 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
return -ENOMEM;

/* exclude ioeventfd which is limited by maximum fd */
- if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1)
+ if (check_limit &&
+ (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1))
return -ENOSPC;

new_bus = kmalloc(struct_size(bus, range, bus->dev_count + 1),
@@ -3851,6 +3854,20 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
return 0;
}

+int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
+ int len, struct kvm_io_device *dev)
+{
+ return __kvm_io_bus_register_dev(kvm, bus_idx, addr, len, dev, true);
+}
+
+int kvm_io_bus_register_dev_ioeventfd(struct kvm *kvm, enum kvm_bus bus_idx,
+ gpa_t addr, int len,
+ struct kvm_io_device *dev)
+{
+ return __kvm_io_bus_register_dev(kvm, bus_idx, addr, len, dev, false);
+}
+
+
/* Caller must hold slots_lock. */
void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
struct kvm_io_device *dev)
--
2.21.0