[PATCH 01/12] KVM: Use a local struct to do the initial vfs_poll() on an irqfd

From: Sean Christopherson
Date: Tue Apr 01 2025 - 16:47:12 EST


Use a function-local struct for the poll_table passted to vfs_poll(), as
nothing in the vfs_poll() callchain grabs a long-term reference to the
structure, i.e. its lifetime doesn't need to be tied to the irqfd. Using
a local structure will also allow propagating failures out of the polling
callback without further polluting kvm_kernel_irqfd.

Opportunstically rename irqfd_ptable_queue_proc() to kvm_irqfd_register()
to capture what it actually does.

Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
---
include/linux/kvm_irqfd.h | 1 -
virt/kvm/eventfd.c | 26 +++++++++++++++++---------
2 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h
index 8ad43692e3bb..44fd2a20b09e 100644
--- a/include/linux/kvm_irqfd.h
+++ b/include/linux/kvm_irqfd.h
@@ -55,7 +55,6 @@ struct kvm_kernel_irqfd {
/* Used for setup/shutdown */
struct eventfd_ctx *eventfd;
struct list_head list;
- poll_table pt;
struct work_struct shutdown;
struct irq_bypass_consumer consumer;
struct irq_bypass_producer *producer;
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 249ba5b72e9b..01c6eb4dceb8 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -245,12 +245,17 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
return ret;
}

-static void
-irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
- poll_table *pt)
+struct kvm_irqfd_pt {
+ struct kvm_kernel_irqfd *irqfd;
+ poll_table pt;
+};
+
+static void kvm_irqfd_register(struct file *file, wait_queue_head_t *wqh,
+ poll_table *pt)
{
- struct kvm_kernel_irqfd *irqfd =
- container_of(pt, struct kvm_kernel_irqfd, pt);
+ struct kvm_irqfd_pt *p = container_of(pt, struct kvm_irqfd_pt, pt);
+ struct kvm_kernel_irqfd *irqfd = p->irqfd;
+
add_wait_queue_priority(wqh, &irqfd->wait);
}

@@ -305,6 +310,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
{
struct kvm_kernel_irqfd *irqfd, *tmp;
struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL;
+ struct kvm_irqfd_pt irqfd_pt;
int ret;
__poll_t events;
int idx;
@@ -394,7 +400,6 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
* a callback whenever someone signals the underlying eventfd
*/
init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup);
- init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc);

spin_lock_irq(&kvm->irqfds.lock);

@@ -416,11 +421,14 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
spin_unlock_irq(&kvm->irqfds.lock);

/*
- * Check if there was an event already pending on the eventfd
- * before we registered, and trigger it as if we didn't miss it.
+ * Register the irqfd with the eventfd by polling on the eventfd. If
+ * there was en event pending on the eventfd prior to registering,
+ * manually trigger IRQ injection.
*/
- events = vfs_poll(fd_file(f), &irqfd->pt);
+ irqfd_pt.irqfd = irqfd;
+ init_poll_funcptr(&irqfd_pt.pt, kvm_irqfd_register);

+ events = vfs_poll(fd_file(f), &irqfd_pt.pt);
if (events & EPOLLIN)
schedule_work(&irqfd->inject);

--
2.49.0.504.g3bcea36a83-goog