[RFC 09/12] Drivers: hv: vmbus: Use Linux IRQs to handle VMBus channel interrupts

From: mhkelley58
Date: Tue Jun 04 2024 - 01:13:31 EST


From: Michael Kelley <mhklinux@xxxxxxxxxxx>

Do the following:

1) Create an interrupt handler for VMBus channel interrupts by pulling
out portions of vmbus_chan_sched() into vmbus_chan_handler(). The
outer part of vmbus_chan_sched() that loops through the synic event
page bitmap remains unchanged. But when a pending VMBus channel
interrupt is found, call generic_handle_irq_desc() to invoke
handle_simple_irq() and then vmbus_chan_handler() for the channel's
IRQ. handle_simple_irq() does the IRQ stats for that channel's IRQ,
so that per-channel interrupt counts appear in /proc/interrupts. The
overall processing of VMBus channel interrupts is unchanged except
for the intervening handle_simple_irq() that does the stats. No acks
or EOIs are required for VMBus channel IRQs.

2) Update __vmbus_open() to call request_irq(), specifying the previously
setup channel IRQ name and vmbus_chan_handler() as the interrupt
handler. Set the IRQ affinity to the target_cpu assigned when the
channel was created.

3) Update vmbus_isr() to return "false" if it only handles VMBus
interrupts, which were passed to the channel IRQ handler. If
vmbus_isr() handles one or more control message interrupts, then
return "true". Update the related definitions to specify a boolean
return value.

4) The callers of vmbus_isr() increment IRQ stats for the top-level
IRQ only if "true" is returned. On x86, the caller is
sysvec_hyperv_callback(), which manages the stats directly. On
arm64, the caller is vmbus_percpu_isr(), which maps the boolean
return value to IRQ_NONE ("false") or IRQ_HANDLED ("true").
Then handle_percpu_demux_irq() conditionally updates the
stats based on the return value from vmbus_percpu_isr().

With these changes, interrupts from VMBus channels are now
processed as Linux IRQs that are demultiplexed from the main
VMBus interrupt.

Signed-off-by: Michael Kelley <mhklinux@xxxxxxxxxxx>
---
arch/x86/kernel/cpu/mshyperv.c | 9 ++--
drivers/hv/channel.c | 25 +++++++++-
drivers/hv/hv_common.c | 2 +-
drivers/hv/vmbus_drv.c | 84 +++++++++++++++++++---------------
include/asm-generic/mshyperv.h | 3 +-
5 files changed, 79 insertions(+), 44 deletions(-)

diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index e0fd57a8ba84..18bc282a99db 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -110,7 +110,7 @@ void hv_set_msr(unsigned int reg, u64 value)
}
EXPORT_SYMBOL_GPL(hv_set_msr);

-static void (*vmbus_handler)(void);
+static bool (*vmbus_handler)(void);
static void (*hv_stimer0_handler)(void);
static void (*hv_kexec_handler)(void);
static void (*hv_crash_handler)(struct pt_regs *regs);
@@ -119,9 +119,8 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_callback)
{
struct pt_regs *old_regs = set_irq_regs(regs);

- inc_irq_stat(irq_hv_callback_count);
- if (vmbus_handler)
- vmbus_handler();
+ if (vmbus_handler && vmbus_handler())
+ inc_irq_stat(irq_hv_callback_count);

if (ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED)
apic_eoi();
@@ -129,7 +128,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_callback)
set_irq_regs(old_regs);
}

-void hv_setup_vmbus_handler(void (*handler)(void))
+void hv_setup_vmbus_handler(bool (*handler)(void))
{
vmbus_handler = handler;
}
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index fb8cd8469328..1aa020b538f1 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -638,6 +638,7 @@ static int __vmbus_open(struct vmbus_channel *newchannel,
struct vmbus_channel_open_channel *open_msg;
struct vmbus_channel_msginfo *open_info = NULL;
struct page *page = newchannel->ringbuffer_page;
+ u32 relid = newchannel->offermsg.child_relid;
u32 send_pages, recv_pages;
unsigned long flags;
int err;
@@ -685,13 +686,31 @@ static int __vmbus_open(struct vmbus_channel *newchannel,
if (err)
goto error_free_gpadl;

+ /* Request the IRQ and assign to target_cpu */
+ err = request_irq(newchannel->irq, vmbus_chan_handler, 0,
+ newchannel->irq_name, newchannel);
+ if (err) {
+ pr_err("request_irq failed with %d for relid %d irq %d\n",
+ err, relid, newchannel->irq);
+ goto error_free_gpadl;
+ }
+ err = irq_set_affinity_and_hint(newchannel->irq,
+ cpumask_of(newchannel->target_cpu));
+ if (err) {
+ pr_err("irq_set_affinity_and_hint failed with %d for relid %d irq %d\n",
+ err, relid, newchannel->irq);
+ free_irq(newchannel->irq, newchannel);
+ goto error_free_gpadl;
+ }
+ newchannel->irq_requested = true;
+
/* Create and init the channel open message */
open_info = kzalloc(sizeof(*open_info) +
sizeof(struct vmbus_channel_open_channel),
GFP_KERNEL);
if (!open_info) {
err = -ENOMEM;
- goto error_free_gpadl;
+ goto error_free_irq;
}

init_completion(&open_info->waitevent);
@@ -759,6 +778,10 @@ static int __vmbus_open(struct vmbus_channel *newchannel,
spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
error_free_info:
kfree(open_info);
+error_free_irq:
+ irq_update_affinity_hint(newchannel->irq, NULL);
+ free_irq(newchannel->irq, newchannel);
+ newchannel->irq_requested = false;
error_free_gpadl:
vmbus_teardown_gpadl(newchannel, &newchannel->ringbuffer_gpadlhandle);
error_clean_ring:
diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
index 9c452bfbd571..38a23add721c 100644
--- a/drivers/hv/hv_common.c
+++ b/drivers/hv/hv_common.c
@@ -610,7 +610,7 @@ bool __weak hv_isolation_type_tdx(void)
}
EXPORT_SYMBOL_GPL(hv_isolation_type_tdx);

-void __weak hv_setup_vmbus_handler(void (*handler)(void))
+void __weak hv_setup_vmbus_handler(bool (*handler)(void))
{
}
EXPORT_SYMBOL_GPL(hv_setup_vmbus_handler);
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 8fd03d41e71a..b73be7c02d37 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -1193,6 +1193,45 @@ static void vmbus_force_channel_rescinded(struct vmbus_channel *channel)
}
#endif /* CONFIG_PM_SLEEP */

+irqreturn_t vmbus_chan_handler(int irq, void *dev_id)
+{
+ void (*callback_fn)(void *context);
+ struct vmbus_channel *channel = dev_id;
+
+ /*
+ * Make sure that the ring buffer data structure doesn't get
+ * freed while we dereference the ring buffer pointer. Test
+ * for the channel's onchannel_callback being NULL within a
+ * sched_lock critical section. See also the inline comments
+ * in vmbus_reset_channel_cb().
+ */
+ spin_lock(&channel->sched_lock);
+
+ callback_fn = channel->onchannel_callback;
+ if (unlikely(callback_fn == NULL))
+ goto spin_unlock;
+
+ trace_vmbus_chan_sched(channel);
+
+ ++channel->interrupts;
+
+ switch (channel->callback_mode) {
+ case HV_CALL_ISR:
+ (*callback_fn)(channel->channel_callback_context);
+ break;
+
+ case HV_CALL_BATCHED:
+ hv_begin_read(&channel->inbound);
+ fallthrough;
+ case HV_CALL_DIRECT:
+ tasklet_schedule(&channel->callback_event);
+ }
+
+spin_unlock:
+ spin_unlock(&channel->sched_lock);
+ return IRQ_HANDLED;
+}
+
/*
* Schedule all channels with events pending
*/
@@ -1217,7 +1256,6 @@ static void vmbus_chan_sched(struct hv_per_cpu_context *hv_cpu)
return;

for_each_set_bit(relid, recv_int_page, maxbits) {
- void (*callback_fn)(void *context);
struct vmbus_channel *channel;
struct irq_desc *desc;

@@ -1244,43 +1282,14 @@ static void vmbus_chan_sched(struct hv_per_cpu_context *hv_cpu)
if (channel->rescind)
goto sched_unlock_rcu;

- /*
- * Make sure that the ring buffer data structure doesn't get
- * freed while we dereference the ring buffer pointer. Test
- * for the channel's onchannel_callback being NULL within a
- * sched_lock critical section. See also the inline comments
- * in vmbus_reset_channel_cb().
- */
- spin_lock(&channel->sched_lock);
-
- callback_fn = channel->onchannel_callback;
- if (unlikely(callback_fn == NULL))
- goto sched_unlock;
-
- trace_vmbus_chan_sched(channel);
-
- ++channel->interrupts;
-
- switch (channel->callback_mode) {
- case HV_CALL_ISR:
- (*callback_fn)(channel->channel_callback_context);
- break;
-
- case HV_CALL_BATCHED:
- hv_begin_read(&channel->inbound);
- fallthrough;
- case HV_CALL_DIRECT:
- tasklet_schedule(&channel->callback_event);
- }
+ generic_handle_irq_desc(desc);

-sched_unlock:
- spin_unlock(&channel->sched_lock);
sched_unlock_rcu:
rcu_read_unlock();
}
}

-static void vmbus_isr(void)
+static bool vmbus_isr(void)
{
struct hv_per_cpu_context *hv_cpu
= this_cpu_ptr(hv_context.cpu_context);
@@ -1299,15 +1308,18 @@ static void vmbus_isr(void)
vmbus_signal_eom(msg, HVMSG_TIMER_EXPIRED);
} else
tasklet_schedule(&hv_cpu->msg_dpc);
- }

- add_interrupt_randomness(vmbus_interrupt);
+ add_interrupt_randomness(vmbus_interrupt);
+ return true;
+ }
+ return false;
}

static irqreturn_t vmbus_percpu_isr(int irq, void *dev_id)
{
- vmbus_isr();
- return IRQ_HANDLED;
+ if (vmbus_isr())
+ return IRQ_HANDLED;
+ return IRQ_NONE;
}

int vmbus_irq_set_affinity(struct irq_data *data,
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index 0488ff8b511f..0a5559b9d5f7 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -178,7 +178,7 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type)

int hv_get_hypervisor_version(union hv_hypervisor_version_info *info);

-void hv_setup_vmbus_handler(void (*handler)(void));
+void hv_setup_vmbus_handler(bool (*handler)(void));
void hv_remove_vmbus_handler(void);
void hv_setup_stimer0_handler(void (*handler)(void));
void hv_remove_stimer0_handler(void);
@@ -188,6 +188,7 @@ void hv_remove_kexec_handler(void);
void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs));
void hv_remove_crash_handler(void);

+extern irqreturn_t vmbus_chan_handler(int irq, void *dev_id);
extern void vmbus_irq_mask(struct irq_data *data);
extern void vmbus_irq_unmask(struct irq_data *data);
extern int vmbus_irq_set_affinity(struct irq_data *data,
--
2.25.1