RE: [PATCH v3] drivers: hv: vmbus: Use kthread for vmbus interrupts on PREEMPT_RT

From: Michael Kelley

Date: Tue Feb 17 2026 - 01:43:01 EST


From: Jan Kiszka <jan.kiszka@xxxxxxxxxxx> Sent: Monday, February 16, 2026 8:25 AM
>
> Resolves the following lockdep report when booting PREEMPT_RT on Hyper-V
> with related guest support enabled:
>
> [ 1.127941] hv_vmbus: registering driver hyperv_drm
>
> [ 1.132518] =============================
> [ 1.132519] [ BUG: Invalid wait context ]
> [ 1.132521] 6.19.0-rc8+ #9 Not tainted
> [ 1.132524] -----------------------------
> [ 1.132525] swapper/0/0 is trying to lock:
> [ 1.132526] ffff8b9381bb3c90 (&channel->sched_lock){....}-{3:3}, at: vmbus_chan_sched+0xc4/0x2b0
> [ 1.132543] other info that might help us debug this:
> [ 1.132544] context-{2:2}
> [ 1.132545] 1 lock held by swapper/0/0:
> [ 1.132547] #0: ffffffffa010c4c0 (rcu_read_lock){....}-{1:3}, at: vmbus_chan_sched+0x31/0x2b0
> [ 1.132557] stack backtrace:
> [ 1.132560] CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Not tainted 6.19.0-rc8+ #9 PREEMPT_{RT,(lazy)}
> [ 1.132565] Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS Hyper-V UEFI Release v4.1 09/25/2025
> [ 1.132567] Call Trace:
> [ 1.132570] <IRQ>
> [ 1.132573] dump_stack_lvl+0x6e/0xa0
> [ 1.132581] __lock_acquire+0xee0/0x21b0
> [ 1.132592] lock_acquire+0xd5/0x2d0
> [ 1.132598] ? vmbus_chan_sched+0xc4/0x2b0
> [ 1.132606] ? lock_acquire+0xd5/0x2d0
> [ 1.132613] ? vmbus_chan_sched+0x31/0x2b0
> [ 1.132619] rt_spin_lock+0x3f/0x1f0
> [ 1.132623] ? vmbus_chan_sched+0xc4/0x2b0
> [ 1.132629] ? vmbus_chan_sched+0x31/0x2b0
> [ 1.132634] vmbus_chan_sched+0xc4/0x2b0
> [ 1.132641] vmbus_isr+0x2c/0x150
> [ 1.132648] __sysvec_hyperv_callback+0x5f/0xa0
> [ 1.132654] sysvec_hyperv_callback+0x88/0xb0
> [ 1.132658] </IRQ>
> [ 1.132659] <TASK>
> [ 1.132660] asm_sysvec_hyperv_callback+0x1a/0x20
>
> As code paths that handle vmbus IRQs use sleepy locks under PREEMPT_RT,
> the vmbus_isr execution needs to be moved into thread context. Open-
> coding this allows to skip the IPI that irq_work would additionally
> bring and which we do not need, being an IRQ, never an NMI.
>
> This affects both x86 and arm64, therefore hook into the common driver
> logic.
>
> Signed-off-by: Jan Kiszka <jan.kiszka@xxxxxxxxxxx>

Tested this patch in combination with the related SCSI driver patch.
Tested three configurations with a recent linux-next kernel, either
20260128 or 20260205.

1) Normal Linux kernel
2) Normal Linux kernel plus CONFIG_PROVE_LOCKING
3) PREEMPT_RT kernel plus CONFIG_PROVE_LOCKING

Tested these three configurations in an x86/x64 VM on a local Hyper-V
and again in an ARM64 VM in the Azure public cloud. With all
combinations, ran the "stress-ng" command provided by Florian
Bezdeka for several minutes. Saw no issues related to these patches.
Presumably the normal kernel with CONFIG_PROVE_LOCKING produced
the lockdep report that Saurabh Sengar saw, and that also appears to be
fixed in this version of the patch due to adding lockdep_hardirq_threaded().

However, I noted one additional locking problem in the ARM64 Azure
VM, which has multiple PCI pass-thru devices -- one Mellanox NIC VF and
two NVMe controllers. The first PCI device to be brought online gets
this lockdep report, though Linux continues to run without problems:

[ 8.128629] hv_vmbus: registering driver hv_pci
[ 8.132276] hv_pci ad26ad39-fa5e-4d12-9825-fa62e9c88483: PCI VMBus probing: Using version 0x10004
[ 8.142956] hv_pci ad26ad39-fa5e-4d12-9825-fa62e9c88483: PCI host bridge to bus fa5e:00
[ 8.143231] pci_bus fa5e:00: root bus resource [mem 0xfc0000000-0xfc00fffff window]
[ 8.143272] pci_bus fa5e:00: No busn resource found for root bus, will use [bus 00-ff]
[ 8.154069] =============================
[ 8.156609] [ BUG: Invalid wait context ]
[ 8.159209] 6.19.0-rc7rt-next-20260128+ #9 Tainted: G E
[ 8.163582] -----------------------------
[ 8.166323] systemd-udevd/575 is trying to lock:
[ 8.169163] ffff00011fb62260 (&hbus->device_list_lock){+.+.}-{3:3}, at: get_pcichild_wslot+0x30/0xe0 [pci_hyperv]
[ 8.175792] other info that might help us debug this:
[ 8.179187] context-{5:5}
[ 8.180954] 3 locks held by systemd-udevd/575:
[ 8.183048] #0: ffff000116e50100 (&dev->mutex){....}-{4:4}, at: __device_driver_lock+0x4c/0xb0
[ 8.193285] #1: ffff00011fb62118 (&hbus->state_lock){+.+.}-{4:4}, at: hv_pci_probe+0x32c/0x590 [pci_hyperv]
[ 8.199565] #2: ffffa40f7caa61e0 (pci_lock){....}-{2:2}, at: pci_bus_read_config_dword+0x64/0xf8
[ 8.205112] stack backtrace:
[ 8.207037] CPU: 0 UID: 0 PID: 575 Comm: systemd-udevd Tainted: G E 6.19.0-rc7rt-next-20260128+ #9 PREEMPT_RT
[ 8.209134] Tainted: [E]=UNSIGNED_MODULE
[ 8.219505] Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS Hyper-V UEFI Release v4.1 06/10/2025
[ 8.226029] Call trace:
[ 8.227433] show_stack+0x20/0x38 (C)
[ 8.229541] dump_stack_lvl+0x9c/0x158
[ 8.231698] dump_stack+0x18/0x28
[ 8.233799] __lock_acquire+0x488/0x1e20
[ 8.236373] lock_acquire+0x11c/0x388
[ 8.238783] rt_spin_lock+0x54/0x230
[ 8.241138] get_pcichild_wslot+0x30/0xe0 [pci_hyperv]
[ 8.244550] hv_pcifront_read_config+0x3c/0x98 [pci_hyperv]
[ 8.248323] pci_bus_read_config_dword+0x88/0xf8
[ 8.250419] pci_bus_generic_read_dev_vendor_id+0x3c/0x1c0
[ 8.252517] pci_bus_read_dev_vendor_id+0x54/0x80
[ 8.263922] pci_scan_single_device+0x88/0x100
[ 8.266903] pci_scan_slot+0x74/0x1e0
[ 8.269208] pci_scan_child_bus_extend+0x50/0x328
[ 8.271978] pci_scan_root_bus_bridge+0xc4/0xf8
[ 8.274705] hv_pci_probe+0x390/0x590 [pci_hyperv]
[ 8.277584] vmbus_probe+0x4c/0xb0 [hv_vmbus]
[ 8.279688] really_probe+0xd4/0x3d8
[ 8.285954] __driver_probe_device+0x90/0x1a0
[ 8.288645] driver_probe_device+0x44/0x148
[ 8.291011] __driver_attach+0x154/0x290
[ 8.293201] bus_for_each_dev+0x80/0xf0
[ 8.295407] driver_attach+0x2c/0x40
[ 8.297478] bus_add_driver+0x128/0x270
[ 8.299607] driver_register+0x68/0x138
[ 8.302179] __vmbus_driver_register+0x98/0xc0 [hv_vmbus]
[ 8.305535] init_hv_pci_drv+0x198/0xff8 [pci_hyperv]
[ 8.308566] do_one_initcall+0x70/0x400
[ 8.310957] do_init_module+0x60/0x280
[ 8.313393] load_module+0x2308/0x2680
[ 8.315535] init_module_from_file+0xe0/0x110
[ 8.318432] idempotent_init_module+0x194/0x280
[ 8.321141] __arm64_sys_finit_module+0x74/0xf8
[ 8.323874] invoke_syscall+0x6c/0xf8
[ 8.326213] el0_svc_common.constprop.0+0xe0/0xf0
[ 8.329068] do_el0_svc+0x24/0x38
[ 8.331070] el0_svc+0x164/0x3c8
[ 8.333137] el0t_64_sync_handler+0xd0/0xe8
[ 8.335599] el0t_64_sync+0x1b0/0x1b8
[ 8.338598] pci fa5e:00:00.0: [1414:b111] type 00 class 0x010802 PCIe Endpoint
[ 8.340646] pci fa5e:00:00.0: BAR 0 [mem 0xfc0000000-0xfc00fffff 64bit]
[ 8.357759] pci_bus fa5e:00: busn_res: [bus 00-ff] end is updated to 00

The lockdep report would also be seen in an x86/x64 VM in Azure, though I
did not explicitly test that combination. I have not looked at what it would
take to fix this for PREEMPT_RT. But the fix would be a separate patch that
does not affect the validity of this patch.

So for this patch,
Reviewed-by: Michael Kelley <mhklinux@xxxxxxxxxxx>
Tested-by: Michael Kelley <mhklinux@xxxxxxxxxxx>

> ---
>
> Changes in v3:
> - move logic to generic vmbus driver, targeting arm64 as well
> - annotate non-RT path with lockdep_hardirq_threaded
> - only teardown if setup ran
>
> Changes in v2:
> - reorder vmbus_irq_pending clearing to fix a race condition
>
> drivers/hv/vmbus_drv.c | 66 +++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 65 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
> index 6785ad63a9cb..749a2e68af05 100644
> --- a/drivers/hv/vmbus_drv.c
> +++ b/drivers/hv/vmbus_drv.c
> @@ -25,6 +25,7 @@
> #include <linux/cpu.h>
> #include <linux/sched/isolation.h>
> #include <linux/sched/task_stack.h>
> +#include <linux/smpboot.h>
>
> #include <linux/delay.h>
> #include <linux/panic_notifier.h>
> @@ -1350,7 +1351,7 @@ static void vmbus_message_sched(struct
> hv_per_cpu_context *hv_cpu, void *message
> }
> }
>
> -void vmbus_isr(void)
> +static void __vmbus_isr(void)
> {
> struct hv_per_cpu_context *hv_cpu
> = this_cpu_ptr(hv_context.cpu_context);
> @@ -1363,6 +1364,53 @@ void vmbus_isr(void)
>
> add_interrupt_randomness(vmbus_interrupt);
> }
> +
> +static DEFINE_PER_CPU(bool, vmbus_irq_pending);
> +static DEFINE_PER_CPU(struct task_struct *, vmbus_irqd);
> +
> +static void vmbus_irqd_wake(void)
> +{
> + struct task_struct *tsk = __this_cpu_read(vmbus_irqd);
> +
> + __this_cpu_write(vmbus_irq_pending, true);
> + wake_up_process(tsk);
> +}
> +
> +static void vmbus_irqd_setup(unsigned int cpu)
> +{
> + sched_set_fifo(current);
> +}
> +
> +static int vmbus_irqd_should_run(unsigned int cpu)
> +{
> + return __this_cpu_read(vmbus_irq_pending);
> +}
> +
> +static void run_vmbus_irqd(unsigned int cpu)
> +{
> + __this_cpu_write(vmbus_irq_pending, false);
> + __vmbus_isr();
> +}
> +
> +static bool vmbus_irq_initialized;
> +
> +static struct smp_hotplug_thread vmbus_irq_threads = {
> + .store = &vmbus_irqd,
> + .setup = vmbus_irqd_setup,
> + .thread_should_run = vmbus_irqd_should_run,
> + .thread_fn = run_vmbus_irqd,
> + .thread_comm = "vmbus_irq/%u",
> +};
> +
> +void vmbus_isr(void)
> +{
> + if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
> + vmbus_irqd_wake();
> + } else {
> + lockdep_hardirq_threaded();
> + __vmbus_isr();
> + }
> +}
> EXPORT_SYMBOL_FOR_MODULES(vmbus_isr, "mshv_vtl");
>
> static irqreturn_t vmbus_percpu_isr(int irq, void *dev_id)
> @@ -1462,6 +1510,13 @@ static int vmbus_bus_init(void)
> * the VMbus interrupt handler.
> */
>
> + if (IS_ENABLED(CONFIG_PREEMPT_RT) && !vmbus_irq_initialized) {
> + ret = smpboot_register_percpu_thread(&vmbus_irq_threads);
> + if (ret)
> + goto err_kthread;
> + vmbus_irq_initialized = true;
> + }
> +
> if (vmbus_irq == -1) {
> hv_setup_vmbus_handler(vmbus_isr);
> } else {
> @@ -1507,6 +1562,11 @@ static int vmbus_bus_init(void)
> free_percpu(vmbus_evt);
> }
> err_setup:
> + if (IS_ENABLED(CONFIG_PREEMPT_RT) && vmbus_irq_initialized) {
> + smpboot_unregister_percpu_thread(&vmbus_irq_threads);
> + vmbus_irq_initialized = false;
> + }
> +err_kthread:
> bus_unregister(&hv_bus);
> return ret;
> }
> @@ -2976,6 +3036,10 @@ static void __exit vmbus_exit(void)
> free_percpu_irq(vmbus_irq, vmbus_evt);
> free_percpu(vmbus_evt);
> }
> + if (IS_ENABLED(CONFIG_PREEMPT_RT) && vmbus_irq_initialized) {
> + smpboot_unregister_percpu_thread(&vmbus_irq_threads);
> + vmbus_irq_initialized = false;
> + }
> for_each_online_cpu(cpu) {
> struct hv_per_cpu_context *hv_cpu
> = per_cpu_ptr(hv_context.cpu_context, cpu);
> --
> 2.47.3