[PATCH] KVM: x86/xen: Fix sleeping lock in hard IRQ context on PREEMPT_RT

From: shaikh.kamal

Date: Sun Mar 29 2026 - 09:17:40 EST


On PREEMPT_RT, kvm_xen_set_evtchn_fast() acquires a sleeping lock
(gpc->lock) from hard IRQ context (xen_timer_callback), triggering:

BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:48
in_atomic(): 0, irqs_disabled(): 0, non_block: 0, pid: 0, name: swapper/5
preempt_count: 10100, expected: 0
RCU nest depth: 0, expected: 0
4 locks held by swapper/5/0:
INFO: lockdep is turned off.
irq event stamp: 1766
hardirqs last enabled at (1765): [<ffffffff81678fd4>] tick_nohz_idle_got_tick+0x84/0x90
hardirqs last disabled at (1766): [<ffffffff8b665051>] sysvec_apic_timer_interrupt+0x11/0xd0
softirqs last enabled at (0): [<ffffffff81289e76>] copy_process+0x1586/0x58b0
softirqs last disabled at (0): [<0000000000000000>] 0x0
Preempt disabled at:
[<ffffffff8b6650bc>] sysvec_apic_timer_interrupt+0x7c/0xd0
CPU: 5 UID: 0 PID: 0 Comm: swapper/5 Not tainted 6.13.0-rc1-syzkaller-00026-g2d5404caa8c7 #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024
Call Trace:
<IRQ>
__dump_stack lib/dump_stack.c:94 [inline]
dump_stack_lvl+0x116/0x1f0 lib/dump_stack.c:120
__might_resched+0x30d/0x8f0 kernel/sched/core.c:10318
rt_spin_lock+0x70/0x130 kernel/locking/spinlock_rt.c:48
kvm_xen_set_evtchn_fast+0x20b/0xa40 arch/x86/kvm/xen.c:1820
xen_timer_callback+0x91/0x1a0 arch/x86/kvm/xen.c:142
__run_hrtimer kernel/time/hrtimer.c:1739 [inline]
__hrtimer_run_queues+0x20b/0xa00 kernel/time/hrtimer.c:1803

The Xen timer uses HRTIMER_MODE_ABS_HARD for latency-sensitive event
delivery (see commit 77c9b9dea4fb ("KVM: x86/xen: Use fast path for Xen
timer delivery")). On PREEMPT_RT, hard IRQ hrtimers execute in hard IRQ
context where sleeping locks cannot be acquired.

Use irq_work to defer event injection to a context where sleeping locks
are permitted on PREEMPT_RT. This preserves the hard IRQ timer precision
on non-RT kernels while avoiding the lock context violation on RT.

The approach follows the existing pvclock_irq_work pattern in
arch/x86/kvm/x86.c.

Tested on PREEMPT_RT kernel (CONFIG_PREEMPT_RT=y) with the syzbot C
reproducer - no crash observed after 30+ minutes of continuous execution.
Also tested on non-RT kernel (CONFIG_PREEMPT_RT=n) to verify no
regression in the fast path.

Reported-by: syzbot+919877893c9d28162dc2@xxxxxxxxxxxxxxxxxxxxxxxxx
Closes: https://syzkaller.appspot.com/bug?extid=919877893c9d28162dc2
Fixes: 77c9b9dea4fb ("KVM: x86/xen: Use fast path for Xen timer delivery")

Signed-off-by: shaikh.kamal <shaikhkamal2012@xxxxxxxxx>
---
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/kvm/xen.c | 31 +++++++++++++++++++++++++++++++
2 files changed, 32 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 5a3bfa293e8b..533b45289d53 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -746,6 +746,7 @@ struct kvm_vcpu_xen {
u64 timer_expires; /* In guest epoch */
atomic_t timer_pending;
struct hrtimer timer;
+ struct irq_work timer_inject_irqwork;
int poll_evtchn;
struct timer_list poll_timer;
struct kvm_hypervisor_cpuid cpuid;
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index d6b2a665b499..01fa7b165355 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -122,6 +122,24 @@ void kvm_xen_inject_timer_irqs(struct kvm_vcpu *vcpu)
}
}

+static void xen_timer_inject_irqwork(struct irq_work *work)
+{
+ struct kvm_vcpu_xen *xen = container_of(work, struct kvm_vcpu_xen,
+ timer_inject_irqwork);
+ struct kvm_vcpu *vcpu = container_of(xen, struct kvm_vcpu, arch.xen);
+ struct kvm_xen_evtchn e;
+ int rc;
+
+ e.vcpu_id = vcpu->vcpu_id;
+ e.vcpu_idx = vcpu->vcpu_idx;
+ e.port = vcpu->arch.xen.timer_virq;
+ e.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
+
+ rc = kvm_xen_set_evtchn_fast(&e, vcpu->kvm);
+ if (rc != -EWOULDBLOCK)
+ vcpu->arch.xen.timer_expires = 0;
+}
+
static enum hrtimer_restart xen_timer_callback(struct hrtimer *timer)
{
struct kvm_vcpu *vcpu = container_of(timer, struct kvm_vcpu,
@@ -132,6 +150,17 @@ static enum hrtimer_restart xen_timer_callback(struct hrtimer *timer)
if (atomic_read(&vcpu->arch.xen.timer_pending))
return HRTIMER_NORESTART;

+ /*
+ * On PREEMPT_RT, this callback runs in hard IRQ context where
+ * kvm_xen_set_evtchn_fast() cannot acquire sleeping locks
+ * (specifically gpc->lock). Defer to irq_work which runs in
+ * thread context on RT.
+ */
+ if (in_hardirq()) {
+ irq_work_queue(&vcpu->arch.xen.timer_inject_irqwork);
+ return HRTIMER_NORESTART;
+ }
+
e.vcpu_id = vcpu->vcpu_id;
e.vcpu_idx = vcpu->vcpu_idx;
e.port = vcpu->arch.xen.timer_virq;
@@ -2303,6 +2332,8 @@ void kvm_xen_init_vcpu(struct kvm_vcpu *vcpu)
timer_setup(&vcpu->arch.xen.poll_timer, cancel_evtchn_poll, 0);
hrtimer_setup(&vcpu->arch.xen.timer, xen_timer_callback, CLOCK_MONOTONIC,
HRTIMER_MODE_ABS_HARD);
+ init_irq_work(&vcpu->arch.xen.timer_inject_irqwork,
+ xen_timer_inject_irqwork);

kvm_gpc_init(&vcpu->arch.xen.runstate_cache, vcpu->kvm);
kvm_gpc_init(&vcpu->arch.xen.runstate2_cache, vcpu->kvm);
--
2.43.0