[PATCH] KVM: arm64: vgic: Fix soft lockup during VM teardown

From: Shanker Donthineni
Date: Tue Jan 17 2023 - 21:24:08 EST


Getting intermittent CPU soft lockups during the virtual machines
teardown on a system with GICv4 features enabled. The function
__synchronize_hardirq() has been waiting for IRQD_IRQ_INPROGRESS
to be cleared forever as per the current implementation.

CPU stuck here for a long time leads to soft lockup:
while (irqd_irq_inprogress(&desc->irq_data))
cpu_relax();

Call trace from the lockup CPU:
[ 87.238866] watchdog: BUG: soft lockup - CPU#37 stuck for 23s!
[ 87.250025] CPU: 37 PID: 1031 Comm: qemu-system-aarch64
[ 87.358397] Call trace:
[ 87.360891] __synchronize_hardirq+0x48/0x140
[ 87.365343] free_irq+0x138/0x424
[ 87.368727] vgic_v4_teardown+0xa4/0xe0
[ 87.372649] __kvm_vgic_destroy+0x18c/0x194
[ 87.376922] kvm_vgic_destroy+0x28/0x3c
[ 87.380839] kvm_arch_destroy_vm+0x24/0x44
[ 87.385024] kvm_destroy_vm+0x158/0x2c4
[ 87.388943] kvm_vm_release+0x6c/0x98
[ 87.392681] __fput+0x70/0x220
[ 87.395800] ____fput+0x10/0x20
[ 87.399005] task_work_run+0xb4/0x23c
[ 87.402746] do_exit+0x2bc/0x8a4
[ 87.406042] do_group_exit+0x34/0xb0
[ 87.409693] get_signal+0x878/0x8a0
[ 87.413254] do_notify_resume+0x138/0x1530
[ 87.417440] el0_svc+0xdc/0xf0
[ 87.420559] el0t_64_sync_handler+0xf0/0x11c
[ 87.424919] el0t_64_sync+0x18c/0x190

The state of the IRQD_IRQ_INPROGRESS information is lost inside
irq_domain_activate_irq() which happens before calling free_irq().
Instrumented the code and confirmed, the IRQD state is changed from
0x10401400 to 0x10441600 instead of 0x10401600 causing problem.

Call trace from irqd_set_activated():
[ 78.983544] irqd_set_activated: lost IRQD_IRQ_INPROGRESS
old=0x10401400, new=0x10441600
[ 78.992093] CPU: 19 PID: 1511 Comm: qemu-system-aarch64
[ 79.008461] Call trace:
[ 79.010956] dump_backtrace.part.0+0xc8/0xe0
[ 79.015328] show_stack+0x18/0x54
[ 79.018713] dump_stack_lvl+0x64/0x7c
[ 79.022459] dump_stack+0x18/0x30
[ 79.025842] irq_domain_activate_irq+0x88/0x94
[ 79.030385] vgic_v3_save_pending_tables+0x260/0x29c
[ 79.035463] vgic_set_common_attr+0xac/0x23c
[ 79.039826] vgic_v3_set_attr+0x48/0x60
[ 79.043742] kvm_device_ioctl+0x120/0x19c
[ 79.047840] __arm64_sys_ioctl+0x42c/0xe00
[ 79.052027] invoke_syscall.constprop.0+0x50/0xe0
[ 79.056835] do_el0_svc+0x58/0x180
[ 79.060308] el0_svc+0x38/0xf0
[ 79.063425] el0t_64_sync_handler+0xf0/0x11c
[ 79.067785] el0t_64_sync+0x18c/0x190

irqreturn_t handle_irq_event(struct irq_desc *desc)
{
irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS);
raw_spin_unlock(&desc->lock);

ret = handle_irq_event_percpu(desc);

raw_spin_lock(&desc->lock);
irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS);
}

In this particular failed case and based on traces, the two functions
irqd_set_activated() and handle_irq_event() are concurrently modifying
IRQD state without both holding desc->lock. The irqd_set_activated()
execution path is reading memory 'state_use_accessors' in between set
& clear of IRQD_IRQ_INPROGRESS state change and writing the modified
data after executing 'irqd_clear(desc->irq_data, IRQD_IRQ_INPROGRESS)'.

To fix the lockup issue, hold desc->lock when calling functions
irq_domain_activate_irq() and irq_domain_deactivate_irq).

Signed-off-by: Shanker Donthineni <sdonthineni@xxxxxxxxxx>
---
arch/arm64/kvm/vgic/vgic-v3.c | 6 ++++++
arch/arm64/kvm/vgic/vgic-v4.c | 4 ++++
2 files changed, 10 insertions(+)

diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c
index 2074521d4a8c..e6aa909fcbe2 100644
--- a/arch/arm64/kvm/vgic/vgic-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-v3.c
@@ -353,22 +353,28 @@ int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq)
static void unmap_all_vpes(struct vgic_dist *dist)
{
struct irq_desc *desc;
+ unsigned long flags;
int i;

for (i = 0; i < dist->its_vm.nr_vpes; i++) {
desc = irq_to_desc(dist->its_vm.vpes[i]->irq);
+ raw_spin_lock_irqsave(&desc->lock, flags);
irq_domain_deactivate_irq(irq_desc_get_irq_data(desc));
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
}
}

static void map_all_vpes(struct vgic_dist *dist)
{
struct irq_desc *desc;
+ unsigned long flags;
int i;

for (i = 0; i < dist->its_vm.nr_vpes; i++) {
desc = irq_to_desc(dist->its_vm.vpes[i]->irq);
+ raw_spin_lock_irqsave(&desc->lock, flags);
irq_domain_activate_irq(irq_desc_get_irq_data(desc), false);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
}
}

diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c
index ad06ba6c9b00..a01b8313e82c 100644
--- a/arch/arm64/kvm/vgic/vgic-v4.c
+++ b/arch/arm64/kvm/vgic/vgic-v4.c
@@ -139,8 +139,10 @@ static void vgic_v4_enable_vsgis(struct kvm_vcpu *vcpu)
/* Transfer the full irq state to the vPE */
vgic_v4_sync_sgi_config(vpe, irq);
desc = irq_to_desc(irq->host_irq);
+ raw_spin_lock(&desc->lock);
ret = irq_domain_activate_irq(irq_desc_get_irq_data(desc),
false);
+ raw_spin_unlock(&desc->lock);
if (!WARN_ON(ret)) {
/* Transfer pending state */
ret = irq_set_irqchip_state(irq->host_irq,
@@ -177,7 +179,9 @@ static void vgic_v4_disable_vsgis(struct kvm_vcpu *vcpu)
WARN_ON(ret);

desc = irq_to_desc(irq->host_irq);
+ raw_spin_lock(&desc->lock);
irq_domain_deactivate_irq(irq_desc_get_irq_data(desc));
+ raw_spin_unlock(&desc->lock);
unlock:
raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
vgic_put_irq(vcpu->kvm, irq);
--
2.25.1