[PATCH v2] x86/irq: Add additional unhandled IRQ debug information

From: Prarit Bhargava
Date: Tue Dec 06 2016 - 14:59:32 EST


The current unhandled IRQ warning doesn't output enough information on x86 to
determine which device issued an interrupt. For example,

irq 16: nobody cared (try booting with the "irqpoll" option)
CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.9.0-rc6+ #19
Hardware name: Hewlett-Packard HP Z820 Workstation/158B, BIOS J63 v03.90 06/01/2016
ffff88041f803eb0 ffffffff81337a95 ffff880418607200 ffff88041860729c
ffff88041f803ed8 ffffffff810db845 ffff880418607200 0000000000000000
0000000000000010 ffff88041f803f10 ffffffff810dbb9f ffff880418607200
Call Trace:
<IRQ> [<ffffffff81337a95>] dump_stack+0x63/0x8e
[<ffffffff810db845>] __report_bad_irq+0x35/0xd0
[<ffffffff810dbb9f>] note_interrupt+0x20f/0x260
[<ffffffff810d8f05>] handle_irq_event_percpu+0x45/0x60
[<ffffffff810d8f4c>] handle_irq_event+0x2c/0x50
[<ffffffff810dc8ca>] handle_fasteoi_irq+0x8a/0x150
[<ffffffff8102ee0b>] handle_irq+0xab/0x130
[<ffffffff8109de6a>] ? atomic_notifier_call_chain+0x1a/0x20
[<ffffffff8103631d>] ? __exit_idle+0x2d/0x30
[<ffffffff81715bad>] do_IRQ+0x4d/0xd0
[<ffffffff81713c02>] common_interrupt+0x82/0x82
<EOI> [<ffffffff815e3931>] ? cpuidle_enter_state+0xc1/0x280
[<ffffffff815e3924>] ? cpuidle_enter_state+0xb4/0x280
[<ffffffff815e3b27>] cpuidle_enter+0x17/0x20
[<ffffffff810c1813>] call_cpuidle+0x23/0x40
[<ffffffff810c1a79>] cpu_startup_entry+0x149/0x240
[<ffffffff81705fe7>] rest_init+0x77/0x80
[<ffffffff81d8a147>] start_kernel+0x495/0x4a2
[<ffffffff81d89aa0>] ? set_init_arg+0x55/0x55
[<ffffffff81d89120>] ? early_idt_handler_array+0x120/0x120
[<ffffffff81d895d6>] x86_64_start_reservations+0x2a/0x2c
[<ffffffff81d89715>] x86_64_start_kernel+0x13d/0x14c
handlers:
[<ffffffff8155b050>] usb_hcd_irq
Disabling IRQ #16

The unhandled interrupt is issued against IRQ 16, however, in many cases the
handlers listed do not have anything to do with the device that has issued the
interrupt.

This patch adds weak function arch_irq_debug(), and adds an x86 specific
definition. The x86 version dumps the following information: the status
of the IO APIC pin line associated with the IRQ, the state of the CPU's
LAPIC, and the PCI devices which have their interrupt set.

This information is output:

Additional x86 information on IRQ #16 ...
IRQ16 maps to APIC0 PIN16 : pin10, enabled , level, low , V(10), IRR(1), S(1), remapped, I(98013), Z(0)
This CPU APIC ID is 0, and cpu id is 0.
CPU 0 LAPIC IRR dump:
APIC IRR 7 (IRQs 255 to 224) = 0x20008000 [ 239 253 ]
APIC IRR 6 (IRQs 223 to 192) = 0x0
APIC IRR 5 (IRQs 191 to 160) = 0x0
APIC IRR 4 (IRQs 159 to 128) = 0x0
APIC IRR 3 (IRQs 127 to 96) = 0x0
APIC IRR 2 (IRQs 95 to 64) = 0x0
APIC IRR 1 (IRQs 63 to 32) = 0x0
APIC IRR 0 (IRQs 31 to 0) = 0x0
Disabling IRQ #16
PCI devices with INTERRUPT STATUS set (bit 3 of PCI_STATUS register):
possible unhandled irq source 0000:00:16.0: pin = 1
possible unhandled irq source 0000:05:00.0: pin = 1

[v2]: Fixed kbuild auto test warning for i386

Signed-off-by: Prarit Bhargava <prarit@xxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: "H. Peter Anvin" <hpa@xxxxxxxxx>
Cc: x86@xxxxxxxxxx
Cc: Rui Wang <rui.y.wang@xxxxxxxxx>
Cc: Paul Gortmaker <paul.gortmaker@xxxxxxxxxxxxx>
Cc: Denys Vlasenko <dvlasenk@xxxxxxxxxx>
Cc: Borislav Petkov <bp@xxxxxxx>
Cc: Aaron Lu <aaron.lu@xxxxxxxxx>
---
arch/x86/include/asm/io_apic.h | 2 ++
arch/x86/kernel/apic/io_apic.c | 70 ++++++++++++++++++++++++--------------
arch/x86/kernel/irq.c | 72 ++++++++++++++++++++++++++++++++++++++++
kernel/irq/spurious.c | 9 +++++
4 files changed, 129 insertions(+), 24 deletions(-)

diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 6cbf2cfb3f8a..88b9819e6198 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -195,6 +195,7 @@ static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
extern void setup_ioapic_dest(void);
extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin);
extern void print_IO_APICs(void);
+extern void print_IRQ_to_pin_mapping(unsigned int irq);
#else /* !CONFIG_X86_IO_APIC */

#define IO_APIC_IRQ(x) 0
@@ -203,6 +204,7 @@ static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
static inline void ioapic_insert_resources(void) { }
static inline int arch_early_ioapic_init(void) { return 0; }
static inline void print_IO_APICs(void) {}
+static inline void print_IRQ_to_pin_mapping(unsigned int irq) {}
#define gsi_top (NR_IRQS_LEGACY)
static inline int mp_find_ioapic(u32 gsi) { return 0; }
static inline int mp_map_gsi_to_irq(u32 gsi, unsigned int flags,
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 48e6d84f173e..ff86bd140d18 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1247,34 +1247,30 @@ void ioapic_zap_locks(void)
raw_spin_lock_init(&ioapic_lock);
}

-static void io_apic_print_entries(unsigned int apic, unsigned int nr_entries)
+static void io_apic_print_entry(unsigned int apic, int entry_no)
{
- int i;
char buf[256];
struct IO_APIC_route_entry entry;
struct IR_IO_APIC_route_entry *ir_entry = (void *)&entry;

- printk(KERN_DEBUG "IOAPIC %d:\n", apic);
- for (i = 0; i <= nr_entries; i++) {
- entry = ioapic_read_entry(apic, i);
- snprintf(buf, sizeof(buf),
- " pin%02x, %s, %s, %s, V(%02X), IRR(%1d), S(%1d)",
- i,
- entry.mask == IOAPIC_MASKED ? "disabled" : "enabled ",
- entry.trigger == IOAPIC_LEVEL ? "level" : "edge ",
- entry.polarity == IOAPIC_POL_LOW ? "low " : "high",
- entry.vector, entry.irr, entry.delivery_status);
- if (ir_entry->format)
- printk(KERN_DEBUG "%s, remapped, I(%04X), Z(%X)\n",
- buf, (ir_entry->index << 15) | ir_entry->index,
- ir_entry->zero);
- else
- printk(KERN_DEBUG "%s, %s, D(%02X), M(%1d)\n",
- buf,
- entry.dest_mode == IOAPIC_DEST_MODE_LOGICAL ?
- "logical " : "physical",
- entry.dest, entry.delivery_mode);
- }
+ entry = ioapic_read_entry(apic, entry_no);
+ snprintf(buf, sizeof(buf),
+ " pin%02x, %s, %s, %s, V(%02X), IRR(%1d), S(%1d)",
+ entry_no,
+ entry.mask == IOAPIC_MASKED ? "disabled" : "enabled ",
+ entry.trigger == IOAPIC_LEVEL ? "level" : "edge ",
+ entry.polarity == IOAPIC_POL_LOW ? "low " : "high",
+ entry.vector, entry.irr, entry.delivery_status);
+ if (ir_entry->format)
+ pr_debug("%s, remapped, I(%04X), Z(%X)\n",
+ buf, (ir_entry->index << 15) | ir_entry->index,
+ ir_entry->zero);
+ else
+ pr_debug("%s, %s, D(%02X), M(%1d)\n",
+ buf,
+ entry.dest_mode == IOAPIC_DEST_MODE_LOGICAL ?
+ "logical " : "physical",
+ entry.dest, entry.delivery_mode);
}

static void __init print_IO_APIC(int ioapic_idx)
@@ -1284,6 +1280,7 @@ static void __init print_IO_APIC(int ioapic_idx)
union IO_APIC_reg_02 reg_02;
union IO_APIC_reg_03 reg_03;
unsigned long flags;
+ int i;

raw_spin_lock_irqsave(&ioapic_lock, flags);
reg_00.raw = io_apic_read(ioapic_idx, 0);
@@ -1330,7 +1327,32 @@ static void __init print_IO_APIC(int ioapic_idx)
}

printk(KERN_DEBUG ".... IRQ redirection table:\n");
- io_apic_print_entries(ioapic_idx, reg_01.bits.entries);
+ pr_debug("IOAPIC %d:\n", ioapic_idx);
+ for (i = 0; i <= reg_01.bits.entries; i++)
+ io_apic_print_entry(ioapic_idx, i);
+}
+
+void print_IRQ_to_pin_mapping(unsigned int irq)
+{
+ struct irq_pin_list *entry;
+ struct irq_chip *chip;
+ struct mp_chip_data *data;
+
+ chip = irq_get_chip(irq);
+ if (chip != &ioapic_chip && chip != &ioapic_ir_chip)
+ return;
+ data = irq_get_chip_data(irq);
+ if (!data)
+ return;
+ if (list_empty(&data->irq_2_pin))
+ return;
+
+ pr_emerg("IRQ%d ", irq);
+ for_each_irq_pin(entry, data->irq_2_pin) {
+ pr_cont("maps to APIC%d PIN%d :", entry->apic, entry->pin);
+ io_apic_print_entry(entry->apic, entry->pin);
+ }
+ pr_cont("\n");
}

void __init print_IO_APICs(void)
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 9f669fdd2010..e692f1b4ecab 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -10,6 +10,7 @@
#include <linux/ftrace.h>
#include <linux/delay.h>
#include <linux/export.h>
+#include <linux/pci.h>

#include <asm/apic.h>
#include <asm/io_apic.h>
@@ -547,3 +548,74 @@ void fixup_irqs(void)
}
}
#endif
+
+/* Unhandled Interrupt Debug */
+
+static struct work_struct x86_unhandled_irq_work;
+static void x86_unhandled_irq_work_fn(struct work_struct *work)
+{
+#ifdef CONFIG_PCI
+ struct pci_dev *dev = NULL;
+ u32 reg;
+ int irq_pending;
+
+ pr_emerg("PCI devices with INTERRUPT STATUS set (bit 3 of PCI_STATUS register):\n");
+ for_each_pci_dev(dev) {
+ if (!dev || !dev->pin)
+ continue;
+
+ pci_read_config_dword(dev, PCI_COMMAND, &reg);
+ irq_pending = !!((reg >> 16) & PCI_STATUS_INTERRUPT);
+ if (irq_pending)
+ pr_emerg("possible unhandled irq source %s: pin = %d\n",
+ pci_name(dev), dev->pin);
+ }
+#endif
+}
+
+static void lapic_dump_irr(void)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+ int irr, bit;
+ u32 reg;
+
+ pr_emerg("This CPU APIC ID is %d, and cpu id is %d.\n",
+ hard_smp_processor_id(), smp_processor_id());
+ pr_emerg("CPU %d LAPIC IRR dump:\n", smp_processor_id());
+ /*
+ * Since this CPU is handling the interrupt, dump its LAPIC IRR to see
+ * which line is asserted.
+ */
+ for (irr = APIC_ISR_NR - 1; irr >= 0; irr--) {
+ reg = apic_read(APIC_IRR + irr*0x10);
+ pr_emerg("APIC IRR %d (IRQs %d to %d) = 0x%0x ", irr,
+ (irr + 1) * 32 - 1, irr * 32, reg);
+ if (reg) {
+ pr_cont(" [ ");
+ for (bit = 0; bit < 32; bit++) {
+ if ((reg >> bit) & 0x01)
+ pr_cont("%d ", bit + (irr * 32));
+ }
+ pr_cont("]");
+ }
+ pr_cont("\n");
+ }
+#endif
+}
+
+void arch_irq_debug(unsigned int irq)
+{
+ pr_emerg("Additional x86 information on IRQ #%d ...\n", irq);
+
+ /* PCI information cannot be dumped in interrupt context */
+ INIT_WORK(&x86_unhandled_irq_work, x86_unhandled_irq_work_fn);
+ schedule_work_on(smp_processor_id(), &x86_unhandled_irq_work);
+
+#ifdef CONFIG_X86_IO_APIC
+ /* Dump the IOAPIC irq mapping */
+ print_IRQ_to_pin_mapping(irq);
+#endif
+
+ /* Dump this CPU's IRR to determine which bits are set */
+ lapic_dump_irr();
+}
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 5707f97a3e6a..fac67b799974 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -180,6 +180,14 @@ static inline int bad_action_ret(irqreturn_t action_ret)
return 1;
}

+void __weak arch_irq_debug(unsigned int irq)
+{
+ /*
+ * This weak function provides a method to dump arch-specific
+ * data to further debug unhandled IRQs or IRQ storms.
+ */
+}
+
/*
* If 99,900 of the previous 100,000 interrupts have not been handled
* then assume that the IRQ is stuck in some manner. Drop a diagnostic
@@ -218,6 +226,7 @@ static void __report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret)
action->thread_fn, action->thread_fn);
printk(KERN_CONT "\n");
}
+ arch_irq_debug(irq);
raw_spin_unlock_irqrestore(&desc->lock, flags);
}

--
1.7.9.3