Re: smp_call_function_single lockups

From: Chris J Arges
Date: Tue Mar 31 2015 - 18:24:14 EST


On Tue, Mar 31, 2015 at 08:08:40AM -0700, Linus Torvalds wrote:
> On Mon, Mar 30, 2015 at 8:15 PM, Chris J Arges
> <chris.j.arges@xxxxxxxxxxxxx> wrote:
> >
> > I modified the posted patch with the following:
>
> Actually, in addition to Ingo's patches (and the irq printout), which
> you should try first, if none of that really gives any different
> behavior, can modify that ack_APIC_irq() debugging code a bit more:
>
> > diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
> > index bf32309..dc3e192 100644
> > --- a/arch/x86/include/asm/apic.h
> > +++ b/arch/x86/include/asm/apic.h
> > @@ -441,7 +441,7 @@ static inline void ack_APIC_irq(int vector)
> > if (vector >= 16) {
> > unsigned v = apic_read(APIC_ISR + ((vector & ~0x1f) >> 1));
> > v >>= vector & 0x1f;
> > - WARN_ON_ONCE(!(v & 1));
> > + WARN(!(v & 1), "ack_APIC_irq: vector = %0x\n", vector);
> > }
> > /*
> > * ack_APIC_irq() actually gets compiled as a single instruction
>
> So what I'd suggest doing is:
>
> - change the test of "vector >= 16" to just "vector >= 0".
>
> We still have "-1" as the "unknown vector" thing, but I think only
> the ack_bad_irq() thing calls it, and that should print out its own
> message if it ever triggers, so it isn't an issue.
>
> The reason for the ">= 16" was kind of bogus - the first 16 vectors
> are system vectors, but we definitely shouldn't ack the apic for such
> vectors anyway, so giving a warning for them is very much appropriate.
> In particular, vector 2 is NMI, and maybe we do ACk it incorrectly.
>
> - add a "return" if the warning triggers, and simply don't do the
> actual ACK cycle at all if the ISR bit is clear.
>
> IOW, make it do "if (WARN(..)) return;"
>
> Now, we might get the vector number wrong for some reason, and in that
> case not ACK'ing at all might cause problems too, but it would be
> interesting to see if it changes behavior wrt the lockup.
>
> I don't have any other ideas at the moment, but hopefully the
> suggested changes by me and Ingo will give some more data to go on and
> clarify what might be going on.
>
> Linus
>

Linus,

I had a few runs with your patch plus modifications, and got the following
results (modified patch inlined below):

[ 14.423916] ack_APIC_irq: vector = d1, irq = ffffffff
[ 176.060005] NMI watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [qemu-system-x86:1630]

[ 17.995298] ack_APIC_irq: vector = d1, irq = ffffffff
[ 182.993828] ack_APIC_irq: vector = e1, irq = ffffffff
[ 202.919691] ack_APIC_irq: vector = 22, irq = ffffffff
[ 484.132006] NMI watchdog: BUG: soft lockup - CPU#1 stuck for 22s! [qemu-system-x86:1586]

[ 15.592032] ack_APIC_irq: vector = d1, irq = ffffffff
[ 304.993490] ack_APIC_irq: vector = e1, irq = ffffffff
[ 315.174755] ack_APIC_irq: vector = 22, irq = ffffffff
[ 360.108007] NMI watchdog: BUG: soft lockup - CPU#1 stuck for 22s! [ksmd:26]

[ 15.026077] ack_APIC_irq: vector = b1, irq = ffffffff
[ 374.828531] ack_APIC_irq: vector = c1, irq = ffffffff
[ 402.965942] ack_APIC_irq: vector = d1, irq = ffffffff
[ 434.540814] ack_APIC_irq: vector = e1, irq = ffffffff
[ 461.820768] ack_APIC_irq: vector = 22, irq = ffffffff
[ 536.120027] NMI watchdog: BUG: soft lockup - CPU#0 stuck for 23s! [qemu-system-x86:4243]

[ 17.889334] ack_APIC_irq: vector = d1, irq = ffffffff
[ 291.888784] ack_APIC_irq: vector = e1, irq = ffffffff
[ 297.824627] ack_APIC_irq: vector = 22, irq = ffffffff
[ 336.960594] ack_APIC_irq: vector = 42, irq = ffffffff
[ 367.012706] ack_APIC_irq: vector = 52, irq = ffffffff
[ 377.025090] ack_APIC_irq: vector = 62, irq = ffffffff
[ 417.088773] ack_APIC_irq: vector = 72, irq = ffffffff
[ 447.136788] ack_APIC_irq: vector = 82, irq = ffffffff
-- stopped it since it wasn't reproducing / I was impatient --

So I'm seeing irq == VECTOR_UNDEFINED in all of these cases. Making
(vector >= 0) didn't seem to expose any additional vectors.

This was only instrumented at the L1 level, I'm also planning on instrumenting
the L0 kernel and seeing if we get these as well.

--chris j arges

--

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index efc3b22..88b3933 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -395,7 +395,7 @@ static inline void apic_write(u32 reg, u32 val)
apic->write(reg, val);
}

-static inline void apic_eoi(void)
+static inline void apic_eoi(int vector)
{
apic->eoi_write(APIC_EOI, APIC_EOI_ACK);
}
@@ -426,7 +426,7 @@ extern void __init apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v));

static inline u32 apic_read(u32 reg) { return 0; }
static inline void apic_write(u32 reg, u32 val) { }
-static inline void apic_eoi(void) { }
+static inline void apic_eoi(int vector) { }
static inline u64 apic_icr_read(void) { return 0; }
static inline void apic_icr_write(u32 low, u32 high) { }
static inline void apic_wait_icr_idle(void) { }
@@ -435,13 +435,26 @@ static inline void apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v)) {}

#endif /* CONFIG_X86_LOCAL_APIC */

-static inline void ack_APIC_irq(void)
+#include <asm/irq_vectors.h>
+typedef int vector_irq_t[NR_VECTORS];
+DECLARE_PER_CPU(vector_irq_t, vector_irq);
+
+static inline void ack_APIC_irq(int vector)
{
+ /* Is the ISR bit actually set for this vector? */
+ if (vector >= 0) {
+ unsigned v = apic_read(APIC_ISR + ((vector & ~0x1f) >> 1));
+ int irq;
+ v >>= vector & 0x1f;
+ irq = __this_cpu_read(vector_irq[vector]);
+
+ WARN(!(v & 1), "ack_APIC_irq: vector = %0x, irq = %0x\n", vector, irq);
+ }
/*
* ack_APIC_irq() actually gets compiled as a single instruction
* ... yummie.
*/
- apic_eoi();
+ apic_eoi(vector);
}

static inline unsigned default_get_apic_id(unsigned long x)
@@ -639,9 +652,9 @@ static inline void entering_irq(void)
exit_idle();
}

-static inline void entering_ack_irq(void)
+static inline void entering_ack_irq(int vector)
{
- ack_APIC_irq();
+ ack_APIC_irq(vector);
entering_irq();
}

@@ -650,11 +663,11 @@ static inline void exiting_irq(void)
irq_exit();
}

-static inline void exiting_ack_irq(void)
+static inline void exiting_ack_irq(int vector)
{
irq_exit();
/* Ack only at the end to avoid potential reentry */
- ack_APIC_irq();
+ ack_APIC_irq(vector);
}

extern void ioapic_zap_locks(void);
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index ad3639a..20ef123 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -910,7 +910,7 @@ __visible void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
* Besides, if we don't timer interrupts ignore the global
* interrupt lock, which is the WrongThing (tm) to do.
*/
- entering_ack_irq();
+ entering_ack_irq(LOCAL_TIMER_VECTOR);
local_apic_timer_interrupt();
exiting_irq();

@@ -929,7 +929,7 @@ __visible void __irq_entry smp_trace_apic_timer_interrupt(struct pt_regs *regs)
* Besides, if we don't timer interrupts ignore the global
* interrupt lock, which is the WrongThing (tm) to do.
*/
- entering_ack_irq();
+ entering_ack_irq(LOCAL_TIMER_VECTOR);
trace_local_timer_entry(LOCAL_TIMER_VECTOR);
local_apic_timer_interrupt();
trace_local_timer_exit(LOCAL_TIMER_VECTOR);
@@ -1342,7 +1342,7 @@ void setup_local_APIC(void)
value = apic_read(APIC_ISR + i*0x10);
for (j = 31; j >= 0; j--) {
if (value & (1<<j)) {
- ack_APIC_irq();
+ ack_APIC_irq(-1);
acked++;
}
}
@@ -1868,7 +1868,7 @@ static inline void __smp_spurious_interrupt(u8 vector)
*/
v = apic_read(APIC_ISR + ((vector & ~0x1f) >> 1));
if (v & (1 << (vector & 0x1f)))
- ack_APIC_irq();
+ ack_APIC_irq(vector);

inc_irq_stat(irq_spurious_count);

@@ -1917,7 +1917,7 @@ static inline void __smp_error_interrupt(struct pt_regs *regs)
if (lapic_get_maxlvt() > 3) /* Due to the Pentium erratum 3AP. */
apic_write(APIC_ESR, 0);
v = apic_read(APIC_ESR);
- ack_APIC_irq();
+ ack_APIC_irq(ERROR_APIC_VECTOR);
atomic_inc(&irq_err_count);

apic_printk(APIC_DEBUG, KERN_DEBUG "APIC error on CPU%d: %02x",
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index f4dc246..b74db5a 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1993,7 +1993,7 @@ static void ack_ioapic_level(struct irq_data *data)
* We must acknowledge the irq before we move it or the acknowledge will
* not propagate properly.
*/
- ack_APIC_irq();
+ ack_APIC_irq(i);

/*
* Tail end of clearing remote IRR bit (either by delivering the EOI
@@ -2067,7 +2067,8 @@ static void unmask_lapic_irq(struct irq_data *data)

static void ack_lapic_irq(struct irq_data *data)
{
- ack_APIC_irq();
+ struct irq_cfg *cfg = irqd_cfg(data);
+ ack_APIC_irq(cfg->vector);
}

static struct irq_chip lapic_chip __read_mostly = {
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 6cedd79..30643ff 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -335,9 +335,11 @@ int apic_retrigger_irq(struct irq_data *data)

void apic_ack_edge(struct irq_data *data)
{
- irq_complete_move(irqd_cfg(data));
+ struct irq_cfg *cfg = irqd_cfg(data);
+
+ irq_complete_move(cfg);
irq_move_irq(data);
- ack_APIC_irq();
+ ack_APIC_irq(cfg->vector);
}

/*
@@ -397,7 +399,7 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
{
unsigned vector, me;

- ack_APIC_irq();
+ ack_APIC_irq(IRQ_MOVE_CLEANUP_VECTOR);
irq_enter();
exit_idle();

diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 1af51b1..89bac1f 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -433,7 +433,7 @@ asmlinkage __visible void smp_thermal_interrupt(struct pt_regs *regs)
{
entering_irq();
__smp_thermal_interrupt();
- exiting_ack_irq();
+ exiting_ack_irq(THERMAL_APIC_VECTOR);
}

asmlinkage __visible void smp_trace_thermal_interrupt(struct pt_regs *regs)
@@ -442,7 +442,7 @@ asmlinkage __visible void smp_trace_thermal_interrupt(struct pt_regs *regs)
trace_thermal_apic_entry(THERMAL_APIC_VECTOR);
__smp_thermal_interrupt();
trace_thermal_apic_exit(THERMAL_APIC_VECTOR);
- exiting_ack_irq();
+ exiting_ack_irq(THERMAL_APIC_VECTOR);
}

/* Thermal monitoring depends on APIC, ACPI and clock modulation */
diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c
index 7245980..03068d1 100644
--- a/arch/x86/kernel/cpu/mcheck/threshold.c
+++ b/arch/x86/kernel/cpu/mcheck/threshold.c
@@ -28,7 +28,7 @@ asmlinkage __visible void smp_threshold_interrupt(void)
{
entering_irq();
__smp_threshold_interrupt();
- exiting_ack_irq();
+ exiting_ack_irq(THRESHOLD_APIC_VECTOR);
}

asmlinkage __visible void smp_trace_threshold_interrupt(void)
@@ -37,5 +37,5 @@ asmlinkage __visible void smp_trace_threshold_interrupt(void)
trace_threshold_apic_entry(THRESHOLD_APIC_VECTOR);
__smp_threshold_interrupt();
trace_threshold_apic_exit(THRESHOLD_APIC_VECTOR);
- exiting_ack_irq();
+ exiting_ack_irq(THRESHOLD_APIC_VECTOR);
}
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 67b1cbe..fc2b9a0 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -45,7 +45,8 @@ void ack_bad_irq(unsigned int irq)
* completely.
* But only ack when the APIC is enabled -AK
*/
- ack_APIC_irq();
+ /* This doesn't know the vector. It needs the irq descriptor */
+ ack_APIC_irq(-1);
}

#define irq_stats(x) (&per_cpu(irq_stat, x))
@@ -198,7 +199,7 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
irq = __this_cpu_read(vector_irq[vector]);

if (!handle_irq(irq, regs)) {
- ack_APIC_irq();
+ ack_APIC_irq(vector);

if (irq != VECTOR_RETRIGGERED) {
pr_emerg_ratelimited("%s: %d.%d No irq handler for vector (irq %d)\n",
@@ -230,7 +231,7 @@ __visible void smp_x86_platform_ipi(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);

- entering_ack_irq();
+ entering_ack_irq(X86_PLATFORM_IPI_VECTOR);
__smp_x86_platform_ipi();
exiting_irq();
set_irq_regs(old_regs);
@@ -244,7 +245,7 @@ __visible void smp_kvm_posted_intr_ipi(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);

- ack_APIC_irq();
+ ack_APIC_irq(POSTED_INTR_VECTOR);

irq_enter();

@@ -262,7 +263,7 @@ __visible void smp_trace_x86_platform_ipi(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);

- entering_ack_irq();
+ entering_ack_irq(X86_PLATFORM_IPI_VECTOR);
trace_x86_platform_ipi_entry(X86_PLATFORM_IPI_VECTOR);
__smp_x86_platform_ipi();
trace_x86_platform_ipi_exit(X86_PLATFORM_IPI_VECTOR);
diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c
index 15d741d..dfe9263 100644
--- a/arch/x86/kernel/irq_work.c
+++ b/arch/x86/kernel/irq_work.c
@@ -10,10 +10,10 @@
#include <asm/apic.h>
#include <asm/trace/irq_vectors.h>

-static inline void irq_work_entering_irq(void)
+static inline void irq_work_entering_irq(int vector)
{
irq_enter();
- ack_APIC_irq();
+ ack_APIC_irq(vector);
}

static inline void __smp_irq_work_interrupt(void)
@@ -24,14 +24,14 @@ static inline void __smp_irq_work_interrupt(void)

__visible void smp_irq_work_interrupt(struct pt_regs *regs)
{
- irq_work_entering_irq();
+ irq_work_entering_irq(IRQ_WORK_VECTOR);
__smp_irq_work_interrupt();
exiting_irq();
}

__visible void smp_trace_irq_work_interrupt(struct pt_regs *regs)
{
- irq_work_entering_irq();
+ irq_work_entering_irq(IRQ_WORK_VECTOR);
trace_irq_work_entry(IRQ_WORK_VECTOR);
__smp_irq_work_interrupt();
trace_irq_work_exit(IRQ_WORK_VECTOR);
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index be8e1bd..44b74b2 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -170,7 +170,7 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs)

asmlinkage __visible void smp_reboot_interrupt(void)
{
- ack_APIC_irq();
+ ack_APIC_irq(REBOOT_VECTOR);
irq_enter();
stop_this_cpu(NULL);
irq_exit();
@@ -258,16 +258,16 @@ static inline void __smp_reschedule_interrupt(void)

__visible void smp_reschedule_interrupt(struct pt_regs *regs)
{
- ack_APIC_irq();
+ ack_APIC_irq(RESCHEDULE_VECTOR);
__smp_reschedule_interrupt();
/*
* KVM uses this interrupt to force a cpu out of guest mode
*/
}

-static inline void smp_entering_irq(void)
+static inline void smp_entering_irq(int vector)
{
- ack_APIC_irq();
+ ack_APIC_irq(vector);
irq_enter();
}

@@ -279,7 +279,7 @@ __visible void smp_trace_reschedule_interrupt(struct pt_regs *regs)
* scheduler_ipi(). This is OK, since those functions are allowed
* to nest.
*/
- smp_entering_irq();
+ smp_entering_irq(RESCHEDULE_VECTOR);
trace_reschedule_entry(RESCHEDULE_VECTOR);
__smp_reschedule_interrupt();
trace_reschedule_exit(RESCHEDULE_VECTOR);
@@ -297,14 +297,14 @@ static inline void __smp_call_function_interrupt(void)

__visible void smp_call_function_interrupt(struct pt_regs *regs)
{
- smp_entering_irq();
+ smp_entering_irq(CALL_FUNCTION_VECTOR);
__smp_call_function_interrupt();
exiting_irq();
}

__visible void smp_trace_call_function_interrupt(struct pt_regs *regs)
{
- smp_entering_irq();
+ smp_entering_irq(CALL_FUNCTION_VECTOR);
trace_call_function_entry(CALL_FUNCTION_VECTOR);
__smp_call_function_interrupt();
trace_call_function_exit(CALL_FUNCTION_VECTOR);
@@ -319,14 +319,14 @@ static inline void __smp_call_function_single_interrupt(void)

__visible void smp_call_function_single_interrupt(struct pt_regs *regs)
{
- smp_entering_irq();
+ smp_entering_irq(CALL_FUNCTION_SINGLE_VECTOR);
__smp_call_function_single_interrupt();
exiting_irq();
}

__visible void smp_trace_call_function_single_interrupt(struct pt_regs *regs)
{
- smp_entering_irq();
+ smp_entering_irq(CALL_FUNCTION_SINGLE_VECTOR);
trace_call_function_single_entry(CALL_FUNCTION_SINGLE_VECTOR);
__smp_call_function_single_interrupt();
trace_call_function_single_exit(CALL_FUNCTION_SINGLE_VECTOR);
diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
index 390079e..896c4b3 100644
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c
@@ -334,12 +334,14 @@ void panic_if_irq_remap(const char *msg)

static void ir_ack_apic_edge(struct irq_data *data)
{
- ack_APIC_irq();
+ struct irq_cfg *cfg = irqd_cfg(data);
+ ack_APIC_irq(cfg->vector);
}

static void ir_ack_apic_level(struct irq_data *data)
{
- ack_APIC_irq();
+ struct irq_cfg *cfg = irqd_cfg(data);
+ ack_APIC_irq(cfg->vector);
eoi_ioapic_irq(data->irq, irqd_cfg(data));
}


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/