[patch] io-apic-2.1.90-B

MOLNAR Ingo (mingo@chiara.csoma.elte.hu)
Wed, 11 Mar 1998 18:49:50 +0100 (CET)


there was a subtle bug in the IO-APIC code (in enable_ioapic_irq()), which
led to 'lost networking interface' bugs for every few million IRQs or so.
The patch also includes a cleaner solution (an extra IPI to simulate
hardware interrupts), but it's #if 0-ed out currently because it still
caused lost IRQs on my system. Wondering wether this an APIC bug, if
anyone with a newer SMP system could change that #if 0 to #if 1 and watch
wether there are any problems?

the patch was tested+booted on vanilla pre1-2.1.90. It also cleans up some
code in io_apic.c and smp.c.

-- mingo

--- linux/arch/i386/kernel/irq.c.orig Sat Mar 14 01:32:47 1998
+++ linux/arch/i386/kernel/irq.c Tue Mar 17 05:48:44 1998
@@ -70,6 +70,7 @@

static unsigned int irq_events [NR_IRQS] = { -1, };
static int disabled_irq [NR_IRQS] = { 0, };
+static int ipi_pending [NR_IRQS] = { 0, };

/*
* Not all IRQs can be routed through the IO-APIC, eg. on certain (older)
@@ -489,12 +490,8 @@
*/
void synchronize_bh(void)
{
- if (atomic_read(&global_bh_count)) {
- int cpu = smp_processor_id();
- if (!local_irq_count[cpu] && !local_bh_count[cpu]) {
+ if (atomic_read(&global_bh_count) && !in_interrupt())
wait_on_bh();
- }
- }
}

/*
@@ -672,8 +669,8 @@
#ifdef __SMP__
void enable_ioapic_irq (unsigned int irq)
{
- unsigned long flags;
- int cpu = smp_processor_id(), should_handle_irq;
+ unsigned long flags, should_handle_irq;
+ int cpu = smp_processor_id();

spin_lock_irqsave(&irq_controller_lock, flags);
if (disabled_irq[irq])
@@ -682,18 +679,32 @@
spin_unlock_irqrestore(&irq_controller_lock, flags);
return;
}
+#if 0
/*
* In the SMP+IOAPIC case it might happen that there are an unspecified
- * number of pending IRQ events unhandled. We protect against multiple
- * enable_irq()'s executing them via disable_irq[irq]++
+ * number of pending IRQ events unhandled. These cases are very rare,
+ * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
+ * better to do it this way as thus we dont have to be aware of
+ * 'pending' interrupts in the IRQ path, except at this point.
*/
if (!disabled_irq[irq] && irq_events[irq]) {
+ if (!ipi_pending[irq]) {
+ ipi_pending[irq] = 1;
+ --irq_events[irq];
+ send_IPI(cpu,IO_APIC_VECTOR(irq));
+ }
+ }
+ spin_unlock_irqrestore(&irq_controller_lock, flags);
+#else
+ if (!disabled_irq[irq] && irq_events[irq]) {
struct pt_regs regs; /* FIXME: these are fake currently */

disabled_irq[irq]++;
+ hardirq_enter(cpu);
spin_unlock(&irq_controller_lock);
+
release_irqlock(cpu);
- irq_enter(cpu, irq);
+ while (test_bit(0,&global_irq_lock)) mb();
again:
handle_IRQ_event(irq, &regs);

@@ -713,6 +724,7 @@
__restore_flags(flags);
} else
spin_unlock_irqrestore(&irq_controller_lock, flags);
+#endif
}
#endif

@@ -775,15 +787,16 @@
ack_APIC_irq();

spin_lock(&irq_controller_lock);
+ if (ipi_pending[irq])
+ ipi_pending[irq] = 0;

if (!irq_events[irq]++ && !disabled_irq[irq])
should_handle_irq = 1;
-
+ hardirq_enter(cpu);
spin_unlock(&irq_controller_lock);

- irq_enter(cpu, irq);
-
if (should_handle_irq) {
+ while (test_bit(0,&global_irq_lock)) mb();
again:
handle_IRQ_event(irq, regs);

@@ -797,7 +810,8 @@
goto again;
}

- irq_exit(cpu, irq);
+ hardirq_exit(cpu);
+ release_irqlock(cpu);
}
#endif

@@ -1034,7 +1048,7 @@
* 0x80, because int 0x80 is hm, kindof importantish ;)
*/
for (i = 0; i < NR_IRQS ; i++)
- if (IO_APIC_GATE_OFFSET+(i<<3) <= 0xfe) /* HACK */ {
+ if (IO_APIC_VECTOR(i) <= 0xfe) /* HACK */ {
if (IO_APIC_IRQ(i)) {
irq_handles[i] = &ioapic_irq_type;
/*
@@ -1071,8 +1085,8 @@
#ifdef __SMP__

for (i = 0; i < NR_IRQS ; i++)
- if (IO_APIC_GATE_OFFSET+(i<<3) <= 0xfe) /* hack -- mingo */
- set_intr_gate(IO_APIC_GATE_OFFSET+(i<<3),interrupt[i]);
+ if (IO_APIC_VECTOR(i) <= 0xfe) /* hack -- mingo */
+ set_intr_gate(IO_APIC_VECTOR(i),interrupt[i]);

/*
* The reschedule interrupt slowly changes it's functionality,
--- linux/arch/i386/kernel/smp.c.orig Wed Mar 4 15:16:04 1998
+++ linux/arch/i386/kernel/smp.c Tue Mar 17 05:48:44 1998
@@ -1108,23 +1108,59 @@
setup_IO_APIC();
}

+
+void send_IPI (int dest, int vector)
+{
+ unsigned long cfg;
+ unsigned long flags;
+
+ __save_flags(flags);
+ __cli();
+
+ /*
+ * prepare target chip field
+ */
+
+ cfg = apic_read(APIC_ICR2) & 0x00FFFFFF;
+ apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(dest));
+
+ cfg = apic_read(APIC_ICR);
+ cfg &= ~0xFDFFF;
+ cfg |= APIC_DEST_FIELD|APIC_DEST_DM_FIXED|vector;
+ cfg |= dest;
+
+ /*
+ * Send the IPI. The write to APIC_ICR fires this off.
+ */
+
+ apic_write(APIC_ICR, cfg);
+ __restore_flags(flags);
+}
+
+void funny (void)
+{
+ send_IPI(APIC_DEST_ALLBUT,0x30 /*IO_APIC_VECTOR(11)*/);
+ for(;;)__cli();
+}
+
/*
- * A non wait message cannot pass data or cpu source info. This current setup
- * is only safe because the kernel lock owner is the only person who can send a message.
+ * A non wait message cannot pass data or cpu source info. This current setup
+ * is only safe because the kernel lock owner is the only person who can send
+ * a message.
*
- * Wrapping this whole block in a spinlock is not the safe answer either. A processor may
- * get stuck with irq's off waiting to send a message and thus not replying to the person
- * spinning for a reply....
+ * Wrapping this whole block in a spinlock is not the safe answer either. A
+ * processor may get stuck with irq's off waiting to send a message and thus
+ * not replying to the person spinning for a reply....
*
- * In the end flush tlb ought to be the NMI and a very very short function (to avoid the old
- * IDE disk problems), and other messages sent with IRQ's enabled in a civilised fashion. That
- * will also boost performance.
+ * In the end flush tlb ought to be the NMI and a very very short function
+ * (to avoid the old IDE disk problems), and other messages sent with IRQ's
+ * enabled in a civilised fashion. That will also boost performance.
*/

void smp_message_pass(int target, int msg, unsigned long data, int wait)
{
- unsigned long flags;
unsigned long cfg;
+ unsigned long dest = 0;
unsigned long target_map;
int p=smp_processor_id();
int irq;
@@ -1166,11 +1202,11 @@
}

/*
- * Sanity check we don't re-enter this across CPU's. Only the kernel
- * lock holder may send messages. For a STOP_CPU we are bringing the
- * entire box to the fastest halt we can.. A reschedule carries
- * no data and can occur during a flush.. guess what panic
- * I got to notice this bug...
+ * Sanity check we don't re-enter this across CPU's. Only the kernel
+ * lock holder may send messages. For a STOP_CPU we are bringing the
+ * entire box to the fastest halt we can.. A reschedule carries
+ * no data and can occur during a flush.. guess what panic
+ * I got to notice this bug...
*/

/*
@@ -1183,11 +1219,11 @@
p, msg, target);*/

/*
- * Wait for the APIC to become ready - this should never occur. Its
- * a debugging check really.
+ * Wait for the APIC to become ready - this should never occur. Its
+ * a debugging check really.
*/

- while(ct<1000)
+ while (ct<1000)
{
cfg=apic_read(APIC_ICR);
if(!(cfg&(1<<12)))
@@ -1204,49 +1240,32 @@
printk("CPU #%d: previous IPI still not cleared after 10mS\n", p);

/*
- * Program the APIC to deliver the IPI
- */
-
- __save_flags(flags);
- __cli();
- cfg=apic_read(APIC_ICR2);
- cfg&=0x00FFFFFF;
- apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(target)); /* Target chip */
- cfg=apic_read(APIC_ICR);
- cfg&=~0xFDFFF; /* Clear bits */
- cfg|=APIC_DEST_FIELD|APIC_DEST_DM_FIXED|irq; /* Send an IRQ 13 */
-
- /*
* Set the target requirement
*/

if(target==MSG_ALL_BUT_SELF)
{
- cfg|=APIC_DEST_ALLBUT;
+ dest=APIC_DEST_ALLBUT;
target_map=cpu_present_map;
cpu_callin_map[0]=(1<<p);
}
else if(target==MSG_ALL)
{
- cfg|=APIC_DEST_ALLINC;
+ dest=APIC_DEST_ALLINC;
target_map=cpu_present_map;
cpu_callin_map[0]=0;
}
else
- {
- target_map=(1<<target);
- cpu_callin_map[0]=0;
- }
+ panic("huh?");

/*
- * Send the IPI. The write to APIC_ICR fires this off.
+ * Program the APIC to deliver the IPI
*/
-
- apic_write(APIC_ICR, cfg);
- __restore_flags(flags);
+
+ send_IPI(dest,irq);

/*
- * Spin waiting for completion
+ * Spin waiting for completion
*/

switch(wait)
@@ -1443,6 +1462,7 @@
int cpu = smp_processor_id();

ack_APIC_irq();
+ for (;;) __cli();
/*
* This looks silly, but we actually do need to wait
* for the global interrupt lock.
@@ -1694,7 +1714,9 @@
/*
* We ACK the APIC, just in case there is something pending.
*/
+
ack_APIC_irq ();
+

restore_flags(flags);
}
--- linux/arch/i386/kernel/irq.h.orig Sat Mar 14 01:32:47 1998
+++ linux/arch/i386/kernel/irq.h Tue Mar 17 05:48:44 1998
@@ -1,14 +1,14 @@
#ifndef __irq_h
#define __irq_h

+#include <linux/config.h>
+
/*
* Various low-level irq details needed by irq.c and smp.c
*
* Interrupt entry/exit code at both C and assembly level
*/

-#define IO_APIC_GATE_OFFSET 0x51
-
void mask_irq(unsigned int irq);
void unmask_irq(unsigned int irq);
void enable_IO_APIC_irq (unsigned int irq);
@@ -19,9 +19,15 @@
void init_IO_APIC_traps(void);
int IO_APIC_get_PCI_irq_vector (int bus, int slot, int fn);
void make_8259A_irq (unsigned int irq);
+void send_IPI (int dest, int vector);

extern unsigned int io_apic_irqs;

+extern inline int IO_APIC_VECTOR (int irq)
+{
+ return (0x51+(irq<<3));
+}
+
#define MAX_IRQ_SOURCES 128
#define MAX_MP_BUSSES 32
enum mp_bustype {
@@ -37,6 +43,7 @@
* Protects both the 8259 and the
* IO-APIC
*/
+

#ifdef __SMP__

--- linux/arch/i386/kernel/io_apic.c.orig Tue Mar 17 05:48:57 1998
+++ linux/arch/i386/kernel/io_apic.c Tue Mar 17 05:49:21 1998
@@ -271,7 +271,7 @@
if (!IO_APIC_IRQ(irq))
continue;

- entry.vector = IO_APIC_GATE_OFFSET + (irq<<3);
+ entry.vector = IO_APIC_VECTOR(irq);

/*
* Determine IRQ line polarity (high active or low active):
@@ -383,7 +383,7 @@
entry.mask = 1; /* unmask IRQ now */
entry.dest.logical.logical_dest = 0xff; /* all CPUs */

- entry.vector = IO_APIC_GATE_OFFSET + (irq<<3);
+ entry.vector = IO_APIC_VECTOR(irq);

entry.polarity=0;
entry.trigger=0;

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu