Re: [patch] SMP race fix [was Re: SMP lockup & 3c509 on 2.2.x [aka.

Andrea Arcangeli (andrea@e-mind.com)
Fri, 7 May 1999 01:01:39 +0200 (CEST)


On Fri, 7 May 1999, Taneli Vähäkangas wrote:

>I hand-applied most parts of it, didn't touch ioapic-level functions or
>visws file, and also didn't apply the part about locking against when
>called from the interrupt handler. I had to add disable_irq_nosync to

Which `patch' are you using??

andrea@laser:/usr/src/linux$ patch -v
patch 2.5
Copyright 1988 Larry Wall
Copyright 1997 Free Software Foundation, Inc.

This program comes with NO WARRANTY, to the extent permitted by law.
You may redistribute copies of this program
under the terms of the GNU General Public License.
For more information about these matters, see the file named COPYING.

written by Larry Wall with lots o' patches by Paul Eggert

The one above works fine with my patches.

>the list of exported functions in i386_ksyms.c, since my 3c509 only

Damn I forgot to export to modules disable_irq_nosync()!

Here a new global _complete_ patch against 2.2.7:

------------------------------------------------------------------------
Index: drivers/net/3c509.c
===================================================================
RCS file: /var/cvs/linux/drivers/net/3c509.c,v
retrieving revision 1.1.1.2
retrieving revision 1.1.2.3
diff -u -r1.1.1.2 -r1.1.2.3
--- 3c509.c 1999/03/24 00:45:40 1.1.1.2
+++ linux/drivers/net/3c509.c 1999/05/05 19:54:48 1.1.2.3
@@ -568,7 +568,7 @@
*/

#ifdef __SMP__
- disable_irq(dev->irq);
+ disable_irq_nosync(dev->irq);
spin_lock(&lp->lock);
#endif

Index: drivers/net/8390.c
===================================================================
RCS file: /var/cvs/linux/drivers/net/8390.c,v
retrieving revision 1.1.1.2
diff -u -r1.1.1.2 8390.c
--- 8390.c 1999/03/09 00:52:48 1.1.1.2
+++ linux/drivers/net/8390.c 1999/05/05 16:19:33
@@ -257,8 +257,7 @@

/* Ugly but a reset can be slow, yet must be protected */

- disable_irq(dev->irq);
- synchronize_irq();
+ disable_irq_nosync(dev->irq);
spin_lock(&ei_local->page_lock);

/* Try to restart the card. Perhaps the user has fixed something. */
@@ -286,8 +285,7 @@
* Slow phase with lock held.
*/

- disable_irq(dev->irq);
- synchronize_irq();
+ disable_irq_nosync(dev->irq);

spin_lock(&ei_local->page_lock);

Index: include/asm/irq.h
===================================================================
RCS file: /var/cvs/linux/include/asm-i386/irq.h,v
retrieving revision 1.1.1.2
retrieving revision 1.1.2.3
diff -u -r1.1.1.2 -r1.1.2.3
--- irq.h 1999/02/20 15:41:37 1.1.1.2
+++ linux/include/asm-i386/irq.h 1999/05/05 12:31:37 1.1.2.3
@@ -29,6 +29,7 @@
}

extern void disable_irq(unsigned int);
+extern void disable_irq_nosync(unsigned int);
extern void enable_irq(unsigned int);

#endif /* _ASM_IRQ_H */
Index: arch/i386/kernel/io_apic.c
===================================================================
RCS file: /var/cvs/linux/arch/i386/kernel/io_apic.c,v
retrieving revision 1.1.1.5
retrieving revision 1.1.2.9
diff -u -r1.1.1.5 -r1.1.2.9
--- io_apic.c 1999/04/17 14:21:26 1.1.1.5
+++ linux/arch/i386/kernel/io_apic.c 1999/05/05 12:20:24 1.1.2.9
@@ -1060,8 +1060,9 @@
if (!(status & (IRQ_DISABLED | IRQ_INPROGRESS))) {
action = desc->action;
status &= ~IRQ_PENDING;
+ status |= IRQ_INPROGRESS;
}
- desc->status = status | IRQ_INPROGRESS;
+ desc->status = status;
spin_unlock(&irq_controller_lock);

/*
@@ -1112,8 +1113,9 @@
action = NULL;
if (!(status & (IRQ_DISABLED | IRQ_INPROGRESS))) {
action = desc->action;
+ status |= IRQ_INPROGRESS;
}
- desc->status = status | IRQ_INPROGRESS;
+ desc->status = status;

ack_APIC_irq();
spin_unlock(&irq_controller_lock);
Index: arch/i386/kernel/irq.c
===================================================================
RCS file: /var/cvs/linux/arch/i386/kernel/irq.c,v
retrieving revision 1.1.1.4
diff -u -r1.1.1.4 irq.c
--- irq.c 1999/04/17 14:21:26 1.1.1.4
+++ linux/arch/i386/kernel/irq.c 1999/05/06 12:42:55
@@ -242,8 +242,11 @@
status = desc->status & ~IRQ_REPLAY;
action = NULL;
if (!(status & (IRQ_DISABLED | IRQ_INPROGRESS)))
+ {
action = desc->action;
- desc->status = status | IRQ_INPROGRESS;
+ status |= IRQ_INPROGRESS;
+ }
+ desc->status = status;
}
spin_unlock(&irq_controller_lock);

@@ -747,7 +759,7 @@
* hardware disable after having gotten the irq
* controller lock.
*/
-void disable_irq(unsigned int irq)
+static inline void __disable_irq(unsigned int irq)
{
unsigned long flags;

@@ -757,11 +769,28 @@
irq_desc[irq].handler->disable(irq);
}
spin_unlock_irqrestore(&irq_controller_lock, flags);
+}
+
+void disable_irq(unsigned int irq)
+{
+ __disable_irq(irq);
+
+ if (local_irq_count[smp_processor_id()])
+ goto from_irq;

- if (irq_desc[irq].status & IRQ_INPROGRESS)
- synchronize_irq();
+ while (irq_desc[irq].status & IRQ_INPROGRESS)
+ barrier();
+ return;
+
+ from_irq:
+ printk(KERN_ERR "disable_irq: can't synchronize irq %d!\n", irq);
}

+void disable_irq_nosync(unsigned int irq)
+{
+ __disable_irq(irq);
+}
+
void enable_irq(unsigned int irq)
{
unsigned long flags;
@@ -769,7 +798,7 @@
spin_lock_irqsave(&irq_controller_lock, flags);
switch (irq_desc[irq].depth) {
case 1:
- irq_desc[irq].status &= ~(IRQ_DISABLED | IRQ_INPROGRESS);
+ irq_desc[irq].status &= ~IRQ_DISABLED;
irq_desc[irq].handler->enable(irq);
/* fall throught */
default:
@@ -951,7 +980,7 @@
for (i = NR_IRQS-1; i > 0; i--) {
if (!irq_desc[i].action) {
unsigned int status = irq_desc[i].status | IRQ_AUTODETECT;
- irq_desc[i].status = status & ~IRQ_INPROGRESS;
+ irq_desc[i].status = status & ~(IRQ_INPROGRESS|IRQ_DISABLED);
irq_desc[i].handler->startup(i);
}
}
@@ -976,6 +1005,7 @@
/* It triggered already - consider it spurious. */
if (status & IRQ_INPROGRESS) {
irq_desc[i].status = status & ~IRQ_AUTODETECT;
+ irq_desc[i].status |= IRQ_DISABLED;
irq_desc[i].handler->shutdown(i);
}
}
@@ -1006,6 +1036,7 @@
nr_irqs++;
}
irq_desc[i].status = status & ~IRQ_AUTODETECT;
+ irq_desc[i].status |= IRQ_DISABLED;
irq_desc[i].handler->shutdown(i);
}
spin_unlock_irq(&irq_controller_lock);
Index: arch/i386/kernel/visws_apic.c
===================================================================
RCS file: /var/cvs/linux/arch/i386/kernel/visws_apic.c,v
retrieving revision 1.1.1.1
retrieving revision 1.1.2.2
diff -u -r1.1.1.1 -r1.1.2.2
--- visws_apic.c 1999/01/23 16:25:26 1.1.1.1
+++ linux/arch/i386/kernel/visws_apic.c 1999/05/05 12:20:24 1.1.2.2
@@ -204,8 +204,11 @@
status = desc->status & ~IRQ_REPLAY;
action = NULL;
if (!(status & (IRQ_DISABLED | IRQ_INPROGRESS)))
+ {
action = desc->action;
- desc->status = status | IRQ_INPROGRESS;
+ status |= IRQ_INPROGRESS;
+ }
+ desc->status = status;
}
spin_unlock(&irq_controller_lock);

Index: arch/i386/kernel//i386_ksyms.c
===================================================================
RCS file: /var/cvs/linux/arch/i386/kernel/i386_ksyms.c,v
retrieving revision 1.1.2.4
diff -u -r1.1.2.4 i386_ksyms.c
--- i386_ksyms.c 1999/03/24 01:24:01 1.1.2.4
+++ linux/arch/i386/kernel/i386_ksyms.c 1999/05/06 22:54:23
@@ -39,6 +39,7 @@
EXPORT_SYMBOL(local_irq_count);
EXPORT_SYMBOL(enable_irq);
EXPORT_SYMBOL(disable_irq);
+EXPORT_SYMBOL(disable_irq_nosync);
EXPORT_SYMBOL(kernel_thread);

EXPORT_SYMBOL_NOVERS(__down_failed);
-------------------------------------------------------------------

Even if it's not against clean 2.2.7 it apply cleanly here with my
`patch`:

andrea@laser:/tmp$ patch -p0 <~/kernel/SMP-irq-2.2.7-B
patching file `linux/drivers/net/3c509.c'
patching file `linux/drivers/net/8390.c'
patching file `linux/include/asm-i386/irq.h'
patching file `linux/arch/i386/kernel/io_apic.c'
patching file `linux/arch/i386/kernel/irq.c'
patching file `linux/arch/i386/kernel/visws_apic.c'
patching file `linux/arch/i386/kernel/i386_ksyms.c'

no rejects.

>works as a modules. After that, it worked very well. No problems
>flood pinging (previously it would have locked within five seconds).

Cool! ;))

>Functionality-wise the patch works miracles. Have you considered Linus'

;)

>suggestion about implementing IRQ_BUSY flag? Would that be a cleaner
>approach?

It's not needed. My IRQ_INPROGRESS design is the cleaner one (to my eyes
at least ;)

Andrea Arcangeli

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/