[RFC patch 5/5] genirq: make irq threading robust

From: Thomas Gleixner
Date: Wed Oct 01 2008 - 19:04:54 EST


To make sure that a crashed irq thread does not cause more trouble
when the irq code tries to wake up a gone thread or the device code
calling free_irq and trying to kthread_stop the dead thread, we plug a
pointer to irqaction into task_struct, which is evaluated in
do_exit(). When the thread crashes the do_exit code marks the thread
as DIED in irqaction->flags to prevent further wakeups from the
interrupt handler code.

On thread creation we get a reference to task_struct so it stays
around until the free_irq code releases it again.

The procedure vs. the crashed irq handler thread is slightly racy, but
we do not want to have additional locking in the hard interrupt code
path. The worst things which can happen are a warning that we tried to
wakeup a dead task and a hung kthread_stop in free_irq. I'm not
worried about that at all, as removing a module which had a crashed
interrupt handler is critical anyway.

The main purpose of this is to keep the system alive w/o the affected
device working.

Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Reviewed-by: Ingo Molnar <mingo@xxxxxxx>
---
include/linux/interrupt.h | 3 +++
include/linux/sched.h | 1 +
kernel/exit.c | 2 ++
kernel/irq/handle.c | 13 +++++++++++--
kernel/irq/manage.c | 44 +++++++++++++++++++++++++++++++++++++++++---
5 files changed, 58 insertions(+), 5 deletions(-)

Index: linux-2.6-tip/include/linux/interrupt.h
===================================================================
--- linux-2.6-tip.orig/include/linux/interrupt.h
+++ linux-2.6-tip/include/linux/interrupt.h
@@ -61,6 +61,7 @@
#define IRQF_THREADED 0x00002000
#define IRQF_RUNTHREAD 0x00004000
#define IRQF_WARNED_THREADED 0x00008000
+#define IRQF_THREAD_DIED 0x00010000

typedef irqreturn_t (*irq_handler_t)(int, void *);

@@ -114,6 +115,8 @@ static inline int irq_thread_should_run(
return test_and_clear_bit(IRQF_RUNTHREAD, &action->flags);
}

+extern void exit_irq_thread(struct task_struct *tsk);
+
/*
* On lockdep we dont want to enable hardirqs in hardirq
* context. Use local_irq_enable_in_hardirq() to annotate
Index: linux-2.6-tip/include/linux/sched.h
===================================================================
--- linux-2.6-tip.orig/include/linux/sched.h
+++ linux-2.6-tip/include/linux/sched.h
@@ -1301,6 +1301,7 @@ struct task_struct {
int latency_record_count;
struct latency_record latency_record[LT_SAVECOUNT];
#endif
+ struct irqaction *irqaction;
};

/*
Index: linux-2.6-tip/kernel/exit.c
===================================================================
--- linux-2.6-tip.orig/kernel/exit.c
+++ linux-2.6-tip/kernel/exit.c
@@ -1030,6 +1030,8 @@ NORET_TYPE void do_exit(long code)
schedule();
}

+ exit_irq_thread(tsk);
+
exit_signals(tsk); /* sets PF_EXITING */
/*
* tsk->flags are checked in the futex code to protect against
Index: linux-2.6-tip/kernel/irq/handle.c
===================================================================
--- linux-2.6-tip.orig/kernel/irq/handle.c
+++ linux-2.6-tip/kernel/irq/handle.c
@@ -161,8 +161,17 @@ irqreturn_t handle_IRQ_event(unsigned in
set_bit(IRQF_WARNED_THREADED, &action->flags);

case IRQ_WAKE_THREAD:
- set_bit(IRQF_RUNTHREAD, &action->flags);
- wake_up_process(action->thread);
+ /*
+ * In case the thread crashed and was killed
+ * we just pretend that we handled the
+ * interrupt. The quick check handler has
+ * disabled the device interrupt, so no irq
+ * storm is lurking.
+ */
+ if (likely(!(action->flags & IRQF_THREAD_DIED))) {
+ set_bit(IRQF_RUNTHREAD, &action->flags);
+ wake_up_process(action->thread);
+ }
/*
* Set it to handled so the spurious check
* does not trigger.
Index: linux-2.6-tip/kernel/irq/manage.c
===================================================================
--- linux-2.6-tip.orig/kernel/irq/manage.c
+++ linux-2.6-tip/kernel/irq/manage.c
@@ -338,6 +338,8 @@ static int irq_thread(void *data)
{
struct irqaction *action = data;

+ current->irqaction = action;
+
set_current_state(TASK_INTERRUPTIBLE);

while (!kthread_should_stop()) {
@@ -351,11 +353,36 @@ static int irq_thread(void *data)
action->handler(action->irq, action->dev_id);
set_current_state(TASK_INTERRUPTIBLE);
}
+ /*
+ * Clear irqaction. Otherwise exit_irq_thread() would make
+ * fuzz about an active irq thread going into nirvana.
+ */
+ current->irqaction = NULL;
__set_current_state(TASK_RUNNING);
return 0;
}

/*
+ * Called from do_exit()
+ */
+void exit_irq_thread(struct task_struct *tsk)
+{
+ if (!tsk->irqaction)
+ return;
+
+ printk(KERN_ERR
+ "exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n",
+ tsk->comm ? tsk->comm : "", tsk->pid, tsk->irqaction->irq);
+
+ /*
+ * Set the THREAD DIED flag to prevent further wakeups of the
+ * soon to be gone threaded handler.
+ */
+ set_bit(IRQF_THREAD_DIED, &tsk->irqaction->flags);
+ tsk->irqaction = NULL;
+}
+
+/*
* Internal function to register an irqaction - typically used to
* allocate special interrupts that are part of the architecture.
*/
@@ -439,7 +466,12 @@ int setup_irq(unsigned int irq, struct i
new->name);
if (IS_ERR(t))
return PTR_ERR(t);
-
+ /*
+ * We keep the reference to the task struct even if
+ * the thread dies to avoid that the interrupt code
+ * references an already gone task_struct.
+ */
+ get_task_struct(t);
new->thread = t;
}

@@ -565,8 +597,14 @@ void free_irq(unsigned int irq, void *de
if (desc->chip->release)
desc->chip->release(irq, dev_id);
#endif
- if (action->thread)
- kthread_stop(action->thread);
+ if (action->thread) {
+ struct task_struct *t = action->thread;
+
+ action->thread = NULL;
+ if (likely(!(action->flags & IRQF_THREAD_DIED)))
+ kthread_stop(t);
+ put_task_struct(t);
+ }

if (!desc->action) {
desc->status |= IRQ_DISABLED;


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/