[PATCH] powerpc/irq: Run softirqs off the top of the irq stack

From: Benjamin Herrenschmidt
Date: Mon Sep 23 2013 - 00:37:37 EST


Nowadays, irq_exit() calls __do_softirq() pretty much directly
instead of calling do_softirq() which switches to the decicated
softirq stack.

This has lead to observed stack overflows on powerpc since we call
irq_enter() and irq_exit() outside of the scope that switches to
the irq stack.

This fixes it by moving the stack switching up a level, making
irq_enter() and irq_exit() run off the irq stack.

Signed-off-by: Benjamin Herrenschmidt <benh@xxxxxxxxxxxxxxxxxxx>
---

This is the "band aid" discussed so far for the stack overflow
problem for powerpc. I assume sparc and i386 at least need
something similar (I had a quick look at ARM and it doesn't
seem to have irq stacks at all).

Unless objection in the next day or so, I intend to send that to
Linus and to -stable ASAP.

arch/powerpc/include/asm/irq.h | 4 +-
arch/powerpc/kernel/irq.c | 99 ++++++++++++++++++++++--------------------
arch/powerpc/kernel/misc_32.S | 9 ++--
arch/powerpc/kernel/misc_64.S | 10 ++---
4 files changed, 62 insertions(+), 60 deletions(-)

diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index 0e40843..41f13ce 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -69,9 +69,9 @@ extern struct thread_info *softirq_ctx[NR_CPUS];

extern void irq_ctx_init(void);
extern void call_do_softirq(struct thread_info *tp);
-extern int call_handle_irq(int irq, void *p1,
- struct thread_info *tp, void *func);
+extern void call_do_irq(struct pt_regs *regs, struct thread_info *tp);
extern void do_IRQ(struct pt_regs *regs);
+extern void __do_irq(struct pt_regs *regs);

int irq_choose_cpu(const struct cpumask *mask);

diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index c69440c..0c9646f 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -443,46 +443,7 @@ void migrate_irqs(void)

static inline void handle_one_irq(unsigned int irq)
{
- struct thread_info *curtp, *irqtp;
- unsigned long saved_sp_limit;
- struct irq_desc *desc;
-
- desc = irq_to_desc(irq);
- if (!desc)
- return;
-
- /* Switch to the irq stack to handle this */
- curtp = current_thread_info();
- irqtp = hardirq_ctx[smp_processor_id()];
-
- if (curtp == irqtp) {
- /* We're already on the irq stack, just handle it */
- desc->handle_irq(irq, desc);
- return;
- }
-
- saved_sp_limit = current->thread.ksp_limit;
-
- irqtp->task = curtp->task;
- irqtp->flags = 0;
-
- /* Copy the softirq bits in preempt_count so that the
- * softirq checks work in the hardirq context. */
- irqtp->preempt_count = (irqtp->preempt_count & ~SOFTIRQ_MASK) |
- (curtp->preempt_count & SOFTIRQ_MASK);

- current->thread.ksp_limit = (unsigned long)irqtp +
- _ALIGN_UP(sizeof(struct thread_info), 16);
-
- call_handle_irq(irq, desc, irqtp, desc->handle_irq);
- current->thread.ksp_limit = saved_sp_limit;
- irqtp->task = NULL;
-
- /* Set any flag that may have been set on the
- * alternate stack
- */
- if (irqtp->flags)
- set_bits(irqtp->flags, &curtp->flags);
}

static inline void check_stack_overflow(void)
@@ -501,9 +462,9 @@ static inline void check_stack_overflow(void)
#endif
}

-void do_IRQ(struct pt_regs *regs)
+void __do_irq(struct pt_regs *regs)
{
- struct pt_regs *old_regs = set_irq_regs(regs);
+ struct irq_desc *desc;
unsigned int irq;

irq_enter();
@@ -519,18 +480,64 @@ void do_IRQ(struct pt_regs *regs)
*/
irq = ppc_md.get_irq();

- /* We can hard enable interrupts now */
+ /* We can hard enable interrupts now to allow perf interrupts */
may_hard_irq_enable();

/* And finally process it */
- if (irq != NO_IRQ)
- handle_one_irq(irq);
- else
+ if (unlikely(irq == NO_IRQ))
__get_cpu_var(irq_stat).spurious_irqs++;
+ else {
+ desc = irq_to_desc(irq);
+ if (likely(desc))
+ desc->handle_irq(irq, desc);
+ }

trace_irq_exit(regs);

irq_exit();
+}
+
+void do_IRQ(struct pt_regs *regs)
+{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+ struct thread_info *curtp, *irqtp;
+ unsigned long saved_sp_limit;
+
+ /* Switch to the irq stack to handle this */
+ curtp = current_thread_info();
+ irqtp = hardirq_ctx[raw_smp_processor_id()];
+
+ /* Already there ? */
+ if (unlikely(curtp == irqtp)) {
+ __do_irq(regs);
+ set_irq_regs(old_regs);
+ return;
+ }
+
+ /* Adjust the stack limit */
+ saved_sp_limit = current->thread.ksp_limit;
+ current->thread.ksp_limit = (unsigned long)irqtp +
+ _ALIGN_UP(sizeof(struct thread_info), 16);
+
+
+ /* Prepare the thread_info in the irq stack */
+ irqtp->task = curtp->task;
+ irqtp->flags = 0;
+
+ /* Copy the preempt_count so that the [soft]irq checks work. */
+ irqtp->preempt_count = curtp->preempt_count;
+
+ /* Switch stack and call */
+ call_do_irq(regs, irqtp);
+
+ /* Restore stack limit */
+ current->thread.ksp_limit = saved_sp_limit;
+ irqtp->task = NULL;
+
+ /* Copy back updates to the thread_info */
+ if (irqtp->flags)
+ set_bits(irqtp->flags, &curtp->flags);
+
set_irq_regs(old_regs);
}

@@ -592,12 +599,10 @@ void irq_ctx_init(void)
memset((void *)softirq_ctx[i], 0, THREAD_SIZE);
tp = softirq_ctx[i];
tp->cpu = i;
- tp->preempt_count = 0;

memset((void *)hardirq_ctx[i], 0, THREAD_SIZE);
tp = hardirq_ctx[i];
tp->cpu = i;
- tp->preempt_count = HARDIRQ_OFFSET;
}
}

diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 777d999..7da3882 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -47,13 +47,12 @@ _GLOBAL(call_do_softirq)
mtlr r0
blr

-_GLOBAL(call_handle_irq)
+_GLOBAL(call_do_irq)
mflr r0
stw r0,4(r1)
- mtctr r6
- stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r5)
- mr r1,r5
- bctrl
+ stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4)
+ mr r1,r4
+ bl __do_irq
lwz r1,0(r1)
lwz r0,4(r1)
mtlr r0
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 971d7e7..e59caf8 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -40,14 +40,12 @@ _GLOBAL(call_do_softirq)
mtlr r0
blr

-_GLOBAL(call_handle_irq)
- ld r8,0(r6)
+_GLOBAL(call_do_irq)
mflr r0
std r0,16(r1)
- mtctr r8
- stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r5)
- mr r1,r5
- bctrl
+ stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4)
+ mr r1,r4
+ bl .__do_irq
ld r1,0(r1)
ld r0,16(r1)
mtlr r0


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/