Re: [PATCH v3] riscv: add irq stack support

From: Palmer Dabbelt
Date: Thu Jun 02 2022 - 17:44:53 EST


On Sat, 14 May 2022 22:03:36 PDT (-0700), jszhang@xxxxxxxxxx wrote:
Currently, IRQs are still handled on the kernel stack of the current
task on riscv platforms. If the task has a deep call stack at the time
of interrupt, and handling the interrupt also requires a deep stack,
it's possible to see stack overflow.

Before this patch, the stack_max_size of a v5.17-rc1 kernel running on
a lichee RV board gave:
~ # cat /sys/kernel/debug/tracing/stack_max_size
3736

After this patch,
~ # cat /sys/kernel/debug/tracing/stack_max_size
3176

We reduce the max kernel stack usage by 560 bytes!

Signed-off-by: Jisheng Zhang <jszhang@xxxxxxxxxx>
---
since v2:
- rebase on v5.18-rcN
- update commit msg, I.E remove the "it's possible to reduce the
THREAD_SIZE to 8KB for RV64 platforms..."

since v1:
- add __ro_after_init to the irq_stack[] array.

arch/riscv/include/asm/thread_info.h | 1 +
arch/riscv/kernel/asm-offsets.c | 2 ++
arch/riscv/kernel/entry.S | 33 +++++++++++++++++++++++++---
arch/riscv/kernel/irq.c | 16 ++++++++++++++
4 files changed, 49 insertions(+), 3 deletions(-)

diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h
index 74d888c8d631..98ea73721a0b 100644
--- a/arch/riscv/include/asm/thread_info.h
+++ b/arch/riscv/include/asm/thread_info.h
@@ -25,6 +25,7 @@
#endif
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)

+#define IRQ_STACK_SIZE THREAD_SIZE
/*
* By aligning VMAP'd stacks to 2 * THREAD_SIZE, we can detect overflow by
* checking sp & (1 << THREAD_SHIFT), which we can do cheaply in the entry
diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c
index df9444397908..9e32748af0e8 100644
--- a/arch/riscv/kernel/asm-offsets.c
+++ b/arch/riscv/kernel/asm-offsets.c
@@ -37,6 +37,8 @@ void asm_offsets(void)
OFFSET(TASK_TI_PREEMPT_COUNT, task_struct, thread_info.preempt_count);
OFFSET(TASK_TI_KERNEL_SP, task_struct, thread_info.kernel_sp);
OFFSET(TASK_TI_USER_SP, task_struct, thread_info.user_sp);
+ OFFSET(TASK_TI_CPU, task_struct, thread_info.cpu);
+ OFFSET(TASK_STACK, task_struct, stack);

OFFSET(TASK_THREAD_F0, task_struct, thread.fstate.f[0]);
OFFSET(TASK_THREAD_F1, task_struct, thread.fstate.f[1]);
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index c8b9ce274b9a..e91cae183ef4 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -126,12 +126,39 @@ skip_context_tracking:
*/
bge s4, zero, 1f

- la ra, ret_from_exception
+ /* preserve the sp */
+ move s0, sp

- /* Handle interrupts */
move a0, sp /* pt_regs */
+
+ /*
+ * Compare sp with the base of the task stack.
+ * If the top ~(THREAD_SIZE - 1) bits match, we are on a task stack,
+ * and should switch to the irq stack.
+ */
+ REG_L t0, TASK_STACK(tp)

This fails to build on some configurations, as TASK_STACK doesn't fit within the load immediate. IIRC we added a macro for this at some point (to select the short/long addressing sequence), but there's a trivial fix

diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index 99bc411d12f4..095fef82e910 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -136,7 +136,9 @@ skip_context_tracking:
* If the top ~(THREAD_SIZE - 1) bits match, we are on a task stack,
* and should switch to the irq stack.
*/
- REG_L t0, TASK_STACK(tp)
+ li t0, TASK_STACK
+ add t0, t0, tp
+ REG_L t0, 0(t0)
xor t0, t0, s0
li t1, ~(THREAD_SIZE - 1)
and t0, t0, t1

Unfortunatly even with that fix this still blows up early in boot on some of my test configs. Looks like pretty much anything with kasan is failing. Nothing is jumping out at me and it's pretty late so I won't have time to debug it myself, sorry.

+ xor t0, t0, s0
+ li t1, ~(THREAD_SIZE - 1)
+ and t0, t0, t1
+ bnez t0, 2f
+
+ la t1, irq_stack
+ REG_L t2, TASK_TI_CPU(tp)
+ slli t2, t2, RISCV_LGPTR
+ add t1, t1, t2
+ REG_L t2, 0(t1)
+ li t1, IRQ_STACK_SIZE
+ /* switch to the irq stack */
+ add sp, t2, t1
+
+2:
+ /* Handle interrupts */
la a1, generic_handle_arch_irq
- jr a1
+ jalr a1
+
+ /* Restore sp */
+ move sp, s0
+ j ret_from_exception
1:
/*
* Exceptions run with interrupts enabled or disabled depending on the
diff --git a/arch/riscv/kernel/irq.c b/arch/riscv/kernel/irq.c
index 7207fa08d78f..f20cbfd42e82 100644
--- a/arch/riscv/kernel/irq.c
+++ b/arch/riscv/kernel/irq.c
@@ -10,6 +10,8 @@
#include <linux/seq_file.h>
#include <asm/smp.h>

+void *irq_stack[NR_CPUS] __ro_after_init;
+
int arch_show_interrupts(struct seq_file *p, int prec)
{
show_ipi_stats(p, prec);
@@ -18,7 +20,21 @@ int arch_show_interrupts(struct seq_file *p, int prec)

void __init init_IRQ(void)
{
+ int cpu;
+
irqchip_init();
if (!handle_arch_irq)
panic("No interrupt controller found.");
+
+ for_each_possible_cpu(cpu) {
+#ifdef CONFIG_VMAP_STACK
+ void *s = __vmalloc_node(IRQ_STACK_SIZE, THREAD_ALIGN,
+ THREADINFO_GFP, cpu_to_node(cpu),
+ __builtin_return_address(0));
+#else
+ void *s = (void *)__get_free_pages(GFP_KERNEL, get_order(IRQ_STACK_SIZE));
+#endif
+
+ irq_stack[cpu] = s;
+ }
}