Re: [PATCH] riscv: VMAP_STACK overflow detection thread-safe

From: Deepak Gupta
Date: Thu Nov 24 2022 - 02:25:46 EST


On Thu, Nov 24, 2022 at 06:30:35AM +0000, Conor Dooley wrote:
Hey Deepak,
Guo Ren s issues aside, there's some process bits here.

On 24 November 2022 00:50:06 GMT, Deepak Gupta <debug@xxxxxxxxxxxx> wrote:
commit 31da94c25aea835ceac00575a9fd206c5a833fed added support for

Please check the documentation for how to format commit references in patch text.
As a reader, a hash requires access to a git tree to do anything with.
Id show you the format but I am on my phone ;)


Thanks I'll make that correction.

CONFIG_VMAP_STACK. If overflow is detected, CPU switches to `shadow_stack`
temporarily before switching finally to per-cpu `overflow_stack`.

If two CPUs/harts are racing and end up in over flowing kernel stack, one
or both will end up corrupting each other state because `shadow_stack` is
not per-cpu.

Following are the changes in this patch

- Defines an asm macro to obtain per-cpu symbols in destination
register.
- Computing per-cpu symbol requires a temporary register. When stack is
out of question, a place is needed to spill a register. `thread_info`
is good location to have spill register.
- In entry.S when overflow is detected x31 is spilled into thread_info.
x31 is used as temp reg for asm macro to locate per-cpu overflow stack

Other relevant disccussion on this
https://lore.kernel.org/linux-riscv/Y347B0x4VUNOd6V7@xhacker/T/#t

Link: foo


Tested by `echo EXHAUST_STACK > /sys/kernel/debug/provoke-crash/DIRECT`

[ 286.223273] Insufficient stack space to handle exception!/debug/provoke-crash/DIRECT
[ 286.223878] Task stack: [0xff20000010a98000..0xff20000010a9c000]
[ 286.224411] Overflow stack: [0xff600001f7d98370..0xff600001f7d99370]
[ 286.226057] CPU: 1 PID: 205 Comm: bash Not tainted 6.1.0-rc2-00001-g328a1f96f7b9 #34
[ 286.227139] Hardware name: riscv-virtio,qemu (DT)
[ 286.228000] epc : __memset+0x60/0xfc
[ 286.229299] ra : recursive_loop+0x48/0xc6 [lkdtm]
[ 286.231457] epc : ffffffff808de0e4 ra : ffffffff0163a752 sp : ff20000010a97e80
[ 286.232207] gp : ffffffff815c0330 tp : ff600000820ea280 t0 : ff20000010a97e88
[ 286.233584] t1 : 000000000000002e t2 : 3233206874706564 s0 : ff20000010a982b0
[ 286.234293] s1 : 0000000000000012 a0 : ff20000010a97e88 a1 : 0000000000000000
[ 286.234998] a2 : 0000000000000400 a3 : ff20000010a98288 a4 : 0000000000000000
[ 286.235697] a5 : 0000000000000000 a6 : fffffffffffe43f0 a7 : 00007fffffffffff
[ 286.236384] s2 : ff20000010a97e88 s3 : ffffffff01644680 s4 : ff20000010a9be90
[ 286.237743] s5 : ff600000842ba6c0 s6 : 00aaaaaac29e42b0 s7 : 00fffffff0aa3684
[ 286.238691] s8 : 00aaaaaac2978040 s9 : 0000000000000065 s10: 00ffffff8a7cad10
[ 286.239591] s11: 00ffffff8a76a4e0 t3 : ffffffff815dbaf4 t4 : ffffffff815dbaf4
[ 286.240537] t5 : ffffffff815dbab8 t6 : ff20000010a9bb48
[ 286.241540] status: 0000000200000120 badaddr: ff20000010a97e88 cause: 000000000000000f
[ 286.242979] Kernel panic - not syncing: Kernel stack overflow
[ 286.244106] CPU: 1 PID: 205 Comm: bash Not tainted 6.1.0-rc2-00001-g328a1f96f7b9 #34
[ 286.245276] Hardware name: riscv-virtio,qemu (DT)
[ 286.245929] Call Trace:
[ 286.246954] [<ffffffff80006754>] dump_backtrace+0x30/0x38
[ 286.247813] [<ffffffff808de798>] show_stack+0x40/0x4c
[ 286.248429] [<ffffffff808ea2a8>] dump_stack_lvl+0x44/0x5c
[ 286.249439] [<ffffffff808ea2d8>] dump_stack+0x18/0x20
[ 286.250056] [<ffffffff808dec06>] panic+0x126/0x2fe
[ 286.250642] [<ffffffff800065ea>] walk_stackframe+0x0/0xf0
[ 286.251357] [<ffffffff0163a752>] recursive_loop+0x48/0xc6 [lkdtm]
[ 286.253321] SMP: stopping secondary CPUs
[ 286.256724] ---[ end Kernel panic - not syncing: Kernel stack overflow ]---

For stack traces, especially long ones, please cut out the timestamps.

I followed formatting of stack traces in commit 31da94c25ae. Assumed since that's merged, that's how test and stack traces should be included in commit msg.

I'll follow your suggestion and remove timestamps.



Fixes: 31da94c25aea835ceac00575a9fd206c5a833fed

https://gist.githubusercontent.com/conor-pwbot/8882fc7ef7cd4fe833774574fbf509f5/raw/d360ec7aebfaa6c2d043e9bd87f59745ad6a4c85/desc

Please use the correct format for fixes tags.

Cc: Guo Ren <guoren@xxxxxxxxxx>
Cc: Jisheng Zhang <jszhang@xxxxxxxxxx>

Signed-off-by: Deepak Gupta <debug@xxxxxxxxxxxx>

All of the tags need to be in one block, so:

Link:
Fixes:
SoB:

Noted.


Thanks,
Conor.

---
arch/riscv/include/asm/asm.h | 11 ++++++
arch/riscv/include/asm/thread_info.h | 3 ++
arch/riscv/kernel/asm-offsets.c | 4 +++
arch/riscv/kernel/entry.S | 54 ++++------------------------
arch/riscv/kernel/traps.c | 12 +------
5 files changed, 26 insertions(+), 58 deletions(-)

diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h
index 1b471ff73178..373eba843331 100644
--- a/arch/riscv/include/asm/asm.h
+++ b/arch/riscv/include/asm/asm.h
@@ -69,6 +69,7 @@

#ifdef __ASSEMBLY__

+#include <asm/asm-offsets.h>
/* Common assembly source macros */

/*
@@ -80,6 +81,16 @@
.endr
.endm

+.macro asm_per_cpu dst sym tmp
+ REG_L \tmp, TASK_TI_CPU_NUM(tp)
+ slli \tmp, \tmp, 0x3
+ la \dst, __per_cpu_offset
+ add \dst, \dst, \tmp
+ REG_L \tmp, 0(\dst)
+ la \dst, \sym
+ add \dst, \dst, \tmp
+.endm
+
#endif /* __ASSEMBLY__ */

#endif /* _ASM_RISCV_ASM_H */
diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h
index 67322f878e0d..7e17dc07cf11 100644
--- a/arch/riscv/include/asm/thread_info.h
+++ b/arch/riscv/include/asm/thread_info.h
@@ -65,6 +65,9 @@ struct thread_info {
*/
long kernel_sp; /* Kernel stack pointer */
long user_sp; /* User stack pointer */
+#ifdef CONFIG_VMAP_STACK
+ long spill_reg; /* per cpu scratch space to spill a single register */
+#endif
int cpu;
};

diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c
index df9444397908..bed3c83bfb8f 100644
--- a/arch/riscv/kernel/asm-offsets.c
+++ b/arch/riscv/kernel/asm-offsets.c
@@ -38,6 +38,10 @@ void asm_offsets(void)
OFFSET(TASK_TI_KERNEL_SP, task_struct, thread_info.kernel_sp);
OFFSET(TASK_TI_USER_SP, task_struct, thread_info.user_sp);

+ OFFSET(TASK_TI_CPU_NUM, task_struct, thread_info.cpu);
+#ifdef CONFIG_VMAP_STACK
+ OFFSET(TASK_TI_SPILL_REG, task_struct, thread_info.spill_reg);
+#endif
OFFSET(TASK_THREAD_F0, task_struct, thread.fstate.f[0]);
OFFSET(TASK_THREAD_F1, task_struct, thread.fstate.f[1]);
OFFSET(TASK_THREAD_F2, task_struct, thread.fstate.f[2]);
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index b9eda3fcbd6d..12f285cec136 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -10,9 +10,11 @@
#include <asm/asm.h>
#include <asm/csr.h>
#include <asm/unistd.h>
+#include <asm/page.h>
#include <asm/thread_info.h>
#include <asm/asm-offsets.h>
#include <asm/errata_list.h>
+#include <linux/sizes.h>

#if !IS_ENABLED(CONFIG_PREEMPTION)
.set resume_kernel, restore_all
@@ -404,54 +406,12 @@ handle_syscall_trace_exit:

#ifdef CONFIG_VMAP_STACK
handle_kernel_stack_overflow:
- la sp, shadow_stack
- addi sp, sp, SHADOW_OVERFLOW_STACK_SIZE
+ REG_S x31, TASK_TI_SPILL_REG(tp)
+ asm_per_cpu sp, overflow_stack, x31
+ li x31, OVERFLOW_STACK_SIZE
+ add sp, sp, x31
+ REG_L x31, TASK_TI_SPILL_REG(tp)

- //save caller register to shadow stack
- addi sp, sp, -(PT_SIZE_ON_STACK)
- REG_S x1, PT_RA(sp)
- REG_S x5, PT_T0(sp)
- REG_S x6, PT_T1(sp)
- REG_S x7, PT_T2(sp)
- REG_S x10, PT_A0(sp)
- REG_S x11, PT_A1(sp)
- REG_S x12, PT_A2(sp)
- REG_S x13, PT_A3(sp)
- REG_S x14, PT_A4(sp)
- REG_S x15, PT_A5(sp)
- REG_S x16, PT_A6(sp)
- REG_S x17, PT_A7(sp)
- REG_S x28, PT_T3(sp)
- REG_S x29, PT_T4(sp)
- REG_S x30, PT_T5(sp)
- REG_S x31, PT_T6(sp)
-
- la ra, restore_caller_reg
- tail get_overflow_stack
-
-restore_caller_reg:
- //save per-cpu overflow stack
- REG_S a0, -8(sp)
- //restore caller register from shadow_stack
- REG_L x1, PT_RA(sp)
- REG_L x5, PT_T0(sp)
- REG_L x6, PT_T1(sp)
- REG_L x7, PT_T2(sp)
- REG_L x10, PT_A0(sp)
- REG_L x11, PT_A1(sp)
- REG_L x12, PT_A2(sp)
- REG_L x13, PT_A3(sp)
- REG_L x14, PT_A4(sp)
- REG_L x15, PT_A5(sp)
- REG_L x16, PT_A6(sp)
- REG_L x17, PT_A7(sp)
- REG_L x28, PT_T3(sp)
- REG_L x29, PT_T4(sp)
- REG_L x30, PT_T5(sp)
- REG_L x31, PT_T6(sp)
-
- //load per-cpu overflow stack
- REG_L sp, -8(sp)
addi sp, sp, -(PT_SIZE_ON_STACK)

//save context to overflow stack
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index f3e96d60a2ff..eef3a87514c7 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -208,18 +208,8 @@ int is_valid_bugaddr(unsigned long pc)
#endif /* CONFIG_GENERIC_BUG */

#ifdef CONFIG_VMAP_STACK
-static DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)],
+DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)],
overflow_stack)__aligned(16);
-/*
- * shadow stack, handled_ kernel_ stack_ overflow(in kernel/entry.S) is used
- * to get per-cpu overflow stack(get_overflow_stack).
- */
-long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE/sizeof(long)];
-asmlinkage unsigned long get_overflow_stack(void)
-{
- return (unsigned long)this_cpu_ptr(overflow_stack) +
- OVERFLOW_STACK_SIZE;
-}

asmlinkage void handle_bad_stack(struct pt_regs *regs)
{