[PATCH] arm64: Implement stack trace termination record

From: Madhavan T. Venkataraman
Date: Tue Apr 20 2021 - 14:44:47 EST


Reliable stacktracing requires that we identify when a stacktrace is
terminated early. We can do this by ensuring all tasks have a final
frame record at a known location on their task stack, and checking
that this is the final frame record in the chain.

We'd like to use task_pt_regs(task)->stackframe as the final frame
record, as this is already setup upon exception entry from EL0. For
kernel tasks we need to consistently reserve the pt_regs and point x29
at this, which we can do with small changes to __primary_switched,
__secondary_switched, and copy_process().

Since the final frame record must be at a specific location, we must
create the final frame record in __primary_switched and
__secondary_switched rather than leaving this to start_kernel and
secondary_start_kernel. Thus, __primary_switched and
__secondary_switched will now show up in stacktraces for the idle tasks.

Since the final frame record is now identified by its location rather
than by its contents, we identify it at the start of unwind_frame(),
before we read any values from it.

External debuggers may terminate the stack trace when FP == 0. In the
pt_regs->stackframe, the PC is 0 as well. So, stack traces taken in the
debugger may print an extra record 0x0 at the end. While this is not
pretty, this does not do any harm. This is a small price to pay for
having reliable stack trace termination in the kernel. That said, gdb
does not show the extra record probably because it uses DWARF and not
frame pointers for stack traces.

Signed-off-by: Madhavan T. Venkataraman <madvenka@xxxxxxxxxxxxxxxxxxx>
Reviewed-by: Mark Brown <broonie@xxxxxxxxxx>
[Mark: rebase, use ASM_BUG(), update comments, update commit message]
Signed-off-by: Mark Rutland <mark.rutland@xxxxxxx>
---
arch/arm64/kernel/entry.S | 2 +-
arch/arm64/kernel/head.S | 25 +++++++++++++++++++------
arch/arm64/kernel/process.c | 5 +++++
arch/arm64/kernel/stacktrace.c | 16 +++++++---------
4 files changed, 32 insertions(+), 16 deletions(-)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 806a39635482..a5bb8c9c8ace 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -283,7 +283,7 @@ alternative_else_nop_endif
stp lr, x21, [sp, #S_LR]

/*
- * For exceptions from EL0, create a terminal frame record.
+ * For exceptions from EL0, create a final frame record.
* For exceptions from EL1, create a synthetic frame record so the
* interrupted code shows up in the backtrace.
*/
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 96873dfa67fd..cc2d45d54838 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -16,6 +16,7 @@
#include <asm/asm_pointer_auth.h>
#include <asm/assembler.h>
#include <asm/boot.h>
+#include <asm/bug.h>
#include <asm/ptrace.h>
#include <asm/asm-offsets.h>
#include <asm/cache.h>
@@ -393,6 +394,18 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
ret x28
SYM_FUNC_END(__create_page_tables)

+ /*
+ * Create a final frame record at task_pt_regs(current)->stackframe, so
+ * that the unwinder can identify the final frame record of any task by
+ * its location in the task stack. We reserve the entire pt_regs space
+ * for consistency with user tasks and kthreads.
+ */
+ .macro setup_final_frame
+ sub sp, sp, #PT_REGS_SIZE
+ stp xzr, xzr, [sp, #S_STACKFRAME]
+ add x29, sp, #S_STACKFRAME
+ .endm
+
/*
* The following fragment of code is executed with the MMU enabled.
*
@@ -447,9 +460,9 @@ SYM_FUNC_START_LOCAL(__primary_switched)
#endif
bl switch_to_vhe // Prefer VHE if possible
add sp, sp, #16
- mov x29, #0
- mov x30, #0
- b start_kernel
+ setup_final_frame
+ bl start_kernel
+ ASM_BUG()
SYM_FUNC_END(__primary_switched)

.pushsection ".rodata", "a"
@@ -639,14 +652,14 @@ SYM_FUNC_START_LOCAL(__secondary_switched)
cbz x2, __secondary_too_slow
msr sp_el0, x2
scs_load x2, x3
- mov x29, #0
- mov x30, #0
+ setup_final_frame

#ifdef CONFIG_ARM64_PTR_AUTH
ptrauth_keys_init_cpu x2, x3, x4, x5
#endif

- b secondary_start_kernel
+ bl secondary_start_kernel
+ ASM_BUG()
SYM_FUNC_END(__secondary_switched)

SYM_FUNC_START_LOCAL(__secondary_too_slow)
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 0da0fd4ed1d0..31d98fe2e303 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -433,6 +433,11 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
}
p->thread.cpu_context.pc = (unsigned long)ret_from_fork;
p->thread.cpu_context.sp = (unsigned long)childregs;
+ /*
+ * For the benefit of the unwinder, set up childregs->stackframe
+ * as the final frame for the new task.
+ */
+ p->thread.cpu_context.fp = (unsigned long)childregs->stackframe;

ptrace_hw_copy_thread(p);

diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index c22706aa32a1..1e4d44751492 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -68,12 +68,16 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
unsigned long fp = frame->fp;
struct stack_info info;

- if (fp & 0xf)
- return -EINVAL;
-
if (!tsk)
tsk = current;

+ /* Final frame; nothing to unwind */
+ if (fp == (unsigned long)task_pt_regs(tsk)->stackframe)
+ return -ENOENT;
+
+ if (fp & 0xf)
+ return -EINVAL;
+
if (!on_accessible_stack(tsk, fp, &info))
return -EINVAL;

@@ -128,12 +132,6 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)

frame->pc = ptrauth_strip_insn_pac(frame->pc);

- /*
- * This is a terminal record, so we have finished unwinding.
- */
- if (!frame->fp && !frame->pc)
- return -ENOENT;
-
return 0;
}
NOKPROBE_SYMBOL(unwind_frame);
--
2.11.0