Re: [PATCH 1/2] arm64: Implement arch_stack_walk_reliable

From: Josh Poimboeuf
Date: Tue Mar 18 2025 - 21:07:02 EST


On Tue, Mar 18, 2025 at 04:38:20PM -0700, Song Liu wrote:
> On Tue, Mar 18, 2025 at 4:00 PM Josh Poimboeuf <jpoimboe@xxxxxxxxxx> wrote:
> > - even in the -ENOENT case the unreliable bit has already been set
> > right before the call to kunwind_next_frame_record_meta().
>
> For this one, do you mean we set state->common.unreliable, but
> failed to propagate it to data.unreliable?

Hm, I hadn't noticed that. That code is quite the maze.

It's unfortunate there are two separate 'unreliable' variables. It
looks like consume_state() is the only way they get synced?

How does that work if kunwind_next() returns an error and skips
consume_state()? Or if kunwind_recover_return_address() returns an
error to kunwind_next()?

What I actually meant was the following:

do_kunwind()
kunwind_next()
kunwind_next_frame_record()
state->common.unreliable = true;
kunwind_next_frame_record_meta()
return -ENOENT;

Notice that in the success case (-ENOENT), unreliable has already been
set.

Actually I think it would be much simpler to just propagate -ENOENT down
the call chain. Then no 'unreliable' bits needed.

Like so (instead of original patch):

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index c9fe3e7566a6..5713fad567c5 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -276,6 +276,7 @@ config ARM64
select HAVE_SOFTIRQ_ON_OWN_STACK
select USER_STACKTRACE_SUPPORT
select VDSO_GETRANDOM
+ select HAVE_RELIABLE_STACKTRACE
help
ARM 64-bit (AArch64) Linux support.

@@ -2509,4 +2510,3 @@ endmenu # "CPU Power Management"
source "drivers/acpi/Kconfig"

source "arch/arm64/kvm/Kconfig"
-
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 1d9d51d7627f..e227da842bc3 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -277,22 +277,28 @@ kunwind_next(struct kunwind_state *state)

typedef bool (*kunwind_consume_fn)(const struct kunwind_state *state, void *cookie);

-static __always_inline void
+static __always_inline int
do_kunwind(struct kunwind_state *state, kunwind_consume_fn consume_state,
void *cookie)
{
- if (kunwind_recover_return_address(state))
- return;
+ int ret;
+
+ ret = kunwind_recover_return_address(state);
+ if (ret)
+ return ret;

while (1) {
int ret;

if (!consume_state(state, cookie))
- break;
+ return -EINVAL;
+
ret = kunwind_next(state);
- if (ret < 0)
- break;
+ if (ret)
+ return ret;
}
+
+ return -EINVAL;
}

/*
@@ -324,7 +330,7 @@ do_kunwind(struct kunwind_state *state, kunwind_consume_fn consume_state,
: stackinfo_get_unknown(); \
})

-static __always_inline void
+static __always_inline int
kunwind_stack_walk(kunwind_consume_fn consume_state,
void *cookie, struct task_struct *task,
struct pt_regs *regs)
@@ -352,7 +358,7 @@ kunwind_stack_walk(kunwind_consume_fn consume_state,

if (regs) {
if (task != current)
- return;
+ return -EINVAL;
kunwind_init_from_regs(&state, regs);
} else if (task == current) {
kunwind_init_from_caller(&state);
@@ -360,7 +366,7 @@ kunwind_stack_walk(kunwind_consume_fn consume_state,
kunwind_init_from_task(&state, task);
}

- do_kunwind(&state, consume_state, cookie);
+ return do_kunwind(&state, consume_state, cookie);
}

struct kunwind_consume_entry_data {
@@ -387,6 +393,25 @@ noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry,
kunwind_stack_walk(arch_kunwind_consume_entry, &data, task, regs);
}

+noinline noinstr int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
+ void *cookie, struct task_struct *task)
+{
+ int ret;
+ struct kunwind_consume_entry_data data = {
+ .consume_entry = consume_entry,
+ .cookie = cookie,
+ };
+
+ ret = kunwind_stack_walk(arch_kunwind_consume_entry, &data, task, NULL);
+ if (ret) {
+ if (ret == -ENOENT)
+ return 0;
+ return ret;
+ }
+
+ return -EINVAL;
+}
+
struct bpf_unwind_consume_entry_data {
bool (*consume_entry)(void *cookie, u64 ip, u64 sp, u64 fp);
void *cookie;