[for-next][PATCH 05/27] function_graph: Handle tail calls for stack unwinding

From: Steven Rostedt
Date: Tue Jun 04 2024 - 10:45:03 EST


From: "Masami Hiramatsu (Google)" <mhiramat@xxxxxxxxxx>

For the tail-call, there would be 2 or more ftrace_ret_stacks on the
ret_stack, which records "return_to_handler" as the return address except
for the last one. But on the real stack, there should be 1 entry because
tail-call reuses the return address on the stack and jump to the next
function.

In ftrace_graph_ret_addr() that is used for stack unwinding, skip tail
calls as a real stack unwinder would do.

Link: https://lore.kernel.org/linux-trace-kernel/171509096221.162236.8806372072523195752.stgit@devnote2
Link: https://lore.kernel.org/linux-trace-kernel/20240603190821.717065217@xxxxxxxxxxx

Cc: Mark Rutland <mark.rutland@xxxxxxx>
Cc: Mathieu Desnoyers <mathieu.desnoyers@xxxxxxxxxxxx>
Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Cc: Alexei Starovoitov <alexei.starovoitov@xxxxxxxxx>
Cc: Florent Revest <revest@xxxxxxxxxxxx>
Cc: Martin KaFai Lau <martin.lau@xxxxxxxxx>
Cc: bpf <bpf@xxxxxxxxxxxxxxx>
Cc: Sven Schnelle <svens@xxxxxxxxxxxxx>
Cc: Alexei Starovoitov <ast@xxxxxxxxxx>
Cc: Jiri Olsa <jolsa@xxxxxxxxxx>
Cc: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
Cc: Daniel Borkmann <daniel@xxxxxxxxxxxxx>
Cc: Alan Maguire <alan.maguire@xxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Guo Ren <guoren@xxxxxxxxxx>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@xxxxxxxxxx>
Signed-off-by: Steven Rostedt (Google) <rostedt@xxxxxxxxxxx>
---
kernel/trace/fgraph.c | 19 ++++++++++++++++---
1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index aae51f746828..8de2a2662281 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -594,16 +594,26 @@ unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
unsigned long ret, unsigned long *retp)
{
struct ftrace_ret_stack *ret_stack;
+ unsigned long return_handler = (unsigned long)dereference_kernel_function_descriptor(return_to_handler);
int i = task->curr_ret_stack;

- if (ret != (unsigned long)dereference_kernel_function_descriptor(return_to_handler))
+ if (ret != return_handler)
return ret;

while (i > 0) {
ret_stack = get_ret_stack(current, i, &i);
if (!ret_stack)
break;
- if (ret_stack->retp == retp)
+ /*
+ * For the tail-call, there would be 2 or more ftrace_ret_stacks on
+ * the ret_stack, which records "return_to_handler" as the return
+ * address except for the last one.
+ * But on the real stack, there should be 1 entry because tail-call
+ * reuses the return address on the stack and jump to the next function.
+ * Thus we will continue to find real return address.
+ */
+ if (ret_stack->retp == retp &&
+ ret_stack->ret != return_handler)
return ret_stack->ret;
}

@@ -614,10 +624,11 @@ unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
unsigned long ret, unsigned long *retp)
{
struct ftrace_ret_stack *ret_stack;
+ unsigned long return_handler = (unsigned long)dereference_kernel_function_descriptor(return_to_handler);
int offset = task->curr_ret_stack;
int i;

- if (ret != (unsigned long)dereference_kernel_function_descriptor(return_to_handler))
+ if (ret != return_handler)
return ret;

if (!idx)
@@ -626,6 +637,8 @@ unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
i = *idx;
do {
ret_stack = get_ret_stack(task, offset, &offset);
+ if (ret_stack && ret_stack->ret == return_handler)
+ continue;
i--;
} while (i >= 0 && ret_stack);

--
2.43.0