Re: [PATCH 05/19] x86/dumpstack: fix function graph tracing stack dump reliability issues

From: Steven Rostedt
Date: Fri Jul 29 2016 - 18:55:43 EST


On Thu, 21 Jul 2016 16:21:42 -0500
Josh Poimboeuf <jpoimboe@xxxxxxxxxx> wrote:

> When function graph tracing is enabled for a function, its return
> address on the stack is replaced with the address of an ftrace handler
> (return_to_handler). When dumping the stack of a task with graph
> tracing enabled, there are some subtle bugs:
>
> - The fake return_to_handler() address can be reported as reliable.
> Instead, because it's not the real caller, it should be considered
> unreliable.

I have some mixed emotions about this. First, it's not "fake", the
function *is* going to return to it, but you are right, that's not the
function that was called.

I do like to see these in the trace, because sometimes these functions
are an issue. But I guess I can live with them being marked as
"unreliable".


>
> - In print_context_stack(), the real caller's return address is always
> reported as reliable, even if the return_to_handler() address wasn't
> referred to by a frame pointer.

Hmm, if CONFIG_FRAME_POINTER is enabled, perhaps we should only call
the look up of ftrace_graph_ret_addr(). Hmm, playing with this, yeah,
we definitely should. It can report the wrong reliability.

Without doing the reliability check we can get out of sync with the
ret_stack. I have a patch to go on top of this patch below (hmm, it may
not apply fully, because I was using a different base tree than you).

>
> In addition to fixing these bugs, convert print_ftrace_graph_addr() to a
> more generic function which can be used outside of dump_trace()
> callbacks.
>
> Signed-off-by: Josh Poimboeuf <jpoimboe@xxxxxxxxxx>
> ---
> arch/x86/include/asm/stacktrace.h | 13 ++++++++++
> arch/x86/kernel/dumpstack.c | 50 +++++++++++++++++----------------------
> 2 files changed, 35 insertions(+), 28 deletions(-)
>
> diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
> index 6f65995..5d3d258 100644
> --- a/arch/x86/include/asm/stacktrace.h
> +++ b/arch/x86/include/asm/stacktrace.h
> @@ -14,6 +14,19 @@ extern int kstack_depth_to_print;
> struct thread_info;
> struct stacktrace_ops;
>
> +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
> +
> +unsigned long
> +ftrace_graph_ret_addr(struct task_struct *task, int *idx, unsigned long addr);
> +
> +#else
> +static inline unsigned long
> +ftrace_graph_ret_addr(struct task_struct *task, int *idx, unsigned long addr)
> +{
> + return addr;
> +}
> +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
> +
> typedef unsigned long (*walk_stack_t)(struct task_struct *task,
> unsigned long *stack,
> unsigned long bp,
> diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
> index 692eecae..0a8694b 100644
> --- a/arch/x86/kernel/dumpstack.c
> +++ b/arch/x86/kernel/dumpstack.c
> @@ -40,36 +40,25 @@ void printk_address(unsigned long address)
> }
>
> #ifdef CONFIG_FUNCTION_GRAPH_TRACER
> -static void
> -print_ftrace_graph_addr(unsigned long addr, void *data,
> - const struct stacktrace_ops *ops,
> - struct task_struct *task, int *graph)
> +unsigned long
> +ftrace_graph_ret_addr(struct task_struct *task, int *idx, unsigned long addr)
> {
> - unsigned long ret_addr;
> - int index;
> + int task_idx;
>
> if (addr != (unsigned long)return_to_handler)
> - return;
> + return addr;
>
> - index = task->curr_ret_stack;
> + task_idx = task->curr_ret_stack;
>
> - if (!task->ret_stack || index < *graph)
> - return;
> + if (!task->ret_stack || task_idx < *idx)
> + return addr;
>
> - index -= *graph;
> - ret_addr = task->ret_stack[index].ret;
> + task_idx -= *idx;
> + (*idx)++;
>
> - ops->address(data, ret_addr, 1);
> -
> - (*graph)++;
> + return task->ret_stack[task_idx].ret;
> }
> -#else
> -static inline void
> -print_ftrace_graph_addr(unsigned long addr, void *data,
> - const struct stacktrace_ops *ops,
> - struct task_struct *task, int *graph)
> -{ }
> -#endif
> +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
>
> /*
> * x86-64 can have up to three kernel stacks:
> @@ -108,18 +97,23 @@ print_context_stack(struct task_struct *task,
> stack = (unsigned long *)task_stack_page(task);
>
> while (valid_stack_ptr(task, stack, sizeof(*stack), end)) {
> - unsigned long addr;
> + unsigned long addr = *stack;
>
> addr = *stack;
> if (__kernel_text_address(addr)) {
> + int reliable = 0;
> + unsigned long real_addr;
> +
> if ((unsigned long) stack == bp + sizeof(long)) {
> - ops->address(data, addr, 1);
> + reliable = 1;
> frame = frame->next_frame;
> bp = (unsigned long) frame;
> - } else {
> - ops->address(data, addr, 0);
> }
> - print_ftrace_graph_addr(addr, data, ops, task, graph);
> +
> + real_addr = ftrace_graph_ret_addr(task, graph, addr);
> + if (addr != real_addr)
> + ops->address(data, addr, 0);

Note this changes behavior, as the original code had the ret_to_handler
first. This makes it second. (I fixed this below).

And that we should add a reliability check if CONFIG_FRAME_POINTER is
enabled.

> + ops->address(data, real_addr, reliable);
> }
> stack++;
> }
> @@ -142,11 +136,11 @@ print_context_stack_bp(struct task_struct *task,
> if (!__kernel_text_address(addr))
> break;
>
> + addr = ftrace_graph_ret_addr(task, graph, addr);
> if (ops->address(data, addr, 1))
> break;
> frame = frame->next_frame;
> ret_addr = &frame->return_address;
> - print_ftrace_graph_addr(addr, data, ops, task, graph);

This also changes the current code to print the return address as well.

> }
>
> return (unsigned long)frame;

Here's my patch that should be applied on top.

Maybe add a Signed-off-by: Steven Rostedt <rostedt@xxxxxxxxxxx> along
with your SOB. But you should remain Author.

-- Steve

---
arch/x86/kernel/dumpstack.c | 16 ++++++++++------
1 file changed, 10 insertions(+), 6 deletions(-)

Index: linux-trace.git/arch/x86/kernel/dumpstack.c
===================================================================
--- linux-trace.git.orig/arch/x86/kernel/dumpstack.c 2016-07-29 17:17:10.995002677 -0400
+++ linux-trace.git/arch/x86/kernel/dumpstack.c 2016-07-29 18:50:53.497633797 -0400
@@ -90,10 +90,9 @@ print_context_stack(struct task_struct *
while (valid_stack_ptr(task, stack, sizeof(*stack), end)) {
unsigned long addr = *stack;

- addr = *stack;
if (__kernel_text_address(addr)) {
+ unsigned long real_addr = addr;
int reliable = 0;
- unsigned long real_addr;

if ((unsigned long) stack == bp + sizeof(long)) {
reliable = 1;
@@ -101,10 +100,12 @@ print_context_stack(struct task_struct *
bp = (unsigned long) frame;
}

- real_addr = ftrace_graph_ret_addr(task, graph, addr);
+ if (!IS_ENABLED(CONFIG_FRAME_POINTER) || reliable)
+ real_addr = ftrace_graph_ret_addr(task, graph, addr);
+
+ ops->address(data, real_addr, reliable);
if (addr != real_addr)
ops->address(data, addr, 0);
- ops->address(data, real_addr, reliable);
}
stack++;
}
@@ -123,13 +124,16 @@ print_context_stack_bp(struct task_struc

while (valid_stack_ptr(task, ret_addr, sizeof(*ret_addr), end)) {
unsigned long addr = *ret_addr;
+ unsigned long real_addr;

if (!__kernel_text_address(addr))
break;

- addr = ftrace_graph_ret_addr(task, graph, addr);
- if (ops->address(data, addr, 1))
+ real_addr = ftrace_graph_ret_addr(task, graph, addr);
+ if (ops->address(data, real_addr, 1))
break;
+ if (real_addr != addr)
+ ops->address(data, addr, 0);
frame = frame->next_frame;
ret_addr = &frame->return_address;
}