[PATCH] tracing/fgraph: support recording function return values

From: Changbin Du
Date: Sat Jul 13 2019 - 08:11:00 EST


This patch adds a new trace option 'funcgraph-retval' and is disabled by
default. When this option is enabled, fgraph tracer will show the return
value of each function. This is useful to find/analyze a original error
source in a call graph.

One limitation is that the kernel doesn't know the prototype of functions.
So fgraph assumes all functions have a retvalue of type int. You must ignore
the value of *void* function. And if the retvalue looks like an error code
then both hexadecimal and decimal number are displayed.

In this patch, only x86 and ARM platforms are supported.

Here is example showing the error is caused by vmx_create_vcpu() and the
error code is -5 (-EIO).

Here is an example:
with echo 1 > /sys/kernel/debug/tracing/options/funcgraph-retval

3) | kvm_vm_ioctl() {
3) | mutex_lock() {
3) | _cond_resched() {
3) 0.234 us | rcu_all_qs(); /* ret=0x80000000 */
3) 0.704 us | } /* ret=0x0 */
3) 1.226 us | } /* ret=0x0 */
3) 0.247 us | mutex_unlock(); /* ret=0xffff8880738ed040 */
3) | kvm_arch_vcpu_create() {
3) | vmx_create_vcpu() {
3) + 17.969 us | kmem_cache_alloc(); /* ret=0xffff88813a980040 */
3) + 15.948 us | kmem_cache_alloc(); /* ret=0xffff88813aa99200 */
3) 0.653 us | allocate_vpid.part.88(); /* ret=0x1 */
3) 6.964 us | kvm_vcpu_init(); /* ret=0xfffffffb */
3) 0.323 us | free_vpid.part.89(); /* ret=0x1 */
3) 9.985 us | kmem_cache_free(); /* ret=0x80000000 */
3) 9.491 us | kmem_cache_free(); /* ret=0x80000000 */
3) + 69.858 us | } /* ret=0xfffffffffffffffb/-5 */
3) + 70.631 us | } /* ret=0xfffffffffffffffb/-5 */
3) | mutex_lock() {
3) | _cond_resched() {
3) 0.199 us | rcu_all_qs(); /* ret=0x80000000 */
3) 0.594 us | } /* ret=0x0 */
3) 1.067 us | } /* ret=0x0 */
3) 0.337 us | mutex_unlock(); /* ret=0xffff8880738ed040 */
3) + 92.730 us | } /* ret=0xfffffffffffffffb/-5 */

Checking above fgraph output, we can easily know the original error is
raised from function vmx_create_vcpu().

Signed-off-by: Changbin Du <changbin.du@xxxxxxxxx>
Cc: Steven Rostedt <rostedt@xxxxxxxxxxx>
---
Documentation/trace/ftrace.rst | 5 ++++
arch/arm/kernel/entry-ftrace.S | 1 +
arch/arm64/kernel/entry-ftrace.S | 1 +
arch/x86/kernel/ftrace_32.S | 1 +
arch/x86/kernel/ftrace_64.S | 1 +
include/linux/ftrace.h | 1 +
kernel/trace/Kconfig | 4 +++
kernel/trace/fgraph.c | 17 +++++++++++-
kernel/trace/trace.h | 1 +
kernel/trace/trace_entries.h | 1 +
kernel/trace/trace_functions_graph.c | 39 ++++++++++++++++++++++++----
11 files changed, 66 insertions(+), 6 deletions(-)

diff --git a/Documentation/trace/ftrace.rst b/Documentation/trace/ftrace.rst
index f60079259669..50dfb0378213 100644
--- a/Documentation/trace/ftrace.rst
+++ b/Documentation/trace/ftrace.rst
@@ -1264,6 +1264,11 @@ Options for function_graph tracer:
only a closing curly bracket "}" is displayed for
the return of a function.

+ funcgraph-retval - At the end of each function (the return) the
+ return value the function (though the function may not
+ really have it) is displayed in hex and negative number
+ if it looks like a error code.
+
sleep-time
When running function graph tracer, to include
the time a task schedules out in its function.
diff --git a/arch/arm/kernel/entry-ftrace.S b/arch/arm/kernel/entry-ftrace.S
index a74289ebc803..94e8209f1a6b 100644
--- a/arch/arm/kernel/entry-ftrace.S
+++ b/arch/arm/kernel/entry-ftrace.S
@@ -259,6 +259,7 @@ ENDPROC(ftrace_graph_regs_caller)
.globl return_to_handler
return_to_handler:
stmdb sp!, {r0-r3}
+ mov r1, r0 @ return value
mov r0, fp @ frame pointer
bl ftrace_return_to_handler
mov lr, r0 @ r0 has real ret addr
diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
index 33d003d80121..b7ce416969d0 100644
--- a/arch/arm64/kernel/entry-ftrace.S
+++ b/arch/arm64/kernel/entry-ftrace.S
@@ -199,6 +199,7 @@ ENTRY(return_to_handler)
stp x4, x5, [sp, #32]
stp x6, x7, [sp, #48]

+ mov x1, x0 // return value
mov x0, x29 // parent's fp
bl ftrace_return_to_handler// addr = ftrace_return_to_hander(fp);
mov x30, x0 // restore the original return address
diff --git a/arch/x86/kernel/ftrace_32.S b/arch/x86/kernel/ftrace_32.S
index 073aab525d80..539e80577a83 100644
--- a/arch/x86/kernel/ftrace_32.S
+++ b/arch/x86/kernel/ftrace_32.S
@@ -185,6 +185,7 @@ END(ftrace_graph_caller)
return_to_handler:
pushl %eax
pushl %edx
+ movl %eax, %edx
movl $0, %eax
call ftrace_return_to_handler
movl %eax, %ecx
diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
index 809d54397dba..3b31a1959025 100644
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -304,6 +304,7 @@ ENTRY(return_to_handler)
movq %rax, (%rsp)
movq %rdx, 8(%rsp)
movq %rbp, %rdi
+ movq %rax, %rsi

call ftrace_return_to_handler

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 25e2995d4a4c..83881bc47b50 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -734,6 +734,7 @@ struct ftrace_graph_ret {
unsigned long overrun;
unsigned long long calltime;
unsigned long long rettime;
+ unsigned long retval;
int depth;
} __packed;

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 564e5fdb025f..7d189ea461e8 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -25,6 +25,9 @@ config HAVE_FUNCTION_GRAPH_TRACER
help
See Documentation/trace/ftrace-design.rst

+config HAVE_FTRACE_RETVAL
+ bool
+
config HAVE_DYNAMIC_FTRACE
bool
help
@@ -161,6 +164,7 @@ config FUNCTION_GRAPH_TRACER
depends on HAVE_FUNCTION_GRAPH_TRACER
depends on FUNCTION_TRACER
depends on !X86_32 || !CC_OPTIMIZE_FOR_SIZE
+ select HAVE_FTRACE_RETVAL if (X86 || ARM)
default y
help
Enable the kernel to trace a function at both its return
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 8dfd5021b933..df27fe3a35f9 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -206,13 +206,15 @@ static struct notifier_block ftrace_suspend_notifier = {
* Send the trace to the ring-buffer.
* @return the original return address.
*/
-unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
+static unsigned long _ftrace_return_to_handler(unsigned long frame_pointer,
+ unsigned long retval)
{
struct ftrace_graph_ret trace;
unsigned long ret;

ftrace_pop_return_trace(&trace, &ret, frame_pointer);
trace.rettime = trace_clock_local();
+ trace.retval = retval;
ftrace_graph_return(&trace);
/*
* The ftrace_graph_return() may still access the current
@@ -232,6 +234,19 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
return ret;
}

+#if defined(CONFIG_HAVE_FTRACE_RETVAL)
+unsigned long ftrace_return_to_handler(unsigned long frame_pointer,
+ unsigned long retval)
+{
+ return _ftrace_return_to_handler(frame_pointer, retval);
+}
+#else
+unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
+{
+ return _ftrace_return_to_handler(frame_pointer, 0);
+}
+#endif
+
/**
* ftrace_graph_get_ret_stack - return the entry of the shadow stack
* @task: The task to read the shadow stack from
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 005f08629b8b..483eecf0e9c3 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -905,6 +905,7 @@ static __always_inline bool ftrace_hash_empty(struct ftrace_hash *hash)
#define TRACE_GRAPH_PRINT_TAIL 0x100
#define TRACE_GRAPH_SLEEP_TIME 0x200
#define TRACE_GRAPH_GRAPH_TIME 0x400
+#define TRACE_GRAPH_PRINT_RETVAL 0x800
#define TRACE_GRAPH_PRINT_FILL_SHIFT 28
#define TRACE_GRAPH_PRINT_FILL_MASK (0x3 << TRACE_GRAPH_PRINT_FILL_SHIFT)

diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index fc8e97328e54..4b5312cfed66 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -99,6 +99,7 @@ FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
__field_desc( unsigned long, ret, func )
__field_desc( unsigned long long, ret, calltime)
__field_desc( unsigned long long, ret, rettime )
+ __field_desc( unsigned long, ret, retval )
__field_desc( unsigned long, ret, overrun )
__field_desc( int, ret, depth )
),
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 69ebf3c2f1b5..399382cf175b 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -66,6 +66,11 @@ static struct tracer_opt trace_opts[] = {
{ TRACER_OPT(graph-time, TRACE_GRAPH_GRAPH_TIME) },
#endif

+#ifdef CONFIG_HAVE_FTRACE_RETVAL
+ /* Display return value of function */
+ { TRACER_OPT(funcgraph-retval, TRACE_GRAPH_PRINT_RETVAL) },
+#endif
+
{ } /* Empty entry */
};

@@ -624,6 +629,18 @@ print_graph_duration(struct trace_array *tr, unsigned long long duration,
trace_seq_puts(s, "| ");
}

+static void print_graph_retval(struct trace_seq *s, unsigned long val, bool comment)
+{
+ if (comment)
+ trace_seq_printf(s, " /* ");
+ if (IS_ERR_VALUE(val))
+ trace_seq_printf(s, "ret=0x%lx/%ld", val, val);
+ else
+ trace_seq_printf(s, "ret=0x%lx", val);
+ if (comment)
+ trace_seq_printf(s, " */");
+}
+
/* Case of a leaf function on its call entry */
static enum print_line_t
print_graph_entry_leaf(struct trace_iterator *iter,
@@ -668,7 +685,10 @@ print_graph_entry_leaf(struct trace_iterator *iter,
for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++)
trace_seq_putc(s, ' ');

- trace_seq_printf(s, "%ps();\n", (void *)call->func);
+ trace_seq_printf(s, "%ps();", (void *)call->func);
+ if (flags & TRACE_GRAPH_PRINT_RETVAL)
+ print_graph_retval(s, graph_ret->retval, true);
+ trace_seq_puts(s, "\n");

print_graph_irq(iter, graph_ret->func, TRACE_GRAPH_RET,
cpu, iter->ent->pid, flags);
@@ -953,10 +973,19 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
* belongs to, write out the function name. Always do
* that if the funcgraph-tail option is enabled.
*/
- if (func_match && !(flags & TRACE_GRAPH_PRINT_TAIL))
- trace_seq_puts(s, "}\n");
- else
- trace_seq_printf(s, "} /* %ps */\n", (void *)trace->func);
+ if (func_match && !(flags & TRACE_GRAPH_PRINT_TAIL)) {
+ trace_seq_puts(s, "}");
+ if (flags & TRACE_GRAPH_PRINT_RETVAL)
+ print_graph_retval(s, trace->retval, true);
+ trace_seq_puts(s, "\n");
+ } else {
+ trace_seq_printf(s, "} /* %ps", (void *)trace->func);
+ if (flags & TRACE_GRAPH_PRINT_RETVAL) {
+ trace_seq_puts(s, ", ");
+ print_graph_retval(s, trace->retval, false);
+ }
+ trace_seq_puts(s, " */\n");
+ }

/* Overrun */
if (flags & TRACE_GRAPH_PRINT_OVERRUN)
--
2.20.1