[PATCH 2/2] x86/dumpstack: Add save_stack_trace_norm()

From: Byungchul Park
Date: Mon Jul 04 2016 - 06:29:50 EST


In non-oops case, it's usually not necessary to check all words of stack
area to extract backtrace. Instead, we can achieve it by tracking frame
pointer. So made it possible to save stack trace lightly in normal case.

I measured its ovehead and printed its difference of sched_clock() with
my QEMU x86 machine. The latency was improved over 80% when
trace->max_entries = 5.

Before this patch:

[ 2.780735] save_stack_trace takes 19902 (sched_lock)
[ 2.780718] save_stack_trace takes 20240 (sched_lock)
[ 2.781692] save_stack_trace takes 45215 (sched_lock)
[ 2.781477] save_stack_trace takes 20191 (sched_lock)
[ 2.781694] save_stack_trace takes 20044 (sched_lock)
[ 2.782589] save_stack_trace takes 20292 (sched_lock)
[ 2.782706] save_stack_trace takes 20024 (sched_lock)
[ 2.782706] save_stack_trace takes 19881 (sched_lock)
[ 2.782881] save_stack_trace takes 24577 (sched_lock)
[ 2.782706] save_stack_trace takes 19901 (sched_lock)
[ 2.783621] save_stack_trace takes 24381 (sched_lock)
[ 2.783621] save_stack_trace takes 20205 (sched_lock)
[ 2.783760] save_stack_trace takes 19956 (sched_lock)
[ 2.783718] save_stack_trace takes 20280 (sched_lock)
[ 2.784179] save_stack_trace takes 20099 (sched_lock)
[ 2.784835] save_stack_trace takes 20055 (sched_lock)
[ 2.785922] save_stack_trace takes 20157 (sched_lock)
[ 2.785922] save_stack_trace takes 20140 (sched_lock)
[ 2.786178] save_stack_trace takes 20040 (sched_lock)
[ 2.786877] save_stack_trace takes 20102 (sched_lock)
[ 2.795000] save_stack_trace takes 21147 (sched_lock)
[ 2.795397] save_stack_trace takes 20230 (sched_lock)
[ 2.795397] save_stack_trace takes 31274 (sched_lock)
[ 2.795739] save_stack_trace takes 19706 (sched_lock)
[ 2.796484] save_stack_trace takes 20266 (sched_lock)
[ 2.796484] save_stack_trace takes 20902 (sched_lock)
[ 2.797000] save_stack_trace takes 38110 (sched_lock)
[ 2.797510] save_stack_trace takes 20224 (sched_lock)
[ 2.798181] save_stack_trace takes 20172 (sched_lock)
[ 2.798837] save_stack_trace takes 20824 (sched_lock)

After this patch:

[ 3.100520] save_stack_trace takes 3817 (sched_lock)
[ 3.100680] save_stack_trace takes 3812 (sched_lock)
[ 3.101740] save_stack_trace takes 5012 (sched_lock)
[ 3.101809] save_stack_trace takes 3868 (sched_lock)
[ 3.103264] save_stack_trace takes 3657 (sched_lock)
[ 3.103264] save_stack_trace takes 3821 (sched_lock)
[ 3.129442] save_stack_trace takes 3923 (sched_lock)
[ 3.129983] save_stack_trace takes 3405 (sched_lock)
[ 3.130000] save_stack_trace takes 3603 (sched_lock)
[ 3.130246] save_stack_trace takes 3362 (sched_lock)
[ 3.130598] save_stack_trace takes 3674 (sched_lock)
[ 3.130968] save_stack_trace takes 3609 (sched_lock)
[ 3.131379] save_stack_trace takes 6376 (sched_lock)
[ 3.131847] save_stack_trace takes 3221 (sched_lock)
[ 3.132000] save_stack_trace takes 3597 (sched_lock)
[ 3.132043] save_stack_trace takes 3400 (sched_lock)
[ 3.132572] save_stack_trace takes 3283 (sched_lock)
[ 3.132714] save_stack_trace takes 3335 (sched_lock)
[ 3.133039] save_stack_trace takes 3358 (sched_lock)
[ 3.133476] save_stack_trace takes 3160 (sched_lock)
[ 3.133807] save_stack_trace takes 3297 (sched_lock)
[ 3.133954] save_stack_trace takes 3330 (sched_lock)
[ 3.134235] save_stack_trace takes 3517 (sched_lock)
[ 3.134711] save_stack_trace takes 3773 (sched_lock)
[ 3.135000] save_stack_trace takes 3685 (sched_lock)
[ 3.135541] save_stack_trace takes 4757 (sched_lock)
[ 3.135865] save_stack_trace takes 3420 (sched_lock)
[ 3.136000] save_stack_trace takes 3329 (sched_lock)
[ 3.137000] save_stack_trace takes 4058 (sched_lock)
[ 3.137000] save_stack_trace takes 3499 (sched_lock)

Signed-off-by: Byungchul Park <byungchul.park@xxxxxxx>
---
arch/x86/kernel/stacktrace.c | 25 +++++++++++++++++++++++++
include/linux/stacktrace.h | 2 ++
2 files changed, 27 insertions(+)

diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 9545719..f1ca767 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -49,6 +49,10 @@ static int save_stack_end(void *data)
return trace->nr_entries >= trace->max_entries;
}

+/*
+ * This operation should be used in the oops case where
+ * stack might be broken.
+ */
static const struct stacktrace_ops save_stack_ops = {
.stack = save_stack_stack,
.address = save_stack_address,
@@ -56,6 +60,13 @@ static const struct stacktrace_ops save_stack_ops = {
.end_walk = save_stack_end,
};

+static const struct stacktrace_ops save_stack_ops_norm = {
+ .stack = save_stack_stack,
+ .address = save_stack_address,
+ .walk_stack = print_context_stack_bp,
+ .end_walk = save_stack_end,
+};
+
static const struct stacktrace_ops save_stack_ops_nosched = {
.stack = save_stack_stack,
.address = save_stack_address_nosched,
@@ -64,6 +75,7 @@ static const struct stacktrace_ops save_stack_ops_nosched = {

/*
* Save stack-backtrace addresses into a stack_trace buffer.
+ * It works even in oops.
*/
void save_stack_trace(struct stack_trace *trace)
{
@@ -73,6 +85,19 @@ void save_stack_trace(struct stack_trace *trace)
}
EXPORT_SYMBOL_GPL(save_stack_trace);

+/*
+ * Save stack-backtrace addresses into a stack_trace buffer.
+ * This is perfered in normal case where we expect the stack is
+ * reliable.
+ */
+void save_stack_trace_norm(struct stack_trace *trace)
+{
+ dump_trace(current, NULL, NULL, 0, &save_stack_ops_norm, trace);
+ if (trace->nr_entries < trace->max_entries)
+ trace->entries[trace->nr_entries++] = ULONG_MAX;
+}
+EXPORT_SYMBOL_GPL(save_stack_trace_norm);
+
void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
{
dump_trace(current, regs, NULL, 0, &save_stack_ops, trace);
diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h
index 0a34489..58d5176 100644
--- a/include/linux/stacktrace.h
+++ b/include/linux/stacktrace.h
@@ -14,6 +14,7 @@ struct stack_trace {
};

extern void save_stack_trace(struct stack_trace *trace);
+extern void save_stack_trace_norm(struct stack_trace *trace);
extern void save_stack_trace_regs(struct pt_regs *regs,
struct stack_trace *trace);
extern void save_stack_trace_tsk(struct task_struct *tsk,
@@ -31,6 +32,7 @@ extern void save_stack_trace_user(struct stack_trace *trace);

#else
# define save_stack_trace(trace) do { } while (0)
+# define save_stack_trace_norm(trace) do { } while (0)
# define save_stack_trace_tsk(tsk, trace) do { } while (0)
# define save_stack_trace_user(trace) do { } while (0)
# define print_stack_trace(trace, spaces) do { } while (0)
--
1.9.1