[PATCH 1/2] x86/dumpstack: Fix partial register dumps

From: Josh Poimboeuf
Date: Sun Dec 31 2017 - 11:18:53 EST


The show_regs_safe() logic is wrong. When there's an iret stack frame,
it prints the entire pt_regs -- most of which is random stack data --
instead of just the five registers at the end.

show_regs_safe() is also poorly named: the on_stack() checks aren't for
safety. Rename the function to show_regs_if_on_stack() and add a
comment to explain why the checks are needed.

These issues were introduced with the "partial register dump" feature of
the following commit:

b02fcf9ba121 ("x86/unwinder: Handle stack overflows more gracefully")

That patch had gone through a few iterations of development, and the
above issues were artifacts from a previous iteration of the patch where
'regs' pointed directly to the iret frame rather than to the (partially
empty) pt_regs.

Tested-by: Alexander Tsoy <alexander@xxxxxxx>
Fixes: b02fcf9ba121 ("x86/unwinder: Handle stack overflows more gracefully")
Cc: stable@xxxxxxxxxxxxxxx
Signed-off-by: Josh Poimboeuf <jpoimboe@xxxxxxxxxx>
---
arch/x86/include/asm/unwind.h | 17 +++++++++++++----
arch/x86/kernel/dumpstack.c | 28 ++++++++++++++++++++--------
arch/x86/kernel/stacktrace.c | 2 +-
3 files changed, 34 insertions(+), 13 deletions(-)

diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
index c1688c2d0a12..1f86e1b0a5cd 100644
--- a/arch/x86/include/asm/unwind.h
+++ b/arch/x86/include/asm/unwind.h
@@ -56,18 +56,27 @@ void unwind_start(struct unwind_state *state, struct task_struct *task,

#if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER)
/*
- * WARNING: The entire pt_regs may not be safe to dereference. In some cases,
- * only the iret frame registers are accessible. Use with caution!
+ * If 'partial' returns true, only the iret frame registers are valid.
*/
-static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
+static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state,
+ bool *partial)
{
if (unwind_done(state))
return NULL;

+ if (partial) {
+#ifdef CONFIG_UNWINDER_ORC
+ *partial = !state->full_regs;
+#else
+ *partial = false;
+#endif
+ }
+
return state->regs;
}
#else
-static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
+static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state,
+ bool *partial)
{
return NULL;
}
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 36b17e0febe8..e4c2c73bd99e 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -76,12 +76,23 @@ void show_iret_regs(struct pt_regs *regs)
regs->sp, regs->flags);
}

-static void show_regs_safe(struct stack_info *info, struct pt_regs *regs)
+static void show_regs_if_on_stack(struct stack_info *info, struct pt_regs *regs,
+ bool partial)
{
- if (on_stack(info, regs, sizeof(*regs)))
+ /*
+ * These on_stack() checks aren't strictly necessary: the unwind code
+ * has already validated the 'regs' pointer. The checks are done for
+ * ordering reasons: if the registers are on the next stack, we don't
+ * want to print them out yet. Otherwise they'll be shown as part of
+ * the wrong stack. Later, when show_trace_log_lvl() switches to the
+ * next stack, this function will be called again with the same regs so
+ * they can be printed in the right context.
+ */
+ if (!partial && on_stack(info, regs, sizeof(*regs))) {
__show_regs(regs, 0);
- else if (on_stack(info, (void *)regs + IRET_FRAME_OFFSET,
- IRET_FRAME_SIZE)) {
+
+ } else if (partial && on_stack(info, (void *)regs + IRET_FRAME_OFFSET,
+ IRET_FRAME_SIZE)) {
/*
* When an interrupt or exception occurs in entry code, the
* full pt_regs might not have been saved yet. In that case
@@ -98,6 +109,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
struct stack_info stack_info = {0};
unsigned long visit_mask = 0;
int graph_idx = 0;
+ bool partial;

printk("%sCall Trace:\n", log_lvl);

@@ -140,7 +152,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
printk("%s <%s>\n", log_lvl, stack_name);

if (regs)
- show_regs_safe(&stack_info, regs);
+ show_regs_if_on_stack(&stack_info, regs, partial);

/*
* Scan the stack, printing any text addresses we find. At the
@@ -164,7 +176,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,

/*
* Don't print regs->ip again if it was already printed
- * by show_regs_safe() below.
+ * by show_regs_if_on_stack().
*/
if (regs && stack == &regs->ip)
goto next;
@@ -199,9 +211,9 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
unwind_next_frame(&state);

/* if the frame has entry regs, print them */
- regs = unwind_get_entry_regs(&state);
+ regs = unwind_get_entry_regs(&state, &partial);
if (regs)
- show_regs_safe(&stack_info, regs);
+ show_regs_if_on_stack(&stack_info, regs, partial);
}

if (stack_name)
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 77835bc021c7..7dd0d2a0d142 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -102,7 +102,7 @@ __save_stack_trace_reliable(struct stack_trace *trace,
for (unwind_start(&state, task, NULL, NULL); !unwind_done(&state);
unwind_next_frame(&state)) {

- regs = unwind_get_entry_regs(&state);
+ regs = unwind_get_entry_regs(&state, NULL);
if (regs) {
/*
* Kernel mode registers on the stack indicate an
--
2.13.6