[tip:x86/mm] x86/dumpstack: When OOPSing, rewind the stack before do_exit()

From: tip-bot for Andy Lutomirski
Date: Fri Jul 15 2016 - 08:03:13 EST


Commit-ID: 2deb4be28077638591fe5fc593b7f8aabc140f42
Gitweb: http://git.kernel.org/tip/2deb4be28077638591fe5fc593b7f8aabc140f42
Author: Andy Lutomirski <luto@xxxxxxxxxx>
AuthorDate: Thu, 14 Jul 2016 13:22:55 -0700
Committer: Ingo Molnar <mingo@xxxxxxxxxx>
CommitDate: Fri, 15 Jul 2016 10:26:28 +0200

x86/dumpstack: When OOPSing, rewind the stack before do_exit()

If we call do_exit() with a clean stack, we greatly reduce the risk of
recursive oopses due to stack overflow in do_exit, and we allow
do_exit to work even if we OOPS from an IST stack. The latter gives
us a much better chance of surviving long enough after we detect a
stack overflow to write out our logs.

Signed-off-by: Andy Lutomirski <luto@xxxxxxxxxx>
Reviewed-by: Josh Poimboeuf <jpoimboe@xxxxxxxxxx>
Cc: Borislav Petkov <bp@xxxxxxxxx>
Cc: Brian Gerst <brgerst@xxxxxxxxx>
Cc: Denys Vlasenko <dvlasenk@xxxxxxxxxx>
Cc: H. Peter Anvin <hpa@xxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Link: http://lkml.kernel.org/r/32f73ceb372ec61889598da5e5b145889b9f2e19.1468527351.git.luto@xxxxxxxxxx
Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
---
arch/x86/entry/entry_32.S | 11 +++++++++++
arch/x86/entry/entry_64.S | 11 +++++++++++
arch/x86/kernel/dumpstack.c | 10 +++++++++-
3 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 983e5d3..0b56666 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -1153,3 +1153,14 @@ ENTRY(async_page_fault)
jmp error_code
END(async_page_fault)
#endif
+
+ENTRY(rewind_stack_do_exit)
+ /* Prevent any naive code from trying to unwind to our caller. */
+ xorl %ebp, %ebp
+
+ movl PER_CPU_VAR(cpu_current_top_of_stack), %esi
+ leal -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%esi), %esp
+
+ call do_exit
+1: jmp 1b
+END(rewind_stack_do_exit)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 9ee0da1..b846875 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1423,3 +1423,14 @@ ENTRY(ignore_sysret)
mov $-ENOSYS, %eax
sysret
END(ignore_sysret)
+
+ENTRY(rewind_stack_do_exit)
+ /* Prevent any naive code from trying to unwind to our caller. */
+ xorl %ebp, %ebp
+
+ movq PER_CPU_VAR(cpu_current_top_of_stack), %rax
+ leaq -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%rax), %rsp
+
+ call do_exit
+1: jmp 1b
+END(rewind_stack_do_exit)
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index cc88e25..de8242d 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -234,6 +234,8 @@ unsigned long oops_begin(void)
EXPORT_SYMBOL_GPL(oops_begin);
NOKPROBE_SYMBOL(oops_begin);

+void __noreturn rewind_stack_do_exit(int signr);
+
void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
{
if (regs && kexec_should_crash(current))
@@ -255,7 +257,13 @@ void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
panic("Fatal exception in interrupt");
if (panic_on_oops)
panic("Fatal exception");
- do_exit(signr);
+
+ /*
+ * We're not going to return, but we might be on an IST stack or
+ * have very little stack space left. Rewind the stack and kill
+ * the task.
+ */
+ rewind_stack_do_exit(signr);
}
NOKPROBE_SYMBOL(oops_end);