[tip:x86/urgent] x86/ptrace: Stop setting TS_COMPAT in ptrace code

From: tip-bot for Andy Lutomirski
Date: Wed Jul 27 2016 - 06:45:17 EST


Commit-ID: 609c19a385c8744c41f944e2a2d8afe8e8fb860e
Gitweb: http://git.kernel.org/tip/609c19a385c8744c41f944e2a2d8afe8e8fb860e
Author: Andy Lutomirski <luto@xxxxxxxxxx>
AuthorDate: Tue, 26 Jul 2016 23:12:22 -0700
Committer: Ingo Molnar <mingo@xxxxxxxxxx>
CommitDate: Wed, 27 Jul 2016 11:09:43 +0200

x86/ptrace: Stop setting TS_COMPAT in ptrace code

Setting TS_COMPAT in ptrace is wrong: if we happen to do it during
syscall entry, then we'll confuse seccomp and audit. (The former
isn't a security problem: seccomp is currently entirely insecure if a
malicious ptracer is attached.) As a minimal fix, this patch adds a
new flag TS_I386_REGS_POKED that handles the ptrace special case.

Signed-off-by: Andy Lutomirski <luto@xxxxxxxxxx>
Acked-by: Oleg Nesterov <oleg@xxxxxxxxxx>
Cc: Borislav Petkov <bp@xxxxxxxxx>
Cc: Brian Gerst <brgerst@xxxxxxxxx>
Cc: Denys Vlasenko <dvlasenk@xxxxxxxxxx>
Cc: H. Peter Anvin <hpa@xxxxxxxxx>
Cc: Josh Poimboeuf <jpoimboe@xxxxxxxxxx>
Cc: Kees Cook <keescook@xxxxxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Cc: Pedro Alves <palves@xxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Link: http://lkml.kernel.org/r/5383ebed38b39fa37462139e337aff7f2314d1ca.1469599803.git.luto@xxxxxxxxxx
Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
---
arch/x86/entry/common.c | 6 +++++-
arch/x86/include/asm/syscall.h | 5 +----
arch/x86/include/asm/thread_info.h | 3 +++
arch/x86/kernel/ptrace.c | 15 +++++++++------
arch/x86/kernel/signal.c | 26 ++++++++++++++++++++++++--
5 files changed, 42 insertions(+), 13 deletions(-)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 9e1e27d..be8c403 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -270,8 +270,12 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
* handling, because syscall restart has a fixup for compat
* syscalls. The fixup is exercised by the ptrace_syscall_32
* selftest.
+ *
+ * We also need to clear TS_REGS_POKED_I386: the 32-bit tracer
+ * special case only applies after poking regs and before the
+ * very next return to user mode.
*/
- ti->status &= ~TS_COMPAT;
+ ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
#endif

user_enter_irqoff();
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 999b7cd..4e23dd1 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -60,7 +60,7 @@ static inline long syscall_get_error(struct task_struct *task,
* TS_COMPAT is set for 32-bit syscall entries and then
* remains set until we return to user mode.
*/
- if (task_thread_info(task)->status & TS_COMPAT)
+ if (task_thread_info(task)->status & (TS_COMPAT|TS_I386_REGS_POKED))
/*
* Sign-extend the value so (int)-EFOO becomes (long)-EFOO
* and will match correctly in comparisons.
@@ -239,9 +239,6 @@ static inline int syscall_get_arch(void)
* TS_COMPAT is set for 32-bit syscall entry and then
* remains set until we return to user mode.
*
- * TIF_IA32 tasks should always have TS_COMPAT set at
- * system call time.
- *
* x32 tasks should be considered AUDIT_ARCH_X86_64.
*/
if (task_thread_info(current)->status & TS_COMPAT)
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 89bff04..f856c59 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -219,6 +219,9 @@ static inline unsigned long current_stack_pointer(void)
* have to worry about atomic accesses.
*/
#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/
+#ifdef CONFIG_COMPAT
+#define TS_I386_REGS_POKED 0x0004 /* regs poked by 32-bit ptracer */
+#endif
#define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal() */

#ifndef __ASSEMBLY__
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 600edd2..f79576a 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -923,15 +923,18 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value)

case offsetof(struct user32, regs.orig_eax):
/*
- * A 32-bit debugger setting orig_eax means to restore
- * the state of the task restarting a 32-bit syscall.
- * Make sure we interpret the -ERESTART* codes correctly
- * in case the task is not actually still sitting at the
- * exit from a 32-bit syscall with TS_COMPAT still set.
+ * Warning: bizarre corner case fixup here. A 32-bit
+ * debugger setting orig_eax to -1 wants to disable
+ * syscall restart. Make sure that the syscall
+ * restart code sign-extends orig_ax. Also make sure
+ * we interpret the -ERESTART* codes correctly if
+ * loaded into regs->ax in case the task is not
+ * actually still sitting at the exit from a 32-bit
+ * syscall with TS_COMPAT still set.
*/
regs->orig_ax = value;
if (syscall_get_nr(child, regs) >= 0)
- task_thread_info(child)->status |= TS_COMPAT;
+ task_thread_info(child)->status |= TS_I386_REGS_POKED;
break;

case offsetof(struct user32, regs.eflags):
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 22cc2f9..9747a63 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -760,8 +760,30 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)

static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
{
-#ifdef CONFIG_X86_64
- if (in_ia32_syscall())
+ /*
+ * This function is fundamentally broken as currently
+ * implemented.
+ *
+ * The idea is that we want to trigger a call to the
+ * restart_block() syscall and that we want in_ia32_syscall(),
+ * in_x32_syscall(), etc. to match whatever they were in the
+ * syscall being restarted. We assume that the syscall
+ * instruction at (regs->ip - 2) matches whatever syscall
+ * instruction we used to enter in the first place.
+ *
+ * The problem is that we can get here when ptrace pokes
+ * syscall-like values into regs even if we're not in a syscall
+ * at all.
+ *
+ * For now, we maintain historical behavior and guess based on
+ * stored state. We could do better by saving the actual
+ * syscall arch in restart_block or (with caveats on x32) by
+ * checking if regs->ip points to 'int $0x80'. The current
+ * behavior is incorrect if a tracer has a different bitness
+ * than the tracee.
+ */
+#ifdef CONFIG_IA32_EMULATION
+ if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED))
return __NR_ia32_restart_syscall;
#endif
#ifdef CONFIG_X86_X32_ABI