[PATCH v2 04/10] x86-64: Replace vsyscall gettimeofday fallback with int 0xcc

From: Andy Lutomirski
Date: Sun May 29 2011 - 23:49:41 EST


Now the only way to issue a syscall with side effects through the
vsyscall page is to call a misaligned instruction. I haven't
checked for that.

Signed-off-by: Andy Lutomirski <luto@xxxxxxx>
---
arch/x86/include/asm/irq_vectors.h | 6 ++-
arch/x86/include/asm/traps.h | 4 ++
arch/x86/include/asm/vsyscall.h | 6 +++
arch/x86/kernel/entry_64.S | 2 +
arch/x86/kernel/traps.c | 4 ++
arch/x86/kernel/vsyscall_64.c | 83 +++++++++++++++++++++++++++++++++---
6 files changed, 97 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 6e976ee..a563c50 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -17,7 +17,8 @@
* Vectors 0 ... 31 : system traps and exceptions - hardcoded events
* Vectors 32 ... 127 : device interrupts
* Vector 128 : legacy int80 syscall interface
- * Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 : device interrupts
+ * Vector 204 : legacy x86_64 vsyscall emulation
+ * Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 except 204 : device interrupts
* Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts
*
* 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table.
@@ -50,6 +51,9 @@
#ifdef CONFIG_X86_32
# define SYSCALL_VECTOR 0x80
#endif
+#ifdef CONFIG_X86_64
+# define VSYSCALL_EMU_VECTOR 0xcc
+#endif

/*
* Vectors 0x30-0x3f are used for ISA interrupts.
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 0310da6..2bae0a5 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -1,6 +1,8 @@
#ifndef _ASM_X86_TRAPS_H
#define _ASM_X86_TRAPS_H

+#include <linux/kprobes.h>
+
#include <asm/debugreg.h>
#include <asm/siginfo.h> /* TRAP_TRACE, ... */

@@ -38,6 +40,7 @@ asmlinkage void alignment_check(void);
asmlinkage void machine_check(void);
#endif /* CONFIG_X86_MCE */
asmlinkage void simd_coprocessor_error(void);
+asmlinkage void emulate_vsyscall(void);

dotraplinkage void do_divide_error(struct pt_regs *, long);
dotraplinkage void do_debug(struct pt_regs *, long);
@@ -64,6 +67,7 @@ dotraplinkage void do_alignment_check(struct pt_regs *, long);
dotraplinkage void do_machine_check(struct pt_regs *, long);
#endif
dotraplinkage void do_simd_coprocessor_error(struct pt_regs *, long);
+dotraplinkage void do_emulate_vsyscall(struct pt_regs *, long);
#ifdef CONFIG_X86_32
dotraplinkage void do_iret_error(struct pt_regs *, long);
#endif
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h
index d555973..293ae08 100644
--- a/arch/x86/include/asm/vsyscall.h
+++ b/arch/x86/include/asm/vsyscall.h
@@ -31,6 +31,12 @@ extern struct timezone sys_tz;

extern void map_vsyscall(void);

+/* Emulation */
+static inline bool in_vsyscall_page(unsigned long addr)
+{
+ return (addr & ~(PAGE_SIZE - 1)) == VSYSCALL_START;
+}
+
#endif /* __KERNEL__ */

#endif /* _ASM_X86_VSYSCALL_H */
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 8a445a0..bee7e81 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1121,6 +1121,8 @@ zeroentry spurious_interrupt_bug do_spurious_interrupt_bug
zeroentry coprocessor_error do_coprocessor_error
errorentry alignment_check do_alignment_check
zeroentry simd_coprocessor_error do_simd_coprocessor_error
+zeroentry emulate_vsyscall do_emulate_vsyscall
+

/* Reload gs selector with exception handling */
/* edi: new selector */
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index b9b6716..72f0f6a 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -872,6 +872,10 @@ void __init trap_init(void)
set_bit(SYSCALL_VECTOR, used_vectors);
#endif

+ BUG_ON(test_bit(VSYSCALL_EMU_VECTOR, used_vectors));
+ set_system_intr_gate(VSYSCALL_EMU_VECTOR, &emulate_vsyscall);
+ set_bit(VSYSCALL_EMU_VECTOR, used_vectors);
+
/*
* Should be a barrier for any external CPU state:
*/
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 3e8dac7..53d2237 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -32,6 +32,8 @@
#include <linux/cpu.h>
#include <linux/smp.h>
#include <linux/notifier.h>
+#include <linux/syscalls.h>
+#include <linux/ratelimit.h>

#include <asm/vsyscall.h>
#include <asm/pgtable.h>
@@ -44,10 +46,10 @@
#include <asm/desc.h>
#include <asm/topology.h>
#include <asm/vgtod.h>
+#include <asm/traps.h>

#define __vsyscall(nr) \
__attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace
-#define __syscall_clobber "r11","cx","memory"

DEFINE_VVAR(int, vgetcpu_mode);
DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) =
@@ -84,6 +86,26 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
}

+static void warn_bad_vsyscall(struct pt_regs *regs, bool is_warning,
+ const char *message)
+{
+ struct task_struct *tsk;
+ static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
+ DEFAULT_RATELIMIT_BURST);
+ if (!show_unhandled_signals || !__ratelimit(&rs))
+ return;
+
+ tsk = current;
+ printk("%s%s[%d] %s ip:%lx sp:%lx ax:%lx si:%lx di:%lx",
+ is_warning ? KERN_WARNING : KERN_INFO,
+ tsk->comm, task_pid_nr(tsk),
+ message,
+ regs->ip - 2, regs->sp, regs->ax, regs->si, regs->di);
+ if (!in_vsyscall_page(regs->ip - 2))
+ print_vma_addr(" in ", regs->ip - 2);
+ printk("\n");
+}
+
/* RED-PEN may want to readd seq locking, but then the variable should be
* write-once.
*/
@@ -92,13 +114,14 @@ static __always_inline void do_get_tz(struct timezone * tz)
*tz = VVAR(vsyscall_gtod_data).sys_tz;
}

-static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz)
+static __always_inline int fallback_gettimeofday(struct timeval *tv)
{
int ret;
- asm volatile("syscall"
- : "=a" (ret)
- : "0" (__NR_gettimeofday),"D" (tv),"S" (tz)
- : __syscall_clobber );
+ /* Invoke do_emulate_vsyscall. */
+ asm volatile("movb $0xce, %%al;\n\t"
+ "int %[vec]"
+ : "=a" (ret)
+ : "D" (tv), [vec] "i" (VSYSCALL_EMU_VECTOR));
return ret;
}

@@ -113,7 +136,7 @@ static __always_inline void do_vgettimeofday(struct timeval * tv)

vread = VVAR(vsyscall_gtod_data).clock.vread;
if (unlikely(!vread)) {
- gettimeofday(tv,NULL);
+ fallback_gettimeofday(tv);
return;
}

@@ -214,6 +237,52 @@ static long __vsyscall(3) venosys_1(void)
return -ENOSYS;
}

+void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code)
+{
+ long ret;
+
+ /* Kernel code must never get here. */
+ BUG_ON(!user_mode(regs));
+
+ local_irq_enable();
+
+ if ((regs->ax & 0xFF) != 0xce) {
+ warn_bad_vsyscall(regs, false, "illegal int 0xcc "
+ "(exploit attempt?)");
+ force_sig(SIGSEGV, current);
+ goto out;
+ }
+
+ if (!in_vsyscall_page(regs->ip)) {
+ /*
+ * We allow the call because tools like ThreadSpotter
+ * might copy the int 0xcc instruction to user memory.
+ * We make it annoying, though, to try to persuade
+ * the authors to stop doing that...
+ */
+ warn_bad_vsyscall(regs, true, "int 0xcc in user code (exploit"
+ " attempt? legacy instrumented code?)");
+ }
+
+ if (current->seccomp.mode) {
+ do_exit(SIGKILL);
+ goto out;
+ }
+
+ ret = sys_gettimeofday((struct timeval __user *)regs->di, NULL);
+ if (ret == -EFAULT) {
+ warn_bad_vsyscall(regs, true, "int 0xcc faulted (exploit "
+ "attempt?)");
+ force_sig(SIGSEGV, current);
+ goto out;
+ }
+
+ regs->ax = ret;
+
+out:
+ local_irq_disable();
+}
+
/* Assume __initcall executes before all user space. Hopefully kmod
doesn't violate that. We'll find out if it does. */
static void __cpuinit vsyscall_set_cpu(int cpu)
--
1.7.5.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/