[RFC] x86-64: Add vsyscall=emulate|native|none option

From: Andy Lutomirski
Date: Tue Aug 09 2011 - 12:47:27 EST


vsyscall=native makes vsyscalls as fast as syscalls and makes pin
and DynamoRIO work. vsyscall=emulate (default) preserves current
behavior, and vsyscall=none is good for paranoid people who don't
need their boxes to work reliably.

Signed-off-by: Andy Lutomirski <luto@xxxxxxx>
---

This is an alternate fix. It applies on top of the patch that
wires up getcpu on x86_64.

Documentation/kernel-parameters.txt | 21 +++++++++++++++++++
arch/x86/kernel/vsyscall_64.c | 35 +++++++++++++++++++++++++++++++-
arch/x86/kernel/vsyscall_emu_64.S | 37 ++++++++++++++++++++++++++++++++++-
3 files changed, 90 insertions(+), 3 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index e279b72..78926aa 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2680,6 +2680,27 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
vmpoff= [KNL,S390] Perform z/VM CP command after power off.
Format: <command>

+ vsyscall= [X86-64]
+ Controls the behavior of vsyscalls (i.e. calls to
+ fixed addresses of 0xffffffffff600x00 from legacy
+ code). Most statically-linked binaries and older
+ versions of glibc use these calls. Because these
+ functions are at fixed addresses, they make nice
+ targets for exploits that can control RIP.
+
+ emulate [default] Vsyscalls turn into traps and are
+ emulated reasonably safely.
+
+ native Vsyscalls are native syscall instructions.
+ This is a little bit faster than trapping
+ and makes a few dynamic recompilers work
+ better than they would in emulation mode.
+ It also makes exploits much easier to write.
+
+ none Vsyscalls don't work at all. This makes
+ them quite hard to use for exploits but
+ might break your system.
+
vt.cur_default= [VT] Default cursor shape.
Format: 0xCCBBAA, where AA, BB, and CC are the same as
the parameters of the <Esc>[?A;B;Cc escape sequence;
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index bf8e9ff..e06a200 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -56,6 +56,27 @@ DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) =
.lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock),
};

+static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE;
+
+static int __init vsyscall_setup(char *str)
+{
+ if (str) {
+ if (!strcmp("emulate", str))
+ vsyscall_mode = EMULATE;
+ else if (!strcmp("native", str))
+ vsyscall_mode = NATIVE;
+ else if (!strcmp("none", str))
+ vsyscall_mode = NONE;
+ else
+ return -EINVAL;
+
+ return 0;
+ }
+
+ return -EINVAL;
+}
+early_param("vsyscall", vsyscall_setup);
+
void update_vsyscall_tz(void)
{
unsigned long flags;
@@ -151,7 +172,13 @@ void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code)

if (vsyscall_nr < 0) {
warn_bad_vsyscall(KERN_WARNING, regs,
- "illegal int 0xcc (exploit attempt?)");
+ "illegal int 0xcc (exploit attempt or buggy program) -- look up the vsyscall kernel parameter if you need a workaround");
+ goto sigsegv;
+ }
+
+ if (vsyscall_mode == NONE) {
+ warn_bad_vsyscall(KERN_INFO, regs,
+ "vsyscall attempted with vsyscall=none -- sending SIGSEGV");
goto sigsegv;
}

@@ -260,8 +287,12 @@ void __init map_vsyscall(void)
extern char __vvar_page;
unsigned long physaddr_vvar_page = __pa_symbol(&__vvar_page);

- /* Note that VSYSCALL_MAPPED_PAGES must agree with the code below. */
+ extern char __native_vsyscall_page;
+ if (vsyscall_mode == NATIVE)
+ physaddr_page0 = __pa_symbol(&__native_vsyscall_page);
+
__set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL);
+
__set_fixmap(VVAR_PAGE, physaddr_vvar_page, PAGE_KERNEL_VVAR);
BUILD_BUG_ON((unsigned long)__fix_to_virt(VVAR_PAGE) != (unsigned long)VVAR_ADDRESS);
}
diff --git a/arch/x86/kernel/vsyscall_emu_64.S b/arch/x86/kernel/vsyscall_emu_64.S
index ffa845e..97bb09d 100644
--- a/arch/x86/kernel/vsyscall_emu_64.S
+++ b/arch/x86/kernel/vsyscall_emu_64.S
@@ -7,9 +7,17 @@
*/

#include <linux/linkage.h>
+
#include <asm/irq_vectors.h>
+#include <asm/page_types.h>
+#include <asm/unistd_64.h>
+
+/*
+ * There are two versions of the vsyscall code. The unused parts of the
+ * pages are filled with 0xcc by the linker script.
+ */

-/* The unused parts of the page are filled with 0xcc by the linker script. */
+/* Mostly safe version used for vsyscall=emulate and vsyscall=none */

.section .vsyscall_0, "a"
ENTRY(vsyscall_0)
@@ -25,3 +33,30 @@ END(vsyscall_1)
ENTRY(vsyscall_2)
int $VSYSCALL_EMU_VECTOR
END(vsyscall_2)
+
+
+/* Much less safe version used for vsyscall=native */
+
+__PAGE_ALIGNED_DATA
+ .globl __native_vsyscall_page
+ .balign PAGE_SIZE, 0xcc
+ .type __native_syscall_page, @object
+__native_vsyscall_page:
+
+ mov $__NR_gettimeofday, %rax
+ syscall
+ ret
+
+ .balign 1024, 0xcc
+ mov $__NR_time, %rax
+ syscall
+ ret
+
+ .balign 1024, 0xcc
+ mov $__NR_getcpu, %rax
+ syscall
+ ret
+
+ .balign 4096, 0xcc
+
+ .size __native_vsyscall_page, 4096
\ No newline at end of file
--
1.7.6

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/