Re: [PATCH v3] x86/asm/entry/32, selftests: Add test_syscall_vdso test
From: Shuah Khan
Date: Thu Jun 18 2015 - 10:13:39 EST
On 06/15/2015 07:30 AM, Denys Vlasenko wrote:
> The test checks that all registers are preserved across
> 32-bit syscalls. It tests syscalls through VDSO
> (if available) and through INT 0x80, normally
> and under ptrace.
>
> If kernel is a 64-bit one, high registers (r8..r15) are poisoned
> before call and checked afterwards. They must be either preserved,
> or cleared to zero (but r11 is special); r12..15 must be preserved
> for INT 0x80.
>
> EFLAGS is checked for changes too, but change there is not considered
> to be a bug (paravirt kernels do not preserve arithmetic flags).
>
> Run-tested on 64-bit kernel:
Test looks good to me. Looks like it didn't get added to x86
Makefile for running it and installing it. Other than that
test itself is fine.
This patch as is doesn't add the test or run-tests target.
Is it the intent that this test be left out of selftest
run-tests target?
Please add it to x86 Makefile and make sure kselftest
target and kselftest install doesn't break, if you would
like this test to be part of run-tests target.
thanks,
-- Shuah
>
> $ ./test_syscall_vdso_32
> [RUN] Executing 6-argument 32-bit syscall via VDSO
> [OK] Arguments are preserved across syscall
> [NOTE] R11 has changed:0000000000200ed7 - assuming clobbered by SYSRET insn
> [OK] R8..R15 did not leak kernel data
> [RUN] Executing 6-argument 32-bit syscall via INT 80
> [OK] Arguments are preserved across syscall
> [OK] R8..R15 did not leak kernel data
> [RUN] Running tests under ptrace
> [RUN] Executing 6-argument 32-bit syscall via VDSO
> [OK] Arguments are preserved across syscall
> [OK] R8..R15 did not leak kernel data
> [RUN] Executing 6-argument 32-bit syscall via INT 80
> [OK] Arguments are preserved across syscall
> [OK] R8..R15 did not leak kernel data
>
> On 32-bit paravirt kernel:
>
> $ ./test_syscall_vdso_32
> [NOTE] Not a 64-bit kernel, won't test R8..R15 leaks
> [RUN] Executing 6-argument 32-bit syscall via VDSO
> [WARN] Flags before=0000000000200ed7 id 0 00 o d i s z 0 a 0 p 1 c
> [WARN] Flags after=0000000000200246 id 0 00 i z 0 0 p 1
> [WARN] Flags change=0000000000000c91 0 00 o d s 0 a 0 0 c
> [OK] Arguments are preserved across syscall
> [RUN] Executing 6-argument 32-bit syscall via INT 80
> [OK] Arguments are preserved across syscall
> [RUN] Running tests under ptrace
> [RUN] Executing 6-argument 32-bit syscall via VDSO
> [OK] Arguments are preserved across syscall
> [RUN] Executing 6-argument 32-bit syscall via INT 80
> [OK] Arguments are preserved across syscall
>
> Signed-off-by: Denys Vlasenko <dvlasenk@xxxxxxxxxx>
> CC: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
> CC: Steven Rostedt <rostedt@xxxxxxxxxxx>
> CC: Ingo Molnar <mingo@xxxxxxxxxx>
> CC: Borislav Petkov <bp@xxxxxxxxx>
> CC: "H. Peter Anvin" <hpa@xxxxxxxxx>
> CC: Andy Lutomirski <luto@xxxxxxxxxxxxxx>
> CC: Oleg Nesterov <oleg@xxxxxxxxxx>
> CC: Frederic Weisbecker <fweisbec@xxxxxxxxx>
> CC: Alexei Starovoitov <ast@xxxxxxxxxxxx>
> CC: Will Drewry <wad@xxxxxxxxxxxx>
> CC: Kees Cook <keescook@xxxxxxxxxxxx>
> CC: x86@xxxxxxxxxx
> CC: linux-kernel@xxxxxxxxxxxxxxx
> ---
>
> Changes in v2:
> does not fail if VDSO can't be found;
> tests INT 80 syscall method;
> tests syscalls under ptrace;
> switched to /* */ comments
>
> Changes in v3:
> added checking for r8..r15 info leaks
>
> tools/testing/selftests/x86/test_syscall_vdso.c | 401 ++++++++++++++++++++++++
> tools/testing/selftests/x86/thunks_32.S | 55 ++++
> 2 files changed, 456 insertions(+)
> create mode 100644 tools/testing/selftests/x86/test_syscall_vdso.c
> create mode 100644 tools/testing/selftests/x86/thunks_32.S
>
> diff --git a/tools/testing/selftests/x86/test_syscall_vdso.c b/tools/testing/selftests/x86/test_syscall_vdso.c
> new file mode 100644
> index 0000000..0792aef
> --- /dev/null
> +++ b/tools/testing/selftests/x86/test_syscall_vdso.c
> @@ -0,0 +1,401 @@
> +/*
> + * 32-bit syscall ABI conformance test.
> + *
> + * Copyright (c) 2015 Denys Vlasenko
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but
> + * WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * General Public License for more details.
> + */
> +/*
> + * Can be built statically:
> + * gcc -Os -Wall -static -m32 test_syscall_vdso.c thunks_32.S
> + */
> +#undef _GNU_SOURCE
> +#define _GNU_SOURCE 1
> +#undef __USE_GNU
> +#define __USE_GNU 1
> +#include <unistd.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <stdio.h>
> +#include <signal.h>
> +#include <sys/types.h>
> +#include <sys/select.h>
> +#include <sys/time.h>
> +#include <elf.h>
> +#include <sys/ptrace.h>
> +#include <sys/wait.h>
> +
> +#if !defined(__i386__)
> +int main(int argc, char **argv, char **envp)
> +{
> + printf("[SKIP]\tNot a 32-bit x86 userspace\n");
> + return 0;
> +}
> +#else
> +
> +long syscall_addr;
> +long get_syscall(char **envp)
> +{
> + Elf32_auxv_t *auxv;
> + while (*envp++ != NULL)
> + continue;
> + for (auxv = (void *)envp; auxv->a_type != AT_NULL; auxv++)
> + if (auxv->a_type == AT_SYSINFO)
> + return auxv->a_un.a_val;
> + printf("[WARN]\tAT_SYSINFO not supplied\n");
> + return 0;
> +}
> +
> +asm (
> + " .pushsection .text\n"
> + " .global int80\n"
> + "int80:\n"
> + " int $0x80\n"
> + " ret\n"
> + " .popsection\n"
> +);
> +extern char int80;
> +
> +struct regs64 {
> + uint64_t rax, rbx, rcx, rdx;
> + uint64_t rsi, rdi, rbp, rsp;
> + uint64_t r8, r9, r10, r11;
> + uint64_t r12, r13, r14, r15;
> +};
> +struct regs64 regs64;
> +int kernel_is_64bit;
> +
> +asm (
> + " .pushsection .text\n"
> + " .code64\n"
> + "get_regs64:\n"
> + " push %rax\n"
> + " mov $regs64, %eax\n"
> + " pop 0*8(%rax)\n"
> + " movq %rbx, 1*8(%rax)\n"
> + " movq %rcx, 2*8(%rax)\n"
> + " movq %rdx, 3*8(%rax)\n"
> + " movq %rsi, 4*8(%rax)\n"
> + " movq %rdi, 5*8(%rax)\n"
> + " movq %rbp, 6*8(%rax)\n"
> + " movq %rsp, 7*8(%rax)\n"
> + " movq %r8, 8*8(%rax)\n"
> + " movq %r9, 9*8(%rax)\n"
> + " movq %r10, 10*8(%rax)\n"
> + " movq %r11, 11*8(%rax)\n"
> + " movq %r12, 12*8(%rax)\n"
> + " movq %r13, 13*8(%rax)\n"
> + " movq %r14, 14*8(%rax)\n"
> + " movq %r15, 15*8(%rax)\n"
> + " ret\n"
> + "poison_regs64:\n"
> + " movq $0x7f7f7f7f, %r8\n"
> + " shl $32, %r8\n"
> + " orq $0x7f7f7f7f, %r8\n"
> + " movq %r8, %r9\n"
> + " movq %r8, %r10\n"
> + " movq %r8, %r11\n"
> + " movq %r8, %r12\n"
> + " movq %r8, %r13\n"
> + " movq %r8, %r14\n"
> + " movq %r8, %r15\n"
> + " ret\n"
> + " .code32\n"
> + " .popsection\n"
> +);
> +extern void get_regs64(void);
> +extern void poison_regs64(void);
> +extern unsigned long call64_from_32(void (*function)(void));
> +void print_regs64(void)
> +{
> + if (!kernel_is_64bit)
> + return;
> + printf("ax:%016llx bx:%016llx cx:%016llx dx:%016llx\n", regs64.rax, regs64.rbx, regs64.rcx, regs64.rdx);
> + printf("si:%016llx di:%016llx bp:%016llx sp:%016llx\n", regs64.rsi, regs64.rdi, regs64.rbp, regs64.rsp);
> + printf(" 8:%016llx 9:%016llx 10:%016llx 11:%016llx\n", regs64.r8 , regs64.r9 , regs64.r10, regs64.r11);
> + printf("12:%016llx 13:%016llx 14:%016llx 15:%016llx\n", regs64.r12, regs64.r13, regs64.r14, regs64.r15);
> +}
> +
> +int check_regs64(void)
> +{
> + int err = 0;
> + int num = 8;
> + uint64_t *r64 = ®s64.r8;
> +
> + if (!kernel_is_64bit)
> + return 0;
> +
> + do {
> + if (*r64 == 0x7f7f7f7f7f7f7f7fULL)
> + continue; /* register did not change */
> + if (syscall_addr != (long)&int80) {
> + /*
> + * Non-INT80 syscall entrypoints are allowed to clobber R8+ regs:
> + * either clear them to 0, or for R11, load EFLAGS.
> + */
> + if (*r64 == 0)
> + continue;
> + if (num == 11) {
> + printf("[NOTE]\tR11 has changed:%016llx - assuming clobbered by SYSRET insn\n", *r64);
> + continue;
> + }
> + } else {
> + /* INT80 syscall entrypoint can be used by
> + * 64-bit programs too, unlike SYSCALL/SYSENTER.
> + * Therefore it must preserve R12+
> + * (they are callee-saved registers in 64-bit C ABI).
> + *
> + * This was probably historically not intended,
> + * but R8..11 are clobbered (cleared to 0).
> + * IOW: they are the only registers which aren't
> + * preserved across INT80 syscall.
> + */
> + if (*r64 == 0 && num <= 11)
> + continue;
> + }
> + printf("[FAIL]\tR%d has changed:%016llx\n", num, *r64);
> + err++;
> + } while (r64++, ++num < 16);
> +
> + if (!err)
> + printf("[OK]\tR8..R15 did not leak kernel data\n");
> + return err;
> +}
> +
> +int nfds;
> +fd_set rfds;
> +fd_set wfds;
> +fd_set efds;
> +struct timespec timeout;
> +sigset_t sigmask;
> +struct {
> + sigset_t *sp;
> + int sz;
> +} sigmask_desc;
> +
> +void prep_args()
> +{
> + nfds = 42;
> + FD_ZERO(&rfds);
> + FD_ZERO(&wfds);
> + FD_ZERO(&efds);
> + FD_SET(0, &rfds);
> + FD_SET(1, &wfds);
> + FD_SET(2, &efds);
> + timeout.tv_sec = 0;
> + timeout.tv_nsec = 123;
> + sigemptyset(&sigmask);
> + sigaddset(&sigmask, SIGINT);
> + sigaddset(&sigmask, SIGUSR2);
> + sigaddset(&sigmask, SIGRTMAX);
> + sigmask_desc.sp = &sigmask;
> + sigmask_desc.sz = 8; /* bytes */
> +}
> +
> +static void print_flags(const char *name, unsigned long r)
> +{
> + static const char *bitarray[] = {
> + "\n" ,"c\n" ,/* Carry Flag */
> + "0 " ,"1 " ,/* Bit 1 - always on */
> + "" ,"p " ,/* Parity Flag */
> + "0 " ,"3? " ,
> + "" ,"a " ,/* Auxiliary carry Flag */
> + "0 " ,"5? " ,
> + "" ,"z " ,/* Zero Flag */
> + "" ,"s " ,/* Sign Flag */
> + "" ,"t " ,/* Trap Flag */
> + "" ,"i " ,/* Interrupt Flag */
> + "" ,"d " ,/* Direction Flag */
> + "" ,"o " ,/* Overflow Flag */
> + "0 " ,"1 " ,/* I/O Privilege Level (2 bits) */
> + "0" ,"1" ,/* I/O Privilege Level (2 bits) */
> + "" ,"n " ,/* Nested Task */
> + "0 " ,"15? ",
> + "" ,"r " ,/* Resume Flag */
> + "" ,"v " ,/* Virtual Mode */
> + "" ,"ac " ,/* Alignment Check/Access Control */
> + "" ,"vif ",/* Virtual Interrupt Flag */
> + "" ,"vip ",/* Virtual Interrupt Pending */
> + "" ,"id " ,/* CPUID detection */
> + NULL
> + };
> + const char **bitstr;
> + int bit;
> +
> + printf("%s=%016lx ", name, r);
> + bitstr = bitarray + 42;
> + bit = 21;
> + if ((r >> 22) != 0)
> + printf("(extra bits are set) ");
> + do {
> + if (bitstr[(r >> bit) & 1][0])
> + printf(bitstr[(r >> bit) & 1]);
> + bitstr -= 2;
> + bit--;
> + } while (bit >= 0);
> +}
> +
> +int run_syscall(void)
> +{
> + long flags, bad_arg;
> +
> + prep_args();
> +
> + if (kernel_is_64bit)
> + call64_from_32(poison_regs64);
> + /*print_regs64();*/
> +
> + asm("\n"
> + /* Try 6-arg syscall: pselect. It should return quickly */
> + " push %%ebp\n"
> + " mov $308, %%eax\n" /* PSELECT */
> + " mov nfds, %%ebx\n" /* ebx arg1 */
> + " mov $rfds, %%ecx\n" /* ecx arg2 */
> + " mov $wfds, %%edx\n" /* edx arg3 */
> + " mov $efds, %%esi\n" /* esi arg4 */
> + " mov $timeout, %%edi\n" /* edi arg5 */
> + " mov $sigmask_desc, %%ebp\n" /* %ebp arg6 */
> + " push $0x200ed7\n" /* set almost all flags */
> + " popf\n" /* except TF, IOPL, NT, RF, VM, AC, VIF, VIP */
> + " call *syscall_addr\n"
> + /* Check that registers are not clobbered */
> + " pushf\n"
> + " pop %%eax\n"
> + " cld\n"
> + " cmp nfds, %%ebx\n" /* ebx arg1 */
> + " mov $1, %%ebx\n"
> + " jne 1f\n"
> + " cmp $rfds, %%ecx\n" /* ecx arg2 */
> + " mov $2, %%ebx\n"
> + " jne 1f\n"
> + " cmp $wfds, %%edx\n" /* edx arg3 */
> + " mov $3, %%ebx\n"
> + " jne 1f\n"
> + " cmp $efds, %%esi\n" /* esi arg4 */
> + " mov $4, %%ebx\n"
> + " jne 1f\n"
> + " cmp $timeout, %%edi\n" /* edi arg5 */
> + " mov $5, %%ebx\n"
> + " jne 1f\n"
> + " cmpl $sigmask_desc, %%ebp\n" /* %ebp arg6 */
> + " mov $6, %%ebx\n"
> + " jne 1f\n"
> + " mov $0, %%ebx\n"
> + "1:\n"
> + " pop %%ebp\n"
> + : "=a" (flags), "=b" (bad_arg)
> + :
> + : "cx", "dx", "si", "di"
> + );
> +
> + if (kernel_is_64bit) {
> + memset(®s64, 0x77, sizeof(regs64));
> + call64_from_32(get_regs64);
> + /*print_regs64();*/
> + }
> +
> + /*
> + * On paravirt kernels, flags are not preserved across syscalls.
> + * Thus, we do not consider it a bug if some are changed.
> + * We just show ones which do.
> + */
> + if ((0x200ed7 ^ flags) != 0) {
> + print_flags("[WARN]\tFlags before", 0x200ed7);
> + print_flags("[WARN]\tFlags after", flags);
> + print_flags("[WARN]\tFlags change", (0x200ed7 ^ flags));
> + }
> +
> + if (bad_arg) {
> + printf("[FAIL]\targ#%ld clobbered\n", bad_arg);
> + return 1;
> + }
> + printf("[OK]\tArguments are preserved across syscall\n");
> +
> + return check_regs64();
> +}
> +
> +int run_syscall_twice()
> +{
> + int exitcode = 0;
> + long sv;
> +
> + if (syscall_addr) {
> + printf("[RUN]\tExecuting 6-argument 32-bit syscall via VDSO\n");
> + exitcode = run_syscall();
> + }
> + sv = syscall_addr;
> + syscall_addr = (long)&int80;
> + printf("[RUN]\tExecuting 6-argument 32-bit syscall via INT 80\n");
> + exitcode += run_syscall();
> + syscall_addr = sv;
> + return exitcode;
> +}
> +
> +void ptrace_me()
> +{
> + pid_t pid;
> +
> + fflush(NULL);
> + pid = fork();
> + if (pid < 0)
> + exit(1);
> + if (pid == 0) {
> + /* child */
> + if (ptrace(PTRACE_TRACEME, 0L, 0L, 0L) != 0)
> + exit(0);
> + raise(SIGSTOP);
> + return;
> + }
> + /* parent */
> + printf("[RUN]\tRunning tests under ptrace\n");
> + while (1) {
> + int status;
> + pid = waitpid(-1, &status, __WALL);
> + if (WIFEXITED(status))
> + exit(WEXITSTATUS(status));
> + if (WIFSIGNALED(status))
> + exit(WTERMSIG(status));
> + if (pid <= 0 || !WIFSTOPPED(status)) /* paranoia */
> + exit(255);
> + /*
> + * Note: we do not inject sig = WSTOPSIG(status).
> + * We probably should, but careful: do not inject SIGTRAP
> + * generated by syscall entry/exit stops.
> + * That kills the child.
> + */
> + ptrace(PTRACE_SYSCALL, pid, 0L, 0L /*sig*/);
> + }
> +}
> +
> +int main(int argc, char **argv, char **envp)
> +{
> + int exitcode = 0;
> + int cs;
> +
> + asm("\n"
> + " movl %%cs, %%eax\n"
> + : "=a" (cs)
> + );
> + kernel_is_64bit = (cs == 0x23);
> + if (!kernel_is_64bit)
> + printf("[NOTE]\tNot a 64-bit kernel, won't test R8..R15 leaks\n");
> +
> + /* This only works for non-static builds:
> + * syscall_addr = dlsym(dlopen("linux-gate.so.1", RTLD_NOW), "__kernel_vsyscall");
> + */
> + syscall_addr = get_syscall(envp);
> +
> + exitcode += run_syscall_twice();
> + ptrace_me();
> + exitcode += run_syscall_twice();
> +
> + return exitcode;
> +}
> +#endif
> diff --git a/tools/testing/selftests/x86/thunks_32.S b/tools/testing/selftests/x86/thunks_32.S
> new file mode 100644
> index 0000000..77e9159
> --- /dev/null
> +++ b/tools/testing/selftests/x86/thunks_32.S
> @@ -0,0 +1,55 @@
> +/*
> + * thunks_32.S - assembly helpers for mixed-bitness code
> + * Copyright (c) 2015 2015 Denys Vlasenko
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but
> + * WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * General Public License for more details.
> + *
> + * These are little helpers that make it easier to switch bitness on
> + * the fly.
> + */
> +
> + .text
> + .code32
> +
> + .global call64_from_32
> + .type call32_from_64, @function
> +
> + // 4(%esp): function to call
> +call64_from_32:
> + // Fetch function address
> + mov 4(%esp), %eax
> +
> + // Save registers which are callee-clobbered by 64-bit ABI
> + push %ecx
> + push %edx
> + push %esi
> + push %edi
> +
> + // Switch to long mode
> + jmp $0x33,$1f
> +1: .code64
> +
> + // Call the function
> + call *%rax
> +
> + // Switch to compatibility mode
> + push $0x23 /* USER32_CS */
> + .code32; push $1f; .code64 /* hack: can't have X86_64_32S relocation in 32-bit ELF */
> + lretq
> +1: .code32
> +
> + pop %edi
> + pop %esi
> + pop %edx
> + pop %ecx
> +
> + ret
> +
> +.size call64_from_32, .-call64_from_32
>
--
Shuah Khan
Sr. Linux Kernel Developer
Open Source Innovation Group
Samsung Research America (Silicon Valley)
shuahkh@xxxxxxxxxxxxxxx | (970) 217-8978
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/