Re: [PATCH 0/5] Collected vdso/vsyscall fixes for 3.1

From: Konrad Rzeszutek Wilk
Date: Wed Jul 27 2011 - 11:43:52 EST

On Wed, Jul 27, 2011 at 11:34:21AM -0400, Andrew Lutomirski wrote:
> On Wed, Jul 27, 2011 at 11:30 AM, Konrad Rzeszutek Wilk
> <konrad.wilk@xxxxxxxxxx> wrote:
> >> > Anyhow, removed the benchmark code and ran it on 64-bit:
> >> >
> >> > sh-4.1# /test_vsyscall  test
> >> > Testing gettimeofday...
> >> > [  109.552261] test_vsyscall[2462] trap invalid opcode ip:400c8d sp:7fff84fab470 error:0 in test_vsyscall[400000+2000]
> >> > Illegal instruction
> >> > sh-4.1# /test_vsyscall  intcc
> >> > About to execute int 0xcc from RIP = 400959
> >> > [  114.137150] test_vsyscall[2463] illegal int 0xcc (exploit attempt?) ip:400959 cs:e033 sp:7fff8b328310 ax:2c si:0 di:7fff8b3280f0
> >> > Caught SIGSEGV: Segmentation fault (Signal sent by the kernel [(nil)])RIP = 400959
> >> >
> >> > [This is on git:// #testing, which
> >> > has todays linus/master and your patchset]
> >> >
> >>
> >> I'll set up Xen.  Something's clearly still buggy.
> >
> > You sure? This is what I get when I boot baremetal:
> >
> > sh-4.1#
> > sh-4.1# xen-detect
> > Not running on Xen.
> > sh-4.1# /test_vsyscall test
> > Testing gettimeo[   84.442819] test_vsyscall[3175] trap invalid opcode ip:400c8d sp:7fffa8a72dc0 error:0fday...
> >  in test_vsyscall[400000+2000]
> $ test_vsyscall test
> Testing gettimeofday...
> vDSO offset = 0.000001s
> vsyscall offset = 0.000001s
> Testing time...
> vDSO offset = 0
> vsyscall offset = 0
> Testing getcpu...
> ok! cpu=6 node=0
> Can you send me your test_vsyscall binary so I can disassemble it?

Here it is (also including source since I uncommented parts of it).

One extra thing - I've been using AMD machines for this - I hadn't
tried this on an Intel box.

Attachment: test_vsyscall
Description: Binary data


#include <stdio.h>
#include <sys/time.h>
#include <time.h>
#include <stdlib.h>
#include <sys/syscall.h>
#include <unistd.h>
#include <dlfcn.h>
#include <string.h>
#include <inttypes.h>
#include <signal.h>
#include <sys/ucontext.h>
#include <asm/ldt.h>
#include <errno.h>

static inline int modify_ldt(int mode, void *ptr, unsigned long size)
int ret = syscall(__NR_modify_ldt, mode, ptr, size);
if (ret != 0)
errno = -ret;
return (ret == 0 ? 0 : -1);

/* vsyscalls and vDSO */
typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz);
const gtod_t vgtod = (gtod_t)0xffffffffff600000;
gtod_t vdso_gtod;

typedef long (*time_func_t)(time_t *t);
const time_func_t vtime = (time_func_t)0xffffffffff600400;
time_func_t vdso_time;

typedef long (*getcpu_t)(unsigned *, unsigned *, struct getcpu_cache*);
const getcpu_t vgetcpu = (getcpu_t)0xffffffffff600800;
getcpu_t vdso_getcpu;

void init_vdso()
void *vdso = dlopen("", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
if (!vdso) {
printf("Warning: failed to find vDSO\n");

vdso_gtod = (gtod_t)dlsym(vdso, "gettimeofday");
if (!vdso_gtod)
printf("Warning: failed to find gettimeofday in vDSO\n");

vdso_time = (time_func_t)dlsym(vdso, "time");
if (!vdso_time)
printf("Warning: failed to find time in vDSO\n");

vdso_getcpu = (getcpu_t)dlsym(vdso, "getcpu");
if (!vdso_getcpu)
printf("Warning: failed to find getcpu in vDSO\n");

/* syscalls */
static inline long sys_gtod(struct timeval *tv, struct timezone *tz)
return syscall(__NR_gettimeofday, tv, tz);

static inline long sys_time(time_t *t)
return syscall(__NR_time, t);

/* There is no sys_getcpu. */

static void segv(int sig, siginfo_t *info, void *ctx_void)
psiginfo(info, "Caught SIGSEGV");

ucontext_t *ctx = (ucontext_t*)ctx_void;
printf("RIP = %lx\n", ctx->uc_mcontext.gregs[REG_RIP]);


#if 0
/* benchmark helper */
template<typename Func> void benchmark(const char *desc, Func f)
struct timespec start, end;
long loops = 0;

printf("Benchmarking %s ... ", desc);

if (clock_gettime(CLOCK_MONOTONIC, &start)) {

long loops_now = 1000;
for(int i = 0; i < loops_now; i++)
loops += loops_now;

if (clock_gettime(CLOCK_MONOTONIC, &end)) {

unsigned long long duration = (end.tv_nsec - start.tv_nsec) +
1000000000ULL * (end.tv_sec - start.tv_sec);

if (duration < 500000000ULL)

printf("%9ld loops in %.5fs = %7.2f nsec / loop\n",
loops, float(duration) * 1e-9,
float(duration) / loops);
static double tv_diff(const struct timeval &a, const struct timeval &b)
return double(a.tv_sec - b.tv_sec) +
double((int)a.tv_usec - (int)b.tv_usec) * 1e-6;

int test(int argc, char **argv)
printf("Testing gettimeofday...\n");
struct timeval tv_sys, tv_vdso, tv_vsys;
struct timezone tz_sys, tz_vdso, tz_vsys;
int ret_sys = sys_gtod(&tv_sys, &tz_sys);
int ret_vdso = -1;
if (vdso_gtod)
ret_vdso = vdso_gtod(&tv_vdso, &tz_vdso);
int ret_vsys = vgtod(&tv_vsys, &tz_vsys);

if (ret_sys) {
printf(" syscall failed\n");
} else {
if (ret_vdso == 0) {
if (tz_sys.tz_minuteswest != tz_vdso.tz_minuteswest || tz_sys.tz_dsttime != tz_vdso.tz_dsttime)
printf(" vDSO tz mismatch\n");
printf(" vDSO offset = %.6fs\n", tv_diff(tv_vdso, tv_sys));
} else if (vdso_gtod) {
printf(" vDSO failed\n");
if (ret_vsys == 0) {
if (tz_sys.tz_minuteswest != tz_vsys.tz_minuteswest || tz_sys.tz_dsttime != tz_vsys.tz_dsttime)
printf(" vsyscall tz mismatch\n");
printf(" vsyscall offset = %.6fs\n", tv_diff(tv_vsys, tv_sys));

printf("\nTesting time...\n");
long t_sys, t_vdso = 0, t_vsys;
long t2_sys = -1, t2_vdso = -1, t2_vsys = -1;
t_sys = sys_time(&t2_sys);
if (vdso_time)
t_vdso = vdso_time(&t2_vdso);
t_vsys = vtime(&t2_vsys);
if (t_sys < 0 || t_sys != t2_sys) {
printf(" syscall failed (ret:%ld output:%ld)\n", t_sys, t2_sys);
} else {
if (vdso_time) {
if (t_vdso < 0 || t_vdso != t2_vdso)
printf(" vDSO failed (ret:%ld output:%ld)\n", t_vdso, t2_vdso);
printf(" vDSO offset = %ld\n", t_vdso - t_sys);

if (t_vsys < 0 || t_vsys != t2_vsys)
printf(" vsyscall failed (ret:%ld output:%ld)\n", t_vsys, t2_vsys);
printf(" vsyscall offset = %ld\n", t_vsys - t_sys);

printf("Testing getcpu...\n");
unsigned cpu_vdso, cpu_vsys, node_vdso, node_vsys;
ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0);
ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0);
if (ret_vdso)
printf(" vDSO failed (ret:%ld)\n", (unsigned long)ret_vdso);
if (ret_vsys)
printf(" vsyscall failed (ret:%ld)\n", (unsigned long)ret_vdso);
if (ret_vdso == 0 && ret_vsys == 0) {
if (cpu_vdso != cpu_vsys)
printf(" cpu mismatch (vdso:%u vsyscall:%u)!\n", cpu_vdso, cpu_vsys);
else if (node_vdso != node_vsys)
printf(" node mismatch (vdso:%u vsyscall:%u)!\n", node_vdso, node_vsys);
printf(" ok! cpu=%u node=%u\n", cpu_vdso, node_vdso);

return 0;

int bench(int argc, char **argv)
struct timeval tv;
struct timezone tz;
#if 0
benchmark(" syscall gettimeofday", [&]{sys_gtod(&tv, &tz);});
benchmark(" vdso gettimeofday", [&]{vdso_gtod(&tv, &tz);});
benchmark("vsyscall gettimeofday", [&]{vgtod(&tv, &tz);});

time_t t;
benchmark(" syscall time ", [&]{sys_time(&t);});
if (vdso_time)
benchmark(" vdso time ", [&]{vdso_time(&t);});
benchmark("vsyscall time ", [&]{vtime(&t);});

unsigned cpu, node;
benchmark(" vdso getcpu ", [&]{vdso_getcpu(&cpu, &node, 0);});
benchmark("vsyscall getcpu ", [&]{vgetcpu(&cpu, &node, 0);});

benchmark("dummy syscall ", [&]{syscall(0xffffffff);});
return 0;

int call(int argc, char **argv)
if (argc != 5) {
printf("Usage: call <addr> <rax> <arg1> <arg2> <arg3>\n");
return 1;

unsigned long addr, rax, arg1, arg2, arg3;
char *end;
addr = strtoull(argv[0], &end, 0);
if (*end)
goto bad;

rax = strtoull(argv[1], &end, 0);
if (*end)
goto bad;

arg1 = strtoull(argv[2], &end, 0);
if (*end)
goto bad;

arg2 = strtoull(argv[3], &end, 0);
if (*end)
goto bad;

arg3 = strtoull(argv[4], &end, 0);
if (*end)
goto bad;

unsigned long ret;
asm volatile("call *%[addr]" : "=a" (ret) : [addr] "rm" (addr), "a" (rax),
"D" (arg1), "S" (arg2), "d" (arg3));
printf("Return value = %ld\n", ret);

return 0;

printf("Bad arg\n");
return 1;

int intcc(int argc, char **argv)
if (argc != 0) {
printf("Usage: intcc\n");
return 1;

extern char intcc_addr;
printf("About to execute int 0xcc from RIP = %lX\n",
(unsigned long)&intcc_addr);

asm volatile ("intcc_addr: int $0xcc");
return 0;

struct __attribute__((packed)) farptr {
uint32_t offset;
uint16_t sel;

static bool to_farptr(farptr *out, uint16_t sel, void *offset)
out->sel = sel;
out->offset = (uint32_t)(unsigned long)offset;
return out->offset == (unsigned long)offset;

int intcc32(int argc, char **argv)
if (argc != 0) {
printf("Usage: intcc32\n");
return 1;

// Install a 32-bit code descriptor
struct user_desc desc;
memset(&desc, 0, sizeof(desc));
desc.entry_number = 0;
desc.base_addr = 0;
desc.limit = 0xFFFFF;
desc.seg_32bit = 1;
desc.limit_in_pages = 1;

if (modify_ldt(1, &desc, sizeof(desc)) != 0) {
return 1;

/* Load the initial CS. */
uint16_t initial_cs;
asm ("mov %%cs,%[initial_cs]" : [initial_cs] "=rm" (initial_cs));
printf("Initial CS = 0x%04X (entry %d)\n",
(unsigned)initial_cs, (int)(initial_cs >> 3));

extern char landing_32, landing_64;

/* Set up the pointers. */
static farptr ptr32, ptr64;
if (!to_farptr(&ptr32, 0x4, &landing_32) || !to_farptr(&ptr64, initial_cs, &landing_64)) {
printf("Something's mapped too high\n");
return 1;

/* Go for it! */
asm volatile (
"mov %%rsp,%%rsi\n" // Save rsp (avoids truncation).
"ljmpl *(%%eax)\n" // Switch to 32-bit mode.

// 32-bit mode!
// (Well, sort of. DS and ES are 0, so we can't use them.)
"\tint $0xcc\n" // Try int 0xcc.
"\tljmpl *%%cs:(%%ecx)\n" // Switch back.

// 64-bit mode again!
"\tmov %%rsi,%%rsp"
: "a" (&ptr32), "c" (&ptr64)
: "rsi", "cc");

printf("Holy cow! We survived!\n");

return 0;

int main(int argc, char **argv)
struct sigaction sa_segv;
memset(&sa_segv, 0, sizeof(sa_segv));
sa_segv.sa_sigaction = segv;
sa_segv.sa_flags = SA_SIGINFO;
if (sigaction(SIGSEGV, &sa_segv, 0))

if (argc < 2) {
printf("Usage: test_vsyscall <command> ...\n"
"command := { test, bench, intcc, call }\n");
return 1;

if (!strcmp(argv[1], "test"))
return test(argc - 2, argv + 2);
if (!strcmp(argv[1], "bench"))
return bench(argc - 2, argv + 2);
if (!strcmp(argv[1], "intcc"))
return intcc(argc - 2, argv + 2);
if (!strcmp(argv[1], "intcc32"))
return intcc32(argc - 2, argv + 2);
if (!strcmp(argv[1], "call"))
return call(argc - 2, argv + 2);

printf("Unknown command\n");
return 1;