Re: execve-under-ptrace API bug (was Re: Ptrace documentation, draft #3)
From: Denys Vlasenko
Date: Mon May 30 2011 - 14:11:53 EST
> Ok, let's take a deeper look at API needs. What we need to report, and when?
>
> We have three kinds of threads at execve:
> 1. execve'ing thread,
> 2. leader, two cases: (2a) leader is still alive, (2b) leader has exited by now.
> 3. other threads.
>
> (3) is the most simple: API should report death of these threads.
> There is no need to ensure these death notifications are reported
> before execve syscall exit is reported. They can be consumed
> by tracer later.
>
> (1) execve'ing thread is obviously alive. current kernel already
> reports its execve success. The only thing we need to add is
> a way to retrieve its former pid, so that tracer can drop
> former pid's data, and also to cater for the "two execve's" case.
> PTRACE_EVENT_EXEC seems to be a good place to do it.
> Say, using GETEVENTMSG?
>
> (2) is the most problematic. If leader is still alive, should
> we report its death? This makes sense since if we do,
> and if we ensure its death is always reported before
> PTRACE_EVENT_EXEC, then the rule is pretty simple:
> at PTRACE_EVENT_EXEC, leader is always reported dead.
>
> However, I don't see why we _must_ do it this way.
> The life of tracer is not that much worse if at
> PTRACE_EVENT_EXEC leader which is still alive
> is simply "supplanted" by the execve'ed process.
>
> We definitely must ensure, though, that if leader races with
> execve'ing thread and enters exit(2), its death is never reported
> *after* PTRACE_EVENT_EXEC - that'd confuse the tracer for sure!
> Process which has exited but is still alive?! Not good!
FWIW, here is the current behavior (2.6.38.6-27.fc15.i686.PAE).
Test program creates two threads and execve's from last thread.
PTRACE_O_TRACECLONE | PTRACE_O_TRACEEXIT | PTRACE_O_TRACEEXEC
is requested by tracer.
Compiled attached program with gcc -Wall threaded-execve.c,
ran it and I see this:
6797: thread leader
6797: status:0003057f WIFSTOPPED sig:5 (TRAP) event:CLONE
6798: status:0000137f WIFSTOPPED sig:19 (STOP) event:(null)
6797: status:0003057f WIFSTOPPED sig:5 (TRAP) event:CLONE
6799: status:0000137f WIFSTOPPED sig:19 (STOP) event:(null)
6798: status:0006057f WIFSTOPPED sig:5 (TRAP) event:EXIT
6797: status:0006057f WIFSTOPPED sig:5 (TRAP) event:EXIT
6798: status:00000000 WIFEXITED exitcode:0
6797: status:0004057f WIFSTOPPED sig:5 (TRAP) event:EXEC
6797: status:0003057f WIFSTOPPED sig:5 (TRAP) event:CLONE
6800: status:0000137f WIFSTOPPED sig:19 (STOP) event:(null)
6797: status:0003057f WIFSTOPPED sig:5 (TRAP) event:CLONE
6801: status:0000137f WIFSTOPPED sig:19 (STOP) event:(null)
6800: status:0006057f WIFSTOPPED sig:5 (TRAP) event:EXIT
6797: status:0006057f WIFSTOPPED sig:5 (TRAP) event:EXIT
6800: status:00000000 WIFEXITED exitcode:0
6797: status:0004057f WIFSTOPPED sig:5 (TRAP) event:EXEC
...
...
...
In short, it doesn't look too bad: we do get EXIT events for both
destroyed threads, and even get WIFEXITED for the non-leader.
(IOW: maybe PTRACE_O_TRACEEXIT is not even needed!)
EXEC event is reported last (also good!)
Oleg, does it look like it works as intended, or am I just lucky?
I guess I need to test larger number of threads, and throw in some races...
--
vda
/* ...DESCRITION...
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely. */
#define _GNU_SOURCE 1
#include <assert.h>
#include <limits.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <errno.h>
#include <stdio.h>
#include <sched.h>
#include <signal.h>
#include <dirent.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/syscall.h>
/* #include <pthread.h> */
/* Dance around ptrace.h + user.h incompatibility */
#ifdef __ia64__
# define ia64_fpreg ia64_fpreg_DISABLE
# define pt_all_user_regs pt_all_user_regs_DISABLE
#endif
#include <sys/ptrace.h>
#include <linux/ptrace.h>
#ifdef __ia64__
# undef ia64_fpreg
# undef pt_all_user_regs
#endif
#include <sys/user.h>
#if defined __i386__ || defined __x86_64__
# include <sys/debugreg.h>
#endif
/* Define clone2 for all arches */
#ifdef __ia64__
extern int __clone2(int (*fn) (void *), void *child_stack_base,
size_t stack_size, int flags, void *arg, ...);
#define clone2 __clone2
#else
#define clone2(func, stack_base, size, flags, arg...) \
clone(func, (stack_base) + (size), flags, arg)
#endif
static int verbose;
#define VERBOSE(...) do { \
if (verbose) { \
printf(__VA_ARGS__); fflush(stdout); \
} \
} while (0)
static pid_t child;
/*static pid_t grandchild;*/
static void
sigkill(pid_t *pp)
{
pid_t pid = *pp;
*pp = 0;
if (pid > 0)
kill(pid, SIGKILL);
}
static void
cleanup(void)
{
/*sigkill(&grandchild);*/
sigkill(&child);
while (waitpid(-1, NULL, __WALL) > 0)
continue;
}
static void
handler_fail(int signo)
{
VERBOSE("alarm timed out\n");
sigset_t set;
signal(SIGABRT, SIG_DFL);
signal(SIGALRM, SIG_DFL);
/* SIGALRM may be blocked in sighandler, need to unblock */
sigfillset(&set);
sigprocmask(SIG_UNBLOCK, &set, NULL);
/* Due to kernel bugs, waitpid may block. Need to have a timeout */
alarm(1);
cleanup();
assert(0);
}
static const char* sig_name(unsigned sig)
{
static const char *const sigs[] = {
[SIGSTOP] = "STOP", [SIGTRAP] = "TRAP", [SIGKILL] = "KILL",
[SIGTERM] = "TERM", [SIGINT ] = "INT ", [0 ] = "0 ",
[SIGTRAP|0x80] = "TRAP|80",
};
static const unsigned num_sigs = sizeof(sigs) / sizeof(sigs[0]);
if (sig < num_sigs)
return sigs[sig];
return "SIG????";
}
static const char* event_name(int status)
{
static const char *const events[] = {
[PTRACE_EVENT_FORK ] = "FORK",
[PTRACE_EVENT_VFORK ] = "VFORK",
[PTRACE_EVENT_CLONE ] = "CLONE",
[PTRACE_EVENT_EXEC ] = "EXEC",
[PTRACE_EVENT_VFORK_DONE] = "VFORK_DONE",
[PTRACE_EVENT_EXIT ] = "EXIT",
};
static const unsigned num_events = sizeof(events) / sizeof(events[0]);
status = (unsigned)status >> 16;
if (status < num_events)
return events[status];
return "EV???";
}
/****************** Standard scaffolding ends here ****************/
/*
* Extended commentary of the entire test.
*
* What kernels / patches exhibit it? When it was fixed?
* Is it CPU vendor/model dependent? SMP dependent?
* Is it deterministic?
* How easy/hard is to reproduce it
* (always? a dozen loops? a second? minute? etc)
*/
/* If the test is not deterministic:
* Amount of seconds needed to almost 100% catch it */
//#define DEFAULT_TESTTIME 5
/* or (if reproducible in a few loops only) */
//#define DEFAULT_LOOPS 100
static int
thread1(void *unused)
{
for(;;) pause();
return 0;
}
static int
thread2(void *unused)
{
execl("/proc/self/exe", "exe", NULL);
for(;;) pause();
return 0;
}
static int
thread_leader(void *unused)
{
/* malloc gives sufficiently aligned buffer.
* long buf[] does not! (on ia64).
*/
/* As seen in pthread_create(): */
clone2(thread1, malloc(16 * 1024), 16 * 1024, 0
| CLONE_VM
| CLONE_FS
| CLONE_FILES | CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM
// | CLONE_PTRACE
| 0 /* no signal to send on death */
, NULL);
usleep(50*1000);
clone2(thread2, malloc(16 * 1024), 16 * 1024, 0
| CLONE_VM
| CLONE_FS
| CLONE_FILES | CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM
// | CLONE_PTRACE
| 0 /* no signal to send on death */
, NULL);
for(;;) pause();
return 0;
}
/* If nothing strange happens, just returns.
* Notable events (which are not bugs) print some sort of marker
* is verbose is on, but still continue and return normally.
* Known bugs also print a message if verbose, but they exit(1).
* New bugs are likely to trip asserts or cause hang/kernel crash :)
*/
static void
reproduce(void)
{
int status;
pid_t pid;
VERBOSE(".");
alarm(1);
/* Typical scenario starts like this. */
child = fork();
assert(child != -1);
if (child == 0) {
/* child */
errno = 0;
ptrace(PTRACE_TRACEME, 0, (void *) 0, (void *) 0);
assert_perror(errno);
raise(SIGSTOP);
assert_perror(errno);
printf("%d: thread leader\n", getpid());
thread_leader(NULL);
}
/* We are parent tracer */
assert(child > 0);
errno = 0;
/* Child has stopped itself, checking */
pid = waitpid(child, &status, 0);
assert(pid == child);
assert(WIFSTOPPED (status));
assert(WSTOPSIG (status) == SIGSTOP);
ptrace(PTRACE_SETOPTIONS, child, NULL,
PTRACE_O_TRACESYSGOOD
| PTRACE_O_TRACECLONE
| PTRACE_O_TRACEEXIT
| PTRACE_O_TRACEEXEC);
assert_perror(errno);
ptrace(PTRACE_CONT, child, NULL, (void *) 0);
assert_perror(errno);
/* Let's just look on the resulting sequence of events */
for (;;) {
pid_t pid = waitpid(-1, &status, __WALL);
if (pid <= 0) {
printf("waitpid returned %d\n", pid);
return;
}
if (WIFSTOPPED(status)) {
printf("%d: status:%08x WIFSTOPPED sig:%d (%s) event:%s\n",
pid, status,
WSTOPSIG(status), sig_name(WSTOPSIG(status)),
event_name(status)
);
ptrace(PTRACE_CONT, pid, NULL, (void *)0);
assert_perror(errno);
}
else if (WIFEXITED(status))
printf("%d: status:%08x WIFEXITED exitcode:%d\n",
pid, status, WEXITSTATUS(status));
else if (WIFSIGNALED(status))
printf("%d: status:%08x WIFSIGNALED sig:%d (%s)\n",
pid, status, WTERMSIG(status), sig_name(WTERMSIG(status)));
else
printf("%d: status:%08x - ???\n",
pid, status);
}
cleanup();
}
int
main(int argc, char **argv)
{
setbuf(stdout, NULL);
if (strcmp(argv[0], "exe") == 0)
thread_leader(NULL);
#if defined DEFAULT_TESTTIME || defined DEFAULT_LOOPS
int i;
char *env_testtime = getenv("TESTTIME"); /* misnomer */
int testtime = (env_testtime ? atoi(env_testtime) : 1);
#endif
atexit(cleanup);
signal(SIGINT, handler_fail);
signal(SIGABRT, handler_fail);
signal(SIGALRM, handler_fail);
verbose = (argc - 1);
#if defined DEFAULT_TESTTIME
testtime *= DEFAULT_TESTTIME;
for(i = 0; i < testtime; i++) {
time_t t = time(NULL);
while (t == time(NULL))
reproduce();
}
VERBOSE("\n");
#elif defined DEFAULT_LOOPS
testtime *= DEFAULT_LOOPS;
for(i = 0; i < testtime; i++)
reproduce();
VERBOSE("\n");
#else
reproduce();
#endif
return 0;
}