Re: seccomp for 2.6.11-rc1-bk8

From: Ingo Molnar
Date: Fri Jan 21 2005 - 07:51:25 EST



* Ingo Molnar <mingo@xxxxxxx> wrote:

> > This is the seccomp patch ported to 2.6.11-rc1-bk8, that I need for
> > Cpushare (until trusted computing will hit the hardware market).
> > [...]
>
> why do you need any kernel code for this? This seems to be a limited
> ptrace implementation: restricting untrusted userspace code to only be
> able to exec read/write/sigreturn.
>
> So this patch, unless i'm missing something, duplicates in essence what
> ptrace can do [...]

there's one thing ptrace wont do: if the ptrace parent dies unexpectedly
and the child was 'running' (there is a small window where the child
might not be stopped and where this may happen) then the child can get
runaway. While i think this is theoretical (UML doesnt suffer from this
problem), it is simple to fix - find below a proof-of-concept patch that
introduces PTRACE_ATTACH_JAIL - ptraced children can never escape out of
such a jail. (barely tested - but you get the idea.)

Ingo

Signed-off-by: Ingo Molnar <mingo@xxxxxxx>

--- kernel/ptrace.c.orig
+++ kernel/ptrace.c
@@ -49,10 +49,20 @@ void ptrace_untrace(task_t *child)
{
spin_lock(&child->sighand->siglock);
if (child->state == TASK_TRACED) {
- if (child->signal->flags & SIGNAL_STOP_STOPPED) {
+ /*
+ * Child must be killed if parent dies unexpectedly:
+ */
+ if (child->signal->flags & SIGNAL_PTRACE_ONLY) {
child->state = TASK_STOPPED;
- } else {
+ spin_unlock(&child->sighand->siglock);
+ force_sig_specific(SIGKILL, child);
signal_wake_up(child, 1);
+ } else {
+ if (child->signal->flags & SIGNAL_STOP_STOPPED) {
+ child->state = TASK_STOPPED;
+ } else {
+ signal_wake_up(child, 1);
+ }
}
}
spin_unlock(&child->sighand->siglock);
@@ -117,7 +127,7 @@ int ptrace_check_attach(struct task_stru
return ret;
}

-int ptrace_attach(struct task_struct *task)
+static int __ptrace_attach(struct task_struct *task, int jail)
{
int retval;
task_lock(task);
@@ -154,8 +164,12 @@ int ptrace_attach(struct task_struct *ta

write_lock_irq(&tasklist_lock);
__ptrace_link(task, current);
+ if (jail) {
+ spin_lock(&task->sighand->siglock);
+ task->signal->flags |= SIGNAL_PTRACE_ONLY;
+ spin_unlock(&task->sighand->siglock);
+ }
write_unlock_irq(&tasklist_lock);
-
force_sig_specific(SIGSTOP, task);
return 0;

@@ -164,6 +178,16 @@ bad:
return retval;
}

+int ptrace_attach(struct task_struct *task)
+{
+ return __ptrace_attach(task, 0);
+}
+
+int ptrace_attach_jail(struct task_struct *task)
+{
+ return __ptrace_attach(task, 1);
+}
+
int ptrace_detach(struct task_struct *child, unsigned int data)
{
if ((unsigned long) data > _NSIG)
--- arch/i386/kernel/ptrace.c.orig
+++ arch/i386/kernel/ptrace.c
@@ -388,6 +388,10 @@ asmlinkage int sys_ptrace(long request,
ret = ptrace_attach(child);
goto out_tsk;
}
+ if (request == PTRACE_ATTACH_JAIL) {
+ ret = ptrace_attach_jail(child);
+ goto out_tsk;
+ }

ret = ptrace_check_attach(child, request == PTRACE_KILL);
if (ret < 0)
--- include/linux/ptrace.h.orig
+++ include/linux/ptrace.h
@@ -18,6 +18,7 @@

#define PTRACE_ATTACH 0x10
#define PTRACE_DETACH 0x11
+#define PTRACE_ATTACH_JAIL 0x12

#define PTRACE_SYSCALL 24

@@ -79,6 +80,7 @@
extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len);
extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len);
extern int ptrace_attach(struct task_struct *tsk);
+extern int ptrace_attach_jail(struct task_struct *tsk);
extern int ptrace_detach(struct task_struct *, unsigned int);
extern void ptrace_disable(struct task_struct *);
extern int ptrace_check_attach(struct task_struct *task, int kill);
--- include/linux/sched.h.orig
+++ include/linux/sched.h
@@ -338,6 +338,7 @@ struct signal_struct {
#define SIGNAL_STOP_DEQUEUED 0x00000002 /* stop signal dequeued */
#define SIGNAL_STOP_CONTINUED 0x00000004 /* SIGCONT since WCONTINUED reap */
#define SIGNAL_GROUP_EXIT 0x00000008 /* group exit in progress */
+#define SIGNAL_PTRACE_ONLY 0x00000010 /* kill on ptrace parent death */


/*
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/