Re: security problem with seccomp-filter
From: Felix von Leitner
Date: Sun Apr 12 2015 - 17:33:22 EST
> What you're describing should work correctly (it's part of the
> regression test suite we use). So, given that, I'd love to get to the
> bottom of what you're seeing. Do you have a URL to your code? What
> architecture are you running on?
Well, I must be doing something wrong then.
I extracted a test case from my program.
I put it on http://ptrace.fefe.de/seccompfail.c
It installs three seccomp filters, the last one containing this:
DISALLOW_SYSCALL(prctl),
with
#define DISALLOW_SYSCALL(name) \
BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_##name, 0, 1), \
BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL)
It is my understanding that that should then kill the process if the
prctl syscall is called again.
I test this by attempting to install the very same seccomp filter again,
which calls prctl, but the process is not killed.
What am I doing wrong?
Thanks,
Felix
#include <stddef.h>
#include <features.h>
#include <inttypes.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netinet/ip_icmp.h>
#include <arpa/inet.h>
#include <sys/poll.h>
#include <unistd.h>
#include <time.h>
#include <netdb.h>
#include <alloca.h>
#include <signal.h>
#include <errno.h>
#include <sys/prctl.h>
#include <linux/unistd.h>
#include <linux/audit.h>
#include <linux/filter.h>
#include <linux/seccomp.h>
#ifndef SECCOMP_MODE_FILTER
# define SECCOMP_MODE_FILTER 2 /* uses user-supplied filter. */
# define SECCOMP_RET_KILL 0x00000000U /* kill the task immediately */
# define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */
# define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */
struct seccomp_data {
int nr;
__u32 arch;
__u64 instruction_pointer;
__u64 args[6];
};
#endif
#ifndef SYS_SECCOMP
# define SYS_SECCOMP 1
#endif
#define syscall_nr (offsetof(struct seccomp_data, nr))
#if defined(__i386__)
# define REG_SYSCALL REG_EAX
# define ARCH_NR AUDIT_ARCH_I386
#elif defined(__x86_64__)
# define REG_SYSCALL REG_RAX
# define ARCH_NR AUDIT_ARCH_X86_64
#else
# error "Platform does not support seccomp filter yet"
#endif
#define ALLOW_SYSCALL(name) \
BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_##name, 0, 1), \
BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
static int install_syscall_filter(void) {
/* Linux allows a process to restrict itself (and potential children)
* in what syscalls can be issued. The mechanism is called
* seccomp-filter or "seccomp mode 2". It works by reusing the
* Berkeley Packet Filter, which is meant for PCAP-style packet
* filtering expressions like "only TCP packets, please". But it is
* really a bytecode that has to be passed inside an array, and each
* instruction is constructed using scary looking macros. The basics
* are not so bad, however. We have two registers, one accumulator
* and one index register (which is not used in this part of the
* code), and instead of a network packet we are operating on a
* certain struct with the syscall info, which is called seccomp_data
* (reproduced above). */
struct sock_filter filter[] = {
/* validate architecture to avoid x32-on-x86_64 syscall aliasing shenanigans */
/* BPF_LD = load, BPF_W = word, BPF_ABS = absolute offset */
BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct seccomp_data, arch)),
/* BPF_JMP+BPF_JEQ+BPF_K = compare accumulator to constant (in our
* case, ARCH_NR), and skip the next instruction if equal */
BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, ARCH_NR, 1, 0),
/* "return SECCOMP_RET_KILL", tell seccomp to kill the process */
BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL),
/* load the syscall number */
BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct seccomp_data, nr)),
/* and now a list of allowed syscalls */
ALLOW_SYSCALL(rt_sigreturn),
#ifdef __NR_sigreturn
ALLOW_SYSCALL(sigreturn),
#endif
ALLOW_SYSCALL(exit_group),
ALLOW_SYSCALL(exit),
#ifdef __NR_socketcall
ALLOW_SYSCALL(socketcall),
#else
ALLOW_SYSCALL(socket),
ALLOW_SYSCALL(sendto),
ALLOW_SYSCALL(recvfrom),
#endif
ALLOW_SYSCALL(poll),
/* so we can further restrict allowed syscalls */
ALLOW_SYSCALL(prctl),
/* so gethostbyname can open /etc/resolv.conf */
ALLOW_SYSCALL(open),
ALLOW_SYSCALL(read),
ALLOW_SYSCALL(mmap),
ALLOW_SYSCALL(mmap2),
ALLOW_SYSCALL(munmap),
ALLOW_SYSCALL(lseek),
ALLOW_SYSCALL(_llseek),
ALLOW_SYSCALL(close),
/* for our time keeping */
ALLOW_SYSCALL(gettimeofday), // x86_64 uses a vsyscall for this, so this filter will never trigger
/* for when buffer writes the output; since we only write to stdout, filter for fd==1 */
BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_write, 0, 4),
/* it's write(2). Load first argument into accumulator */
BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct seccomp_data, args[0])),
/* if it's 1 (stdout), skip 1 instruction */
BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 1, 1, 0),
/* "return SECCOMP_RET_KILL", tell seccomp to kill the process */
BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL),
/* "return SECCOMP_RET_ALLOW", tell seccomp to allow the syscall */
BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
/* if none of these syscalls matched, kill the process */
BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL)
};
struct sock_fprog prog = {
.len = (unsigned short)(sizeof(filter)/sizeof(filter[0])),
.filter = filter
};
/* see linux/Documentation/prctl/no_new_privs.txt */
if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
/* if this fails, we are running on an ancient kernel without
* seccomp support; nothing we can do about it, really. */
return -1;
}
/* see linux/Documentation/prctl/seccomp_filter.txt */
if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {
/* if this happens, we are running on a kernel without seccomp
* filters support; nothing we can do about it, really. */
return -1;
}
return 0;
}
#define DISALLOW_SYSCALL(name) \
BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_##name, 0, 1), \
BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL)
static int seccomp_denyfile() {
struct sock_filter filter[] = {
DISALLOW_SYSCALL(open),
DISALLOW_SYSCALL(mmap),
DISALLOW_SYSCALL(mmap2),
DISALLOW_SYSCALL(munmap),
DISALLOW_SYSCALL(lseek),
DISALLOW_SYSCALL(_llseek),
DISALLOW_SYSCALL(close),
BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
};
struct sock_fprog prog = {
.len = (unsigned short)(sizeof(filter)/sizeof(filter[0])),
.filter = filter
};
return prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
}
static int seccomp_denysocket() {
struct sock_filter filter[] = {
#ifndef __NR_socketcall
DISALLOW_SYSCALL(setsockopt),
DISALLOW_SYSCALL(socket),
#endif
DISALLOW_SYSCALL(prctl),
BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
};
struct sock_fprog prog = {
.len = (unsigned short)(sizeof(filter)/sizeof(filter[0])),
.filter = filter
};
return prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
}
int main(int argc,char* argv[]) {
/* If it fails, the kernel does not support seccomp filter.
* We'll just continue */
install_syscall_filter();
seccomp_denyfile();
seccomp_denysocket();
seccomp_denysocket();
return 0;
}