Re: KASAN: use-after-free Read in posix_lock_inode

From: Jeff Layton
Date: Wed Jan 02 2019 - 16:42:32 EST


On Wed, 2019-01-02 at 19:54 +0100, Dmitry Vyukov wrote:
> On Wed, Jan 2, 2019 at 7:51 PM Dmitry Vyukov <dvyukov@xxxxxxxxxx> wrote:
> > On Wed, Jan 2, 2019 at 7:20 PM Jeff Layton <jlayton@xxxxxxxxxx> wrote:
> > > On Wed, 2019-01-02 at 02:31 -0800, syzbot wrote:
> > > > Hello,
> > > >
> > > > syzbot found the following crash on:
> > > >
> > > > HEAD commit: e1ef035d272e Merge tag 'armsoc-defconfig' of git://git.ker..
> > > > git tree: upstream
> > > > console output: https://syzkaller.appspot.com/x/log.txt?x=16bb4c4b400000
> > > > kernel config: https://syzkaller.appspot.com/x/.config?x=9c6a26e22579190b
> > > > dashboard link: https://syzkaller.appspot.com/bug?extid=239d99847eb49ecb3899
> > > > compiler: gcc (GCC) 9.0.0 20181231 (experimental)
> > > > syz repro: https://syzkaller.appspot.com/x/repro.syz?x=128aa377400000
> > > >
> > > > IMPORTANT: if you fix the bug, please add the following tag to the commit:
> > > > Reported-by: syzbot+239d99847eb49ecb3899@xxxxxxxxxxxxxxxxxxxxxxxxx
> > > >
> > > > IPv6: ADDRCONF(NETDEV_UP): vxcan1: link is not ready
> > > > IPv6: ADDRCONF(NETDEV_UP): vxcan1: link is not ready
> > > > 8021q: adding VLAN 0 to HW filter on device batadv0
> > > > 8021q: adding VLAN 0 to HW filter on device batadv0
> > > > ==================================================================
> > > > BUG: KASAN: use-after-free in what_owner_is_waiting_for fs/locks.c:1000
> > > > [inline]
> > > > BUG: KASAN: use-after-free in posix_locks_deadlock fs/locks.c:1023 [inline]
> > > > BUG: KASAN: use-after-free in posix_lock_inode+0x1f9e/0x2750 fs/locks.c:1163
> > > > Read of size 8 at addr ffff88808791b000 by task syz-executor2/10100
> > > >
> > > > CPU: 1 PID: 10100 Comm: syz-executor2 Not tainted 4.20.0+ #3
> > > > Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
> > > > Google 01/01/2011
> > > > Call Trace:
> > > > __dump_stack lib/dump_stack.c:77 [inline]
> > > > dump_stack+0x1db/0x2d0 lib/dump_stack.c:113
> > > > print_address_description.cold+0x7c/0x20d mm/kasan/report.c:187
> > > > kasan_report.cold+0x1b/0x40 mm/kasan/report.c:317
> > > > __asan_report_load8_noabort+0x14/0x20 mm/kasan/generic_report.c:135
> > > > what_owner_is_waiting_for fs/locks.c:1000 [inline]
> > > > posix_locks_deadlock fs/locks.c:1023 [inline]
> > > > posix_lock_inode+0x1f9e/0x2750 fs/locks.c:1163
> > > > posix_lock_file fs/locks.c:1346 [inline]
> > > > vfs_lock_file fs/locks.c:2314 [inline]
> > > > vfs_lock_file+0xc7/0xf0 fs/locks.c:2309
> > > > do_lock_file_wait.part.0+0xe5/0x260 fs/locks.c:2328
> > > > do_lock_file_wait fs/locks.c:2324 [inline]
> > > > fcntl_setlk+0x2f1/0xfe0 fs/locks.c:2413
> > > > do_fcntl+0x843/0x12b0 fs/fcntl.c:370
> > > > __do_sys_fcntl fs/fcntl.c:463 [inline]
> > > > __se_sys_fcntl fs/fcntl.c:448 [inline]
> > > > __x64_sys_fcntl+0x16d/0x1e0 fs/fcntl.c:448
> > > > do_syscall_64+0x1a3/0x800 arch/x86/entry/common.c:290
> > > > entry_SYSCALL_64_after_hwframe+0x49/0xbe
> > > > RIP: 0033:0x457ec9
> > > > Code: 6d b7 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7
> > > > 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff
> > > > ff 0f 83 3b b7 fb ff c3 66 2e 0f 1f 84 00 00 00 00
> > > > RSP: 002b:00007f58bbb50c78 EFLAGS: 00000246 ORIG_RAX: 0000000000000048
> > > > RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000000457ec9
> > > > RDX: 0000000020000140 RSI: 0000000000000007 RDI: 0000000000000003
> > > > RBP: 000000000073bf00 R08: 0000000000000000 R09: 0000000000000000
> > > > R10: 0000000000000000 R11: 0000000000000246 R12: 00007f58bbb516d4
> > > > R13: 00000000004be5f0 R14: 00000000004ceab0 R15: 00000000ffffffff
> > > >
> > > > Allocated by task 10100:
> > > > save_stack+0x45/0xd0 mm/kasan/common.c:73
> > > > set_track mm/kasan/common.c:85 [inline]
> > > > kasan_kmalloc mm/kasan/common.c:482 [inline]
> > > > kasan_kmalloc+0xcf/0xe0 mm/kasan/common.c:455
> > > > kasan_slab_alloc+0xf/0x20 mm/kasan/common.c:397
> > > > kmem_cache_alloc+0x12d/0x710 mm/slab.c:3541
> > > > kmem_cache_zalloc include/linux/slab.h:730 [inline]
> > > > locks_alloc_lock+0x8e/0x2f0 fs/locks.c:344
> > > > fcntl_setlk+0xa9/0xfe0 fs/locks.c:2362
> > > > do_fcntl+0x843/0x12b0 fs/fcntl.c:370
> > > > __do_sys_fcntl fs/fcntl.c:463 [inline]
> > > > __se_sys_fcntl fs/fcntl.c:448 [inline]
> > > > __x64_sys_fcntl+0x16d/0x1e0 fs/fcntl.c:448
> > > > do_syscall_64+0x1a3/0x800 arch/x86/entry/common.c:290
> > > > entry_SYSCALL_64_after_hwframe+0x49/0xbe
> > > >
> > > > Freed by task 10100:
> > > > save_stack+0x45/0xd0 mm/kasan/common.c:73
> > > > set_track mm/kasan/common.c:85 [inline]
> > > > __kasan_slab_free+0x102/0x150 mm/kasan/common.c:444
> > > > kasan_slab_free+0xe/0x10 mm/kasan/common.c:452
> > > > __cache_free mm/slab.c:3485 [inline]
> > > > kmem_cache_free+0x86/0x260 mm/slab.c:3747
> > > > locks_free_lock+0x27a/0x3f0 fs/locks.c:381
> > > > fcntl_setlk+0x7b5/0xfe0 fs/locks.c:2439
> > > > do_fcntl+0x843/0x12b0 fs/fcntl.c:370
> > > > __do_sys_fcntl fs/fcntl.c:463 [inline]
> > > > __se_sys_fcntl fs/fcntl.c:448 [inline]
> > > > __x64_sys_fcntl+0x16d/0x1e0 fs/fcntl.c:448
> > > > do_syscall_64+0x1a3/0x800 arch/x86/entry/common.c:290
> > > > entry_SYSCALL_64_after_hwframe+0x49/0xbe
> > > >
> > > > The buggy address belongs to the object at ffff88808791b000
> > > > which belongs to the cache file_lock_cache of size 264
> > > > The buggy address is located 0 bytes inside of
> > > > 264-byte region [ffff88808791b000, ffff88808791b108)
> > > > The buggy address belongs to the page:
> > > > page:ffffea00021e46c0 count:1 mapcount:0 mapping:ffff8880aa16a1c0 index:0x0
> > > > flags: 0x1fffc0000000200(slab)
> > > > raw: 01fffc0000000200 ffffea0002333508 ffffea00021d76c8 ffff8880aa16a1c0
> > > > raw: 0000000000000000 ffff88808791b000 000000010000000c 0000000000000000
> > > > page dumped because: kasan: bad access detected
> > > >
> > > > Memory state around the buggy address:
> > > > ffff88808791af00: fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc fc
> > > > ffff88808791af80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
> > > > > ffff88808791b000: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
> > > > ^
> > > > ffff88808791b080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
> > > > ffff88808791b100: fb fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb
> > > > ==================================================================
> > > >
> > > >
> > >
> > > I've given this a harder look and I really don't quite grok what
> > > this output is telling me:
> > >
> > > All 3 stack traces say they come from PID 10100, but the use-after-free
> > > seems to occur well before the free could have occurred in the context
> > > of the current fcntl call.
> >
> > Interestingly it is the case in all crashes for this bug. It may be
> > something inherent, or maybe just the particular program that
> > triggered this is such that these accesses happen in the same thread.
> >
> > > So, I guess that leaves the possibility that we freed a lock request
> > > from an earlier fcntl call without removing it properly from the tree,
> > > but (a) I don't see how that could happen, _and_ (b) why didn't that
> > > trip the BUG_ONs in locks_free_lock?
> > >
> > > I'll keep looking at this, but I'm a bit stumped at the moment.
> >
> > The simplest repro for this is:
> >
> > # See https://goo.gl/kgGztJ for information about syzkaller reproducers.
> > #{"threaded":true,"collide":true,"repeat":true,"procs":6,"sandbox":"none","fault_call":-1,"tun":true,"tmpdir":true,"cgroups":true,"netdev":true,"resetnet":true,"segv":true}
> > r0 = epoll_create1(0x0)
> > fcntl$lock(r0, 0x7, &(0x7f0000000080))
> > fcntl$lock(r0, 0x7, &(0x7f0000000140)={0x1000000000001, 0x0, 0x1000000})
> >
> > "collide":true means that the 2 fcntl's were executed in parallel. But
> > still the alloc/free/access always happened in the same thread, so the
> > thread interaction seems to be somewhat unusual.
>
> Looking at frequency of this crash, repro properties and some other
> signals my money are on a race/atomicity violation with a narrow
> inconsistency window. E.g. unlock something and then expect things
> have not changed after re-lock, or remove from list and reset a
> pointer few instructions later.


Thanks Dmitry,

The good news is that it's quite reproducible. I used syz-prog2c on the
reproducer and got the attached program (build with -lpthread).

I didn't have KASAN enabled on my throwaway VM, but I got a GPF and
stack trace much like the one above when I ran this on a kernel with the
thundering herd set. A kernel based on a commit from earlier in the
merge window didn't show the problem.

I tested commenting out some lockless shortcuts, but they didn't fix it.
I'll keep playing with it.

Cheers,
--
Jeff Layton <jlayton@xxxxxxxxxx>
// autogenerated by syzkaller (https://github.com/google/syzkaller)

#define _GNU_SOURCE

#include <arpa/inet.h>
#include <dirent.h>
#include <endian.h>
#include <errno.h>
#include <fcntl.h>
#include <net/if.h>
#include <net/if_arp.h>
#include <netinet/in.h>
#include <pthread.h>
#include <sched.h>
#include <setjmp.h>
#include <signal.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/mount.h>
#include <sys/prctl.h>
#include <sys/resource.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/uio.h>
#include <sys/wait.h>
#include <time.h>
#include <unistd.h>

#include <linux/capability.h>
#include <linux/futex.h>
#include <linux/if_addr.h>
#include <linux/if_ether.h>
#include <linux/if_link.h>
#include <linux/if_tun.h>
#include <linux/in6.h>
#include <linux/ip.h>
#include <linux/neighbour.h>
#include <linux/net.h>
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
#include <linux/tcp.h>
#include <linux/veth.h>

unsigned long long procid;

static __thread int skip_segv;
static __thread jmp_buf segv_env;

static void segv_handler(int sig, siginfo_t* info, void* ctx)
{
uintptr_t addr = (uintptr_t)info->si_addr;
const uintptr_t prog_start = 1 << 20;
const uintptr_t prog_end = 100 << 20;
if (__atomic_load_n(&skip_segv, __ATOMIC_RELAXED) &&
(addr < prog_start || addr > prog_end)) {
_longjmp(segv_env, 1);
}
exit(sig);
}

static void install_segv_handler(void)
{
struct sigaction sa;
memset(&sa, 0, sizeof(sa));
sa.sa_handler = SIG_IGN;
syscall(SYS_rt_sigaction, 0x20, &sa, NULL, 8);
syscall(SYS_rt_sigaction, 0x21, &sa, NULL, 8);
memset(&sa, 0, sizeof(sa));
sa.sa_sigaction = segv_handler;
sa.sa_flags = SA_NODEFER | SA_SIGINFO;
sigaction(SIGSEGV, &sa, NULL);
sigaction(SIGBUS, &sa, NULL);
}

#define NONFAILING(...) \
{ \
__atomic_fetch_add(&skip_segv, 1, __ATOMIC_SEQ_CST); \
if (_setjmp(segv_env) == 0) { \
__VA_ARGS__; \
} \
__atomic_fetch_sub(&skip_segv, 1, __ATOMIC_SEQ_CST); \
}

static void sleep_ms(uint64_t ms)
{
usleep(ms * 1000);
}

static uint64_t current_time_ms(void)
{
struct timespec ts;
if (clock_gettime(CLOCK_MONOTONIC, &ts))
exit(1);
return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000;
}

static void use_temporary_dir(void)
{
char tmpdir_template[] = "./syzkaller.XXXXXX";
char* tmpdir = mkdtemp(tmpdir_template);
if (!tmpdir)
exit(1);
if (chmod(tmpdir, 0777))
exit(1);
if (chdir(tmpdir))
exit(1);
}

static void thread_start(void* (*fn)(void*), void* arg)
{
pthread_t th;
pthread_attr_t attr;
pthread_attr_init(&attr);
pthread_attr_setstacksize(&attr, 128 << 10);
if (pthread_create(&th, &attr, fn, arg))
exit(1);
pthread_attr_destroy(&attr);
}

typedef struct {
int state;
} event_t;

static void event_init(event_t* ev)
{
ev->state = 0;
}

static void event_reset(event_t* ev)
{
ev->state = 0;
}

static void event_set(event_t* ev)
{
if (ev->state)
exit(1);
__atomic_store_n(&ev->state, 1, __ATOMIC_RELEASE);
syscall(SYS_futex, &ev->state, FUTEX_WAKE | FUTEX_PRIVATE_FLAG);
}

static void event_wait(event_t* ev)
{
while (!__atomic_load_n(&ev->state, __ATOMIC_ACQUIRE))
syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, 0);
}

static int event_isset(event_t* ev)
{
return __atomic_load_n(&ev->state, __ATOMIC_ACQUIRE);
}

static int event_timedwait(event_t* ev, uint64_t timeout)
{
uint64_t start = current_time_ms();
uint64_t now = start;
for (;;) {
uint64_t remain = timeout - (now - start);
struct timespec ts;
ts.tv_sec = remain / 1000;
ts.tv_nsec = (remain % 1000) * 1000 * 1000;
syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, &ts);
if (__atomic_load_n(&ev->state, __ATOMIC_RELAXED))
return 1;
now = current_time_ms();
if (now - start > timeout)
return 0;
}
}

static bool write_file(const char* file, const char* what, ...)
{
char buf[1024];
va_list args;
va_start(args, what);
vsnprintf(buf, sizeof(buf), what, args);
va_end(args);
buf[sizeof(buf) - 1] = 0;
int len = strlen(buf);
int fd = open(file, O_WRONLY | O_CLOEXEC);
if (fd == -1)
return false;
if (write(fd, buf, len) != len) {
int err = errno;
close(fd);
errno = err;
return false;
}
close(fd);
return true;
}

static struct {
char* pos;
int nesting;
struct nlattr* nested[8];
char buf[1024];
} nlmsg;

static void netlink_init(int typ, int flags, const void* data, int size)
{
memset(&nlmsg, 0, sizeof(nlmsg));
struct nlmsghdr* hdr = (struct nlmsghdr*)nlmsg.buf;
hdr->nlmsg_type = typ;
hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
memcpy(hdr + 1, data, size);
nlmsg.pos = (char*)(hdr + 1) + NLMSG_ALIGN(size);
}

static void netlink_attr(int typ, const void* data, int size)
{
struct nlattr* attr = (struct nlattr*)nlmsg.pos;
attr->nla_len = sizeof(*attr) + size;
attr->nla_type = typ;
memcpy(attr + 1, data, size);
nlmsg.pos += NLMSG_ALIGN(attr->nla_len);
}

static void netlink_nest(int typ)
{
struct nlattr* attr = (struct nlattr*)nlmsg.pos;
attr->nla_type = typ;
nlmsg.pos += sizeof(*attr);
nlmsg.nested[nlmsg.nesting++] = attr;
}

static void netlink_done(void)
{
struct nlattr* attr = nlmsg.nested[--nlmsg.nesting];
attr->nla_len = nlmsg.pos - (char*)attr;
}

static int netlink_send(int sock)
{
if (nlmsg.pos > nlmsg.buf + sizeof(nlmsg.buf) || nlmsg.nesting)
exit(1);
struct nlmsghdr* hdr = (struct nlmsghdr*)nlmsg.buf;
hdr->nlmsg_len = nlmsg.pos - nlmsg.buf;
struct sockaddr_nl addr;
memset(&addr, 0, sizeof(addr));
addr.nl_family = AF_NETLINK;
unsigned n = sendto(sock, nlmsg.buf, hdr->nlmsg_len, 0,
(struct sockaddr*)&addr, sizeof(addr));
if (n != hdr->nlmsg_len)
exit(1);
n = recv(sock, nlmsg.buf, sizeof(nlmsg.buf), 0);
if (n < sizeof(struct nlmsghdr) + sizeof(struct nlmsgerr))
exit(1);
if (hdr->nlmsg_type != NLMSG_ERROR)
exit(1);
return -((struct nlmsgerr*)(hdr + 1))->error;
}

static void netlink_add_device_impl(const char* type, const char* name)
{
struct ifinfomsg hdr;
memset(&hdr, 0, sizeof(hdr));
netlink_init(RTM_NEWLINK, NLM_F_EXCL | NLM_F_CREATE, &hdr, sizeof(hdr));
if (name)
netlink_attr(IFLA_IFNAME, name, strlen(name));
netlink_nest(IFLA_LINKINFO);
netlink_attr(IFLA_INFO_KIND, type, strlen(type));
}

static void netlink_add_device(int sock, const char* type, const char* name)
{
netlink_add_device_impl(type, name);
netlink_done();
int err = netlink_send(sock);
(void)err;
}

static void netlink_add_veth(int sock, const char* name, const char* peer)
{
netlink_add_device_impl("veth", name);
netlink_nest(IFLA_INFO_DATA);
netlink_nest(VETH_INFO_PEER);
nlmsg.pos += sizeof(struct ifinfomsg);
netlink_attr(IFLA_IFNAME, peer, strlen(peer));
netlink_done();
netlink_done();
netlink_done();
int err = netlink_send(sock);
(void)err;
}

static void netlink_add_hsr(int sock, const char* name, const char* slave1,
const char* slave2)
{
netlink_add_device_impl("hsr", name);
netlink_nest(IFLA_INFO_DATA);
int ifindex1 = if_nametoindex(slave1);
netlink_attr(IFLA_HSR_SLAVE1, &ifindex1, sizeof(ifindex1));
int ifindex2 = if_nametoindex(slave2);
netlink_attr(IFLA_HSR_SLAVE2, &ifindex2, sizeof(ifindex2));
netlink_done();
netlink_done();
int err = netlink_send(sock);
(void)err;
}

static void netlink_device_change(int sock, const char* name, bool up,
const char* master, const void* mac,
int macsize)
{
struct ifinfomsg hdr;
memset(&hdr, 0, sizeof(hdr));
if (up)
hdr.ifi_flags = hdr.ifi_change = IFF_UP;
netlink_init(RTM_NEWLINK, 0, &hdr, sizeof(hdr));
netlink_attr(IFLA_IFNAME, name, strlen(name));
if (master) {
int ifindex = if_nametoindex(master);
netlink_attr(IFLA_MASTER, &ifindex, sizeof(ifindex));
}
if (macsize)
netlink_attr(IFLA_ADDRESS, mac, macsize);
int err = netlink_send(sock);
(void)err;
}

static int netlink_add_addr(int sock, const char* dev, const void* addr,
int addrsize)
{
struct ifaddrmsg hdr;
memset(&hdr, 0, sizeof(hdr));
hdr.ifa_family = addrsize == 4 ? AF_INET : AF_INET6;
hdr.ifa_prefixlen = addrsize == 4 ? 24 : 120;
hdr.ifa_scope = RT_SCOPE_UNIVERSE;
hdr.ifa_index = if_nametoindex(dev);
netlink_init(RTM_NEWADDR, NLM_F_CREATE | NLM_F_REPLACE, &hdr, sizeof(hdr));
netlink_attr(IFA_LOCAL, addr, addrsize);
netlink_attr(IFA_ADDRESS, addr, addrsize);
return netlink_send(sock);
}

static void netlink_add_addr4(int sock, const char* dev, const char* addr)
{
struct in_addr in_addr;
inet_pton(AF_INET, addr, &in_addr);
int err = netlink_add_addr(sock, dev, &in_addr, sizeof(in_addr));
(void)err;
}

static void netlink_add_addr6(int sock, const char* dev, const char* addr)
{
struct in6_addr in6_addr;
inet_pton(AF_INET6, addr, &in6_addr);
int err = netlink_add_addr(sock, dev, &in6_addr, sizeof(in6_addr));
(void)err;
}

#define DEV_IPV4 "172.20.20.%d"
#define DEV_IPV6 "fe80::%02hx"
#define DEV_MAC 0x00aaaaaaaaaa
static void initialize_netdevices(void)
{
char netdevsim[16];
sprintf(netdevsim, "netdevsim%d", (int)procid);
struct {
const char* type;
const char* dev;
} devtypes[] = {
{"ip6gretap", "ip6gretap0"}, {"bridge", "bridge0"},
{"vcan", "vcan0"}, {"bond", "bond0"},
{"team", "team0"}, {"dummy", "dummy0"},
{"nlmon", "nlmon0"}, {"caif", "caif0"},
{"batadv", "batadv0"}, {"vxcan", "vxcan1"},
{"netdevsim", netdevsim}, {"veth", 0},
};
const char* devmasters[] = {"bridge", "bond", "team"};
struct {
const char* name;
int macsize;
bool noipv6;
} devices[] = {
{"lo", ETH_ALEN},
{"sit0", 0},
{"bridge0", ETH_ALEN},
{"vcan0", 0, true},
{"tunl0", 0},
{"gre0", 0},
{"gretap0", ETH_ALEN},
{"ip_vti0", 0},
{"ip6_vti0", 0},
{"ip6tnl0", 0},
{"ip6gre0", 0},
{"ip6gretap0", ETH_ALEN},
{"erspan0", ETH_ALEN},
{"bond0", ETH_ALEN},
{"veth0", ETH_ALEN},
{"veth1", ETH_ALEN},
{"team0", ETH_ALEN},
{"veth0_to_bridge", ETH_ALEN},
{"veth1_to_bridge", ETH_ALEN},
{"veth0_to_bond", ETH_ALEN},
{"veth1_to_bond", ETH_ALEN},
{"veth0_to_team", ETH_ALEN},
{"veth1_to_team", ETH_ALEN},
{"veth0_to_hsr", ETH_ALEN},
{"veth1_to_hsr", ETH_ALEN},
{"hsr0", 0},
{"dummy0", ETH_ALEN},
{"nlmon0", 0},
{"vxcan1", 0, true},
{"caif0", ETH_ALEN},
{"batadv0", ETH_ALEN},
{netdevsim, ETH_ALEN},
};
int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
if (sock == -1)
exit(1);
unsigned i;
for (i = 0; i < sizeof(devtypes) / sizeof(devtypes[0]); i++)
netlink_add_device(sock, devtypes[i].type, devtypes[i].dev);
for (i = 0; i < sizeof(devmasters) / (sizeof(devmasters[0])); i++) {
char master[32], slave0[32], veth0[32], slave1[32], veth1[32];
sprintf(slave0, "%s_slave_0", devmasters[i]);
sprintf(veth0, "veth0_to_%s", devmasters[i]);
netlink_add_veth(sock, slave0, veth0);
sprintf(slave1, "%s_slave_1", devmasters[i]);
sprintf(veth1, "veth1_to_%s", devmasters[i]);
netlink_add_veth(sock, slave1, veth1);
sprintf(master, "%s0", devmasters[i]);
netlink_device_change(sock, slave0, false, master, 0, 0);
netlink_device_change(sock, slave1, false, master, 0, 0);
}
netlink_device_change(sock, "bridge_slave_0", true, 0, 0, 0);
netlink_device_change(sock, "bridge_slave_1", true, 0, 0, 0);
netlink_add_veth(sock, "hsr_slave_0", "veth0_to_hsr");
netlink_add_veth(sock, "hsr_slave_1", "veth1_to_hsr");
netlink_add_hsr(sock, "hsr0", "hsr_slave_0", "hsr_slave_1");
netlink_device_change(sock, "hsr_slave_0", true, 0, 0, 0);
netlink_device_change(sock, "hsr_slave_1", true, 0, 0, 0);
for (i = 0; i < sizeof(devices) / (sizeof(devices[0])); i++) {
char addr[32];
sprintf(addr, DEV_IPV4, i + 10);
netlink_add_addr4(sock, devices[i].name, addr);
if (!devices[i].noipv6) {
sprintf(addr, DEV_IPV6, i + 10);
netlink_add_addr6(sock, devices[i].name, addr);
}
uint64_t macaddr = DEV_MAC + ((i + 10ull) << 40);
netlink_device_change(sock, devices[i].name, true, 0, &macaddr,
devices[i].macsize);
}
close(sock);
}
static void initialize_netdevices_init(void)
{
int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
if (sock == -1)
exit(1);
struct {
const char* type;
int macsize;
bool noipv6;
bool noup;
} devtypes[] = {
{"nr", 7, true},
{"rose", 5, true, true},
};
unsigned i;
for (i = 0; i < sizeof(devtypes) / sizeof(devtypes[0]); i++) {
char dev[32], addr[32];
sprintf(dev, "%s%d", devtypes[i].type, (int)procid);
sprintf(addr, "172.30.%d.%d", i, (int)procid + 1);
netlink_add_addr4(sock, dev, addr);
if (!devtypes[i].noipv6) {
sprintf(addr, "fe88::%02hx:%02hx", i, (int)procid + 1);
netlink_add_addr6(sock, dev, addr);
}
int macsize = devtypes[i].macsize;
uint64_t macaddr = 0xbbbbbb +
((unsigned long long)i << (8 * (macsize - 2))) +
(procid << (8 * (macsize - 1)));
netlink_device_change(sock, dev, !devtypes[i].noup, 0, &macaddr, macsize);
}
close(sock);
}

#define XT_TABLE_SIZE 1536
#define XT_MAX_ENTRIES 10

struct xt_counters {
uint64_t pcnt, bcnt;
};

struct ipt_getinfo {
char name[32];
unsigned int valid_hooks;
unsigned int hook_entry[5];
unsigned int underflow[5];
unsigned int num_entries;
unsigned int size;
};

struct ipt_get_entries {
char name[32];
unsigned int size;
void* entrytable[XT_TABLE_SIZE / sizeof(void*)];
};

struct ipt_replace {
char name[32];
unsigned int valid_hooks;
unsigned int num_entries;
unsigned int size;
unsigned int hook_entry[5];
unsigned int underflow[5];
unsigned int num_counters;
struct xt_counters* counters;
char entrytable[XT_TABLE_SIZE];
};

struct ipt_table_desc {
const char* name;
struct ipt_getinfo info;
struct ipt_replace replace;
};

static struct ipt_table_desc ipv4_tables[] = {
{.name = "filter"}, {.name = "nat"}, {.name = "mangle"},
{.name = "raw"}, {.name = "security"},
};

static struct ipt_table_desc ipv6_tables[] = {
{.name = "filter"}, {.name = "nat"}, {.name = "mangle"},
{.name = "raw"}, {.name = "security"},
};

#define IPT_BASE_CTL 64
#define IPT_SO_SET_REPLACE (IPT_BASE_CTL)
#define IPT_SO_GET_INFO (IPT_BASE_CTL)
#define IPT_SO_GET_ENTRIES (IPT_BASE_CTL + 1)

struct arpt_getinfo {
char name[32];
unsigned int valid_hooks;
unsigned int hook_entry[3];
unsigned int underflow[3];
unsigned int num_entries;
unsigned int size;
};

struct arpt_get_entries {
char name[32];
unsigned int size;
void* entrytable[XT_TABLE_SIZE / sizeof(void*)];
};

struct arpt_replace {
char name[32];
unsigned int valid_hooks;
unsigned int num_entries;
unsigned int size;
unsigned int hook_entry[3];
unsigned int underflow[3];
unsigned int num_counters;
struct xt_counters* counters;
char entrytable[XT_TABLE_SIZE];
};

struct arpt_table_desc {
const char* name;
struct arpt_getinfo info;
struct arpt_replace replace;
};

static struct arpt_table_desc arpt_tables[] = {
{.name = "filter"},
};

#define ARPT_BASE_CTL 96
#define ARPT_SO_SET_REPLACE (ARPT_BASE_CTL)
#define ARPT_SO_GET_INFO (ARPT_BASE_CTL)
#define ARPT_SO_GET_ENTRIES (ARPT_BASE_CTL + 1)

static void checkpoint_iptables(struct ipt_table_desc* tables, int num_tables,
int family, int level)
{
struct ipt_get_entries entries;
socklen_t optlen;
int fd, i;
fd = socket(family, SOCK_STREAM, IPPROTO_TCP);
if (fd == -1) {
switch (errno) {
case EAFNOSUPPORT:
case ENOPROTOOPT:
return;
}
exit(1);
}
for (i = 0; i < num_tables; i++) {
struct ipt_table_desc* table = &tables[i];
strcpy(table->info.name, table->name);
strcpy(table->replace.name, table->name);
optlen = sizeof(table->info);
if (getsockopt(fd, level, IPT_SO_GET_INFO, &table->info, &optlen)) {
switch (errno) {
case EPERM:
case ENOENT:
case ENOPROTOOPT:
continue;
}
exit(1);
}
if (table->info.size > sizeof(table->replace.entrytable))
exit(1);
if (table->info.num_entries > XT_MAX_ENTRIES)
exit(1);
memset(&entries, 0, sizeof(entries));
strcpy(entries.name, table->name);
entries.size = table->info.size;
optlen = sizeof(entries) - sizeof(entries.entrytable) + table->info.size;
if (getsockopt(fd, level, IPT_SO_GET_ENTRIES, &entries, &optlen))
exit(1);
table->replace.valid_hooks = table->info.valid_hooks;
table->replace.num_entries = table->info.num_entries;
table->replace.size = table->info.size;
memcpy(table->replace.hook_entry, table->info.hook_entry,
sizeof(table->replace.hook_entry));
memcpy(table->replace.underflow, table->info.underflow,
sizeof(table->replace.underflow));
memcpy(table->replace.entrytable, entries.entrytable, table->info.size);
}
close(fd);
}

static void reset_iptables(struct ipt_table_desc* tables, int num_tables,
int family, int level)
{
struct xt_counters counters[XT_MAX_ENTRIES];
struct ipt_get_entries entries;
struct ipt_getinfo info;
socklen_t optlen;
int fd, i;
fd = socket(family, SOCK_STREAM, IPPROTO_TCP);
if (fd == -1) {
switch (errno) {
case EAFNOSUPPORT:
case ENOPROTOOPT:
return;
}
exit(1);
}
for (i = 0; i < num_tables; i++) {
struct ipt_table_desc* table = &tables[i];
if (table->info.valid_hooks == 0)
continue;
memset(&info, 0, sizeof(info));
strcpy(info.name, table->name);
optlen = sizeof(info);
if (getsockopt(fd, level, IPT_SO_GET_INFO, &info, &optlen))
exit(1);
if (memcmp(&table->info, &info, sizeof(table->info)) == 0) {
memset(&entries, 0, sizeof(entries));
strcpy(entries.name, table->name);
entries.size = table->info.size;
optlen = sizeof(entries) - sizeof(entries.entrytable) + entries.size;
if (getsockopt(fd, level, IPT_SO_GET_ENTRIES, &entries, &optlen))
exit(1);
if (memcmp(table->replace.entrytable, entries.entrytable,
table->info.size) == 0)
continue;
}
table->replace.num_counters = info.num_entries;
table->replace.counters = counters;
optlen = sizeof(table->replace) - sizeof(table->replace.entrytable) +
table->replace.size;
if (setsockopt(fd, level, IPT_SO_SET_REPLACE, &table->replace, optlen))
exit(1);
}
close(fd);
}

static void checkpoint_arptables(void)
{
struct arpt_get_entries entries;
socklen_t optlen;
unsigned i;
int fd;
fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
if (fd == -1) {
switch (errno) {
case EAFNOSUPPORT:
case ENOPROTOOPT:
return;
}
exit(1);
}
for (i = 0; i < sizeof(arpt_tables) / sizeof(arpt_tables[0]); i++) {
struct arpt_table_desc* table = &arpt_tables[i];
strcpy(table->info.name, table->name);
strcpy(table->replace.name, table->name);
optlen = sizeof(table->info);
if (getsockopt(fd, SOL_IP, ARPT_SO_GET_INFO, &table->info, &optlen)) {
switch (errno) {
case EPERM:
case ENOENT:
case ENOPROTOOPT:
continue;
}
exit(1);
}
if (table->info.size > sizeof(table->replace.entrytable))
exit(1);
if (table->info.num_entries > XT_MAX_ENTRIES)
exit(1);
memset(&entries, 0, sizeof(entries));
strcpy(entries.name, table->name);
entries.size = table->info.size;
optlen = sizeof(entries) - sizeof(entries.entrytable) + table->info.size;
if (getsockopt(fd, SOL_IP, ARPT_SO_GET_ENTRIES, &entries, &optlen))
exit(1);
table->replace.valid_hooks = table->info.valid_hooks;
table->replace.num_entries = table->info.num_entries;
table->replace.size = table->info.size;
memcpy(table->replace.hook_entry, table->info.hook_entry,
sizeof(table->replace.hook_entry));
memcpy(table->replace.underflow, table->info.underflow,
sizeof(table->replace.underflow));
memcpy(table->replace.entrytable, entries.entrytable, table->info.size);
}
close(fd);
}

static void reset_arptables()
{
struct xt_counters counters[XT_MAX_ENTRIES];
struct arpt_get_entries entries;
struct arpt_getinfo info;
socklen_t optlen;
unsigned i;
int fd;
fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
if (fd == -1) {
switch (errno) {
case EAFNOSUPPORT:
case ENOPROTOOPT:
return;
}
exit(1);
}
for (i = 0; i < sizeof(arpt_tables) / sizeof(arpt_tables[0]); i++) {
struct arpt_table_desc* table = &arpt_tables[i];
if (table->info.valid_hooks == 0)
continue;
memset(&info, 0, sizeof(info));
strcpy(info.name, table->name);
optlen = sizeof(info);
if (getsockopt(fd, SOL_IP, ARPT_SO_GET_INFO, &info, &optlen))
exit(1);
if (memcmp(&table->info, &info, sizeof(table->info)) == 0) {
memset(&entries, 0, sizeof(entries));
strcpy(entries.name, table->name);
entries.size = table->info.size;
optlen = sizeof(entries) - sizeof(entries.entrytable) + entries.size;
if (getsockopt(fd, SOL_IP, ARPT_SO_GET_ENTRIES, &entries, &optlen))
exit(1);
if (memcmp(table->replace.entrytable, entries.entrytable,
table->info.size) == 0)
continue;
} else {
}
table->replace.num_counters = info.num_entries;
table->replace.counters = counters;
optlen = sizeof(table->replace) - sizeof(table->replace.entrytable) +
table->replace.size;
if (setsockopt(fd, SOL_IP, ARPT_SO_SET_REPLACE, &table->replace, optlen))
exit(1);
}
close(fd);
}

#define NF_BR_NUMHOOKS 6
#define EBT_TABLE_MAXNAMELEN 32
#define EBT_CHAIN_MAXNAMELEN 32
#define EBT_BASE_CTL 128
#define EBT_SO_SET_ENTRIES (EBT_BASE_CTL)
#define EBT_SO_GET_INFO (EBT_BASE_CTL)
#define EBT_SO_GET_ENTRIES (EBT_SO_GET_INFO + 1)
#define EBT_SO_GET_INIT_INFO (EBT_SO_GET_ENTRIES + 1)
#define EBT_SO_GET_INIT_ENTRIES (EBT_SO_GET_INIT_INFO + 1)

struct ebt_replace {
char name[EBT_TABLE_MAXNAMELEN];
unsigned int valid_hooks;
unsigned int nentries;
unsigned int entries_size;
struct ebt_entries* hook_entry[NF_BR_NUMHOOKS];
unsigned int num_counters;
struct ebt_counter* counters;
char* entries;
};

struct ebt_entries {
unsigned int distinguisher;
char name[EBT_CHAIN_MAXNAMELEN];
unsigned int counter_offset;
int policy;
unsigned int nentries;
char data[0] __attribute__((aligned(__alignof__(struct ebt_replace))));
};

struct ebt_table_desc {
const char* name;
struct ebt_replace replace;
char entrytable[XT_TABLE_SIZE];
};

static struct ebt_table_desc ebt_tables[] = {
{.name = "filter"},
{.name = "nat"},
{.name = "broute"},
};

static void checkpoint_ebtables(void)
{
socklen_t optlen;
unsigned i;
int fd;
fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
if (fd == -1) {
switch (errno) {
case EAFNOSUPPORT:
case ENOPROTOOPT:
return;
}
exit(1);
}
for (i = 0; i < sizeof(ebt_tables) / sizeof(ebt_tables[0]); i++) {
struct ebt_table_desc* table = &ebt_tables[i];
strcpy(table->replace.name, table->name);
optlen = sizeof(table->replace);
if (getsockopt(fd, SOL_IP, EBT_SO_GET_INIT_INFO, &table->replace,
&optlen)) {
switch (errno) {
case EPERM:
case ENOENT:
case ENOPROTOOPT:
continue;
}
exit(1);
}
if (table->replace.entries_size > sizeof(table->entrytable))
exit(1);
table->replace.num_counters = 0;
table->replace.entries = table->entrytable;
optlen = sizeof(table->replace) + table->replace.entries_size;
if (getsockopt(fd, SOL_IP, EBT_SO_GET_INIT_ENTRIES, &table->replace,
&optlen))
exit(1);
}
close(fd);
}

static void reset_ebtables()
{
struct ebt_replace replace;
char entrytable[XT_TABLE_SIZE];
socklen_t optlen;
unsigned i, j, h;
int fd;
fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
if (fd == -1) {
switch (errno) {
case EAFNOSUPPORT:
case ENOPROTOOPT:
return;
}
exit(1);
}
for (i = 0; i < sizeof(ebt_tables) / sizeof(ebt_tables[0]); i++) {
struct ebt_table_desc* table = &ebt_tables[i];
if (table->replace.valid_hooks == 0)
continue;
memset(&replace, 0, sizeof(replace));
strcpy(replace.name, table->name);
optlen = sizeof(replace);
if (getsockopt(fd, SOL_IP, EBT_SO_GET_INFO, &replace, &optlen))
exit(1);
replace.num_counters = 0;
table->replace.entries = 0;
for (h = 0; h < NF_BR_NUMHOOKS; h++)
table->replace.hook_entry[h] = 0;
if (memcmp(&table->replace, &replace, sizeof(table->replace)) == 0) {
memset(&entrytable, 0, sizeof(entrytable));
replace.entries = entrytable;
optlen = sizeof(replace) + replace.entries_size;
if (getsockopt(fd, SOL_IP, EBT_SO_GET_ENTRIES, &replace, &optlen))
exit(1);
if (memcmp(table->entrytable, entrytable, replace.entries_size) == 0)
continue;
}
for (j = 0, h = 0; h < NF_BR_NUMHOOKS; h++) {
if (table->replace.valid_hooks & (1 << h)) {
table->replace.hook_entry[h] =
(struct ebt_entries*)table->entrytable + j;
j++;
}
}
table->replace.entries = table->entrytable;
optlen = sizeof(table->replace) + table->replace.entries_size;
if (setsockopt(fd, SOL_IP, EBT_SO_SET_ENTRIES, &table->replace, optlen))
exit(1);
}
close(fd);
}

static void checkpoint_net_namespace(void)
{
checkpoint_ebtables();
checkpoint_arptables();
checkpoint_iptables(ipv4_tables, sizeof(ipv4_tables) / sizeof(ipv4_tables[0]),
AF_INET, SOL_IP);
checkpoint_iptables(ipv6_tables, sizeof(ipv6_tables) / sizeof(ipv6_tables[0]),
AF_INET6, SOL_IPV6);
}

static void reset_net_namespace(void)
{
reset_ebtables();
reset_arptables();
reset_iptables(ipv4_tables, sizeof(ipv4_tables) / sizeof(ipv4_tables[0]),
AF_INET, SOL_IP);
reset_iptables(ipv6_tables, sizeof(ipv6_tables) / sizeof(ipv6_tables[0]),
AF_INET6, SOL_IPV6);
}

static void setup_cgroups()
{
if (mkdir("/syzcgroup", 0777)) {
}
if (mkdir("/syzcgroup/unified", 0777)) {
}
if (mount("none", "/syzcgroup/unified", "cgroup2", 0, NULL)) {
}
if (chmod("/syzcgroup/unified", 0777)) {
}
write_file("/syzcgroup/unified/cgroup.subtree_control",
"+cpu +memory +io +pids +rdma");
if (mkdir("/syzcgroup/cpu", 0777)) {
}
if (mount("none", "/syzcgroup/cpu", "cgroup", 0,
"cpuset,cpuacct,perf_event,hugetlb")) {
}
write_file("/syzcgroup/cpu/cgroup.clone_children", "1");
if (chmod("/syzcgroup/cpu", 0777)) {
}
if (mkdir("/syzcgroup/net", 0777)) {
}
if (mount("none", "/syzcgroup/net", "cgroup", 0,
"net_cls,net_prio,devices,freezer")) {
}
if (chmod("/syzcgroup/net", 0777)) {
}
}
static void setup_binfmt_misc()
{
if (mount(0, "/proc/sys/fs/binfmt_misc", "binfmt_misc", 0, 0)) {
}
write_file("/proc/sys/fs/binfmt_misc/register", ":syz0:M:0:\x01::./file0:");
write_file("/proc/sys/fs/binfmt_misc/register",
":syz1:M:1:\x02::./file0:POC");
}

static void setup_common()
{
if (mount(0, "/sys/fs/fuse/connections", "fusectl", 0, 0)) {
}
setup_cgroups();
setup_binfmt_misc();
}

static void loop();

static void sandbox_common()
{
prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
setpgrp();
setsid();
struct rlimit rlim;
rlim.rlim_cur = rlim.rlim_max = 200 << 20;
setrlimit(RLIMIT_AS, &rlim);
rlim.rlim_cur = rlim.rlim_max = 32 << 20;
setrlimit(RLIMIT_MEMLOCK, &rlim);
rlim.rlim_cur = rlim.rlim_max = 136 << 20;
setrlimit(RLIMIT_FSIZE, &rlim);
rlim.rlim_cur = rlim.rlim_max = 1 << 20;
setrlimit(RLIMIT_STACK, &rlim);
rlim.rlim_cur = rlim.rlim_max = 0;
setrlimit(RLIMIT_CORE, &rlim);
rlim.rlim_cur = rlim.rlim_max = 256;
setrlimit(RLIMIT_NOFILE, &rlim);
if (unshare(CLONE_NEWNS)) {
}
if (unshare(CLONE_NEWIPC)) {
}
if (unshare(0x02000000)) {
}
if (unshare(CLONE_NEWUTS)) {
}
if (unshare(CLONE_SYSVSEM)) {
}
typedef struct {
const char* name;
const char* value;
} sysctl_t;
static const sysctl_t sysctls[] = {
{"/proc/sys/kernel/shmmax", "16777216"},
{"/proc/sys/kernel/shmall", "536870912"},
{"/proc/sys/kernel/shmmni", "1024"},
{"/proc/sys/kernel/msgmax", "8192"},
{"/proc/sys/kernel/msgmni", "1024"},
{"/proc/sys/kernel/msgmnb", "1024"},
{"/proc/sys/kernel/sem", "1024 1048576 500 1024"},
};
unsigned i;
for (i = 0; i < sizeof(sysctls) / sizeof(sysctls[0]); i++)
write_file(sysctls[i].name, sysctls[i].value);
}

int wait_for_loop(int pid)
{
if (pid < 0)
exit(1);
int status = 0;
while (waitpid(-1, &status, __WALL) != pid) {
}
return WEXITSTATUS(status);
}

static int real_uid;
static int real_gid;
__attribute__((aligned(64 << 10))) static char sandbox_stack[1 << 20];

static int namespace_sandbox_proc(void* arg)
{
sandbox_common();
write_file("/proc/self/setgroups", "deny");
if (!write_file("/proc/self/uid_map", "0 %d 1\n", real_uid))
exit(1);
if (!write_file("/proc/self/gid_map", "0 %d 1\n", real_gid))
exit(1);
initialize_netdevices_init();
if (unshare(CLONE_NEWNET))
exit(1);
initialize_netdevices();
if (mkdir("./syz-tmp", 0777))
exit(1);
if (mount("", "./syz-tmp", "tmpfs", 0, NULL))
exit(1);
if (mkdir("./syz-tmp/newroot", 0777))
exit(1);
if (mkdir("./syz-tmp/newroot/dev", 0700))
exit(1);
unsigned bind_mount_flags = MS_BIND | MS_REC | MS_PRIVATE;
if (mount("/dev", "./syz-tmp/newroot/dev", NULL, bind_mount_flags, NULL))
exit(1);
if (mkdir("./syz-tmp/newroot/proc", 0700))
exit(1);
if (mount(NULL, "./syz-tmp/newroot/proc", "proc", 0, NULL))
exit(1);
if (mkdir("./syz-tmp/newroot/selinux", 0700))
exit(1);
const char* selinux_path = "./syz-tmp/newroot/selinux";
if (mount("/selinux", selinux_path, NULL, bind_mount_flags, NULL)) {
if (errno != ENOENT)
exit(1);
if (mount("/sys/fs/selinux", selinux_path, NULL, bind_mount_flags, NULL) &&
errno != ENOENT)
exit(1);
}
if (mkdir("./syz-tmp/newroot/sys", 0700))
exit(1);
if (mount("/sys", "./syz-tmp/newroot/sys", 0, bind_mount_flags, NULL))
exit(1);
if (mkdir("./syz-tmp/newroot/syzcgroup", 0700))
exit(1);
if (mkdir("./syz-tmp/newroot/syzcgroup/unified", 0700))
exit(1);
if (mkdir("./syz-tmp/newroot/syzcgroup/cpu", 0700))
exit(1);
if (mkdir("./syz-tmp/newroot/syzcgroup/net", 0700))
exit(1);
if (mount("/syzcgroup/unified", "./syz-tmp/newroot/syzcgroup/unified", NULL,
bind_mount_flags, NULL)) {
}
if (mount("/syzcgroup/cpu", "./syz-tmp/newroot/syzcgroup/cpu", NULL,
bind_mount_flags, NULL)) {
}
if (mount("/syzcgroup/net", "./syz-tmp/newroot/syzcgroup/net", NULL,
bind_mount_flags, NULL)) {
}
if (mkdir("./syz-tmp/pivot", 0777))
exit(1);
if (syscall(SYS_pivot_root, "./syz-tmp", "./syz-tmp/pivot")) {
if (chdir("./syz-tmp"))
exit(1);
} else {
if (chdir("/"))
exit(1);
if (umount2("./pivot", MNT_DETACH))
exit(1);
}
if (chroot("./newroot"))
exit(1);
if (chdir("/"))
exit(1);
struct __user_cap_header_struct cap_hdr = {};
struct __user_cap_data_struct cap_data[2] = {};
cap_hdr.version = _LINUX_CAPABILITY_VERSION_3;
cap_hdr.pid = getpid();
if (syscall(SYS_capget, &cap_hdr, &cap_data))
exit(1);
cap_data[0].effective &= ~(1 << CAP_SYS_PTRACE);
cap_data[0].permitted &= ~(1 << CAP_SYS_PTRACE);
cap_data[0].inheritable &= ~(1 << CAP_SYS_PTRACE);
if (syscall(SYS_capset, &cap_hdr, &cap_data))
exit(1);
loop();
exit(1);
}

#define SYZ_HAVE_SANDBOX_NAMESPACE 1
static int do_sandbox_namespace(void)
{
int pid;
setup_common();
real_uid = getuid();
real_gid = getgid();
mprotect(sandbox_stack, 4096, PROT_NONE);
pid =
clone(namespace_sandbox_proc, &sandbox_stack[sizeof(sandbox_stack) - 64],
CLONE_NEWUSER | CLONE_NEWPID, 0);
return wait_for_loop(pid);
}

#define FS_IOC_SETFLAGS _IOW('f', 2, long)
static void remove_dir(const char* dir)
{
DIR* dp;
struct dirent* ep;
int iter = 0;
retry:
while (umount2(dir, MNT_DETACH) == 0) {
}
dp = opendir(dir);
if (dp == NULL) {
if (errno == EMFILE) {
exit(1);
}
exit(1);
}
while ((ep = readdir(dp))) {
if (strcmp(ep->d_name, ".") == 0 || strcmp(ep->d_name, "..") == 0)
continue;
char filename[FILENAME_MAX];
snprintf(filename, sizeof(filename), "%s/%s", dir, ep->d_name);
while (umount2(filename, MNT_DETACH) == 0) {
}
struct stat st;
if (lstat(filename, &st))
exit(1);
if (S_ISDIR(st.st_mode)) {
remove_dir(filename);
continue;
}
int i;
for (i = 0;; i++) {
if (unlink(filename) == 0)
break;
if (errno == EPERM) {
int fd = open(filename, O_RDONLY);
if (fd != -1) {
long flags = 0;
if (ioctl(fd, FS_IOC_SETFLAGS, &flags) == 0)
close(fd);
continue;
}
}
if (errno == EROFS) {
break;
}
if (errno != EBUSY || i > 100)
exit(1);
if (umount2(filename, MNT_DETACH))
exit(1);
}
}
closedir(dp);
int i;
for (i = 0;; i++) {
if (rmdir(dir) == 0)
break;
if (i < 100) {
if (errno == EPERM) {
int fd = open(dir, O_RDONLY);
if (fd != -1) {
long flags = 0;
if (ioctl(fd, FS_IOC_SETFLAGS, &flags) == 0)
close(fd);
continue;
}
}
if (errno == EROFS) {
break;
}
if (errno == EBUSY) {
if (umount2(dir, MNT_DETACH))
exit(1);
continue;
}
if (errno == ENOTEMPTY) {
if (iter < 100) {
iter++;
goto retry;
}
}
}
exit(1);
}
}

static void kill_and_wait(int pid, int* status)
{
kill(-pid, SIGKILL);
kill(pid, SIGKILL);
int i;
for (i = 0; i < 100; i++) {
if (waitpid(-1, status, WNOHANG | __WALL) == pid)
return;
usleep(1000);
}
DIR* dir = opendir("/sys/fs/fuse/connections");
if (dir) {
for (;;) {
struct dirent* ent = readdir(dir);
if (!ent)
break;
if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0)
continue;
char abort[300];
snprintf(abort, sizeof(abort), "/sys/fs/fuse/connections/%s/abort",
ent->d_name);
int fd = open(abort, O_WRONLY);
if (fd == -1) {
continue;
}
if (write(fd, abort, 1) < 0) {
}
close(fd);
}
closedir(dir);
} else {
}
while (waitpid(-1, status, __WALL) != pid) {
}
}

#define SYZ_HAVE_SETUP_LOOP 1
static void setup_loop()
{
int pid = getpid();
char cgroupdir[64];
char file[128];
snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/unified/syz%llu", procid);
if (mkdir(cgroupdir, 0777)) {
}
snprintf(file, sizeof(file), "%s/pids.max", cgroupdir);
write_file(file, "32");
snprintf(file, sizeof(file), "%s/memory.low", cgroupdir);
write_file(file, "%d", 298 << 20);
snprintf(file, sizeof(file), "%s/memory.high", cgroupdir);
write_file(file, "%d", 299 << 20);
snprintf(file, sizeof(file), "%s/memory.max", cgroupdir);
write_file(file, "%d", 300 << 20);
snprintf(file, sizeof(file), "%s/cgroup.procs", cgroupdir);
write_file(file, "%d", pid);
snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/cpu/syz%llu", procid);
if (mkdir(cgroupdir, 0777)) {
}
snprintf(file, sizeof(file), "%s/cgroup.procs", cgroupdir);
write_file(file, "%d", pid);
snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/net/syz%llu", procid);
if (mkdir(cgroupdir, 0777)) {
}
snprintf(file, sizeof(file), "%s/cgroup.procs", cgroupdir);
write_file(file, "%d", pid);
checkpoint_net_namespace();
}

#define SYZ_HAVE_RESET_LOOP 1
static void reset_loop()
{
reset_net_namespace();
}

#define SYZ_HAVE_SETUP_TEST 1
static void setup_test()
{
prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
setpgrp();
char cgroupdir[64];
snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/unified/syz%llu", procid);
if (symlink(cgroupdir, "./cgroup")) {
}
snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/cpu/syz%llu", procid);
if (symlink(cgroupdir, "./cgroup.cpu")) {
}
snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/net/syz%llu", procid);
if (symlink(cgroupdir, "./cgroup.net")) {
}
write_file("/proc/self/oom_score_adj", "1000");
}

#define SYZ_HAVE_RESET_TEST 1
static void reset_test()
{
int fd;
for (fd = 3; fd < 30; fd++)
close(fd);
}

struct thread_t {
int created, call;
event_t ready, done;
};

static struct thread_t threads[16];
static void execute_call(int call);
static int running;

static void* thr(void* arg)
{
struct thread_t* th = (struct thread_t*)arg;
for (;;) {
event_wait(&th->ready);
event_reset(&th->ready);
execute_call(th->call);
__atomic_fetch_sub(&running, 1, __ATOMIC_RELAXED);
event_set(&th->done);
}
return 0;
}

static void execute_one(void)
{
int i, call, thread;
for (call = 0; call < 6; call++) {
for (thread = 0; thread < (int)(sizeof(threads) / sizeof(threads[0]));
thread++) {
struct thread_t* th = &threads[thread];
if (!th->created) {
th->created = 1;
event_init(&th->ready);
event_init(&th->done);
event_set(&th->done);
thread_start(thr, th);
}
if (!event_isset(&th->done))
continue;
event_reset(&th->done);
th->call = call;
__atomic_fetch_add(&running, 1, __ATOMIC_RELAXED);
event_set(&th->ready);
event_timedwait(&th->done, 45);
break;
}
}
for (i = 0; i < 100 && __atomic_load_n(&running, __ATOMIC_RELAXED); i++)
sleep_ms(1);
}

static void execute_one(void);

#define WAIT_FLAGS __WALL

static void loop(void)
{
setup_loop();
int iter;
for (iter = 0;; iter++) {
char cwdbuf[32];
sprintf(cwdbuf, "./%d", iter);
if (mkdir(cwdbuf, 0777))
exit(1);
reset_loop();
int pid = fork();
if (pid < 0)
exit(1);
if (pid == 0) {
if (chdir(cwdbuf))
exit(1);
setup_test();
execute_one();
reset_test();
exit(0);
}
int status = 0;
uint64_t start = current_time_ms();
for (;;) {
if (waitpid(-1, &status, WNOHANG | WAIT_FLAGS) == pid)
break;
sleep_ms(1);
if (current_time_ms() - start < 5 * 1000)
continue;
kill_and_wait(pid, &status);
break;
}
remove_dir(cwdbuf);
}
}

uint64_t r[2] = {0xffffffffffffffff, 0xffffffffffffffff};

void execute_call(int call)
{
long res;
switch (call) {
case 0:
res = syscall(__NR_epoll_create1, 0);
if (res != -1)
r[0] = res;
break;
case 1:
NONFAILING(*(uint16_t*)0x20000080 = 0);
NONFAILING(*(uint16_t*)0x20000082 = 0);
NONFAILING(*(uint64_t*)0x20000088 = 0);
NONFAILING(*(uint64_t*)0x20000090 = 0);
NONFAILING(*(uint32_t*)0x20000098 = 0);
syscall(__NR_fcntl, r[0], 7, 0x20000080);
break;
case 2:
NONFAILING(*(uint16_t*)0x20000140 = 1);
NONFAILING(*(uint16_t*)0x20000142 = 0);
NONFAILING(*(uint64_t*)0x20000148 = 0x1000000);
NONFAILING(*(uint64_t*)0x20000150 = 0);
NONFAILING(*(uint32_t*)0x20000158 = 0);
syscall(__NR_fcntl, r[0], 7, 0x20000140);
break;
case 3:
syscall(__NR_bind, -1, 0, 0);
break;
case 4:
res = syscall(__NR_socket, 0xa, 1, 0x84);
if (res != -1)
r[1] = res;
break;
case 5:
syscall(__NR_sendto, r[1], 0, 0, 0, 0, 0);
break;
}
}
int main(void)
{
syscall(__NR_mmap, 0x20000000, 0x1000000, 3, 0x32, -1, 0);
install_segv_handler();
for (procid = 0; procid < 6; procid++) {
if (fork() == 0) {
use_temporary_dir();
do_sandbox_namespace();
}
}
sleep(1000000);
return 0;
}