Re: net/ipv4: deadlock in ip_ra_control

From: Andrey Konovalov
Date: Wed Apr 12 2017 - 08:06:11 EST


On Mon, Mar 6, 2017 at 3:04 AM, Cong Wang <xiyou.wangcong@xxxxxxxxx> wrote:
> On Fri, Mar 3, 2017 at 10:43 AM, Dmitry Vyukov <dvyukov@xxxxxxxxxx> wrote:
>> On Thu, Mar 2, 2017 at 10:40 AM, Dmitry Vyukov <dvyukov@xxxxxxxxxx> wrote:
>>> On Wed, Mar 1, 2017 at 6:18 PM, Cong Wang <xiyou.wangcong@xxxxxxxxx> wrote:
>>>> On Wed, Mar 1, 2017 at 2:44 AM, Dmitry Vyukov <dvyukov@xxxxxxxxxx> wrote:
>>>>> Hello,
>>>>>
>>>>> I've got the following deadlock report while running syzkaller fuzzer
>>>>> on linux-next/51788aebe7cae79cb334ad50641347465fc188fd:
>>>>>
>>>>> ======================================================
>>>>> [ INFO: possible circular locking dependency detected ]
>>>>> 4.10.0-next-20170301+ #1 Not tainted
>>>>> -------------------------------------------------------
>>>>> syz-executor1/3394 is trying to acquire lock:
>>>>> (sk_lock-AF_INET){+.+.+.}, at: [<ffffffff838864cc>] lock_sock
>>>>> include/net/sock.h:1460 [inline]
>>>>> (sk_lock-AF_INET){+.+.+.}, at: [<ffffffff838864cc>]
>>>>> do_ip_setsockopt.isra.12+0x21c/0x3540 net/ipv4/ip_sockglue.c:652
>>>>>
>>>>> but task is already holding lock:
>>>>> (rtnl_mutex){+.+.+.}, at: [<ffffffff836fbd97>] rtnl_lock+0x17/0x20
>>>>> net/core/rtnetlink.c:70
>>>>>
>>>>> which lock already depends on the new lock.
>>>>>
>>>>>
>>>>> the existing dependency chain (in reverse order) is:
>>>>>
>>>>> -> #1 (rtnl_mutex){+.+.+.}:
>>>>> validate_chain kernel/locking/lockdep.c:2265 [inline]
>>>>> __lock_acquire+0x2149/0x3430 kernel/locking/lockdep.c:3338
>>>>> lock_acquire+0x2a1/0x630 kernel/locking/lockdep.c:3753
>>>>> __mutex_lock_common kernel/locking/mutex.c:754 [inline]
>>>>> __mutex_lock+0x172/0x1730 kernel/locking/mutex.c:891
>>>>> mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:906
>>>>> rtnl_lock+0x17/0x20 net/core/rtnetlink.c:70
>>>>> mrtsock_destruct+0x86/0x2c0 net/ipv4/ipmr.c:1281
>>>>> ip_ra_control+0x459/0x600 net/ipv4/ip_sockglue.c:372
>>>>> do_ip_setsockopt.isra.12+0x1064/0x3540 net/ipv4/ip_sockglue.c:1161
>>>>> ip_setsockopt+0x3a/0xb0 net/ipv4/ip_sockglue.c:1264
>>>>> raw_setsockopt+0xb7/0xd0 net/ipv4/raw.c:839
>>>>> sock_common_setsockopt+0x95/0xd0 net/core/sock.c:2725
>>>>> SYSC_setsockopt net/socket.c:1786 [inline]
>>>>> SyS_setsockopt+0x25c/0x390 net/socket.c:1765
>>>>> entry_SYSCALL_64_fastpath+0x1f/0xc2
>>>>>
>>>>> -> #0 (sk_lock-AF_INET){+.+.+.}:
>>>>> check_prev_add kernel/locking/lockdep.c:1828 [inline]
>>>>> check_prevs_add+0xa8f/0x19f0 kernel/locking/lockdep.c:1938
>>>>> validate_chain kernel/locking/lockdep.c:2265 [inline]
>>>>> __lock_acquire+0x2149/0x3430 kernel/locking/lockdep.c:3338
>>>>> lock_acquire+0x2a1/0x630 kernel/locking/lockdep.c:3753
>>>>> lock_sock_nested+0xcb/0x120 net/core/sock.c:2530
>>>>> lock_sock include/net/sock.h:1460 [inline]
>>>>> do_ip_setsockopt.isra.12+0x21c/0x3540 net/ipv4/ip_sockglue.c:652
>>>>> ip_setsockopt+0x3a/0xb0 net/ipv4/ip_sockglue.c:1264
>>>>> tcp_setsockopt+0x82/0xd0 net/ipv4/tcp.c:2721
>>>>> sock_common_setsockopt+0x95/0xd0 net/core/sock.c:2725
>>>>> SYSC_setsockopt net/socket.c:1786 [inline]
>>>>> SyS_setsockopt+0x25c/0x390 net/socket.c:1765
>>>>> entry_SYSCALL_64_fastpath+0x1f/0xc2
>>>>>
>>>>
>>>> Please try the attached patch (compile only).
>>>
>>>
>>> Pushed the patch to the bots.
>>> Thanks
>>
>>
>> This patch triggers:
>
> Ah, update the patch to fix this.

Hi Cong,

I now have a reproducer for this bug (attached) and your patch fixes it.

Could you send it?

Thanks!

>
> --
> You received this message because you are subscribed to the Google Groups "syzkaller" group.
> To unsubscribe from this group and stop receiving emails from it, send an email to syzkaller+unsubscribe@xxxxxxxxxxxxxxxxx
> For more options, visit https://groups.google.com/d/optout.
// autogenerated by syzkaller (http://github.com/google/syzkaller)

#ifndef __NR_mmap
#define __NR_mmap 9
#endif
#ifndef __NR_socket
#define __NR_socket 41
#endif
#ifndef __NR_setsockopt
#define __NR_setsockopt 54
#endif

#define _GNU_SOURCE

#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/mount.h>
#include <sys/prctl.h>
#include <sys/resource.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/wait.h>

#include <linux/capability.h>
#include <linux/if.h>
#include <linux/if_tun.h>
#include <linux/kvm.h>
#include <linux/sched.h>
#include <net/if_arp.h>

#include <assert.h>
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <grp.h>
#include <pthread.h>
#include <setjmp.h>
#include <signal.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

const int kFailStatus = 67;
const int kErrorStatus = 68;
const int kRetryStatus = 69;

__attribute__((noreturn)) void doexit(int status)
{
volatile unsigned i;
syscall(__NR_exit_group, status);
for (i = 0;; i++) {
}
}

__attribute__((noreturn)) void fail(const char* msg, ...)
{
int e = errno;
fflush(stdout);
va_list args;
va_start(args, msg);
vfprintf(stderr, msg, args);
va_end(args);
fprintf(stderr, " (errno %d)\n", e);
doexit((e == ENOMEM || e == EAGAIN) ? kRetryStatus : kFailStatus);
}

__attribute__((noreturn)) void exitf(const char* msg, ...)
{
int e = errno;
fflush(stdout);
va_list args;
va_start(args, msg);
vfprintf(stderr, msg, args);
va_end(args);
fprintf(stderr, " (errno %d)\n", e);
doexit(kRetryStatus);
}

static int flag_debug;

void debug(const char* msg, ...)
{
if (!flag_debug)
return;
va_list args;
va_start(args, msg);
vfprintf(stdout, msg, args);
va_end(args);
fflush(stdout);
}

__thread int skip_segv;
__thread jmp_buf segv_env;

static void segv_handler(int sig, siginfo_t* info, void* uctx)
{
uintptr_t addr = (uintptr_t)info->si_addr;
const uintptr_t prog_start = 1 << 20;
const uintptr_t prog_end = 100 << 20;
if (__atomic_load_n(&skip_segv, __ATOMIC_RELAXED) &&
(addr < prog_start || addr > prog_end)) {
debug("SIGSEGV on %p, skipping\n", addr);
_longjmp(segv_env, 1);
}
debug("SIGSEGV on %p, exiting\n", addr);
doexit(sig);
for (;;) {
}
}

static void install_segv_handler()
{
struct sigaction sa;
memset(&sa, 0, sizeof(sa));
sa.sa_sigaction = segv_handler;
sa.sa_flags = SA_NODEFER | SA_SIGINFO;
sigaction(SIGSEGV, &sa, NULL);
sigaction(SIGBUS, &sa, NULL);
}

#define NONFAILING(...) \
{ \
__atomic_fetch_add(&skip_segv, 1, __ATOMIC_SEQ_CST); \
if (_setjmp(segv_env) == 0) { \
__VA_ARGS__; \
} \
__atomic_fetch_sub(&skip_segv, 1, __ATOMIC_SEQ_CST); \
}

#define BITMASK_LEN(type, bf_len) (type)((1ull << (bf_len)) - 1)

#define BITMASK_LEN_OFF(type, bf_off, bf_len) \
(type)(BITMASK_LEN(type, (bf_len)) << (bf_off))

#define STORE_BY_BITMASK(type, addr, val, bf_off, bf_len) \
if ((bf_off) == 0 && (bf_len) == 0) { \
*(type*)(addr) = (type)(val); \
} else { \
type new_val = *(type*)(addr); \
new_val &= ~BITMASK_LEN_OFF(type, (bf_off), (bf_len)); \
new_val |= ((type)(val)&BITMASK_LEN(type, (bf_len))) << (bf_off); \
*(type*)(addr) = new_val; \
}

static uintptr_t execute_syscall(int nr, uintptr_t a0, uintptr_t a1,
uintptr_t a2, uintptr_t a3,
uintptr_t a4, uintptr_t a5,
uintptr_t a6, uintptr_t a7,
uintptr_t a8)
{
switch (nr) {
default:
return syscall(nr, a0, a1, a2, a3, a4, a5);
}
}

static void setup_main_process()
{
struct sigaction sa;
memset(&sa, 0, sizeof(sa));
sa.sa_handler = SIG_IGN;
syscall(SYS_rt_sigaction, 0x20, &sa, NULL, 8);
syscall(SYS_rt_sigaction, 0x21, &sa, NULL, 8);
install_segv_handler();

char tmpdir_template[] = "./syzkaller.XXXXXX";
char* tmpdir = mkdtemp(tmpdir_template);
if (!tmpdir)
fail("failed to mkdtemp");
if (chmod(tmpdir, 0777))
fail("failed to chmod");
if (chdir(tmpdir))
fail("failed to chdir");
}

static void loop();

static void sandbox_common()
{
prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
setpgrp();
setsid();

struct rlimit rlim;
rlim.rlim_cur = rlim.rlim_max = 128 << 20;
setrlimit(RLIMIT_AS, &rlim);
rlim.rlim_cur = rlim.rlim_max = 1 << 20;
setrlimit(RLIMIT_FSIZE, &rlim);
rlim.rlim_cur = rlim.rlim_max = 1 << 20;
setrlimit(RLIMIT_STACK, &rlim);
rlim.rlim_cur = rlim.rlim_max = 0;
setrlimit(RLIMIT_CORE, &rlim);

unshare(CLONE_NEWNS);
unshare(CLONE_NEWIPC);
unshare(CLONE_IO);
}

static int do_sandbox_none(int executor_pid, bool enable_tun)
{
int pid = fork();
if (pid)
return pid;

sandbox_common();

loop();
doexit(1);
}

long r[10];
void loop()
{
memset(r, -1, sizeof(r));
r[0] = execute_syscall(__NR_mmap, 0x20000000ul, 0x4000ul, 0x3ul,
0x32ul, 0xfffffffffffffffful, 0x0ul, 0, 0, 0);
r[1] = execute_syscall(__NR_socket, 0x2ul, 0x80003ul, 0x2ul, 0, 0, 0,
0, 0, 0);
NONFAILING(*(uint32_t*)0x20f01000 = (uint32_t)0x0);
r[3] = execute_syscall(__NR_setsockopt, r[1], 0x0ul, 0xc8ul,
0x20f01000ul, 0x4ul, 0, 0, 0, 0);
NONFAILING(*(uint32_t*)0x20001ff4 = (uint32_t)0xa2090000);
NONFAILING(*(uint32_t*)0x20001ff8 = (uint32_t)0x0);
NONFAILING(*(uint32_t*)0x20001ffc = (uint32_t)0x9);
r[7] = execute_syscall(__NR_setsockopt, r[1], 0x0ul, 0x23ul,
0x20001ff4ul, 0xcul, 0, 0, 0, 0);
NONFAILING(*(uint32_t*)0x20000000 = (uint32_t)0x0);
r[9] = execute_syscall(__NR_setsockopt, r[1], 0x0ul, 0x5ul,
0x20000000ul, 0x4ul, 0, 0, 0, 0);
}
int main()
{
setup_main_process();
int pid = do_sandbox_none(0, false);
int status = 0;
while (waitpid(pid, &status, __WALL) != pid) {
}
return 0;
}