possible deadlock in __wake_up_common_lock

From: chase xd
Date: Mon Dec 09 2024 - 07:03:55 EST


Hi, Syzkaller reports this on lts 6.1.119, looks like a new bug:

Syzkaller hit 'possible deadlock in __wake_up_common_lock' bug.

============================================
WARNING: possible recursive locking detected
6.1.119-dirty #3 Not tainted
--------------------------------------------
syz-executor199/6820 is trying to acquire lock:
ffff88807c386378 (&ctx->cq_wait){....}-{2:2}, at:
__wake_up_common_lock+0xb8/0x140 kernel/sched/wait.c:137

but task is already holding lock:
ffff88807c386378 (&ctx->cq_wait){....}-{2:2}, at:
__wake_up_common_lock+0xb8/0x140 kernel/sched/wait.c:137

other info that might help us debug this:
Possible unsafe locking scenario:

CPU0
----
lock(&ctx->cq_wait);
lock(&ctx->cq_wait);

*** DEADLOCK ***

May be due to missing lock nesting notation

2 locks held by syz-executor199/6820:
#0: ffff88807c3860a8 (&ctx->uring_lock){+.+.}-{3:3}, at:
__do_sys_io_uring_enter+0x8fc/0x2130 io_uring/io_uring.c:3313
#1: ffff88807c386378 (&ctx->cq_wait){....}-{2:2}, at:
__wake_up_common_lock+0xb8/0x140 kernel/sched/wait.c:137

stack backtrace:
CPU: 7 PID: 6820 Comm: syz-executor199 Not tainted 6.1.119-dirty #3
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014
Call Trace:
<TASK>
__dump_stack lib/dump_stack.c:88 [inline]
dump_stack_lvl+0x5b/0x85 lib/dump_stack.c:106
print_deadlock_bug kernel/locking/lockdep.c:2983 [inline]
check_deadlock kernel/locking/lockdep.c:3026 [inline]
validate_chain kernel/locking/lockdep.c:3812 [inline]
__lock_acquire.cold+0x219/0x3bd kernel/locking/lockdep.c:5049
lock_acquire kernel/locking/lockdep.c:5662 [inline]
lock_acquire+0x1e3/0x5e0 kernel/locking/lockdep.c:5627
__raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline]
_raw_spin_lock_irqsave+0x3d/0x60 kernel/locking/spinlock.c:162
__wake_up_common_lock+0xb8/0x140 kernel/sched/wait.c:137
__io_cqring_wake io_uring/io_uring.h:224 [inline]
__io_cqring_wake io_uring/io_uring.h:211 [inline]
io_req_local_work_add io_uring/io_uring.c:1135 [inline]
__io_req_task_work_add+0x4a4/0xd60 io_uring/io_uring.c:1146
io_poll_wake+0x3cb/0x550 io_uring/poll.c:465
__wake_up_common+0x14c/0x650 kernel/sched/wait.c:107
__wake_up_common_lock+0xd4/0x140 kernel/sched/wait.c:138
__io_cqring_wake io_uring/io_uring.h:224 [inline]
__io_cqring_wake io_uring/io_uring.h:211 [inline]
io_cqring_wake io_uring/io_uring.h:231 [inline]
io_cqring_ev_posted io_uring/io_uring.c:578 [inline]
__io_cq_unlock_post io_uring/io_uring.c:586 [inline]
__io_submit_flush_completions+0x778/0xba0 io_uring/io_uring.c:1346
io_submit_flush_completions io_uring/io_uring.c:159 [inline]
io_submit_state_end io_uring/io_uring.c:2203 [inline]
io_submit_sqes+0xa78/0x1ce0 io_uring/io_uring.c:2317
__do_sys_io_uring_enter+0x907/0x2130 io_uring/io_uring.c:3314
do_syscall_x64 arch/x86/entry/common.c:51 [inline]
do_syscall_64+0x3a/0xb0 arch/x86/entry/common.c:81
entry_SYSCALL_64_after_hwframe+0x6e/0xd8
RIP: 0033:0x7fa54e70640d
Code: 28 c3 e8 46 1e 00 00 66 0f 1f 44 00 00 f3 0f 1e fa 48 89 f8 48
89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d
01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007ffd0ad80be8 EFLAGS: 00000246 ORIG_RAX: 00000000000001aa
RAX: ffffffffffffffda RBX: 00007ffd0ad80df8 RCX: 00007fa54e70640d
RDX: 0000000000000000 RSI: 000000000000331b RDI: 0000000000000003
RBP: 0000000000000001 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000001
R13: 00007ffd0ad80de8 R14: 00007fa54e783530 R15: 0000000000000001
</TASK>


Syzkaller reproducer:
# {Threaded:false Repeat:false RepeatTimes:0 Procs:1 Slowdown:1
Sandbox: SandboxArg:0 Leak:false NetInjection:false NetDevices:false
NetReset:false Cgroups:false BinfmtMisc:false CloseFDs:false
KCSAN:false DevlinkPCI:false NicVF:false USB:false VhciInjection:false
Wifi:false IEEE802154:false Sysctl:false Swap:false UseTmpDir:false
HandleSegv:false Repro:false Trace:false LegacyOptions:{Collide:false
Fault:false FaultCall:0 FaultNth:0}}
r0 = syz_io_uring_setup(0x100, &(0x7f0000000000)={0x0, 0x0, 0x3a40},
&(0x7f0000000180)=<r1=>0x0, &(0x7f00000001c0)=<r2=>0x0)
syz_io_uring_setup(0x255d, &(0x7f00000001c0)={0x0, 0x0, 0x40, 0x0,
0x3, 0x0, r0}, &(0x7f0000000140), &(0x7f00000024c0)=<r3=>0x0)
syz_io_uring_submit(r1, r3, &(0x7f00000000c0)=@IORING_OP_SEND={0x1a,
0x0, 0x0, 0xffffffffffffffff, 0x0, 0x0})
io_uring_register$IORING_REGISTER_ENABLE_RINGS(r0, 0xc, 0x0, 0x0)
syz_io_uring_submit(r1, r2,
&(0x7f0000000100)=@IORING_OP_READV=@use_registered_buffer={0x1, 0x0,
0x0, @fd=r0})
syz_io_uring_submit(r1, r2,
&(0x7f0000000100)=@IORING_OP_READV=@use_registered_buffer={0x1, 0x0,
0x0, @fd=r0})
io_uring_enter(r0, 0x331b, 0x0, 0x0, 0x0, 0x0)


C reproducer:
// autogenerated by syzkaller (https://github.com/google/syzkaller)

#define _GNU_SOURCE

#include <endian.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <unistd.h>

#ifndef __NR_io_uring_enter
#define __NR_io_uring_enter 426
#endif
#ifndef __NR_io_uring_register
#define __NR_io_uring_register 427
#endif
#ifndef __NR_io_uring_setup
#define __NR_io_uring_setup 425
#endif

#define SIZEOF_IO_URING_SQE 64
#define SIZEOF_IO_URING_CQE 16
#define SQ_HEAD_OFFSET 0
#define SQ_TAIL_OFFSET 64
#define SQ_RING_MASK_OFFSET 256
#define SQ_RING_ENTRIES_OFFSET 264
#define SQ_FLAGS_OFFSET 276
#define SQ_DROPPED_OFFSET 272
#define CQ_HEAD_OFFSET 128
#define CQ_TAIL_OFFSET 192
#define CQ_RING_MASK_OFFSET 260
#define CQ_RING_ENTRIES_OFFSET 268
#define CQ_RING_OVERFLOW_OFFSET 284
#define CQ_FLAGS_OFFSET 280
#define CQ_CQES_OFFSET 320

struct io_sqring_offsets {
uint32_t head;
uint32_t tail;
uint32_t ring_mask;
uint32_t ring_entries;
uint32_t flags;
uint32_t dropped;
uint32_t array;
uint32_t resv1;
uint64_t resv2;
};

struct io_cqring_offsets {
uint32_t head;
uint32_t tail;
uint32_t ring_mask;
uint32_t ring_entries;
uint32_t overflow;
uint32_t cqes;
uint64_t resv[2];
};

struct io_uring_params {
uint32_t sq_entries;
uint32_t cq_entries;
uint32_t flags;
uint32_t sq_thread_cpu;
uint32_t sq_thread_idle;
uint32_t features;
uint32_t resv[4];
struct io_sqring_offsets sq_off;
struct io_cqring_offsets cq_off;
};

#define IORING_OFF_SQ_RING 0
#define IORING_OFF_SQES 0x10000000ULL
#define IORING_SETUP_SQE128 (1U << 10)
#define IORING_SETUP_CQE32 (1U << 11)

static long syz_io_uring_setup(volatile long a0, volatile long a1,
volatile long a2, volatile long a3)
{
uint32_t entries = (uint32_t)a0;
struct io_uring_params* setup_params = (struct io_uring_params*)a1;
void** ring_ptr_out = (void**)a2;
void** sqes_ptr_out = (void**)a3;
setup_params->flags &= ~(IORING_SETUP_CQE32 | IORING_SETUP_SQE128);
uint32_t fd_io_uring = syscall(__NR_io_uring_setup, entries, setup_params);
uint32_t sq_ring_sz =
setup_params->sq_off.array + setup_params->sq_entries * sizeof(uint32_t);
uint32_t cq_ring_sz = setup_params->cq_off.cqes +
setup_params->cq_entries * SIZEOF_IO_URING_CQE;
uint32_t ring_sz = sq_ring_sz > cq_ring_sz ? sq_ring_sz : cq_ring_sz;
*ring_ptr_out =
mmap(0, ring_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
fd_io_uring, IORING_OFF_SQ_RING);
uint32_t sqes_sz = setup_params->sq_entries * SIZEOF_IO_URING_SQE;
*sqes_ptr_out = mmap(0, sqes_sz, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, fd_io_uring, IORING_OFF_SQES);
uint32_t* array =
(uint32_t*)((uintptr_t)*ring_ptr_out + setup_params->sq_off.array);
for (uint32_t index = 0; index < entries; index++)
array[index] = index;
return fd_io_uring;
}

static long syz_io_uring_submit(volatile long a0, volatile long a1,
volatile long a2)
{
char* ring_ptr = (char*)a0;
char* sqes_ptr = (char*)a1;
char* sqe = (char*)a2;
uint32_t sq_ring_mask = *(uint32_t*)(ring_ptr + SQ_RING_MASK_OFFSET);
uint32_t* sq_tail_ptr = (uint32_t*)(ring_ptr + SQ_TAIL_OFFSET);
uint32_t sq_tail = *sq_tail_ptr & sq_ring_mask;
char* sqe_dest = sqes_ptr + sq_tail * SIZEOF_IO_URING_SQE;
memcpy(sqe_dest, sqe, SIZEOF_IO_URING_SQE);
uint32_t sq_tail_next = *sq_tail_ptr + 1;
__atomic_store_n(sq_tail_ptr, sq_tail_next, __ATOMIC_RELEASE);
return 0;
}

uint64_t r[4] = {0xffffffffffffffff, 0x0, 0x0, 0x0};

int main(void)
{
syscall(__NR_mmap, /*addr=*/0x1ffff000ul, /*len=*/0x1000ul, /*prot=*/0ul,
/*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul, /*fd=*/-1,
/*offset=*/0ul);
syscall(__NR_mmap, /*addr=*/0x20000000ul, /*len=*/0x1000000ul,
/*prot=PROT_WRITE|PROT_READ|PROT_EXEC*/ 7ul,
/*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul, /*fd=*/-1,
/*offset=*/0ul);
syscall(__NR_mmap, /*addr=*/0x21000000ul, /*len=*/0x1000ul, /*prot=*/0ul,
/*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul, /*fd=*/-1,
/*offset=*/0ul);
intptr_t res = 0;
*(uint32_t*)0x20000004 = 0;
*(uint32_t*)0x20000008 = 0x3a40;
*(uint32_t*)0x2000000c = 0;
*(uint32_t*)0x20000010 = 0;
*(uint32_t*)0x20000018 = -1;
memset((void*)0x2000001c, 0, 12);
res = -1;
res = syz_io_uring_setup(/*entries=*/0x100, /*params=*/0x20000000,
/*ring_ptr=*/0x20000180, /*sqes_ptr=*/0x200001c0);
if (res != -1) {
r[0] = res;
r[1] = *(uint64_t*)0x20000180;
r[2] = *(uint64_t*)0x200001c0;
}
*(uint32_t*)0x200001c4 = 0;
*(uint32_t*)0x200001c8 = 0x40;
*(uint32_t*)0x200001cc = 0;
*(uint32_t*)0x200001d0 = 3;
*(uint32_t*)0x200001d8 = r[0];
memset((void*)0x200001dc, 0, 12);
res = -1;
res = syz_io_uring_setup(/*entries=*/0x255d, /*params=*/0x200001c0,
/*ring_ptr=*/0x20000140, /*sqes_ptr=*/0x200024c0);
if (res != -1)
r[3] = *(uint64_t*)0x200024c0;
*(uint8_t*)0x200000c0 = 0x1a;
*(uint8_t*)0x200000c1 = 0;
*(uint16_t*)0x200000c2 = 0;
*(uint32_t*)0x200000c4 = -1;
*(uint64_t*)0x200000c8 = 0;
*(uint64_t*)0x200000d0 = 0;
*(uint32_t*)0x200000d8 = 0;
*(uint32_t*)0x200000dc = 0;
*(uint64_t*)0x200000e0 = 0;
*(uint16_t*)0x200000e8 = 0;
*(uint16_t*)0x200000ea = 0;
memset((void*)0x200000ec, 0, 20);
syz_io_uring_submit(/*ring_ptr=*/r[1], /*sqes_ptr=*/r[3], /*sqe=*/0x200000c0);
syscall(__NR_io_uring_register, /*fd=*/r[0], /*opcode=*/0xcul, /*arg=*/0ul,
/*nr_args=*/0ul);
*(uint8_t*)0x20000100 = 1;
*(uint8_t*)0x20000101 = 0;
*(uint16_t*)0x20000102 = 0;
*(uint32_t*)0x20000104 = r[0];
*(uint64_t*)0x20000108 = 0;
*(uint64_t*)0x20000110 = 0;
*(uint32_t*)0x20000118 = 0;
*(uint32_t*)0x2000011c = 0;
*(uint64_t*)0x20000120 = 0;
*(uint16_t*)0x20000128 = 0;
*(uint16_t*)0x2000012a = 0;
memset((void*)0x2000012c, 0, 20);
syz_io_uring_submit(/*ring_ptr=*/r[1], /*sqes_ptr=*/r[2], /*sqe=*/0x20000100);
*(uint8_t*)0x20000100 = 1;
*(uint8_t*)0x20000101 = 0;
*(uint16_t*)0x20000102 = 0;
*(uint32_t*)0x20000104 = r[0];
*(uint64_t*)0x20000108 = 0;
*(uint64_t*)0x20000110 = 0;
*(uint32_t*)0x20000118 = 0;
*(uint32_t*)0x2000011c = 0;
*(uint64_t*)0x20000120 = 0;
*(uint16_t*)0x20000128 = 0;
*(uint16_t*)0x2000012a = 0;
memset((void*)0x2000012c, 0, 20);
syz_io_uring_submit(/*ring_ptr=*/r[1], /*sqes_ptr=*/r[2], /*sqe=*/0x20000100);
syscall(__NR_io_uring_enter, /*fd=*/r[0], /*to_submit=*/0x331b,
/*min_complete=*/0, /*flags=*/0ul, /*sigmask=*/0ul, /*size=*/0ul);
return 0;
}