Re: [PATCH] net/p9/trans_fd.c: fix double list_del()

From: Tomas Bortoli
Date: Tue Jul 24 2018 - 06:04:20 EST


On 07/24/2018 03:40 AM, jiangyiwen wrote:
> On 2018/7/23 20:19, Tomas Bortoli wrote:
>> A double list_del(&req->req_list) is possible in p9_fd_cancel() as
>> shown by Syzbot. To prevent it we have to ensure that we have the
>> client->lock when deleting the list. Furthermore, we have to update
>> the status of the request before releasing the lock, to prevent the
>> race.
>>
>> Signed-off-by: Tomas Bortoli <tomasbortoli@xxxxxxxxx>
>> Reported-by: syzbot+735d926e9d1317c3310c@xxxxxxxxxxxxxxxxxxxxxxxxx
>> ---
>> net/9p/trans_fd.c | 9 ++++-----
>> 1 file changed, 4 insertions(+), 5 deletions(-)
>>
>> diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
>> index a64b01c56e30..370c6c69a05c 100644
>> --- a/net/9p/trans_fd.c
>> +++ b/net/9p/trans_fd.c
>> @@ -199,15 +199,14 @@ static void p9_mux_poll_stop(struct p9_conn *m)
>> static void p9_conn_cancel(struct p9_conn *m, int err)
>> {
>> struct p9_req_t *req, *rtmp;
>> - unsigned long flags;
>> LIST_HEAD(cancel_list);
>>
>> p9_debug(P9_DEBUG_ERROR, "mux %p err %d\n", m, err);
>>
>> - spin_lock_irqsave(&m->client->lock, flags);
>> + spin_lock(&m->client->lock);
>>
>> if (m->err) {
>> - spin_unlock_irqrestore(&m->client->lock, flags);
>> + spin_unlock(&m->client->lock);
>> return;
>> }
>>
>> @@ -219,7 +218,6 @@ static void p9_conn_cancel(struct p9_conn *m, int err)
>> list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) {
>> list_move(&req->req_list, &cancel_list);
>> }
>> - spin_unlock_irqrestore(&m->client->lock, flags);
>>
>> list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) {
>> p9_debug(P9_DEBUG_ERROR, "call back req %p\n", req);
>> @@ -228,6 +226,7 @@ static void p9_conn_cancel(struct p9_conn *m, int err)
>> req->t_err = err;
>> p9_client_cb(m->client, req, REQ_STATUS_ERROR);
>> }
>> + spin_unlock(&m->client->lock);
>
> If you want to expand the ranges of client->lock, the cancel_list will not
> be necessary, you can optimize this code.
>

Unfortunately, not. Moving the spin_lock() before the for makes the
crash appear again. This because the calls to list_move() in the for
before delete all the elements from req->req_list, so the list is empty,
another call to list_del() would trigger a double del.
That's why we hold the lock to update the status of all those requests..
otherwise we have again the race with p9_fd_cancel().
Crash log at the bottom.


> Thanks,
> Yiwen.
>
>> }
>>
>> static __poll_t
>> @@ -370,12 +369,12 @@ static void p9_read_work(struct work_struct *work)
>> if (m->req->status != REQ_STATUS_ERROR)
>> status = REQ_STATUS_RCVD;
>> list_del(&m->req->req_list);
>> - spin_unlock(&m->client->lock);
>> p9_client_cb(m->client, m->req, status);
>> m->rc.sdata = NULL;
>> m->rc.offset = 0;
>> m->rc.capacity = 0;
>> m->req = NULL;
>> + spin_unlock(&m->client->lock);
>> }
>>
>> end_clear:
>>
>
>

Crash:

syzkaller login: [ 55.691138] list_del corruption,
ffff88004de337a8->next is LIST_POISON1 (dead000000000100)
[ 55.693058] ------------[ cut here ]------------
[ 55.693910] kernel BUG at lib/list_debug.c:47!
[ 55.695060] invalid opcode: 0000 [#1] SMP KASAN
[ 55.696008] CPU: 1 PID: 9500 Comm: repro1 Not tainted 4.18.0-rc4+ #260
[ 55.696027] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
BIOS 1.10.2-1 04/01/2014
[ 55.696027] RIP: 0010:__list_del_entry_valid+0xd3/0x150
[ 55.696027] Code: 00 00 00 49 8b 54 24 08 48 39 f2 75 3b 48 83 c4 08
b8 01 00 00 00 5b 41 5c 5d c3 4c 89 e2 48 c7 c7 80 06 b8 87 e8 21 c6 1d
fe <0f> 0b 48 c7 c7 e0 06 b8 87 e8 13 c6 1d fe 0f 0b 48 c7 c7 40 07 b8
[ 55.696027] RSP: 0018:ffff88004de2f198 EFLAGS: 00010282
[ 55.696027] RAX: 000000000000004e RBX: dead000000000200 RCX:
ffffffff815efe0e
[ 55.696027] RDX: 0000000000000000 RSI: 0000000000000000 RDI:
ffff88006c9265ac
[ 55.696027] RBP: ffff88004de2f1b0 R08: ffffed000d924fc1 R09:
0000000000000001
[ 55.696027] R10: ffffed000cdc94e8 R11: ffff88006c927e07 R12:
dead000000000100
[ 55.696027] R13: ffff880066e4a740 R14: ffff88004de337a8 R15:
ffff88004de2f240
[ 55.696027] FS: 00007efc61d2e700(0000) GS:ffff88006c900000(0000)
knlGS:0000000000000000
[ 55.696027] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 55.696027] CR2: 00007efc613ab330 CR3: 000000005e86f000 CR4:
00000000000006e0
[ 55.696027] Call Trace:
[ 55.696027] ? _raw_spin_lock+0x32/0x40
[ 55.696027] p9_fd_cancel+0xf3/0x390
[ 55.696027] ? p9_fd_request+0x238/0x3e0
[ 55.696027] ? p9_fd_close+0x5a0/0x5a0
[ 55.696027] p9_client_rpc+0xacf/0x11b0
[ 55.696027] ? p9_client_prepare_req.part.11+0xd20/0xd20
[ 55.696027] ? __fget+0x378/0x5a0
[ 55.696027] ? iterate_fd+0x400/0x400
[ 55.696027] ? finish_wait+0x4b0/0x4b0
[ 55.696027] ? rcu_read_lock_sched_held+0x108/0x120
[ 55.696027] ? p9_fd_cancel+0x390/0x390
[ 55.696027] p9_client_create+0xa33/0x1600
[ 55.696027] ? v9fs_drop_inode+0x100/0x140
[ 55.696027] ? p9_client_read+0xbe0/0xbe0
[ 55.724517] ? __sched_text_start+0x8/0x8
[ 55.724517] ? find_held_lock+0x35/0x1d0
[ 55.724517] ? __lockdep_init_map+0xe4/0x650
[ 55.724517] ? lockdep_init_map+0x9/0x10
[ 55.724517] ? kasan_check_write+0x14/0x20
[ 55.724517] ? __init_rwsem+0x1ce/0x2b0
[ 55.724517] ? do_raw_write_unlock+0x2a0/0x2a0
[ 55.724517] ? rcu_read_lock_sched_held+0x108/0x120
[ 55.724517] ? __kmalloc_track_caller+0x49f/0x760
[ 55.724517] ? save_stack+0xa3/0xd0
[ 55.724517] v9fs_session_init+0x218/0x1980
[ 55.724517] ? v9fs_session_init+0x218/0x1980
[ 55.724517] ? v9fs_show_options+0x740/0x740
[ 55.724517] ? kasan_check_read+0x11/0x20
[ 55.724517] ? rcu_is_watching+0x8c/0x150
[ 55.724517] ? rcu_pm_notify+0xc0/0xc0
[ 55.736879] ? v9fs_mount+0x62/0x880
[ 55.736879] ? rcu_read_lock_sched_held+0x108/0x120
[ 55.738600] ? kmem_cache_alloc_trace+0x48d/0x740
[ 55.738600] v9fs_mount+0x81/0x880
[ 55.738600] ? v9fs_mount+0x81/0x880
[ 55.738600] mount_fs+0x66/0x2f0
[ 55.738600] vfs_kern_mount.part.26+0xcc/0x4a0
[ 55.738600] ? may_umount+0xa0/0xa0
[ 55.738600] ? _raw_read_unlock+0x22/0x30
[ 55.738600] ? __get_fs_type+0x8a/0xc0
[ 55.738600] do_mount+0xd86/0x2e90
[ 55.738600] ? kasan_check_read+0x11/0x20
[ 55.738600] ? do_raw_spin_unlock+0xa7/0x330
[ 55.738600] ? copy_mount_string+0x40/0x40
[ 55.738600] ? copy_mount_options+0x5f/0x2e0
[ 55.738600] ? rcu_read_lock_sched_held+0x108/0x120
[ 55.738600] ? kmem_cache_alloc_trace+0x48d/0x740
[ 55.738600] ? copy_mount_options+0x1f7/0x2e0
[ 55.738600] ksys_mount+0xab/0x120
[ 55.738600] __x64_sys_mount+0xbe/0x150
[ 55.738600] ? trace_hardirqs_on_caller+0x421/0x5c0
[ 55.738600] do_syscall_64+0x18c/0x760
[ 55.738600] ? finish_task_switch+0x186/0x9f0
[ 55.738600] ? syscall_return_slowpath+0x560/0x560
[ 55.738600] ? syscall_return_slowpath+0x2b0/0x560
[ 55.738600] ? __switch_to_asm+0x34/0x70
[ 55.738600] ? prepare_exit_to_usermode+0x360/0x360
[ 55.738600] ? __switch_to_asm+0x34/0x70
[ 55.738600] ? entry_SYSCALL_64_after_hwframe+0x59/0xbe
[ 55.738600] ? trace_hardirqs_off_thunk+0x1a/0x1c
[ 55.738600] entry_SYSCALL_64_after_hwframe+0x49/0xbe
[ 55.738600] RIP: 0033:0x7efc61442b79
[ 55.763914] Code: f3 fa ff ff 90 90 90 90 90 90 90 90 90 90 90 90 90
48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f
05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 8f c2 2a 00 31 d2 48 29 c2 64
[ 55.763914] RSP: 002b:00007efc61d2de88 EFLAGS: 00000246 ORIG_RAX:
00000000000000a5
[ 55.763914] RAX: ffffffffffffffda RBX: 0000000000000000 RCX:
00007efc61442b79
[ 55.769909] RDX: 0000000020000380 RSI: 0000000020000000 RDI:
0000000000000000
[ 55.769909] RBP: 00007efc61d2deb0 R08: 0000000020000240 R09:
00007efc61d2e9c0
[ 55.769909] R10: 0000000000000000 R11: 0000000000000246 R12:
00007ffd68da6aa0
[ 55.773854] R13: 00007efc61d2e9c0 R14: 00007efc61d39040 R15:
0000000000000003
[ 55.773854] Modules linked in:
[ 55.776650] ---[ end trace 8de8057bee332983 ]---
[ 55.777631] RIP: 0010:__list_del_entry_valid+0xd3/0x150
[ 55.778754] Code: 00 00 00 49 8b 54 24 08 48 39 f2 75 3b 48 83 c4 08
b8 01 00 00 00 5b 41 5c 5d c3 4c 89 e2 48 c7 c7 80 06 b8 87 e8 21 c6 1d
fe <0f> 0b 48 c7 c7 e0 06 b8 87 e8 13 c6 1d fe 0f 0b 48 c7 c7 40 07 b8
[ 55.782685] RSP: 0018:ffff88004de2f198 EFLAGS: 00010282
[ 55.783785] RAX: 000000000000004e RBX: dead000000000200 RCX:
ffffffff815efe0e
[ 55.785126] RDX: 0000000000000000 RSI: 0000000000000000 RDI:
ffff88006c9265ac
[ 55.786470] RBP: ffff88004de2f1b0 R08: ffffed000d924fc1 R09:
0000000000000001
[ 55.788007] R10: ffffed000cdc94e8 R11: ffff88006c927e07 R12:
dead000000000100
[ 55.789517] R13: ffff880066e4a740 R14: ffff88004de337a8 R15:
ffff88004de2f240
[ 55.791173] FS: 00007efc61d2e700(0000) GS:ffff88006c900000(0000)
knlGS:0000000000000000
[ 55.792746] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 55.793882] CR2: 00007efc613ab330 CR3: 000000005e86f000 CR4:
00000000000006e0
[ 55.795410] Kernel panic - not syncing: Fatal exception
[ 55.796384] Kernel Offset: disabled
[ 55.796384] Rebooting in 86400 seconds..