BUG_ON(!PageLocked(page)) in munlock_vma_page/migrate_pages/__block_write_begin

From: Dmitry Vyukov
Date: Tue Dec 15 2015 - 14:24:03 EST


Hello,

I am seeing lots of similar BUGs in different functions all pointing
to BUG_ON(!PageLocked(page)). I reproduced them on several recent
commits, including stock 6764e5ebd5c62236d082f9ae030674467d0b2779 (Dec
9) with no changes on top and no KASAN/etc.

------------[ cut here ]------------
kernel BUG at fs/buffer.c:1917!
invalid opcode: 0000 [#1] SMP
Modules linked in:
CPU: 3 PID: 17243 Comm: executor Not tainted 4.4.0-rc4+ #53
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
task: ffff8800347ac380 ti: ffff880033510000 task.ti: ffff880033510000
RIP: 0010:[<ffffffff8125cae0>] [
<ffffffff8125cae0>][< none >]
__block_write_begin+0x450/0x480 fs/buffer.c:1919
RSP: 0018:ffff880033513be8 EFLAGS: 00010246
RAX: 01fffc000000086c RBX: ffffea0000c57340 RCX: ffffffff812eb890
RDX: 0000000000001000 RSI: 0000000000000000 RDI: ffffea0000c57340
RBP: ffff880033513c90 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000001000
R13: 00000000fffffff2 R14: ffffffff812eb890 R15: ffff8800721ca4a0
FS: 000000000254f880(0063) GS:ffff88007fd00000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b
CR2: 00000000007100b8 CR3: 0000000034b22000 CR4: 00000000000006e0
Stack:
ffff88003ff6aeb8 ffff880033513c68 0000000000000002 ffff880033513c40
ffffffff823f976e ffffea0000c57340 ffffea0000c57340 ffff8800721ca2b0
ffffffff812eb890 ffff8800721ca2b0 ffff8800721ca4a0 ffff880033513c90
Call Trace:
[<ffffffff8125cf3a>] block_page_mkwrite+0x9a/0xd0 fs/buffer.c:2449
[<ffffffff812f3960>] ext4_page_mkwrite+0x270/0x400 fs/ext4/inode.c:5286
[<ffffffff811da8e0>] do_page_mkwrite+0x40/0x80 mm/memory.c:1970
[< inline >] wp_page_shared mm/memory.c:2240
[<ffffffff811dcc36>] do_wp_page+0x356/0x4e0 mm/memory.c:2344
[< inline >] handle_pte_fault mm/memory.c:3311
[< inline >] __handle_mm_fault mm/memory.c:3413
[<ffffffff811de9e0>] handle_mm_fault+0xd70/0x1a70 mm/memory.c:3442
[<ffffffff810a647a>] __do_page_fault+0x18a/0x410 arch/x86/mm/fault.c:1238
[<ffffffff810a6773>] trace_do_page_fault+0x43/0xd0 arch/x86/mm/fault.c:1331
[<ffffffff810a1494>] do_async_page_fault+0x14/0x80 arch/x86/kernel/kvm.c:264
[<ffffffff82400778>] async_page_fault+0x28/0x30 arch/x86/entry/entry_64.S:988
Code: b4 44 8b 4d b0 4c 8b 55 a8 e9 3b fd ff ff 8b 55 bc 8b 75 b8 48
8b 7d 80 e8 5e ef ff ff e9 dc fe ff ff 0f 0b e8 de 08 41 00 0f 0b <0f>
0b be 0f 00 00 00 48 c7 c7 41 c4 95 82 4c 89 55 a8 44 89 4d
RIP [<ffffffff8125cae0>] __block_write_begin+0x450/0x480 fs/buffer.c:1919
RSP <ffff880033513be8>
---[ end trace c0d117b74b7a2ba6 ]---


------------[ cut here ]------------
kernel BUG at include/linux/swapops.h:106!
invalid opcode: 0000 [#1] SMP KASAN
Modules linked in:
CPU: 2 PID: 10089 Comm: syzkaller_execu Tainted: G W
4.4.0-rc4+ #161
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
task: ffff880037715e00 ti: ffff8800006a0000 task.ti: ffff8800006a0000
RIP: 0010:[<ffffffff816874db>] [<ffffffff816874db>]
try_to_unmap_one+0x74b/0x960
RSP: 0018:ffff8800006a6fb0 EFLAGS: 00010212
RAX: ffff880037715e00 RBX: ffff88005a650000 RCX: ffffc900048f6000
RDX: 0000000000003fff RSI: 0000000000004000 RDI: ffff88005a650130
RBP: ffff8800006a7078 R08: 0000000000000001 R09: 0000000000000000
R10: ffffed000fffec13 R11: 0000000000000000 R12: 0000000000000002
R13: 0000000000000002 R14: 80000000424ad007 R15: 0000000000000018
FS: 00007f84b59d1700(0000) GS:ffff88006da00000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b
CR2: 0000000020797ff8 CR3: 000000000fb77000 CR4: 00000000000006e0
Stack:
0000000000000000 ffff880000000000 0000000000000212 ffff880046dc4780
0000000000000001 1ffff100000d4dfe 00000000006f0000 ffffea0001092b40
0000000041b58ab3 ffffffff86a6af3b ffffffff81686d90 ffffffff81653e38
Call Trace:
[< inline >] rmap_walk_anon mm/rmap.c:1600
[<ffffffff81689a9e>] rmap_walk+0x54e/0xa10 mm/rmap.c:1668
[<ffffffff8168a599>] try_to_unmap+0xd9/0x180 mm/rmap.c:1495
[< inline >] __unmap_and_move mm/migrate.c:890
[< inline >] unmap_and_move mm/migrate.c:950
[<ffffffff816d8220>] migrate_pages+0xcc0/0x25b0 mm/migrate.c:1149
[<ffffffff8165090e>] compact_zone+0xe2e/0x2180 mm/compaction.c:1411
[<ffffffff81651d4a>] compact_zone_order+0xea/0x160 mm/compaction.c:1504
[<ffffffff81652793>] try_to_compact_pages+0x323/0xa60 mm/compaction.c:1555
[<ffffffff815f85ff>] __alloc_pages_direct_compact+0x7f/0x280
mm/page_alloc.c:2765
[< inline >] __alloc_pages_slowpath mm/page_alloc.c:3076
[<ffffffff815f9320>] __alloc_pages_nodemask+0xb20/0x15f0 mm/page_alloc.c:3235
[< inline >] __alloc_pages include/linux/gfp.h:415
[< inline >] __alloc_pages_node include/linux/gfp.h:428
[<ffffffff816beb3f>] alloc_pages_vma+0x49f/0x600 mm/mempolicy.c:2001
[<ffffffff816e6a1a>] do_huge_pmd_wp_page+0x9ba/0x1ab0 mm/huge_memory.c:1189
[< inline >] wp_huge_pmd mm/memory.c:3250
[< inline >] __handle_mm_fault mm/memory.c:3382
[<ffffffff816684f4>] handle_mm_fault+0x2b44/0x3a30 mm/memory.c:3442
[<ffffffff8121d786>] __do_page_fault+0x376/0x8d0 arch/x86/mm/fault.c:1238
[<ffffffff8121ddc3>] trace_do_page_fault+0xb3/0x3d0 arch/x86/mm/fault.c:1331
[<ffffffff81210c94>] do_async_page_fault+0x14/0x70 arch/x86/kernel/kvm.c:264
[<ffffffff85b694f8>] async_page_fault+0x28/0x30 arch/x86/entry/entry_64.S:988
[<ffffffff85b673f6>] entry_SYSCALL_64_fastpath+0x16/0x7a
arch/x86/entry/entry_64.S:185
Code: fc e7 ff f0 48 ff 8b 48 03 00 00 f0 48 ff 83 50 03 00 00 4c 89
e1 48 c1 e9 39 48 8d 14 09 4c 89 e1 e9 2d fd ff ff e8 a5 fc e7 ff <0f>
0b e8 9e fc e7 ff 48 89 df 48 8b b5 68 ff ff ff 48 8b 95 50
RIP [< inline >] make_migration_entry include/linux/swapops.h:106
RIP [<ffffffff816874db>] try_to_unmap_one+0x74b/0x960 mm/rmap.c:1390
RSP <ffff8800006a6fb0>
---[ end trace 4641abb232e75cf4 ]---


------------[ cut here ]------------
kernel BUG at mm/mlock.c:179!
invalid opcode: 0000 [#1] SMP KASAN
Modules linked in:
CPU: 3 PID: 13580 Comm: syzkaller_execu Tainted: G W
4.4.0-rc4+ #161
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
task: ffff88003d825e00 ti: ffff88003ab88000 task.ti: ffff88003ab88000
RIP: 0010:[<ffffffff8166fb84>] [<ffffffff8166fb84>]
munlock_vma_page+0x1c4/0x200
RSP: 0018:ffff88003ab8f4a8 EFLAGS: 00010297
RAX: ffffffff8166fb84 RBX: ffffea00017d0840 RCX: ffffc900070a6000
RDX: 0000000000000720 RSI: 0000000000000721 RDI: 000000000000001f
RBP: ffff88003ab8f4d8 R08: 0000000000000001 R09: 0000000000000001
R10: ffffed000fffec1b R11: 0000000000000000 R12: 00000000000007c0
R13: ffff88007fff7000 R14: ffff88006ad38d48 R15: ffff880031018010
FS: 00007fa3ef2d9700(0000) GS:ffff88006db00000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b
CR2: 0000000020002006 CR3: 00000000380b9000 CR4: 00000000000006e0
Stack:
00000000000007c0 ffff88006ad38cf8 ffffea00017d0840 0000000000000001
ffff88006ad38d48 ffff880031018010 ffff88003ab8f5c0 ffffffff8166010e
0000000020003000 ffff880036e098c0 ffff88002f22cbe8 80000000635c0067
Call Trace:
[<ffffffff8166010e>] wp_page_copy.isra.56+0x93e/0xe60 mm/memory.c:2171
[<ffffffff81663eaf>] do_wp_page+0x17f/0xf20 mm/memory.c:2354
[< inline >] handle_pte_fault mm/memory.c:3311
[< inline >] __handle_mm_fault mm/memory.c:3413
[<ffffffff81667716>] handle_mm_fault+0x1d66/0x3a30 mm/memory.c:3442
[<ffffffff8121d786>] __do_page_fault+0x376/0x8d0 arch/x86/mm/fault.c:1238
[<ffffffff8121ddc3>] trace_do_page_fault+0xb3/0x3d0 arch/x86/mm/fault.c:1331
[<ffffffff81210c94>] do_async_page_fault+0x14/0x70 arch/x86/kernel/kvm.c:264
[<ffffffff85b694f8>] async_page_fault+0x28/0x30 arch/x86/entry/entry_64.S:988
[< inline >] SYSC_poll fs/select.c:969
[<ffffffff817532bc>] SyS_poll+0xbc/0x3f0 fs/select.c:957
[<ffffffff85b673f6>] entry_SYSCALL_64_fastpath+0x16/0x7a
arch/x86/entry/entry_64.S:185
Code: 48 89 df e8 ff f3 ff ff 84 c0 74 ba e8 16 76 e9 ff 4c 89 ff e8
ee 6d 4f 04 48 89 df e8 b6 f2 ff ff e9 50 ff ff ff e8 fc 75 e9 ff <0f>
0b 48 89 df e8 12 ef 05 00 eb 88 e8 0b ef 05 00 e9 87 fe ff
RIP [<ffffffff8166fb84>] munlock_vma_page+0x1c4/0x200 mm/mlock.c:179
RSP <ffff88003ab8f4a8>
---[ end trace 76823d12a049bf21 ]---


Unfortunately I cannot reproduce it without syzkaller fuzzer. Here are
repro instructions using syzkaller infrastructure (you need Go1.5
toolchain installed and GOPATH env var setup):

$ go get github.com/google/syzkaller/tools/execprog
$ cd $GOPATH/src/github.com/google/syzkaller
$ make executor
Then copy $GOPATH/bin/execprog,
$GOPATH/src/github.com/google/syzkaller/bin/executor and the attached
file into the test machine. Run the following command inside of the
test machine:
$ ./execprog -executor ./executor -loop -procs 16 -cover=0 pagelocked

It may require a hour or so to reproduce.

Syzkaller's execprog will do something along the following lines (to
give you idea of what's stressed): create a bunch of threads; mmap 2
shared files per test process (32 in this case); spawn 16
subprocesses; these subprocess will spawn another subproccess; these
subprocesses will execute syscalls listed in the attached file in
different threads in somewhat chaotic order. I've tried to reproduce
it all in the attached mlocked.c file, but it did not trigger the BUGs
for me.

Attachment: pagelocked
Description: Binary data

// autogenerated by syzkaller (http://github.com/google/syzkaller)
#include <syscall.h>
#include <string.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <pthread.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>

long r6 = -1;
long r8 = -1;
long r89 = -1;
long r93 = -1;

#define NMAP 3
#define SIZE (1<<20)

void *maps[NMAP];

void *thr(void *arg)
{
switch ((long)arg) {
case 0:
syscall(SYS_mmap, 0x20000000ul, 0x1000ul, 0x3ul, 0x32ul, 0xfffffffffffffffful, 0x0ul);
break;
case 1:
syscall(SYS_mmap, 0x20000000ul, 0x1000ul, 0x3ul, 0x32ul, 0xfffffffffffffffful, 0x0ul);
break;
case 2:
syscall(SYS_mmap, 0x20000000ul, 0x1000ul, 0x3ul, 0x32ul, 0xfffffffffffffffful, 0x0ul);
break;
case 3:
memcpy((void*)0x2000059f, "\x73\x79\x73\x74\x65\x6d\x74\x72\x75\x73\x74\x65\x64\x00", 14);
memcpy((void*)0x20000fd8, "\x6d\x69\x6d\x65\x5f\x74\x79\x70\x65\x70\x72\x6f\x63\x73\x65\x6c\x69\x6e\x75\x78\x2d\x5c\x2a\xc7\x6d\x69\x6d\x65\x5f\x74\x79\x70\x65\x2e\x2f\x65\x74\x68\x31\x00", 40);
memcpy((void*)0x20000624, "\x10\x22\x8c\x5c\x6c\xc8\x04\x26\x47\xa1\x41\xd1\x5a\x3e\x4e\x84\xdd\x39\x5d\x57\x09\x1d\x61\x0e\x51\xea\x31\xcd\xcf\x68\x91\x47\x66\xba\x2d\x9c\x23\x52\x26\x52\x86\x78\x29\x39\xa8\xaa\x95\x02\xbd\x29\x19\xd6\xc8\x76\x6a\x30\x4c\x77\x75\x2d\x91\x98\x77\x19\x29\x03\xe4\xf4\x49\x68\xde\xf8\x22\x8c\xec\xc3\x3f\x92\x3c\x46\x41\xba\x6c\x64\x52\x1e\x76\x4d\x71\x7d\xd2\x6c\x9a\x17\x85\x2a\x14\xca\x35\x65\xad\x32\x81\x16\xb1\xfa\x38\x94\xce\x41\x4b\x09\xfe\x03\xbf\x9f\x60\xcc\x76\x97\x5d\x7a\xd6\x66\x3e\xcb\x03\x4c\x58\xa3\xc2\xc5\xb6\x6f\xa2\xb3\x70\x4e\x0b\x67\x22\xfd\xa9\xba\x42\xf7\xb5\x0c\xdb\x22\xd1\x20", 152);
r6 = syscall(SYS_add_key, 0x2000059ful, 0x20000fd8ul, 0x20000624ul, 0x98ul, 0xde337a1cb4cf849dul, 0);
break;
case 4:
syscall(SYS_msync, 0x20c0f000ul, 0x4000ul, 0x1ul, 0, 0, 0);
break;
case 5:
r8 = syscall(SYS_gettid, 0, 0, 0, 0, 0, 0);
break;
case 6:
syscall(SYS_mmap, 0x20001000ul, 0x1000ul, 0x3ul, 0x32ul, 0xfffffffffffffffful, 0x0ul);
*(uint32_t*)0x20001fd0 = 0x30;
*(uint32_t*)0x20001fd4 = 0x0;
*(uint64_t*)0x20001fd8 = 0x1;
*(uint32_t*)0x20001fe0 = 0x2;
*(uint32_t*)0x20001fe4 = 0x2;
*(uint64_t*)0x20001fe8 = 0x7;
*(uint64_t*)0x20001ff0 = 0x1;
*(uint64_t*)0x20001ff8 = 0x9;
syscall(SYS_sched_setattr, r8, 0x20001fd0ul, 0x0ul, 0, 0, 0);
break;
case 7:
syscall(SYS_keyctl, 0xful, r6, 0x4ul, 0, 0, 0);
break;
case 8:
syscall(SYS_sync, 0, 0, 0, 0, 0, 0);
break;
case 9:
syscall(SYS_mmap, 0x20003000ul, 0x1000ul, 0x3ul, 0x32ul, 0xfffffffffffffffful, 0x0ul);
break;
case 10:
syscall(SYS_shmget, 0xfffffffffffff727ul, 0x1000ul, 0x80ul, 0x20001000ul, 0, 0);
break;
case 11:
syscall(SYS_tkill, r8, 0x4ul, 0, 0, 0, 0);
break;
case 12:
syscall(SYS_keyctl, 0x3ul, r6, 0, 0, 0, 0);
break;
case 13:
syscall(SYS_mmap, 0x20004000ul, 0x1000ul, 0x3ul, 0x32ul, 0xfffffffffffffffful, 0x0ul);
break;
case 14:
memcpy((void*)0x20004000, "\x2e\x2f\x66\x69\x6c\x65\x30\x00", 8);
r89 = syscall(SYS_openat, 0x1869ful, 0x20004000ul, 0x0ul, 0x140ul, 0, 0);
break;
case 15:
syscall(SYS_mmap, 0x20004000ul, 0x1000ul, 0x3ul, 0x32ul, 0xfffffffffffffffful, 0x0ul);
break;
case 16:
syscall(SYS_mmap, 0x20004000ul, 0x1000ul, 0x3ul, 0x32ul, 0xfffffffffffffffful, 0x0ul);
break;
case 17:
memcpy((void*)0x2000475e, "\x2e\x2f\x62\x75\x73\x00", 6);
r93 = syscall(SYS_open, 0x2000475eul, 0x101000ul, 0x50ul, 0, 0, 0);
break;
case 18:
syscall(SYS_mmap, 0x20004000ul, 0x1000ul, 0x3ul, 0x32ul, 0xfffffffffffffffful, 0x0ul);
break;
case 19:
memcpy((void*)0x20004000, "\x2e\x2f\x66\x69\x6c\x65\x30\x00", 8);
memcpy((void*)0x20004ffb, "\x2e\x2f\x66\x69\x6c\x65\x30\x00", 8);
syscall(SYS_linkat, r89, 0x20004000ul, r93, 0x20004ffbul, 0x1000ul, 0);
break;
case 20:
syscall(SYS_mmap, 0x20005000ul, 0x1000ul, 0x3ul, 0x32ul, 0xfffffffffffffffful, 0x0ul);
break;
case 21:
syscall(SYS_mmap, 0x20005000ul, 0x1000ul, 0x3ul, 0x32ul, 0xfffffffffffffffful, 0x0ul);
break;
case 22: {
unsigned long old, new;
old = 1;
new = 2;
migrate_pages(getpid(), 2, &old, &new);
break;
}
case 23: {
unsigned long old, new;
old = 2;
new = 1;
migrate_pages(getpid(), 2, &old, &new);
break;
}
}
return 0;
}

void worker(void)
{
const int N = 24;
const int K = 2;
int i, j;
pthread_t th[K*N];
void *p;

char buf[128];
sprintf(buf, "/tmp/myprivate%d", getpid());
int fd = open(buf, O_RDWR|O_CREAT, 0600);
ftruncate(fd, 100<<20);
p = mmap(0, 100<<20, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
if (p != MAP_FAILED) {
for (i = 0; i < (100<<20); i += (4<<10))
((volatile char*)p)[i]++;
munmap(p, 100<<20);
}
close(fd);
unlink(buf);

for (i = 0; i < NMAP; i++) {
for (j = 0; j < SIZE; j += 4<<10)
((volatile char*)maps[i])[j] = 1;
}

for (i = 0; i < K*N; i++) {
pthread_create(&th[i], 0, thr, (void*)(long)i);
usleep(20);
}

for (i = 0; i < K*N; i++)
pthread_join(th[i], 0);

}

void forkworker(void)
{
int i, j;

for (i = 0; i < NMAP; i++) {
for (j = 0; j < SIZE; j += 4<<10)
((volatile char*)maps[i])[j] = 1;
}

if (fork() == 0) {
worker();
exit(0);
}
}

int main(int argc, char **argv)
{
int i;

for (i = 0; i < NMAP; i++) {
char buf[128];
sprintf(buf, "/tmp/myshared%d", i);
int fd = open(buf, O_RDWR|O_CREAT, 0600);
ftruncate(fd, SIZE);
maps[i] = mmap(0, SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
}

for (i = 0; i < 8; i++)
forkworker();
for (;;)
if (wait(0) != -1)
forkworker();
}