Re: kvm: GPF in native_write_cr4

From: Dmitry Vyukov
Date: Tue Oct 31 2017 - 07:26:01 EST


-obsolete email address

On Tue, Oct 31, 2017 at 2:24 PM, Dmitry Vyukov <dvyukov@xxxxxxxxxx> wrote:
> On Tue, Oct 31, 2017 at 2:10 PM, Wanpeng Li <kernellwp@xxxxxxxxx> wrote:
>> 2017-10-31 17:59 GMT+08:00 Dmitry Vyukov <dvyukov@xxxxxxxxxx>:
>>> Hello,
>>>
>>> I am seeing the following crash on upstream
>>> 15f859ae5c43c7f0a064ed92d33f7a5bc5de6de0 (Oct 26).
>>> Reproducer:
>>> https://gist.githubusercontent.com/dvyukov/a9690f90c39c1e3b1b6c7acda2d5ef89/raw/33e07f3d6779005fc475764e0802e4a5aee8d0cf/gistfile1.txt
>>> I run qemu with -append "kvm-intel.nested=1" -enable-kvm -cpu host. My
>>> host cpu is E5-2690.
>>>
>>
>> I can't reproduce this w/ latest kvm/queue in both L0 and L1. In
>> addition, there is a commit tries to fix cr4 recently.
>> https://git.kernel.org/pub/scm/virt/kvm/kvm.git/commit/?id=8eb3f87d903168bdbd1222776a6b1e281f50513e
>> The testcast is complex, if the below strace log is as you expected?
>
>
> I have that commit in my tree.
>
> I did a little testing and if I remove "-cpu host" from qemu command
> line, then crash does not happen. So my E5-2690 is somehow involved.
>
> The test case is actually for a different issues, but since recently
> lots of kvm-related programs trigger this bug for me. And it seems to
> be very easy to trigger. Here is my strace output:
>
>
> root@syzkaller:~# strace ./a.out
> execve("./a.out", ["./a.out"], [/* 11 vars */]) = 0
> uname({sys="Linux", node="syzkaller", ...}) = 0
> brk(0) = 0xe3c000
> brk(0xe3d1c0) = 0xe3d1c0
> arch_prctl(ARCH_SET_FS, 0xe3c880) = 0
> readlink("/proc/self/exe", "/root/a.out", 4096) = 11
> brk(0xe5e1c0) = 0xe5e1c0
> brk(0xe5f000) = 0xe5f000
> access("/etc/ld.so.nohwcap", F_OK) = -1 ENOENT (No such file or directory)
> mmap(0x20000000, 11481088, PROT_READ|PROT_WRITE,
> MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x20000000
> openat(AT_FDCWD, "/dev/kvm", O_WRONLY) = 3
> ioctl(3, KVM_CREATE_VM
>
> , 0) = 4
> ioctl(4, KVM_CREATE_VCPU, 0) = 5
> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7ffcf97c07f0) = 0
> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7ffcf97c07f0) = 0
> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7ffcf97c07f0) = 0
> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7ffcf97c07f0) = 0
> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7ffcf97c07f0) = 0
> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7ffcf97c07f0) = 0
> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7ffcf97c07f0) = 0
> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7ffcf97c07f0) = 0
> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7ffcf97c07f0) = 0
> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7ffcf97c07f0) = 0
> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7ffcf97c07f0) = 0
> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7ffcf97c07f0) = 0
> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7ffcf97c07f0) = 0
> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7ffcf97c07f0) = 0
> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7ffcf97c07f0) = 0
> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7ffcf97c07f0) = 0
> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7ffcf97c07f0) = 0
> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7ffcf97c07f0) = 0
> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7ffcf97c07f0) = 0
> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7ffcf97c07f0) = 0
> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7ffcf97c07f0) = 0
> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7ffcf97c07f0) = 0
> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7ffcf97c07f0) = 0
> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7ffcf97c07f0) = 0
> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7ffcf97c0630) = 0
> ioctl(5, KVM_GET_SREGS, 0x7ffcf97c07f0) = 0
> open("/dev/kvm", O_RDWR) = 6
> ioctl(6, KVM_GET_SUPPORTED_CPUID, 0x7ffcf97c0930) = 0
> ioctl(5, KVM_SET_CPUID2, 0x7ffcf97c0930) = 0
> close(6) = 0
> ioctl(5, KVM_SET_MSRS, 0x7ffcf97c00f0) = 5
> ioctl(5, KVM_SET_SREGS, 0x7ffcf97c07f0) = 0
> ioctl(5, KVM_SET_REGS, 0x7ffcf97c0760) = 0
> mremap(0x20998000, 4096, 16384, MREMAP_MAYMOVE|MREMAP_FIXED,
> 0x200fa000) = 0x200fa000
> ioctl(5, KVM_RUN, 0) = 0
> mbind(0x20000000, 8192, MPOL_DEFAULT, 0x20001ff8, 2, MPOL_MF_MOVE) = 0
> exit_group(0) = ?
> [ 44.908130] kasan: CONFIG_KASAN_INLINE enabled
> [ 44.909140] kasan: GPF could be caused by NULL-ptr deref or user
> memory access
> [ 44.910615] general protection fault: 0000 [#1] SMP KASAN
> [ 44.911654] Modules linked in:
> [ 44.912241] CPU: 2 PID: 3074 Comm: a.out Not tainted 4.14.0-rc6+ #12
> [ 44.913556] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
> BIOS Bochs 01/01/2011
> [ 44.915146] task: ffff88006a8e0740 task.stack: ffff88006bab8000
> [ 44.916387] RIP: 0010:native_write_cr4+0x4/0x10
> [ 44.917324] RSP: 0018:ffff88006babf458 EFLAGS: 00010097
> [ 44.918373] RAX: ffff88006a8e0740 RBX: 00000000001606e0 RCX: 0000000000000000
> [ 44.919693] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 00000000001606e0
> [ 44.921049] RBP: ffff88006babf458 R08: ffffffff8106cf91 R09: 0000000000000000
> [ 44.922417] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000002
> [ 44.923738] R13: ffff88006cb14828 R14: ffff88006cb14850 R15: ffff88006cb00000
> [ 44.925061] FS: 0000000000e3c880(0000) GS:ffff88006cb00000(0000)
> knlGS:0000000000000000
> [ 44.926652] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 44.927727] CR2: 0000000000000000 CR3: 0000000005a22004 CR4: 00000000001626e0
> [ 44.929067] Call Trace:
> [ 44.929627] hardware_disable+0x17f/0x210
> [ 44.930373] ? kvm_io_bus_get_dev+0x2a0/0x2a0
> [ 44.931148] kvm_arch_hardware_disable+0x35/0xd0
> [ 44.932022] ? kvm_io_bus_get_dev+0x2a0/0x2a0
> [ 44.932827] hardware_disable_nolock+0x30/0x40
> [ 44.933591] on_each_cpu+0xca/0x1b0
> [ 44.934283] hardware_disable_all_nolock+0x3e/0x50
> [ 44.935168] kvm_put_kvm+0x956/0xdf0
> [ 44.935781] ? kvm_clear_guest+0xb0/0xb0
> [ 44.936543] ? kvm_irqfd_release+0xd1/0x120
> [ 44.937360] ? lock_downgrade+0x990/0x990
> [ 44.938162] ? _raw_spin_unlock_irq+0x27/0x70
> [ 44.938903] ? kvm_irqfd_release+0xdd/0x120
> [ 44.939676] ? kvm_irqfd_release+0xdd/0x120
> [ 44.940459] ? kvm_put_kvm+0xdf0/0xdf0
> [ 44.941092] kvm_vm_release+0x42/0x50
> [ 44.941804] __fput+0x327/0x7e0
> [ 44.942447] ? fput+0x140/0x140
> [ 44.942966] ? check_same_owner+0x320/0x320
> [ 44.943738] ? _raw_spin_unlock_irq+0x27/0x70
> [ 44.944435] ____fput+0x15/0x20
> [ 44.945026] task_work_run+0x199/0x270
> [ 44.945734] ? task_work_cancel+0x210/0x210
> [ 44.946406] ? _raw_spin_unlock+0x22/0x30
> [ 44.947114] ? switch_task_namespaces+0x87/0xc0
> [ 44.947927] do_exit+0x9b5/0x1ad0
> [ 44.948463] ? lock_acquire+0x1d5/0x580
> [ 44.949149] ? lock_acquire+0x1d5/0x580
> [ 44.949776] ? mm_update_next_owner+0x930/0x930
> [ 44.950577] ? lock_release+0xa40/0xa40
> [ 44.951220] ? check_same_owner+0x320/0x320
> [ 44.951906] ? check_noncircular+0x20/0x20
> [ 44.952622] ? _raw_spin_unlock_irq+0x27/0x70
> [ 44.953402] ? recalc_sigpending_tsk+0x117/0x150
> [ 44.954328] ? ptrace_stop+0x631/0xa40
> [ 44.954916] ? find_held_lock+0x35/0x1d0
> [ 44.955617] ? ptrace_notify+0xee/0x130
> [ 44.956208] ? lock_downgrade+0x990/0x990
> [ 44.956862] ? do_raw_spin_trylock+0x190/0x190
> [ 44.957615] ? _raw_spin_unlock_irq+0x27/0x70
> [ 44.958300] ? trace_hardirqs_on_caller+0x421/0x5c0
> [ 44.959100] ? trace_hardirqs_on+0xd/0x10
> [ 44.959794] ? _raw_spin_unlock_irq+0x27/0x70
> [ 44.960512] ? ptrace_notify+0xee/0x130
> [ 44.961106] ? syscall_trace_enter+0x5d4/0x1290
> [ 44.961850] ? trace_raw_output_sys_exit+0x100/0x100
> [ 44.962624] ? exit_to_usermode_loop+0x8c/0x310
> [ 44.963328] do_group_exit+0x149/0x400
> [ 44.963977] ? trace_hardirqs_off+0xd/0x10
> [ 44.964590] ? exit_to_usermode_loop+0x198/0x310
> [ 44.965363] ? SyS_exit+0x30/0x30
> [ 44.965963] ? trace_event_raw_event_sys_exit+0x260/0x260
> [ 44.966742] ? do_syscall_64+0xb6/0x8d0
> [ 44.967376] ? do_group_exit+0x400/0x400
> [ 44.968026] SyS_exit_group+0x1d/0x20
> [ 44.968568] do_syscall_64+0x26c/0x8d0
> [ 44.969198] ? kill_pid+0x30/0x30
> [ 44.969703] ? syscall_return_slowpath+0x510/0x510
> [ 44.970471] ? syscall_return_slowpath+0x2b3/0x510
> [ 44.971196] ? prepare_exit_to_usermode+0x2d0/0x2d0
> [ 44.971924] ? return_from_SYSCALL_64+0x2d/0x7a
> [ 44.972665] ? trace_hardirqs_on_caller+0x421/0x5c0
> [ 44.973473] ? trace_hardirqs_on_thunk+0x1a/0x1c
> [ 44.974156] entry_SYSCALL64_slow_path+0x25/0x25
> [ 44.974890] RIP: 0033:0x443849
> [ 44.975331] RSP: 002b:00007ffcf97c1d48 EFLAGS: 00000246 ORIG_RAX:
> 00000000000000e7
> [ 44.976464] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 0000000000443849
> [ 44.977551] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
> [ 44.978646] RBP: 00000000004b5e88 R08: 000000000000003c R09: 00000000000000e7
> [ 44.979696] R10: ffffffffffffffc0 R11: 0000000000000246 R12: 0000000000000001
> [ 44.980755] R13: 00000000006c52c0 R14: 0000000000404890 R15: 0000000000000000
> [ 44.981825] Code: 0f 1f 80 00 00 00 00 55 48 89 e5 0f 20 d8 5d c3
> 0f 1f 80 00 00 00 00 55 48 89 e5 0f 22 df 5d c3 0f 1f 80 00 00 00 00
> 55 48 89 e5 <0f> 22 e7 5d c3 0f 1f 80 00 00 00 00 55 48 89 e5 44 0f 20
> c0 5d
> [ 44.984645] RIP: native_write_cr4+0x4/0x10 RSP: ffff88006babf458
> [ 44.985583] ---[ end trace 45dae0ba7fdb5e5f ]---
>
>
>
>
>
>> execve("./a.out", ["./a.out"], [/* 32 vars */]) = 0
>> uname({sysname="Linux", nodename="kernel", ...}) = 0
>> brk(NULL) = 0x1d42000
>> brk(0x1d431c0) = 0x1d431c0
>> arch_prctl(ARCH_SET_FS, 0x1d42880) = 0
>> readlink("/proc/self/exe", "/home/kernel/a.out", 4096) = 18
>> brk(0x1d641c0) = 0x1d641c0
>> brk(0x1d65000) = 0x1d65000
>> access("/etc/ld.so.nohwcap", F_OK) = -1 ENOENT (No such file or directory)
>> mmap(0x20000000, 11481088, PROT_READ|PROT_WRITE,
>> MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x20000000
>> openat(AT_FDCWD, "/dev/kvm", O_WRONLY) = 3
>> ioctl(3, KVM_CREATE_VM or LOGGER_GET_LOG_BUF_SIZE, 0) = 4
>> ioctl(4, KVM_CREATE_VCPU, 0) = 5
>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1170) = 0
>> ioctl(5, KVM_GET_SREGS, 0x7fff5e6c1330) = 0
>> open("/dev/kvm", O_RDWR) = 6
>> ioctl(6, KVM_GET_SUPPORTED_CPUID, 0x7fff5e6c1470) = 0
>> ioctl(5, KVM_SET_CPUID2, 0x7fff5e6c1470) = 0
>> close(6) = 0
>> ioctl(5, KVM_SET_MSRS, 0x7fff5e6c0c30) = 5
>> ioctl(5, KVM_SET_SREGS, 0x7fff5e6c1330) = 0
>> ioctl(5, KVM_SET_REGS, 0x7fff5e6c1230) = 0
>> mremap(0x20998000, 4096, 16384, MREMAP_MAYMOVE|MREMAP_FIXED,
>> 0x200fa000) = 0x200fa000
>> ioctl(5, KVM_RUN, 0) = 0
>> mbind(0x20000000, 8192, MPOL_DEFAULT 0x20001ff8, 2, MPOL_MF_MOVE) = 0
>> exit_group(0) = ?
>> +++ exited with 0 +++
>> i
>>
>> Regards,
>> Wanpeng Li
>>
>>> general protection fault: 0000 [#1] SMP KASAN
>>> Modules linked in:
>>> CPU: 1 PID: 3064 Comm: a.out Not tainted 4.14.0-rc6+ #11
>>> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
>>> task: ffff880064c203c0 task.stack: ffff880066718000
>>> RIP: 0010:native_write_cr4+0x4/0x10 arch/x86/include/asm/special_insns.h:75
>>> RSP: 0018:ffff88006671f598 EFLAGS: 00010097
>>> RAX: ffff880064c203c0 RBX: 00000000001606e0 RCX: 0000000000000000
>>> RDX: 0000000000000000 RSI: 0000000000000000 RDI: 00000000001606e0
>>> RBP: ffff88006671f598 R08: 0000000000000006 R09: 0000000000000006
>>> R10: ffff880064c203c0 R11: 0000000000000000 R12: 0000000000000001
>>> R13: ffff88006ca94828 R14: ffff88006ca94850 R15: ffff88006ca80000
>>> FS: 00000000019cd880(0000) GS:ffff88006ca80000(0000) knlGS:0000000000000000
>>> CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>>> CR2: 0000000000000000 CR3: 0000000005a22001 CR4: 00000000001626e0
>>> Call Trace:
>>> __write_cr4 arch/x86/include/asm/paravirt.h:76 [inline]
>>> cr4_clear_bits arch/x86/include/asm/tlbflush.h:197 [inline]
>>> kvm_cpu_vmxoff arch/x86/kvm/vmx.c:3571 [inline]
>>> hardware_disable+0x197/0x210 arch/x86/kvm/vmx.c:3577
>>> kvm_arch_hardware_disable+0x35/0xd0 arch/x86/kvm/x86.c:7920
>>> hardware_disable_nolock+0x30/0x40
>>> arch/x86/kvm/../../../virt/kvm/kvm_main.c:3282
>>> on_each_cpu+0xca/0x1b0 kernel/smp.c:604
>>> hardware_disable_all_nolock+0x44/0x60
>>> arch/x86/kvm/../../../virt/kvm/kvm_main.c:3300
>>> hardware_disable_all arch/x86/kvm/../../../virt/kvm/kvm_main.c:3306 [inline]
>>> kvm_destroy_vm arch/x86/kvm/../../../virt/kvm/kvm_main.c:735 [inline]
>>> kvm_put_kvm+0x887/0xe00 arch/x86/kvm/../../../virt/kvm/kvm_main.c:748
>>> kvm_vm_release+0x42/0x50 arch/x86/kvm/../../../virt/kvm/kvm_main.c:759
>>> __fput+0x301/0x7e0 fs/file_table.c:210
>>> ____fput+0x15/0x20 fs/file_table.c:244
>>> task_work_run+0x19e/0x250 kernel/task_work.c:112
>>> exit_task_work include/linux/task_work.h:21 [inline]
>>> do_exit+0x99f/0x18b0 kernel/exit.c:865
>>> do_group_exit+0x14b/0x3f0 kernel/exit.c:968
>>> SYSC_exit_group kernel/exit.c:979 [inline]
>>> SyS_exit_group+0x1d/0x20 kernel/exit.c:977
>>> entry_SYSCALL_64_fastpath+0x1f/0xbe
>>> RIP: 0033:0x443849
>>> RSP: 002b:00007ffe58a95c78 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7
>>> RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 0000000000443849
>>> RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
>>> RBP: 0000000000000086 R08: 000000000000003c R09: 00000000000000e7
>>> R10: ffffffffffffffc0 R11: 0000000000000246 R12: 0000000000000000
>>> R13: 0000000000404800 R14: 0000000000404890 R15: 0000000000000000
>>> Code: 0f 1f 80 00 00 00 00 55 48 89 e5 0f 20 d8 5d c3 0f 1f 80 00 00
>>> 00 00 55 48 89 e5 0f 22 df 5d c3 0f 1f 80 00 00 00 00 55 48 89 e5 <0f>
>>> 22 e7 5d c3 0f 1f 80 00 00 00 00 55 48 89 e5 44 0f 20 c0 5d
>>> RIP: native_write_cr4+0x4/0x10 arch/x86/include/asm/special_insns.h:75
>>> RSP: ffff88006671f598
>>> ---[ end trace 6f9dbcc14aa47936 ]---