Re: kvm: GPF in native_write_cr4
From: Dmitry Vyukov
Date: Mon Nov 06 2017 - 04:52:36 EST
On Tue, Oct 31, 2017 at 2:07 PM, Dmitry Vyukov <dvyukov@xxxxxxxxxx> wrote:
> On Tue, Oct 31, 2017 at 3:53 PM, Haozhong Zhang
> <haozhong.zhang@xxxxxxxxx> wrote:
>> Hi Wanpeng,
>>
>> On 10/31/17 19:10 +0800, Wanpeng Li wrote:
>>> 2017-10-31 17:59 GMT+08:00 Dmitry Vyukov <dvyukov@xxxxxxxxxx>:
>>> > Hello,
>>> >
>>> > I am seeing the following crash on upstream
>>> > 15f859ae5c43c7f0a064ed92d33f7a5bc5de6de0 (Oct 26).
>>> > Reproducer:
>>> > https://gist.githubusercontent.com/dvyukov/a9690f90c39c1e3b1b6c7acda2d5ef89/raw/33e07f3d6779005fc475764e0802e4a5aee8d0cf/gistfile1.txt
>>> > I run qemu with -append "kvm-intel.nested=1" -enable-kvm -cpu host. My
>>> > host cpu is E5-2690.
As +Jim pointed out, this is E5-2690 v3 to be more precise.
>>> I can't reproduce this w/ latest kvm/queue in both L0 and L1. In
>>> addition, there is a commit tries to fix cr4 recently.
>>> https://git.kernel.org/pub/scm/virt/kvm/kvm.git/commit/?id=8eb3f87d903168bdbd1222776a6b1e281f50513e
>>
>> The calltrace in this bug report is the same as what I got before
>> above commit.
>>
>> In the previous bug, L0 KVM misused L2 CR4 as L1 CR4. When L1 KVM
>> tried to clear L1 CR4.VMXE in L1 VM shutdown path, L0 KVM considered
>> L1 intended to clear/set other bits as well (because of the wrong L2
>> CR4 was used by L0 KVM as L1 CR4), but changes to extra bits may not
>> be allowed against other L1 states.
>>
>> In my previous fix, I tried to fix one place of such L1/L2 CR4
>> misuse. If there is no other places of CR4 misuse, you may have a look
>> at the guest states checked by kvm_set_cr4() against guest CR4
>> changes, and check whether L1 and L2 versions of any of them are
>> misused.
>>
>> It would make the debug easier if we can log which check fails in
>> kvm_set_cr4() when the calltrace appears (e.g., by adding printk
>> before return 1 in kvm_set_cr4()).
>>
>>
>> Haozhong
>
>
> To double-check I've also tried latest upstream
> 5f479447d983111c039f1d6d958553c1ad1b2ff1 (Oct 30) and the bug still
> reproduces. So it reproduces with the mentioned fix.
>
>
>
>>> The testcast is complex, if the below strace log is as you expected?
>>>
>>> execve("./a.out", ["./a.out"], [/* 32 vars */]) = 0
>>> uname({sysname="Linux", nodename="kernel", ...}) = 0
>>> brk(NULL) = 0x1d42000
>>> brk(0x1d431c0) = 0x1d431c0
>>> arch_prctl(ARCH_SET_FS, 0x1d42880) = 0
>>> readlink("/proc/self/exe", "/home/kernel/a.out", 4096) = 18
>>> brk(0x1d641c0) = 0x1d641c0
>>> brk(0x1d65000) = 0x1d65000
>>> access("/etc/ld.so.nohwcap", F_OK) = -1 ENOENT (No such file or directory)
>>> mmap(0x20000000, 11481088, PROT_READ|PROT_WRITE,
>>> MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x20000000
>>> openat(AT_FDCWD, "/dev/kvm", O_WRONLY) = 3
>>> ioctl(3, KVM_CREATE_VM or LOGGER_GET_LOG_BUF_SIZE, 0) = 4
>>> ioctl(4, KVM_CREATE_VCPU, 0) = 5
>>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1230) = 0
>>> ioctl(4, KVM_SET_USER_MEMORY_REGION, 0x7fff5e6c1170) = 0
>>> ioctl(5, KVM_GET_SREGS, 0x7fff5e6c1330) = 0
>>> open("/dev/kvm", O_RDWR) = 6
>>> ioctl(6, KVM_GET_SUPPORTED_CPUID, 0x7fff5e6c1470) = 0
>>> ioctl(5, KVM_SET_CPUID2, 0x7fff5e6c1470) = 0
>>> close(6) = 0
>>> ioctl(5, KVM_SET_MSRS, 0x7fff5e6c0c30) = 5
>>> ioctl(5, KVM_SET_SREGS, 0x7fff5e6c1330) = 0
>>> ioctl(5, KVM_SET_REGS, 0x7fff5e6c1230) = 0
>>> mremap(0x20998000, 4096, 16384, MREMAP_MAYMOVE|MREMAP_FIXED,
>>> 0x200fa000) = 0x200fa000
>>> ioctl(5, KVM_RUN, 0) = 0
>>> mbind(0x20000000, 8192, MPOL_DEFAULT 0x20001ff8, 2, MPOL_MF_MOVE) = 0
>>> exit_group(0) = ?
>>> +++ exited with 0 +++
>>> i
>>>
>>> Regards,
>>> Wanpeng Li
>>>
>>> > general protection fault: 0000 [#1] SMP KASAN
>>> > Modules linked in:
>>> > CPU: 1 PID: 3064 Comm: a.out Not tainted 4.14.0-rc6+ #11
>>> > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
>>> > task: ffff880064c203c0 task.stack: ffff880066718000
>>> > RIP: 0010:native_write_cr4+0x4/0x10 arch/x86/include/asm/special_insns.h:75
>>> > RSP: 0018:ffff88006671f598 EFLAGS: 00010097
>>> > RAX: ffff880064c203c0 RBX: 00000000001606e0 RCX: 0000000000000000
>>> > RDX: 0000000000000000 RSI: 0000000000000000 RDI: 00000000001606e0
>>> > RBP: ffff88006671f598 R08: 0000000000000006 R09: 0000000000000006
>>> > R10: ffff880064c203c0 R11: 0000000000000000 R12: 0000000000000001
>>> > R13: ffff88006ca94828 R14: ffff88006ca94850 R15: ffff88006ca80000
>>> > FS: 00000000019cd880(0000) GS:ffff88006ca80000(0000) knlGS:0000000000000000
>>> > CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>>> > CR2: 0000000000000000 CR3: 0000000005a22001 CR4: 00000000001626e0
>>> > Call Trace:
>>> > __write_cr4 arch/x86/include/asm/paravirt.h:76 [inline]
>>> > cr4_clear_bits arch/x86/include/asm/tlbflush.h:197 [inline]
>>> > kvm_cpu_vmxoff arch/x86/kvm/vmx.c:3571 [inline]
>>> > hardware_disable+0x197/0x210 arch/x86/kvm/vmx.c:3577
>>> > kvm_arch_hardware_disable+0x35/0xd0 arch/x86/kvm/x86.c:7920
>>> > hardware_disable_nolock+0x30/0x40
>>> > arch/x86/kvm/../../../virt/kvm/kvm_main.c:3282
>>> > on_each_cpu+0xca/0x1b0 kernel/smp.c:604
>>> > hardware_disable_all_nolock+0x44/0x60
>>> > arch/x86/kvm/../../../virt/kvm/kvm_main.c:3300
>>> > hardware_disable_all arch/x86/kvm/../../../virt/kvm/kvm_main.c:3306 [inline]
>>> > kvm_destroy_vm arch/x86/kvm/../../../virt/kvm/kvm_main.c:735 [inline]
>>> > kvm_put_kvm+0x887/0xe00 arch/x86/kvm/../../../virt/kvm/kvm_main.c:748
>>> > kvm_vm_release+0x42/0x50 arch/x86/kvm/../../../virt/kvm/kvm_main.c:759
>>> > __fput+0x301/0x7e0 fs/file_table.c:210
>>> > ____fput+0x15/0x20 fs/file_table.c:244
>>> > task_work_run+0x19e/0x250 kernel/task_work.c:112
>>> > exit_task_work include/linux/task_work.h:21 [inline]
>>> > do_exit+0x99f/0x18b0 kernel/exit.c:865
>>> > do_group_exit+0x14b/0x3f0 kernel/exit.c:968
>>> > SYSC_exit_group kernel/exit.c:979 [inline]
>>> > SyS_exit_group+0x1d/0x20 kernel/exit.c:977
>>> > entry_SYSCALL_64_fastpath+0x1f/0xbe
>>> > RIP: 0033:0x443849
>>> > RSP: 002b:00007ffe58a95c78 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7
>>> > RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 0000000000443849
>>> > RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
>>> > RBP: 0000000000000086 R08: 000000000000003c R09: 00000000000000e7
>>> > R10: ffffffffffffffc0 R11: 0000000000000246 R12: 0000000000000000
>>> > R13: 0000000000404800 R14: 0000000000404890 R15: 0000000000000000
>>> > Code: 0f 1f 80 00 00 00 00 55 48 89 e5 0f 20 d8 5d c3 0f 1f 80 00 00
>>> > 00 00 55 48 89 e5 0f 22 df 5d c3 0f 1f 80 00 00 00 00 55 48 89 e5 <0f>
>>> > 22 e7 5d c3 0f 1f 80 00 00 00 00 55 48 89 e5 44 0f 20 c0 5d
>>> > RIP: native_write_cr4+0x4/0x10 arch/x86/include/asm/special_insns.h:75
>>> > RSP: ffff88006671f598
>>> > ---[ end trace 6f9dbcc14aa47936 ]---