Re: [PATCH v5 untested] kvm: better MWAIT emulation for guests

From: Gabriel L. Somlo
Date: Thu Mar 16 2017 - 17:15:24 EST


On Thu, Mar 16, 2017 at 04:17:11PM -0400, Gabriel L. Somlo wrote:
> On Thu, Mar 16, 2017 at 09:27:56PM +0200, Michael S. Tsirkin wrote:
> > On Thu, Mar 16, 2017 at 03:24:41PM -0400, Gabriel L. Somlo wrote:
> > > On Thu, Mar 16, 2017 at 08:29:32PM +0200, Michael S. Tsirkin wrote:
> > > > Let's take a step back and try to figure out how is
> > > > mwait called. How about dumping code of VCPUs
> > > > around mwait? gdb disa command will do this.
> > >
> > > Started guest with '-s', tried to attach from gdb with
> > > "target remote localhost:1234", got
> > > "remote 'g' packet reply is too long: <lengthy string of numbers>"
> >
> > Try
> >
> > set arch x86-64:x86-64
>
> 'set architecture i386:x86-64:intel' is what worked for me;
>
> Been rooting around for a while, can't find mwait or monitor :(
>
> Guess I'll have to recompile KVM to actually issue an invalid opcode,
> so OS X will print a panic message with the exact address :)
>
> Stay tuned...

OK, so I found a few instances. The one closest to where a random
interrupt from gdb landed, was this one:

...
0xffffff7f813ff379: mov 0x90(%r15),%rax
0xffffff7f813ff380: mov 0x18(%rax),%rsi
0xffffff7f813ff384: xor %ecx,%ecx
0xffffff7f813ff386: mov %rsi,%rax
0xffffff7f813ff389: xor %edx,%edx
0xffffff7f813ff38b: monitor %rax,%rcx,%rdx
0xffffff7f813ff38e: test %r14,%r14
0xffffff7f813ff391: je 0xffffff7f813ff3ad
0xffffff7f813ff393: movq $0x0,0x8(%r14)
0xffffff7f813ff39b: movl $0x0,(%r14)
0xffffff7f813ff3a2: test %ebx,%ebx
0xffffff7f813ff3a4: je 0xffffff7f813ff3b2
0xffffff7f813ff3a6: mfence
0xffffff7f813ff3a9: wbinvd
0xffffff7f813ff3ab: jmp 0xffffff7f813ff3b2
0xffffff7f813ff3ad: cmpl $0x0,(%rsi)
0xffffff7f813ff3b0: jne 0xffffff7f813ff3d6
0xffffff7f813ff3b2: mov %r12d,%eax
0xffffff7f813ff3b5: imul $0x148,%rax,%rax
0xffffff7f813ff3bc: lea 0x153bd(%rip),%rcx # 0xffffff7f81414780
0xffffff7f813ff3c3: mov (%rcx),%rcx
0xffffff7f813ff3c6: mov 0x20(%rcx),%rcx
0xffffff7f813ff3ca: mov 0xc(%rcx,%rax,1),%eax
0xffffff7f813ff3ce: mov $0x1,%ecx
0xffffff7f813ff3d3: mwait %rax,%rcx
=> 0xffffff7f813ff3d6: lfence
0xffffff7f813ff3d9: rdtsc
0xffffff7f813ff3db: lfence
0xffffff7f813ff3de: mov %rax,%rbx
0xffffff7f813ff3e1: mov %rdx,%r15
...

Also, there were a few more within the range occupied by
AppleIntelCPUPowerManagement.kext (which provides is the "smart"
idle loop used by OS X):


...
0xffffff7f813f799a: mov 0x90(%r15),%rax
0xffffff7f813f79a1: mov 0x18(%rax),%r15
0xffffff7f813f79a5: xor %ecx,%ecx
0xffffff7f813f79a7: mov %r15,%rax
0xffffff7f813f79aa: xor %edx,%edx
0xffffff7f813f79ac: monitor %rax,%rcx,%rdx
0xffffff7f813f79af: mov %r12d,%r12d
0xffffff7f813f79b2: imul $0x148,%r12,%r13
0xffffff7f813f79b9: lea 0x1cdc0(%rip),%rax # 0xffffff7f81414780
0xffffff7f813f79c0: mov (%rax),%rax
0xffffff7f813f79c3: mov 0x20(%rax),%rcx
0xffffff7f813f79c7: testb $0x10,0x2(%rcx,%r13,1)
0xffffff7f813f79cd: je 0xffffff7f813f79d5
0xffffff7f813f79cf: callq *0x80(%rax)
0xffffff7f813f79d5: test %r14,%r14
0xffffff7f813f79d8: je 0xffffff7f813f79f4
0xffffff7f813f79da: movq $0x0,0x8(%r14)
0xffffff7f813f79e2: movl $0x0,(%r14)
0xffffff7f813f79e9: test %ebx,%ebx
0xffffff7f813f79eb: je 0xffffff7f813f79fa
0xffffff7f813f79ed: mfence
0xffffff7f813f79f0: wbinvd
0xffffff7f813f79f2: jmp 0xffffff7f813f79fa
0xffffff7f813f79f4: cmpl $0x0,(%r15)
0xffffff7f813f79f8: jne 0xffffff7f813f7a15
0xffffff7f813f79fa: lea 0x1cd7f(%rip),%rax # 0xffffff7f81414780
0xffffff7f813f7a01: mov (%rax),%rax
0xffffff7f813f7a04: mov 0x20(%rax),%rax
0xffffff7f813f7a08: mov 0xc(%rax,%r13,1),%eax
0xffffff7f813f7a0d: mov $0x1,%ecx
0xffffff7f813f7a12: mwait %rax,%rcx
0xffffff7f813f7a15: lfence
0xffffff7f813f7a18: rdtsc
0xffffff7f813f7a1a: lfence
0xffffff7f813f7a1d: mov %rax,%rbx
0xffffff7f813f7a20: mov %rdx,%r15
...

...
0xffffff7f813f89c9: xor %ecx,%ecx
0xffffff7f813f89cb: mov %r13,%rax
0xffffff7f813f89ce: xor %edx,%edx
0xffffff7f813f89d0: monitor %rax,%rcx,%rdx
0xffffff7f813f89d3: mov %r12d,%r15d
0xffffff7f813f89d6: imul $0x148,%r15,%r12
0xffffff7f813f89dd: lea 0x1bd9c(%rip),%rax # 0xffffff7f81414780
0xffffff7f813f89e4: mov (%rax),%rax
0xffffff7f813f89e7: mov 0x20(%rax),%rcx
0xffffff7f813f89eb: testb $0x10,0x2(%rcx,%r12,1)
0xffffff7f813f89f1: je 0xffffff7f813f89f9
0xffffff7f813f89f3: callq *0x80(%rax)
0xffffff7f813f89f9: test %r14,%r14
0xffffff7f813f89fc: je 0xffffff7f813f8a18
0xffffff7f813f89fe: movq $0x0,0x8(%r14)
0xffffff7f813f8a06: movl $0x0,(%r14)
0xffffff7f813f8a0d: test %ebx,%ebx
0xffffff7f813f8a0f: je 0xffffff7f813f8a1f
0xffffff7f813f8a11: mfence
0xffffff7f813f8a14: wbinvd
0xffffff7f813f8a16: jmp 0xffffff7f813f8a1f
0xffffff7f813f8a18: cmpl $0x0,0x0(%r13)
0xffffff7f813f8a1d: jne 0xffffff7f813f8a3a
0xffffff7f813f8a1f: lea 0x1bd5a(%rip),%rax # 0xffffff7f81414780
0xffffff7f813f8a26: mov (%rax),%rax
0xffffff7f813f8a29: mov 0x20(%rax),%rax
0xffffff7f813f8a2d: mov 0xc(%rax,%r12,1),%eax
0xffffff7f813f8a32: mov $0x1,%ecx
0xffffff7f813f8a37: mwait %rax,%rcx
0xffffff7f813f8a3a: lfence
0xffffff7f813f8a3d: rdtsc
0xffffff7f813f8a3f: lfence
0xffffff7f813f8a42: mov %rax,%rbx
0xffffff7f813f8a45: mov %rdx,%r12
0xffffff7f813f8a48: shl $0x20,%r12
...

...
0xffffff7f81401c10: mov %r13,%rax
0xffffff7f81401c13: xor %edx,%edx
0xffffff7f81401c15: monitor %rax,%rcx,%rdx
0xffffff7f81401c18: mov %r12d,%r15d
0xffffff7f81401c1b: imul $0x148,%r15,%r12
0xffffff7f81401c22: lea 0x12b57(%rip),%rax # 0xffffff7f81414780
0xffffff7f81401c29: mov (%rax),%rax
0xffffff7f81401c2c: mov 0x20(%rax),%rcx
0xffffff7f81401c30: testb $0x10,0x2(%rcx,%r12,1)
0xffffff7f81401c36: je 0xffffff7f81401c3e
0xffffff7f81401c38: callq *0x80(%rax)
0xffffff7f81401c3e: test %r14,%r14
0xffffff7f81401c41: je 0xffffff7f81401c5d
0xffffff7f81401c43: movq $0x0,0x8(%r14)
0xffffff7f81401c4b: movl $0x0,(%r14)
0xffffff7f81401c52: test %ebx,%ebx
0xffffff7f81401c54: je 0xffffff7f81401c64
0xffffff7f81401c56: mfence
0xffffff7f81401c59: wbinvd
0xffffff7f81401c5b: jmp 0xffffff7f81401c64
0xffffff7f81401c5d: cmpl $0x0,0x0(%r13)
0xffffff7f81401c62: jne 0xffffff7f81401c7f
0xffffff7f81401c64: lea 0x12b15(%rip),%rax # 0xffffff7f81414780
0xffffff7f81401c6b: mov (%rax),%rax
0xffffff7f81401c6e: mov 0x20(%rax),%rax
0xffffff7f81401c72: mov 0xc(%rax,%r12,1),%eax
0xffffff7f81401c77: mov $0x1,%ecx
0xffffff7f81401c7c: mwait %rax,%rcx
0xffffff7f81401c7f: lfence
0xffffff7f81401c82: rdtsc
0xffffff7f81401c84: lfence
0xffffff7f81401c87: mov %rax,%rbx
0xffffff7f81401c8a: mov %rdx,%r12
0xffffff7f81401c8d: shl $0x20,%r12
0xffffff7f81401c91: lea 0xaf1c(%rip),%rax # 0xffffff7f8140cbb4
0xffffff7f81401c98: testb $0x1,(%rax)
...

If that's not enough context, I can email you the whole 'script'
output I collected...

HTH,
--Gabriel