Re: [PATCH] KVM: X86: Fix the decoding of segment overrides in 64bit mode

From: Wanpeng Li
Date: Mon Mar 26 2018 - 08:25:27 EST


2018-03-23 23:04 GMT+08:00 Paolo Bonzini <pbonzini@xxxxxxxxxx>:
> On 23/03/2018 15:27, Wanpeng Li wrote:
>> 2018-03-22 21:53 GMT+08:00 Andrew Cooper <andrew.cooper3@xxxxxxxxxx>:
>>> On 22/03/18 13:39, Wanpeng Li wrote:
>>>> 2018-03-22 20:38 GMT+08:00 Paolo Bonzini <pbonzini@xxxxxxxxxx>:
>>>>> On 22/03/2018 12:04, Andrew Cooper wrote:
>>>>>> We've got a Force Emulation Prefix (ud2a; .ascii "xen") for doing
>>>>>> magic. Originally, this was used for PV guests to explicitly request an
>>>>>> emulated CPUID, but I extended it to HVM guests for "emulate the next
>>>>>> instruction", after we had some guest user => guest kernel privilege
>>>>>> escalations because of incorrect emulation.
>>>>> Wanpeng, why don't you add it behind a new kvm module parameter? :)
>>>> Great point! I will have a try. Thanks Paolo and Andrew. :)
>>>
>>> Using the force emulation prefix requires intercepting #UD, which is in
>>> general a BadThing(tm) for security. Therefore, we have a build time
>>
>> Yeah, however kvm intercepts and emulates #UD by default, should we
>> add a new kvm module parameter to enable it and disable by default?
>
> No, the module parameter should only be about the force-emulation prefix.

How about something like this? (Add EmulateOnUD to cpuid, the testcase
will use it)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index dd88158..80da5c6 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -4772,7 +4772,7 @@ static const struct opcode twobyte_table[256] = {
X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
/* 0xA0 - 0xA7 */
I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg),
- II(ImplicitOps, em_cpuid, cpuid),
+ II(EmulateOnUD | ImplicitOps, em_cpuid, cpuid),
F(DstMem | SrcReg | ModRM | BitOp | NoWrite, em_bt),
F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld),
F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 9bc05f5..1825b45 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -108,6 +108,9 @@ module_param_named(enable_shadow_vmcs,
enable_shadow_vmcs, bool, S_IRUGO);
static bool __read_mostly nested = 0;
module_param(nested, bool, S_IRUGO);

+static bool __read_mostly fep = 0;
+module_param(fep, bool, S_IRUGO);
+
static u64 __read_mostly host_xss;

static bool __read_mostly enable_pml = 1;
@@ -6215,6 +6218,27 @@ static int handle_machine_check(struct kvm_vcpu *vcpu)
return 1;
}

+static int handle_ud(struct kvm_vcpu *vcpu)
+{
+ enum emulation_result er;
+
+ if (fep) {
+ char sig[5]; /* ud2; .ascii "kvm" */
+ struct x86_exception e;
+
+ kvm_read_guest_virt(&vcpu->arch.emulate_ctxt,
+ kvm_get_linear_rip(vcpu), sig, sizeof(sig), &e);
+ if (memcmp(sig, "\xf\xbkvm", sizeof(sig)) == 0)
+ kvm_rip_write(vcpu, kvm_rip_read(vcpu) + sizeof(sig));
+ }
+ er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD);
+ if (er == EMULATE_USER_EXIT)
+ return 0;
+ if (er != EMULATE_DONE)
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return 1;
+}
+
static int handle_exception(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -6233,14 +6257,8 @@ static int handle_exception(struct kvm_vcpu *vcpu)
if (is_nmi(intr_info))
return 1; /* already handled by vmx_vcpu_run() */

- if (is_invalid_opcode(intr_info)) {
- er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD);
- if (er == EMULATE_USER_EXIT)
- return 0;
- if (er != EMULATE_DONE)
- kvm_queue_exception(vcpu, UD_VECTOR);
- return 1;
- }
+ if (is_invalid_opcode(intr_info))
+ return handle_ud(vcpu);

error_code = 0;
if (intr_info & INTR_INFO_DELIVER_CODE_MASK)


The testcase:

#include <stdio.h>
#include <string.h>

#define HYPERVISOR_INFO 0x40000000

#define CPUID(idx, eax, ebx, ecx, edx)\
asm volatile (\
"test %1,%1;jz 1f; ud2a; .ascii \"kvm\"; 1: cpuid" \
:"=b" (*ebx), "=a" (*eax),"=c" (*ecx), "=d" (*edx)\
:"0"(idx) );

void main()
{
unsigned int eax,ebx,ecx,edx;
char string[13];

CPUID(HYPERVISOR_INFO, &eax, &ebx, &ecx, &edx);
*(unsigned int *)(string+0) = ebx;
*(unsigned int *)(string+4) = ecx;
*(unsigned int *)(string+8) = edx;

string[12] = 0;
if (strncmp(string, "KVMKVMKVM\0\0\0",12) == 0) {
printf("kvm guest\n");
} else
printf("bare hardware\n");

}

Regards,
Wanpeng Li