Re: [PATCH v3 2/5] KVM: MMU: check guest CR3 reserved bits based on its physical address width.

From: Jim Mattson
Date: Fri Sep 15 2017 - 19:19:59 EST


On Thu, Aug 24, 2017 at 5:27 AM, Yu Zhang <yu.c.zhang@xxxxxxxxxxxxxxx> wrote:
> Currently, KVM uses CR3_L_MODE_RESERVED_BITS to check the
> reserved bits in CR3. Yet the length of reserved bits in
> guest CR3 should be based on the physical address width
> exposed to the VM. This patch changes CR3 check logic to
> calculate the reserved bits at runtime.
>
> Signed-off-by: Yu Zhang <yu.c.zhang@xxxxxxxxxxxxxxx>
> ---
> arch/x86/include/asm/kvm_host.h | 1 -
> arch/x86/kvm/emulate.c | 14 ++++++++++++--
> arch/x86/kvm/mmu.h | 3 +++
> arch/x86/kvm/x86.c | 8 ++++----
> 4 files changed, 19 insertions(+), 7 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 6db0ed9..e716228 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -79,7 +79,6 @@
> | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
> | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
>
> -#define CR3_L_MODE_RESERVED_BITS 0xFFFFFF0000000000ULL
> #define CR3_PCID_INVD BIT_64(63)
> #define CR4_RESERVED_BITS \
> (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
> diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
> index 319d91f..a89b595 100644
> --- a/arch/x86/kvm/emulate.c
> +++ b/arch/x86/kvm/emulate.c
> @@ -28,6 +28,7 @@
>
> #include "x86.h"
> #include "tss.h"
> +#include "mmu.h"
>
> /*
> * Operand types
> @@ -4097,8 +4098,17 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt)
> u64 rsvd = 0;
>
> ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
> - if (efer & EFER_LMA)
> - rsvd = CR3_L_MODE_RESERVED_BITS & ~CR3_PCID_INVD;
> + if (efer & EFER_LMA) {
> + u64 maxphyaddr;
> + u32 eax = 0x80000008;
> +
> + if (ctxt->ops->get_cpuid(ctxt, &eax, NULL, NULL,
> + NULL, false))

Passing NULL for the address of ecx looks problematic to me.

We have:

static bool emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, bool
check_limit)
{
return kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx, check_limit);
}

And:

bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
u32 *ecx, u32 *edx, bool check_limit)
{
u32 function = *eax, index = *ecx;
struct kvm_cpuid_entry2 *best;
bool entry_found = true;
...

Doesn't this immediately try to dereference a NULL pointer? How much
testing have you done of this code?

> + maxphyaddr = eax & 0xff;
> + else
> + maxphyaddr = 36;
> + rsvd = rsvd_bits(maxphyaddr, 62);
> + }
>
> if (new_val & rsvd)
> return emulate_gp(ctxt, 0);
> diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
> index 3ed6192..67e7ec2 100644
> --- a/arch/x86/kvm/mmu.h
> +++ b/arch/x86/kvm/mmu.h
> @@ -48,6 +48,9 @@
>
> static inline u64 rsvd_bits(int s, int e)
> {
> + if (e < s)
> + return 0;
> +
> return ((1ULL << (e - s + 1)) - 1) << s;
> }
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index cc2c7e4..79f5889 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -813,10 +813,10 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
> return 0;
> }
>
> - if (is_long_mode(vcpu)) {
> - if (cr3 & CR3_L_MODE_RESERVED_BITS)
> - return 1;
> - } else if (is_pae(vcpu) && is_paging(vcpu) &&
> + if (is_long_mode(vcpu) &&
> + (cr3 & rsvd_bits(cpuid_maxphyaddr(vcpu), 62)))
> + return 1;
> + else if (is_pae(vcpu) && is_paging(vcpu) &&
> !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
> return 1;
>
> --
> 2.5.0
>