Re: [PATCH] KVM: x86: fix bogus warning about reserved bits

From: Paolo Bonzini
Date: Tue Sep 22 2015 - 17:04:50 EST




On 22/09/2015 19:56, Borislav Petkov wrote:
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 69088a1ba509..3ce2b74c75dc 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -3322,7 +3322,7 @@ walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
> break;
>
> reserved |= is_shadow_zero_bits_set(&vcpu->arch.mmu, spte,
> - leaf);
> + iterator.level);
> }
>
> walk_shadow_page_lockless_end(vcpu);
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index c0b9ff3e1aec..a44f8fed9be1 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -7063,13 +7063,16 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
> unsigned int id)
> {
> struct kvm_vcpu *vcpu;
> + int idx;
>
> if (check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
> printk_once(KERN_WARNING
> "kvm: SMP vm created on host with unstable TSC; "
> "guest TSC will not be reliable\n");
>
> + idx = srcu_read_lock(&kvm->srcu);
> vcpu = kvm_x86_ops->vcpu_create(kvm, id);
> + srcu_read_unlock(&kvm->srcu, idx);
>
> return vcpu;
> }

Yup, looks good.

Let's add more debugging output:

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 3ce2b74c75dc..bf1122e9c7bf 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3268,23 +3268,28 @@ static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr,
return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access, exception);
}

-static bool
-__is_rsvd_bits_set(struct rsvd_bits_validate *rsvd_check, u64 pte, int level)
+static u64
+rsvd_bits_set(struct rsvd_bits_validate *rsvd_check, u64 pte, int level)
{
int bit7 = (pte >> 7) & 1, low6 = pte & 0x3f;
+ u64 mask = rsvd_check->rsvd_bits_mask[bit7][level-1];
+
+ if (unlikely(pte & mask))
+ return mask;
+ if (unlikely(rsvd_check->bad_mt_xwr & (1ull << low6)))
+ return rsvd_check->bad_mt_xwr;

- return (pte & rsvd_check->rsvd_bits_mask[bit7][level-1]) |
- ((rsvd_check->bad_mt_xwr & (1ull << low6)) != 0);
+ return 0;
}

static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level)
{
- return __is_rsvd_bits_set(&mmu->guest_rsvd_check, gpte, level);
+ return rsvd_bits_set(&mmu->guest_rsvd_check, gpte, level) != 0;
}

-static bool is_shadow_zero_bits_set(struct kvm_mmu *mmu, u64 spte, int level)
+static u64 shadow_rsvd_bits_set(struct kvm_mmu *mmu, u64 spte, int level)
{
- return __is_rsvd_bits_set(&mmu->shadow_zero_check, spte, level);
+ return rsvd_bits_set(&mmu->shadow_zero_check, spte, level);
}

static bool quickly_check_mmio_pf(struct kvm_vcpu *vcpu, u64 addr, bool direct)
@@ -3302,6 +3307,7 @@ walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
struct kvm_shadow_walk_iterator iterator;
u64 sptes[PT64_ROOT_LEVEL], spte = 0ull;
int root, leaf;
+ u64 result;
bool reserved = false;

if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
@@ -3321,15 +3327,20 @@ walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
if (!is_shadow_present_pte(spte))
break;

- reserved |= is_shadow_zero_bits_set(&vcpu->arch.mmu, spte,
- iterator.level);
+ result = shadow_rsvd_bits_set(&vcpu->arch.mmu, spte,
+ iterator.level);
+ if (unlikely(result)) {
+ pr_err("%s: detect reserved bits on spte, addr 0x%llx "
+ "(level %d, 0x%llx)\n",
+ __func__, addr, iterator.level, result);
+ reserved = true;
+ }
}

walk_shadow_page_lockless_end(vcpu);

if (reserved) {
- pr_err("%s: detect reserved bits on spte, addr 0x%llx, dump hierarchy:\n",
- __func__, addr);
+ pr_err("dump hierarchy:\n");
while (root > leaf) {
pr_err("------ spte 0x%llx level %d.\n",
sptes[root - 1], root);

> [ 49.456533] walk_shadow_page_get_mmio_spte: detect reserved bits on spte, addr 0xb8000, dump hierarchy:
> [ 49.465945] ------ spte 0x416ed9027 level 4.
> [ 49.470221] ------ spte 0x416888027 level 3.
> [ 49.474494] ------ spte 0x41694f027 level 2.
> [ 49.474495] ------ spte 0xffff0000000b8f67 level 1.

Thus same as before.

Just to be safe, can you try using "-cpu host" on the QEMU command
line and see if it changes anything? This would catch things such
as an Intel CPUID on an AMD host.

Paolo

> [ 49.474496] ------------[ cut here ]------------
> [ 49.474515] WARNING: CPU: 4 PID: 3540 at arch/x86/kvm/mmu.c:3385 handle_mmio_page_fault.part.57+0x1a/0x20 [kvm]()
> [ 49.474555] Modules linked in: tun sha256_ssse3 sha256_generic drbg binfmt_misc ipv6 vfat fat fuse dm_crypt dm_mod kvm_amd kvm crc32_pclmul aesni_intel aes_x86_64 lrw gf128mul glue_helper ablk_helper cryptd amd64_edac_mod fam15h_power k10temp edac_core amdkfd amd_iommu_v2 radeon acpi_cpufreq
> [ 49.474560] CPU: 4 PID: 3540 Comm: qemu-system-x86 Not tainted 4.3.0-rc2+ #2
> [ 49.474562] Hardware name: To be filled by O.E.M. To be filled by O.E.M./M5A97 EVO R2.0, BIOS 1503 01/16/2013
> [ 49.474569] ffffffffa032f8b2 ffff880416a73b78 ffffffff812c758a 0000000000000000
> [ 49.474574] ffff880416a73bb0 ffffffff810534c1 ffff8804171b0000 000000000000000f
> [ 49.474578] 00000000000b8000 0000000000000000 00000000ffffffff ffff880416a73bc0
> [ 49.474579] Call Trace:
> [ 49.474586] [<ffffffff812c758a>] dump_stack+0x4e/0x84
> [ 49.474589] [<ffffffff810534c1>] warn_slowpath_common+0x91/0xd0
> [ 49.474592] [<ffffffff810535ba>] warn_slowpath_null+0x1a/0x20
> [ 49.474603] [<ffffffffa0301a5a>] handle_mmio_page_fault.part.57+0x1a/0x20 [kvm]
> [ 49.474615] [<ffffffffa0309350>] tdp_page_fault+0x2a0/0x2b0 [kvm]
> [ 49.474620] [<ffffffff810a282d>] ? __lock_acquire+0x57d/0x17a0
> [ 49.474633] [<ffffffffa03035a5>] kvm_mmu_page_fault+0x35/0x240 [kvm]
> [ 49.474637] [<ffffffffa03886b8>] pf_interception+0x108/0x1d0 [kvm_amd]
> [ 49.474642] [<ffffffffa038ad10>] handle_exit+0x150/0xa40 [kvm_amd]
> [ 49.474662] [<ffffffffa02fa398>] ? kvm_arch_vcpu_ioctl_run+0x4c8/0x16f0 [kvm]
> [ 49.474674] [<ffffffffa02fa403>] kvm_arch_vcpu_ioctl_run+0x533/0x16f0 [kvm]
> [ 49.474686] [<ffffffffa02fa398>] ? kvm_arch_vcpu_ioctl_run+0x4c8/0x16f0 [kvm]
> [ 49.474690] [<ffffffff816bd852>] ? mutex_lock_killable_nested+0x312/0x480
> [ 49.474700] [<ffffffffa02e1979>] ? kvm_vcpu_ioctl+0x79/0x6f0 [kvm]
> [ 49.474705] [<ffffffff8107e133>] ? preempt_count_sub+0xb3/0x110
> [ 49.474715] [<ffffffffa02e1c3f>] kvm_vcpu_ioctl+0x33f/0x6f0 [kvm]
> [ 49.474719] [<ffffffff811939d7>] do_vfs_ioctl+0x2d7/0x530
> [ 49.474722] [<ffffffff8119f889>] ? __fget_light+0x29/0x90
> [ 49.474724] [<ffffffff81193c7c>] SyS_ioctl+0x4c/0x90
> [ 49.474729] [<ffffffff816c1a9b>] entry_SYSCALL_64_fastpath+0x16/0x73
> [ 49.474732] ---[ end trace 0e0be3552b84977c ]---
>
>
> Thanks.
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/