[PATCH v2 18/20] kvm: x86/mmu: Support MMIO in the TDP MMU
From: Ben Gardon
Date: Wed Oct 14 2020 - 14:27:54 EST
In order to support MMIO, KVM must be able to walk the TDP paging
structures to find mappings for a given GFN. Support this walk for
the TDP MMU.
Tested by running kvm-unit-tests and KVM selftests on an Intel Haswell
machine. This series introduced no new failures.
This series can be viewed in Gerrit at:
https://linux-review.googlesource.com/c/virt/kvm/kvm/+/2538
v2: Thanks to Dan Carpenter and kernel test robot for finding that root
was used uninitialized in get_mmio_spte.
Signed-off-by: Ben Gardon <bgardon@xxxxxxxxxx>
Reported-by: kernel test robot <lkp@xxxxxxxxx>
Reported-by: Dan Carpenter <dan.carpenter@xxxxxxxxxx>
---
arch/x86/kvm/mmu/mmu.c | 70 ++++++++++++++++++++++++++------------
arch/x86/kvm/mmu/tdp_mmu.c | 18 ++++++++++
arch/x86/kvm/mmu/tdp_mmu.h | 2 ++
3 files changed, 69 insertions(+), 21 deletions(-)
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 58d2412817c87..2e8bf8d19c35a 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3853,54 +3853,82 @@ static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct)
return vcpu_match_mmio_gva(vcpu, addr);
}
-/* return true if reserved bit is detected on spte. */
-static bool
-walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
+/*
+ * Return the level of the lowest level SPTE added to sptes.
+ * That SPTE may be non-present.
+ */
+static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes)
{
struct kvm_shadow_walk_iterator iterator;
- u64 sptes[PT64_ROOT_MAX_LEVEL], spte = 0ull;
- struct rsvd_bits_validate *rsvd_check;
- int root, leaf;
- bool reserved = false;
+ int leaf = vcpu->arch.mmu->root_level;
+ u64 spte;
- rsvd_check = &vcpu->arch.mmu->shadow_zero_check;
walk_shadow_page_lockless_begin(vcpu);
- for (shadow_walk_init(&iterator, vcpu, addr),
- leaf = root = iterator.level;
+ for (shadow_walk_init(&iterator, vcpu, addr);
shadow_walk_okay(&iterator);
__shadow_walk_next(&iterator, spte)) {
+ leaf = iterator.level;
spte = mmu_spte_get_lockless(iterator.sptep);
sptes[leaf - 1] = spte;
- leaf--;
if (!is_shadow_present_pte(spte))
break;
+ }
+
+ walk_shadow_page_lockless_end(vcpu);
+
+ return leaf;
+}
+
+/* return true if reserved bit is detected on spte. */
+static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
+{
+ u64 sptes[PT64_ROOT_MAX_LEVEL];
+ struct rsvd_bits_validate *rsvd_check;
+ int root = vcpu->arch.mmu->root_level;
+ int leaf;
+ int level;
+ bool reserved = false;
+
+ if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) {
+ *sptep = 0ull;
+ return reserved;
+ }
+
+ if (is_tdp_mmu_root(vcpu->kvm, vcpu->arch.mmu->root_hpa))
+ leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes);
+ else
+ leaf = get_walk(vcpu, addr, sptes);
+
+ rsvd_check = &vcpu->arch.mmu->shadow_zero_check;
+
+ for (level = root; level >= leaf; level--) {
+ if (!is_shadow_present_pte(sptes[level - 1]))
+ break;
/*
* Use a bitwise-OR instead of a logical-OR to aggregate the
* reserved bit and EPT's invalid memtype/XWR checks to avoid
* adding a Jcc in the loop.
*/
- reserved |= __is_bad_mt_xwr(rsvd_check, spte) |
- __is_rsvd_bits_set(rsvd_check, spte, iterator.level);
+ reserved |= __is_bad_mt_xwr(rsvd_check, sptes[level - 1]) |
+ __is_rsvd_bits_set(rsvd_check, sptes[level - 1],
+ level);
}
- walk_shadow_page_lockless_end(vcpu);
-
if (reserved) {
pr_err("%s: detect reserved bits on spte, addr 0x%llx, dump hierarchy:\n",
__func__, addr);
- while (root > leaf) {
+ for (level = root; level >= leaf; level--)
pr_err("------ spte 0x%llx level %d.\n",
- sptes[root - 1], root);
- root--;
- }
+ sptes[level - 1], level);
}
- *sptep = spte;
+ *sptep = sptes[leaf - 1];
+
return reserved;
}
@@ -3912,7 +3940,7 @@ static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct)
if (mmio_info_in_cache(vcpu, addr, direct))
return RET_PF_EMULATE;
- reserved = walk_shadow_page_get_mmio_spte(vcpu, addr, &spte);
+ reserved = get_mmio_spte(vcpu, addr, &spte);
if (WARN_ON(reserved))
return -EINVAL;
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index c471f2e977d11..b1515b89606e1 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1128,3 +1128,21 @@ bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
return spte_set;
}
+/*
+ * Return the level of the lowest level SPTE added to sptes.
+ * That SPTE may be non-present.
+ */
+int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes)
+{
+ struct tdp_iter iter;
+ struct kvm_mmu *mmu = vcpu->arch.mmu;
+ int leaf = vcpu->arch.mmu->shadow_root_level;
+ gfn_t gfn = addr >> PAGE_SHIFT;
+
+ tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) {
+ leaf = iter.level;
+ sptes[leaf - 1] = iter.old_spte;
+ }
+
+ return leaf;
+}
diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
index b66283db43221..f890048dfcba5 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -43,4 +43,6 @@ void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
struct kvm_memory_slot *slot, gfn_t gfn);
+
+int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes);
#endif /* __KVM_X86_MMU_TDP_MMU_H */
--
2.28.0.1011.ga647a8990f-goog