[RFC 5/9] Modify the page fault path to meet the direct build EPT requirement

From: Yulei Zhang
Date: Wed Aug 05 2020 - 15:59:14 EST


From: Yulei Zhang <yuleixzhang@xxxxxxxxxxx>

Refine the fast page fault code so that it can be used in either
normal ept mode or direct build EPT mode.

Signed-off-by: Yulei Zhang <yuleixzhang@xxxxxxxxxxx>
---
arch/x86/kvm/mmu/mmu.c | 30 +++++++++++++++++++++---------
1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index df703deac928..b59a4502d1f6 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3471,12 +3471,13 @@ static bool page_fault_can_be_fast(u32 error_code)
* someone else modified the SPTE from its original value.
*/
static bool
-fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
+fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, gpa_t gpa,
u64 *sptep, u64 old_spte, u64 new_spte)
{
gfn_t gfn;

- WARN_ON(!sp->role.direct);
+ WARN_ON(!vcpu->arch.direct_build_tdp &&
+ (!page_header(__pa(sptep))->role.direct));

/*
* Theoretically we could also set dirty bit (and flush TLB) here in
@@ -3498,7 +3499,7 @@ fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
* The gfn of direct spte is stable since it is
* calculated by sp->gfn.
*/
- gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt);
+ gfn = gpa >> PAGE_SHIFT;
kvm_vcpu_mark_page_dirty(vcpu, gfn);
}

@@ -3526,10 +3527,10 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
u32 error_code)
{
struct kvm_shadow_walk_iterator iterator;
- struct kvm_mmu_page *sp;
bool fault_handled = false;
u64 spte = 0ull;
uint retry_count = 0;
+ int pte_level = 0;

if (!page_fault_can_be_fast(error_code))
return false;
@@ -3539,12 +3540,20 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
do {
u64 new_spte;

- for_each_shadow_entry_lockless(vcpu, cr2_or_gpa, iterator, spte)
+ for_each_shadow_entry_lockless(vcpu, cr2_or_gpa, iterator, spte) {
if (!is_shadow_present_pte(spte))
break;
+ }
+
+ if (iterator.level < PT_PAGE_TABLE_LEVEL)
+ pte_level = PT_PAGE_TABLE_LEVEL;
+ else
+ pte_level = iterator.level;
+
+ WARN_ON(!vcpu->arch.direct_build_tdp &&
+ (pte_level != page_header(__pa(iterator.sptep))->role.level));

- sp = page_header(__pa(iterator.sptep));
- if (!is_last_spte(spte, sp->role.level))
+ if (!is_last_spte(spte, pte_level))
break;

/*
@@ -3587,7 +3596,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
*
* See the comments in kvm_arch_commit_memory_region().
*/
- if (sp->role.level > PT_PAGE_TABLE_LEVEL)
+ if (pte_level > PT_PAGE_TABLE_LEVEL)
break;
}

@@ -3601,7 +3610,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
* since the gfn is not stable for indirect shadow page. See
* Documentation/virt/kvm/locking.txt to get more detail.
*/
- fault_handled = fast_pf_fix_direct_spte(vcpu, sp,
+ fault_handled = fast_pf_fix_direct_spte(vcpu, cr2_or_gpa,
iterator.sptep, spte,
new_spte);
if (fault_handled)
@@ -4153,6 +4162,9 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
if (fast_page_fault(vcpu, gpa, error_code))
return RET_PF_RETRY;

+ if (vcpu->arch.direct_build_tdp)
+ return RET_PF_EMULATE;
+
mmu_seq = vcpu->kvm->mmu_notifier_seq;
smp_rmb();

--
2.17.1