[PATCH 13/22] KVM: x86/mmu: add support for nested MBEC

From: Paolo Bonzini

Date: Fri Mar 20 2026 - 20:12:42 EST


Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
---
arch/x86/kvm/mmu/paging_tmpl.h | 29 ++++++++++++++++++++---------
1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index c657ea90bb33..d50085308506 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -124,12 +124,17 @@ static inline void FNAME(protect_clean_gpte)(struct kvm_mmu *mmu, unsigned *acce
*access &= mask;
}

-static inline int FNAME(is_present_gpte)(unsigned long pte)
+static inline int FNAME(is_present_gpte)(struct kvm_mmu *mmu,
+ unsigned long pte)
{
#if PTTYPE != PTTYPE_EPT
return pte & PT_PRESENT_MASK;
#else
- return pte & 7;
+ /*
+ * For EPT, an entry is present if any of bits 2:0 are set.
+ * With mode-based execute control, bit 10 also indicates presence.
+ */
+ return pte & (7 | (mmu_has_mbec(mmu) ? VMX_EPT_USER_EXECUTABLE_MASK : 0));
#endif
}

@@ -152,7 +157,7 @@ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp, u64 *spte,
u64 gpte)
{
- if (!FNAME(is_present_gpte)(gpte))
+ if (!FNAME(is_present_gpte)(vcpu->arch.mmu, gpte))
goto no_present;

/* Prefetch only accessed entries (unless A/D bits are disabled). */
@@ -173,14 +178,17 @@ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
static inline unsigned FNAME(gpte_access)(u64 gpte)
{
unsigned access;
-#if PTTYPE == PTTYPE_EPT
/*
- * For now nested MBEC is not supported and permission_fault() ignores
- * ACC_USER_EXEC_MASK.
+ * Set bits in ACC_*_MASK even if they might not be used in the
+ * actual checks. For example, if EFER.NX is clear permission_fault()
+ * will ignore ACC_EXEC_MASK, and if MBEC is disabled it will
+ * ignore ACC_USER_EXEC_MASK.
*/
+#if PTTYPE == PTTYPE_EPT
access = ((gpte & VMX_EPT_WRITABLE_MASK) ? ACC_WRITE_MASK : 0) |
((gpte & VMX_EPT_EXECUTABLE_MASK) ? ACC_EXEC_MASK : 0) |
- ((gpte & VMX_EPT_READABLE_MASK) ? ACC_READ_MASK : 0);
+ ((gpte & VMX_EPT_READABLE_MASK) ? ACC_READ_MASK : 0) |
+ ((gpte & VMX_EPT_USER_EXECUTABLE_MASK) ? ACC_USER_EXEC_MASK : 0);
#else
/*
* P is set here, so the page is always readable and W/U/!NX represent
@@ -335,7 +343,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
if (walker->level == PT32E_ROOT_LEVEL) {
pte = mmu->get_pdptr(vcpu, (addr >> 30) & 3);
trace_kvm_mmu_paging_element(pte, walker->level);
- if (!FNAME(is_present_gpte)(pte))
+ if (!FNAME(is_present_gpte)(mmu, pte))
goto error;
--walker->level;
}
@@ -417,7 +425,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
*/
pte_access = pt_access & (pte ^ walk_nx_mask);

- if (unlikely(!FNAME(is_present_gpte)(pte)))
+ if (unlikely(!FNAME(is_present_gpte)(mmu, pte)))
goto error;

if (unlikely(FNAME(is_rsvd_bits_set)(mmu, pte, walker->level))) {
@@ -514,6 +522,9 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
* ACC_*_MASK flags!
*/
walker->fault.exit_qualification |= EPT_VIOLATION_RWX_TO_PROT(pte_access);
+ if (mmu_has_mbec(mmu))
+ walker->fault.exit_qualification |=
+ EPT_VIOLATION_USER_EXEC_TO_PROT(pte_access);
}
#endif
walker->fault.address = addr;
--
2.52.0