Re: [PATCH 15/28] KVM: VMX: enable use of MBEC
From: mlevitsk
Date: Tue Jun 02 2026 - 10:37:44 EST
On Tue, 2026-05-05 at 21:52 +0200, Paolo Bonzini wrote:
> If available, set SECONDARY_EXEC_MODE_BASED_EPT_EXEC in the secondary
> execution controls.
>
> The changes are limited because the MMU is designed to create the same
> sPTEs independent of the MBEC setting. On hosts lacking support for
> MBEC, and in nested guests which cannot enable it as of this commit,
> the XU bit is ignored by the processor.
>
> Note that, as of this patch, MBEC is not available to L1 hypervisors
> for their guests.
>
> Tested-by: David Riley <d.riley@xxxxxxxxxxx>
> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> ---
> arch/x86/include/asm/vmx.h | 3 +++
> arch/x86/kvm/mmu.h | 5 +++++
> arch/x86/kvm/vmx/capabilities.h | 7 +++++++
> arch/x86/kvm/vmx/common.h | 10 +++++-----
> arch/x86/kvm/vmx/main.c | 9 +++++++++
> arch/x86/kvm/vmx/nested.c | 1 +
> arch/x86/kvm/vmx/vmx.c | 14 ++++++++++++++
> arch/x86/kvm/vmx/vmx.h | 1 +
> arch/x86/kvm/vmx/x86_ops.h | 1 +
> 9 files changed, 46 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
> index 2b30b921b375..54aa5be50df9 100644
> --- a/arch/x86/include/asm/vmx.h
> +++ b/arch/x86/include/asm/vmx.h
> @@ -619,9 +619,12 @@ enum vm_entry_failure_code {
> #define EPT_VIOLATION_GVA_TRANSLATED BIT(8)
>
> #define EPT_VIOLATION_RWX_TO_PROT(__epte) (((__epte) & VMX_EPT_RWX_MASK) << 3)
> +#define EPT_VIOLATION_USER_EXEC_TO_PROT(__epte) (((__epte) & VMX_EPT_USER_EXECUTABLE_MASK) >> 4)
>
> static_assert(EPT_VIOLATION_RWX_TO_PROT(VMX_EPT_RWX_MASK) ==
> (EPT_VIOLATION_PROT_READ | EPT_VIOLATION_PROT_WRITE | EPT_VIOLATION_PROT_EXEC));
> +static_assert(EPT_VIOLATION_USER_EXEC_TO_PROT(VMX_EPT_USER_EXECUTABLE_MASK) ==
> + (EPT_VIOLATION_PROT_USER_EXEC));
>
> /*
> * Exit Qualifications for NOTIFY VM EXIT
> diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
> index d8c13e43c2d7..23bc5b18efd0 100644
> --- a/arch/x86/kvm/mmu.h
> +++ b/arch/x86/kvm/mmu.h
> @@ -83,6 +83,11 @@ static inline gfn_t kvm_mmu_max_gfn(void)
> return (1ULL << (max_gpa_bits - PAGE_SHIFT)) - 1;
> }
>
> +static inline bool mmu_has_mbec(struct kvm_mmu *mmu)
> +{
> + return mmu->root_role.cr4_smep;
> +}
Hi!
Sorry to complain again :), but in my opinion
'mmu_has_mbec' is also a bit confusing, especially since this will be true for NPT as well.
A comment can help saying that this function should only be called on MMU that uses EPT.
If we go with my suggestion though of a vendor neutral name then I think that we can eliminate this function
and just open code :
if (mmu->root_role.has_user_exec_permission)
....
What do you think?
> +
> u8 kvm_mmu_get_max_tdp_level(void);
>
> void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask);
> diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
> index 7e59eb0f41bb..07469d1cfe74 100644
> --- a/arch/x86/kvm/vmx/capabilities.h
> +++ b/arch/x86/kvm/vmx/capabilities.h
> @@ -15,6 +15,7 @@ extern bool __read_mostly enable_ept;
> extern bool __read_mostly enable_unrestricted_guest;
> extern bool __read_mostly enable_ept_ad_bits;
> extern bool __read_mostly enable_pml;
> +extern bool __read_mostly enable_mbec;
> extern int __read_mostly pt_mode;
>
> #define PT_MODE_SYSTEM 0
> @@ -406,4 +407,10 @@ static inline bool cpu_has_notify_vmexit(void)
> SECONDARY_EXEC_NOTIFY_VM_EXITING;
> }
>
> +static inline bool cpu_has_ept_mbec(void)
> +{
> + return vmcs_config.cpu_based_2nd_exec_ctrl &
> + SECONDARY_EXEC_MODE_BASED_EPT_EXEC;
> +}
> +
> #endif /* __KVM_X86_VMX_CAPS_H */
> diff --git a/arch/x86/kvm/vmx/common.h b/arch/x86/kvm/vmx/common.h
> index 1afbf272efae..40fa72f31fc7 100644
> --- a/arch/x86/kvm/vmx/common.h
> +++ b/arch/x86/kvm/vmx/common.h
> @@ -91,15 +91,15 @@ static inline int __vmx_handle_ept_violation(struct kvm_vcpu *vcpu, gpa_t gpa,
> /* Is it a fetch fault? */
> error_code |= (exit_qualification & EPT_VIOLATION_ACC_INSTR)
> ? PFERR_FETCH_MASK : 0;
> - /*
> - * ept page table entry is present?
> - * note: unconditionally clear USER_EXEC until mode-based
> - * execute control is implemented
> - */
> + /* ept page table entry is present? */
> error_code |= (exit_qualification &
> (EPT_VIOLATION_PROT_MASK & ~EPT_VIOLATION_PROT_USER_EXEC))
> ? PFERR_PRESENT_MASK : 0;
>
> + if (mmu_has_mbec(vcpu->arch.mmu))
> + error_code |= (exit_qualification & EPT_VIOLATION_PROT_USER_EXEC)
> + ? PFERR_PRESENT_MASK : 0;
> +
> if (exit_qualification & EPT_VIOLATION_GVA_IS_VALID)
> error_code |= (exit_qualification & EPT_VIOLATION_GVA_TRANSLATED) ?
> PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK;
> diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
> index dbebddf648be..83d9921277ea 100644
> --- a/arch/x86/kvm/vmx/main.c
> +++ b/arch/x86/kvm/vmx/main.c
> @@ -755,6 +755,14 @@ static int vt_set_identity_map_addr(struct kvm *kvm, u64 ident_addr)
> return vmx_set_identity_map_addr(kvm, ident_addr);
> }
>
> +static bool vt_tdp_has_smep(struct kvm *kvm)
> +{
> + if (is_td(kvm))
> + return false;
> +
> + return vmx_tdp_has_smep(kvm);
> +}
> +
> static u64 vt_get_l2_tsc_offset(struct kvm_vcpu *vcpu)
> {
> /* TDX doesn't support L2 guest at the moment. */
> @@ -966,6 +974,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
> .set_tss_addr = vt_op(set_tss_addr),
> .set_identity_map_addr = vt_op(set_identity_map_addr),
> .get_mt_mask = vmx_get_mt_mask,
> + .tdp_has_smep = vt_op(tdp_has_smep),
>
> .get_exit_info = vt_op(get_exit_info),
> .get_entry_info = vt_op(get_entry_info),
> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> index cd1924c6e075..299d4ca60fb3 100644
> --- a/arch/x86/kvm/vmx/nested.c
> +++ b/arch/x86/kvm/vmx/nested.c
> @@ -2440,6 +2440,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0
> SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
> SECONDARY_EXEC_APIC_REGISTER_VIRT |
> SECONDARY_EXEC_ENABLE_VMFUNC |
> + SECONDARY_EXEC_MODE_BASED_EPT_EXEC |
> SECONDARY_EXEC_DESC);
>
> if (nested_cpu_has(vmcs12,
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 337bbfecc021..f1d616f928a1 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -114,6 +114,9 @@ module_param(emulate_invalid_guest_state, bool, 0444);
> static bool __read_mostly fasteoi = 1;
> module_param(fasteoi, bool, 0444);
>
> +bool __read_mostly enable_mbec = 1;
> +module_param_named(mbec, enable_mbec, bool, 0444);
> +
> module_param(enable_apicv, bool, 0444);
> module_param(enable_ipiv, bool, 0444);
>
> @@ -2773,6 +2776,7 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf,
> return -EIO;
>
> vmx_cap->ept = 0;
> + _cpu_based_2nd_exec_control &= ~SECONDARY_EXEC_MODE_BASED_EPT_EXEC;
> _cpu_based_2nd_exec_control &= ~SECONDARY_EXEC_EPT_VIOLATION_VE;
> }
> if (!(_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_VPID) &&
> @@ -4735,6 +4739,9 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
> */
> exec_control &= ~SECONDARY_EXEC_ENABLE_VMFUNC;
>
> + if (!enable_mbec)
> + exec_control &= ~SECONDARY_EXEC_MODE_BASED_EPT_EXEC;
> +
> /* SECONDARY_EXEC_DESC is enabled/disabled on writes to CR4.UMIP,
> * in vmx_set_cr4. */
> exec_control &= ~SECONDARY_EXEC_DESC;
> @@ -7823,6 +7830,11 @@ u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
> return (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT);
> }
>
> +bool vmx_tdp_has_smep(struct kvm *kvm)
> +{
> + return enable_mbec;
> +}
> +
> static void vmcs_set_secondary_exec_control(struct vcpu_vmx *vmx, u32 new_ctl)
> {
> /*
> @@ -8622,6 +8634,8 @@ __init int vmx_hardware_setup(void)
>
> if (!cpu_has_vmx_ept_ad_bits() || !enable_ept)
> enable_ept_ad_bits = 0;
> + if (!cpu_has_ept_mbec() || !enable_ept)
> + enable_mbec = 0;
>
> if (!cpu_has_vmx_unrestricted_guest() || !enable_ept)
> enable_unrestricted_guest = 0;
> diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
> index db84e8001da5..0a4e263c4095 100644
> --- a/arch/x86/kvm/vmx/vmx.h
> +++ b/arch/x86/kvm/vmx/vmx.h
> @@ -567,6 +567,7 @@ static inline u8 vmx_get_rvi(void)
> SECONDARY_EXEC_ENABLE_VMFUNC | \
> SECONDARY_EXEC_BUS_LOCK_DETECTION | \
> SECONDARY_EXEC_NOTIFY_VM_EXITING | \
> + SECONDARY_EXEC_MODE_BASED_EPT_EXEC | \
> SECONDARY_EXEC_ENCLS_EXITING | \
> SECONDARY_EXEC_EPT_VIOLATION_VE)
>
> diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
> index d09abeac2b56..69cf276be88e 100644
> --- a/arch/x86/kvm/vmx/x86_ops.h
> +++ b/arch/x86/kvm/vmx/x86_ops.h
> @@ -103,6 +103,7 @@ void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
> int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
> int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr);
> u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
> +bool vmx_tdp_has_smep(struct kvm *kvm);
>
> void vmx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason,
> u64 *info1, u64 *info2, u32 *intr_info, u32 *error_code);
Reviewed-by: Maxim Levitsky <mlevitsk@xxxxxxxxxx>
Best regards,
Maxim Levitsky