Re: [PATCH 19/28] KVM: nVMX: advertise MBEC to nested guests
From: mlevitsk
Date: Tue Jun 02 2026 - 11:12:47 EST
On Tue, 2026-05-05 at 21:52 +0200, Paolo Bonzini wrote:
> From: Jon Kohler <jon@xxxxxxxxxxx>
>
> Advertise SECONDARY_EXEC_MODE_BASED_EPT_EXEC (MBEC) to userspace, which
> allows userspace to expose and advertise the feature to the guest.
>
> When MBEC is enabled by the guest, it is passed to the MMU via cr4_smep,
> and to the processor by the merging of vmcs12->secondary_vm_exec_control
> into the VMCS02's secondary VM execution controls.
>
> Signed-off-by: Jon Kohler <jon@xxxxxxxxxxx>
> Message-ID: <20251223054806.1611168-9-jon@xxxxxxxxxxx>
> Tested-by: David Riley <d.riley@xxxxxxxxxxx>
> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> ---
> arch/x86/kvm/mmu.h | 2 +-
> arch/x86/kvm/mmu/mmu.c | 7 ++++---
> arch/x86/kvm/mmu/spte.c | 10 ++++++----
> arch/x86/kvm/vmx/nested.c | 11 +++++++++++
> 4 files changed, 22 insertions(+), 8 deletions(-)
>
> diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
> index 23bc5b18efd0..e1e3869f568b 100644
> --- a/arch/x86/kvm/mmu.h
> +++ b/arch/x86/kvm/mmu.h
> @@ -100,7 +100,7 @@ void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, unsigned long cr0,
> unsigned long cr4, u64 efer, gpa_t nested_cr3);
> void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
> int huge_page_level, bool accessed_dirty,
> - gpa_t new_eptp);
> + bool mbec, gpa_t new_eptp);
> bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu);
> int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
> u64 fault_address, char *insn, int insn_len);
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index a5b68f18b220..ededc26c6675 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -5959,7 +5959,7 @@ EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_init_shadow_npt_mmu);
>
> static union kvm_cpu_role
> kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty,
> - bool execonly, u8 level)
> + bool execonly, u8 level, bool mbec)
> {
> union kvm_cpu_role role = {0};
>
> @@ -5969,6 +5969,7 @@ kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty,
> */
> WARN_ON_ONCE(is_smm(vcpu));
> role.base.level = level;
> + role.base.cr4_smep = mbec;
> role.base.has_4_byte_gpte = false;
> role.base.direct = false;
> role.base.ad_disabled = !accessed_dirty;
> @@ -5984,13 +5985,13 @@ kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty,
>
> void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
> int huge_page_level, bool accessed_dirty,
> - gpa_t new_eptp)
> + bool mbec, gpa_t new_eptp)
> {
> struct kvm_mmu *context = &vcpu->arch.guest_mmu;
> u8 level = vmx_eptp_page_walk_level(new_eptp);
> union kvm_cpu_role new_mode =
> kvm_calc_shadow_ept_root_page_role(vcpu, accessed_dirty,
> - execonly, level);
> + execonly, level, mbec);
>
> if (new_mode.as_u64 != context->cpu_role.as_u64) {
> /* EPT, and thus nested EPT, does not consume CR0, CR4, nor EFER. */
> diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c
> index f41573b0ccfa..d2f5f7dd8fe1 100644
> --- a/arch/x86/kvm/mmu/spte.c
> +++ b/arch/x86/kvm/mmu/spte.c
> @@ -517,10 +517,12 @@ void kvm_mmu_set_ept_masks(bool has_ad_bits)
> * host's MBEC setting does not matter. On hardware without MBEC
> * the XU bit is reserved-as-ignored, and setting it does no harm.
> *
> - * For nested EPT MBEC is not supported, but bit 10 of the gPTE has
> - * no effect because (a) is_present_gpte() does not treat it as a
> - * present bit, and (b) permission_fault() uses an mmu->permissions[]
> - * array that effectively ignores ACC_USER_EXEC_MASK.
> + * For nested EPT, when MBEC is disabled by L1, correctness relies
> + * on (a) ignoring bit 10 of the gPTE in is_present_gpte(), rather
> + * than treating it as a present bit, and (b) permission_fault()
> + * using an mmu->permissions[] array that effectively ignores
> + * ACC_USER_EXEC_MASK. Bit 10 of the gPTE does end up mirrored
> + * in the sPTEs but is ignored because L2 runs with MBEC disabled.
Makes sense.
> */
> shadow_xu_mask = VMX_EPT_USER_EXECUTABLE_MASK;
> shadow_present_mask = VMX_EPT_SUPPRESS_VE_BIT;
> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> index 84f5c25a1f12..bc1046f32ebc 100644
> --- a/arch/x86/kvm/vmx/nested.c
> +++ b/arch/x86/kvm/vmx/nested.c
> @@ -469,6 +469,13 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
> vmcs12->guest_physical_address = fault->address;
> }
>
> +static inline bool nested_ept_mbec_enabled(struct kvm_vcpu *vcpu)
> +{
> + struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
> +
> + return nested_cpu_has2(vmcs12, SECONDARY_EXEC_MODE_BASED_EPT_EXEC);
> +}
> +
> static void nested_ept_new_eptp(struct kvm_vcpu *vcpu)
> {
> struct vcpu_vmx *vmx = to_vmx(vcpu);
> @@ -477,6 +484,7 @@ static void nested_ept_new_eptp(struct kvm_vcpu *vcpu)
>
> kvm_init_shadow_ept_mmu(vcpu, execonly, ept_lpage_level,
> nested_ept_ad_enabled(vcpu),
> + nested_ept_mbec_enabled(vcpu),
> nested_ept_get_eptp(vcpu));
> }
>
> @@ -7257,6 +7265,9 @@ static void nested_vmx_setup_secondary_ctls(u32 ept_caps,
> msrs->ept_caps |= VMX_EPT_AD_BIT;
> }
>
> + if (enable_mbec)
> + msrs->secondary_ctls_high |=
> + SECONDARY_EXEC_MODE_BASED_EPT_EXEC;
> /*
> * Advertise EPTP switching irrespective of hardware support,
> * KVM emulates it in software so long as VMFUNC is supported.
Reviewed-by: Maxim Levitsky <mlevitsk@xxxxxxxxxx>
Best regards,
Maxim Levitsky