Re: [PATCH 25/28] KVM: x86/mmu: add support for GMET to NPT page table walks

From: mlevitsk

Date: Tue Jun 02 2026 - 10:46:24 EST


On Tue, 2026-05-05 at 21:52 +0200, Paolo Bonzini wrote:
> GMET allows page table entries to be created with U=0 in NPT.
> However, when GMET=1 U=0 only affects execution, not reads or
> writes.  Ignore user faults on non-fetch accesses for NPT GMET.
>
> Tested-by: David Riley <d.riley@xxxxxxxxxxx>
> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> ---
>  arch/x86/include/asm/kvm_host.h |  2 ++
>  arch/x86/kvm/mmu.h              |  2 +-
>  arch/x86/kvm/mmu/mmu.c          | 18 ++++++++++++------
>  arch/x86/kvm/svm/nested.c       | 10 +++++++---
>  4 files changed, 22 insertions(+), 10 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 7dde4ca87752..1da3d5c59e15 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -370,6 +370,8 @@ union kvm_mmu_page_role {
>   * cr4_smep is also set for EPT MBEC.  Because it affects
>   * which pages are considered non-present (bit 10 additionally
>   * must be zero if MBEC is on) it has to be in the base role.
> + * It also has to be in the base role for AMD GMET because
> + * kernel-executable pages need to have U=0 with GMET enabled.
>   */
>   unsigned cr4_smep:1;
>  
> diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
> index 1b354e1f2d81..ddf4e467c071 100644
> --- a/arch/x86/kvm/mmu.h
> +++ b/arch/x86/kvm/mmu.h
> @@ -97,7 +97,7 @@ void kvm_mmu_set_ept_masks(bool has_ad_bits);
>  
>  void kvm_init_mmu(struct kvm_vcpu *vcpu);
>  void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, unsigned long cr4,
> -      u64 efer, gpa_t nested_cr3);
> +      u64 efer, gpa_t nested_cr3, u64 misc_ctl);
>  void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
>        int huge_page_level, bool accessed_dirty,
>        bool mbec, gpa_t new_eptp);
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 5a796ae8c396..a283b5078c61 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -55,6 +55,7 @@
>  #include <asm/io.h>
>  #include <asm/set_memory.h>
>  #include <asm/spec-ctrl.h>
> +#include <asm/svm.h>
>  #include <asm/vmx.h>
>  
>  #include "trace.h"
> @@ -5572,7 +5573,7 @@ reset_ept_shadow_zero_bits_mask(struct kvm_mmu *context, bool execonly)
>   (14 & (access) ? 1 << 14 : 0) | \
>   (15 & (access) ? 1 << 15 : 0))
>  
> -static void update_permission_bitmask(struct kvm_mmu *mmu, bool ept)
> +static void update_permission_bitmask(struct kvm_mmu *mmu, bool tdp, bool ept)

Hi!

I vote to call this 'npt' instead, because 'tdp' confuses me a lot,
it is used in kvm for so many things like the tdp_mmu and such.
What do you think?

>  {
>   unsigned index;
>  
> @@ -5633,7 +5634,12 @@ static void update_permission_bitmask(struct kvm_mmu *mmu, bool ept)
>   /* Faults from kernel mode accesses to user pages */
>   u16 kf = (pfec & PFERR_USER_MASK) ? 0 : u;
>  
> - uf = (pfec & PFERR_USER_MASK) ? (u16)~u : 0;
> + /*

> + * For NPT GMET, U=0 does not affect reads and writes.  Fetches
> + * are handled below via cr4_smep.


While at it we might also want to add a comment saying that for regular NPT, U bit is checked,
but all NPT accesses are treated as user so it has to be effectively always 1.
(Assuming that APM is correct)
What do you think?

> + */
> + if (!(tdp && cr4_smep))
> + uf = (pfec & PFERR_USER_MASK) ? (u16)~u : 0;
>  
>   if (efer_nx)
>   ff = (pfec & PFERR_FETCH_MASK) ? (u16)~x : 0;
> @@ -5744,7 +5750,7 @@ static void reset_guest_paging_metadata(struct kvm_vcpu *vcpu,
>   return;
>  
>   reset_guest_rsvds_bits_mask(vcpu, mmu);
> - update_permission_bitmask(mmu, false);
> + update_permission_bitmask(mmu, mmu == &vcpu->arch.guest_mmu, false);
>   update_pkru_bitmask(mmu);
>  }
>  
> @@ -5940,7 +5946,7 @@ static void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu,
>  }
>  
>  void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, unsigned long cr4,
> -      u64 efer, gpa_t nested_cr3)
> +      u64 efer, gpa_t nested_cr3, u64 misc_ctl)
>  {
>   struct kvm_mmu *context = &vcpu->arch.guest_mmu;
>   struct kvm_mmu_role_regs regs = {
> @@ -5953,7 +5959,7 @@ void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, unsigned long cr4,
>  
>   /* NPT requires CR0.PG=1. */
>   WARN_ON_ONCE(cpu_role.base.direct || !cpu_role.base.guest_mode);
> - cpu_role.base.cr4_smep = false;
> + cpu_role.base.cr4_smep = (misc_ctl & SVM_MISC_ENABLE_GMET) != 0;
>  
>   root_role = cpu_role.base;
>   root_role.level = kvm_mmu_get_tdp_level(vcpu);
> @@ -6011,7 +6017,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
>   context->gva_to_gpa = ept_gva_to_gpa;
>   context->sync_spte = ept_sync_spte;
>  
> - update_permission_bitmask(context, true);
> + update_permission_bitmask(context, true, true);
>   context->pkru_mask = 0;
>   reset_rsvds_bits_mask_ept(vcpu, context, execonly, huge_page_level);
>   reset_ept_shadow_zero_bits_mask(context, execonly);
> diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> index a1cffd274000..7adfa7da210d 100644
> --- a/arch/x86/kvm/svm/nested.c
> +++ b/arch/x86/kvm/svm/nested.c
> @@ -95,7 +95,8 @@ static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
>   */
>   kvm_init_shadow_npt_mmu(vcpu, svm->vmcb01.ptr->save.cr4,
>   svm->vmcb01.ptr->save.efer,
> - svm->nested.ctl.nested_cr3);
> + svm->nested.ctl.nested_cr3,
> + svm->nested.ctl.misc_ctl);
>   vcpu->arch.mmu->get_guest_pgd     = nested_svm_get_tdp_cr3;
>   vcpu->arch.mmu->get_pdptr         = nested_svm_get_tdp_pdptr;
>   vcpu->arch.mmu->inject_page_fault = nested_svm_inject_npf_exit;
> @@ -2076,12 +2077,15 @@ static gpa_t svm_translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa,
>         struct x86_exception *exception,
>         u64 pte_access)
>  {
> + struct vcpu_svm *svm = to_svm(vcpu);
>   struct kvm_mmu *mmu = vcpu->arch.mmu;
>  
>   BUG_ON(!mmu_is_nested(vcpu));
>  
> - /* NPT walks are always user-walks */
> - access |= PFERR_USER_MASK;
> + /* Non-GMET walks are always user-walks */
> + if (!(svm->nested.ctl.misc_ctl & SVM_MISC_ENABLE_GMET))
> + access |= PFERR_USER_MASK;

Tiny nitpick: maybe add a 'nested_gmet_enabled' similar to nested_npt_enabled?

Reviewed-by: Maxim Levitsky <mlevitsk@xxxxxxxxxx>


Best regards,
Maxim Levitsky

> +
>   return mmu->gva_to_gpa(vcpu, mmu, gpa, access, exception);
>  }
>