Re: [PATCH 12/28] KVM: x86: make translate_nested_gpa vendor-specific

From: mlevitsk

Date: Tue Jun 02 2026 - 10:35:04 EST


On Tue, 2026-05-05 at 21:52 +0200, Paolo Bonzini wrote:
> EPT and NPT have different rules for passing PFERR_USER_MASK to the
> nested page table walk.  In particular, for final addresses EPT
> uses the U bit of the guest (nGVA->nGPA) walk.
>
> While at it, remove PFERR_USER_MASK from the VMX version of the
> function, since it is actually ignored by the tables that
> update_permission_bitmask() generates for EPT.

Hi!

Since this used to not to be the case, it means that in theory 
this patch series fixes a theoretical bug in which kvm_translate_gpa would fail on nested EPT 
if requested to do exec-only access, because it used to ask incorrectly
for PFERR_USER_MASK access which is used to be translated to a read access, 
and therefore  a guest PTE having only exec permission would fail the check.

I am not sure it is worth mentioning this, it is likely impossible to hit
this, it is just something that came up my mind when reviewing later patches.


>
> Tested-by: David Riley <d.riley@xxxxxxxxxxx>
> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> ---
>  arch/x86/include/asm/kvm_host.h |  4 ++++
>  arch/x86/kvm/hyperv.c           |  3 ++-
>  arch/x86/kvm/mmu.h              |  9 +++------
>  arch/x86/kvm/svm/nested.c       | 15 +++++++++++++++
>  arch/x86/kvm/vmx/nested.c       | 12 ++++++++++++
>  arch/x86/kvm/x86.c              | 16 ----------------
>  6 files changed, 36 insertions(+), 23 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 8f2a1b915df9..62dc782b2dd3 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -2010,6 +2010,10 @@ struct kvm_x86_nested_ops {
>   struct kvm_nested_state *kvm_state);
>   bool (*get_nested_state_pages)(struct kvm_vcpu *vcpu);
>   int (*write_log_dirty)(struct kvm_vcpu *vcpu, gpa_t l2_gpa);
> + gpa_t (*translate_nested_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa,
> +       u64 access,
> +       struct x86_exception *exception,
> +       u64 pte_access);
>  
>   int (*enable_evmcs)(struct kvm_vcpu *vcpu,
>       uint16_t *vmcs_version);
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> index 53688f7b76eb..f35fae3a7b3d 100644
> --- a/arch/x86/kvm/hyperv.c
> +++ b/arch/x86/kvm/hyperv.c
> @@ -2041,7 +2041,8 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
>   * read with kvm_read_guest().
>   */
>   if (!hc->fast && is_guest_mode(vcpu)) {
> - hc->ingpa = translate_nested_gpa(vcpu, hc->ingpa,
> + hc->ingpa = kvm_x86_ops.nested_ops->translate_nested_gpa(
> + vcpu, hc->ingpa,
>   PFERR_GUEST_FINAL_MASK, NULL, 0);
>   if (unlikely(hc->ingpa == INVALID_GPA))
>   return HV_STATUS_INVALID_HYPERCALL_INPUT;
> diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
> index 635c2e5d8513..63be5c5efed9 100644
> --- a/arch/x86/kvm/mmu.h
> +++ b/arch/x86/kvm/mmu.h
> @@ -294,10 +294,6 @@ static inline void kvm_update_page_stats(struct kvm *kvm, int level, int count)
>   atomic64_add(count, &kvm->stat.pages[level - 1]);
>  }
>  
> -gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u64 access,
> -    struct x86_exception *exception,
> -    u64 pte_access);
> -
>  static inline gpa_t kvm_translate_gpa(struct kvm_vcpu *vcpu,
>         struct kvm_mmu *mmu,
>         gpa_t gpa, u64 access,
> @@ -306,8 +302,9 @@ static inline gpa_t kvm_translate_gpa(struct kvm_vcpu *vcpu,
>  {
>   if (mmu != &vcpu->arch.nested_mmu)
>   return gpa;
> - return translate_nested_gpa(vcpu, gpa, access, exception,
> -     pte_access);
> + return kvm_x86_ops.nested_ops->translate_nested_gpa(vcpu, gpa, access,
> +     exception,
> +     pte_access);
>  }
>  
>  static inline bool kvm_has_mirrored_tdp(const struct kvm *kvm)
> diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> index 961804df5f45..df232153eb24 100644
> --- a/arch/x86/kvm/svm/nested.c
> +++ b/arch/x86/kvm/svm/nested.c
> @@ -2071,8 +2071,23 @@ static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu)
>   return true;
>  }
>  
> +static gpa_t svm_translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa,
> +       u64 access,
> +       struct x86_exception *exception,
> +       u64 pte_access)
> +{
> + struct kvm_mmu *mmu = vcpu->arch.mmu;
> +
> + BUG_ON(!mmu_is_nested(vcpu));
> +
> + /* NPT walks are always user-walks */


Tiny nitpick: Maybe we can extend the comment above to explicitly mention that
even for the last, actual guest memory access, the CPU pretends to do a user access to the NPT?
(NPT is really weird...)

Something like that:

/* NPT walks are always user-walks, even for the actual guest linear memory access,
regardless of the actual guest access (user/supervisor) */


Unrelated, If I understand it correctly, without GMET, NPT *does* check the U bit, but
for all NPT walks, even for the last one, it requires the U bit to be set.

And with GMET, U bit is only used for execute access which can happen only
on the last level, and for reads/writes U bit is ignored.

To be honest, I haven't found this mentioned in the APM.
Is this mentioned somewhere in the APM?


> + access |= PFERR_USER_MASK;
> + return mmu->gva_to_gpa(vcpu, mmu, gpa, access, exception);
> +}
> +
>  struct kvm_x86_nested_ops svm_nested_ops = {
>   .leave_nested = svm_leave_nested,
> + .translate_nested_gpa = svm_translate_nested_gpa,
>   .is_exception_vmexit = nested_svm_is_exception_vmexit,
>   .check_events = svm_check_nested_events,
>   .triple_fault = nested_svm_triple_fault,
> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> index 3fe88f29be7a..cd1924c6e075 100644
> --- a/arch/x86/kvm/vmx/nested.c
> +++ b/arch/x86/kvm/vmx/nested.c
> @@ -7438,8 +7438,20 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *))
>   return 0;
>  }
>  
> +static gpa_t vmx_translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa,
> +       u64 access,
> +       struct x86_exception *exception,
> +       u64 pte_access)
> +{
> + struct kvm_mmu *mmu = vcpu->arch.mmu;
> +
> + BUG_ON(!mmu_is_nested(vcpu));
> + return mmu->gva_to_gpa(vcpu, mmu, gpa, access, exception);
> +}
> +
>  struct kvm_x86_nested_ops vmx_nested_ops = {
>   .leave_nested = vmx_leave_nested,
> + .translate_nested_gpa = vmx_translate_nested_gpa,
>   .is_exception_vmexit = nested_vmx_is_exception_vmexit,
>   .check_events = vmx_check_nested_events,
>   .has_events = vmx_has_nested_events,
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 67979b7de5d6..7c6942afae81 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -7848,22 +7848,6 @@ void kvm_get_segment(struct kvm_vcpu *vcpu,
>   kvm_x86_call(get_segment)(vcpu, var, seg);
>  }
>  
> -gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u64 access,
> -    struct x86_exception *exception,
> -    u64 pte_access)
> -{
> - struct kvm_mmu *mmu = vcpu->arch.mmu;
> - gpa_t t_gpa;
> -
> - BUG_ON(!mmu_is_nested(vcpu));
> -
> - /* NPT walks are always user-walks */
> - access |= PFERR_USER_MASK;
> - t_gpa  = mmu->gva_to_gpa(vcpu, mmu, gpa, access, exception);
> -
> - return t_gpa;
> -}
> -
>  gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
>         struct x86_exception *exception)
>  {


Reviewed-by: Maxim Levitsky <mlevitsk@xxxxxxxxxx>

Best regards,
Maxim Levitsky