Re: [PATCH 12/28] KVM: x86: make translate_nested_gpa vendor-specific
From: mlevitsk
Date: Tue Jun 02 2026 - 10:35:04 EST
On Tue, 2026-05-05 at 21:52 +0200, Paolo Bonzini wrote:
> EPT and NPT have different rules for passing PFERR_USER_MASK to the
> nested page table walk. In particular, for final addresses EPT
> uses the U bit of the guest (nGVA->nGPA) walk.
>
> While at it, remove PFERR_USER_MASK from the VMX version of the
> function, since it is actually ignored by the tables that
> update_permission_bitmask() generates for EPT.
Hi!
Since this used to not to be the case, it means that in theory
this patch series fixes a theoretical bug in which kvm_translate_gpa would fail on nested EPT
if requested to do exec-only access, because it used to ask incorrectly
for PFERR_USER_MASK access which is used to be translated to a read access,
and therefore a guest PTE having only exec permission would fail the check.
I am not sure it is worth mentioning this, it is likely impossible to hit
this, it is just something that came up my mind when reviewing later patches.
>
> Tested-by: David Riley <d.riley@xxxxxxxxxxx>
> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> ---
> arch/x86/include/asm/kvm_host.h | 4 ++++
> arch/x86/kvm/hyperv.c | 3 ++-
> arch/x86/kvm/mmu.h | 9 +++------
> arch/x86/kvm/svm/nested.c | 15 +++++++++++++++
> arch/x86/kvm/vmx/nested.c | 12 ++++++++++++
> arch/x86/kvm/x86.c | 16 ----------------
> 6 files changed, 36 insertions(+), 23 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 8f2a1b915df9..62dc782b2dd3 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -2010,6 +2010,10 @@ struct kvm_x86_nested_ops {
> struct kvm_nested_state *kvm_state);
> bool (*get_nested_state_pages)(struct kvm_vcpu *vcpu);
> int (*write_log_dirty)(struct kvm_vcpu *vcpu, gpa_t l2_gpa);
> + gpa_t (*translate_nested_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa,
> + u64 access,
> + struct x86_exception *exception,
> + u64 pte_access);
>
> int (*enable_evmcs)(struct kvm_vcpu *vcpu,
> uint16_t *vmcs_version);
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> index 53688f7b76eb..f35fae3a7b3d 100644
> --- a/arch/x86/kvm/hyperv.c
> +++ b/arch/x86/kvm/hyperv.c
> @@ -2041,7 +2041,8 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
> * read with kvm_read_guest().
> */
> if (!hc->fast && is_guest_mode(vcpu)) {
> - hc->ingpa = translate_nested_gpa(vcpu, hc->ingpa,
> + hc->ingpa = kvm_x86_ops.nested_ops->translate_nested_gpa(
> + vcpu, hc->ingpa,
> PFERR_GUEST_FINAL_MASK, NULL, 0);
> if (unlikely(hc->ingpa == INVALID_GPA))
> return HV_STATUS_INVALID_HYPERCALL_INPUT;
> diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
> index 635c2e5d8513..63be5c5efed9 100644
> --- a/arch/x86/kvm/mmu.h
> +++ b/arch/x86/kvm/mmu.h
> @@ -294,10 +294,6 @@ static inline void kvm_update_page_stats(struct kvm *kvm, int level, int count)
> atomic64_add(count, &kvm->stat.pages[level - 1]);
> }
>
> -gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u64 access,
> - struct x86_exception *exception,
> - u64 pte_access);
> -
> static inline gpa_t kvm_translate_gpa(struct kvm_vcpu *vcpu,
> struct kvm_mmu *mmu,
> gpa_t gpa, u64 access,
> @@ -306,8 +302,9 @@ static inline gpa_t kvm_translate_gpa(struct kvm_vcpu *vcpu,
> {
> if (mmu != &vcpu->arch.nested_mmu)
> return gpa;
> - return translate_nested_gpa(vcpu, gpa, access, exception,
> - pte_access);
> + return kvm_x86_ops.nested_ops->translate_nested_gpa(vcpu, gpa, access,
> + exception,
> + pte_access);
> }
>
> static inline bool kvm_has_mirrored_tdp(const struct kvm *kvm)
> diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> index 961804df5f45..df232153eb24 100644
> --- a/arch/x86/kvm/svm/nested.c
> +++ b/arch/x86/kvm/svm/nested.c
> @@ -2071,8 +2071,23 @@ static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu)
> return true;
> }
>
> +static gpa_t svm_translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa,
> + u64 access,
> + struct x86_exception *exception,
> + u64 pte_access)
> +{
> + struct kvm_mmu *mmu = vcpu->arch.mmu;
> +
> + BUG_ON(!mmu_is_nested(vcpu));
> +
> + /* NPT walks are always user-walks */
Tiny nitpick: Maybe we can extend the comment above to explicitly mention that
even for the last, actual guest memory access, the CPU pretends to do a user access to the NPT?
(NPT is really weird...)
Something like that:
/* NPT walks are always user-walks, even for the actual guest linear memory access,
regardless of the actual guest access (user/supervisor) */
Unrelated, If I understand it correctly, without GMET, NPT *does* check the U bit, but
for all NPT walks, even for the last one, it requires the U bit to be set.
And with GMET, U bit is only used for execute access which can happen only
on the last level, and for reads/writes U bit is ignored.
To be honest, I haven't found this mentioned in the APM.
Is this mentioned somewhere in the APM?
> + access |= PFERR_USER_MASK;
> + return mmu->gva_to_gpa(vcpu, mmu, gpa, access, exception);
> +}
> +
> struct kvm_x86_nested_ops svm_nested_ops = {
> .leave_nested = svm_leave_nested,
> + .translate_nested_gpa = svm_translate_nested_gpa,
> .is_exception_vmexit = nested_svm_is_exception_vmexit,
> .check_events = svm_check_nested_events,
> .triple_fault = nested_svm_triple_fault,
> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> index 3fe88f29be7a..cd1924c6e075 100644
> --- a/arch/x86/kvm/vmx/nested.c
> +++ b/arch/x86/kvm/vmx/nested.c
> @@ -7438,8 +7438,20 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *))
> return 0;
> }
>
> +static gpa_t vmx_translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa,
> + u64 access,
> + struct x86_exception *exception,
> + u64 pte_access)
> +{
> + struct kvm_mmu *mmu = vcpu->arch.mmu;
> +
> + BUG_ON(!mmu_is_nested(vcpu));
> + return mmu->gva_to_gpa(vcpu, mmu, gpa, access, exception);
> +}
> +
> struct kvm_x86_nested_ops vmx_nested_ops = {
> .leave_nested = vmx_leave_nested,
> + .translate_nested_gpa = vmx_translate_nested_gpa,
> .is_exception_vmexit = nested_vmx_is_exception_vmexit,
> .check_events = vmx_check_nested_events,
> .has_events = vmx_has_nested_events,
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 67979b7de5d6..7c6942afae81 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -7848,22 +7848,6 @@ void kvm_get_segment(struct kvm_vcpu *vcpu,
> kvm_x86_call(get_segment)(vcpu, var, seg);
> }
>
> -gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u64 access,
> - struct x86_exception *exception,
> - u64 pte_access)
> -{
> - struct kvm_mmu *mmu = vcpu->arch.mmu;
> - gpa_t t_gpa;
> -
> - BUG_ON(!mmu_is_nested(vcpu));
> -
> - /* NPT walks are always user-walks */
> - access |= PFERR_USER_MASK;
> - t_gpa = mmu->gva_to_gpa(vcpu, mmu, gpa, access, exception);
> -
> - return t_gpa;
> -}
> -
> gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
> struct x86_exception *exception)
> {
Reviewed-by: Maxim Levitsky <mlevitsk@xxxxxxxxxx>
Best regards,
Maxim Levitsky