Re: [PATCH 0/7] KVM: x86: APX reg prep work

From: Paolo Bonzini

Date: Sat Apr 04 2026 - 01:17:16 EST

On Sat, Apr 4, 2026 at 12:05 AM Chang S. Bae <chang.seok.bae@xxxxxxxxx> wrote:
>
> On 4/3/2026 9:03 AM, Paolo Bonzini wrote:
> >
> > But until the kernel starts using APX, I would do the save/restore near
> > kvm_load_xfeatures(), because __vmx_vcpu_run()/__svm_vcpu_run() would
> > have to check whether xcr0.apx is set or not.
> Right, I'd much prefer this. Then, it requires to audit whether any
> fast-path handler could access EGPRs.
>
> But there are cases with the new {RD|WR}MSR (MSR_IMM) instructions that
> appear to access GPRs. Because of this, the EGPR saving/restoring needs
> to happen earlier.

You're right about fast paths... so something like the attached patch.
It is not too bad to translate into assembly, where it could use
alternatives (in the same way as
RESTORE_GUEST_SPEC_CTRL/RESTORE_GUEST_SPEC_CTRL_BODY) in place of
static_cpu_has(). Maybe it's best to bite the bullet and do it
already...

Paolo
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 959fcc01ee0f..9a1766037b6f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -887,6 +887,7 @@ struct kvm_vcpu_arch {
struct fpu_guest guest_fpu;

u64 xcr0;
+ u64 early_xcr0;
u64 guest_supported_xcr0;
u64 ia32_xss;
u64 guest_supported_xss;
@@ -2101,6 +2102,20 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end);

int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);

+void __kvm_load_guest_apx(struct kvm_vcpu *vcpu);
+static inline void kvm_load_guest_apx(struct kvm_vcpu *vcpu)
+{
+ if (static_cpu_has(X86_FEATURE_APX))
+ __kvm_load_guest_apx(vcpu);
+}
+
+void __kvm_save_guest_apx(struct kvm_vcpu *vcpu);
+static inline void kvm_save_guest_apx(struct kvm_vcpu *vcpu)
+{
+ if (static_cpu_has(X86_FEATURE_APX))
+ __kvm_save_guest_apx(vcpu);
+}
+
int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
const void *val, int bytes);

diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h
index 657f5f743ed9..e44cfed94160 100644
--- a/arch/x86/kvm/reverse_cpuid.h
+++ b/arch/x86/kvm/reverse_cpuid.h
@@ -31,6 +31,7 @@
/* Intel-defined sub-features, CPUID level 0x00000007:1 (EDX) */
#define X86_FEATURE_AVX_VNNI_INT8 KVM_X86_FEATURE(CPUID_7_1_EDX, 4)
#define X86_FEATURE_AVX_NE_CONVERT KVM_X86_FEATURE(CPUID_7_1_EDX, 5)
+#define KVM_X86_FEATURE_APX KVM_X86_FEATURE(CPUID_7_1_EDX, 7)
#define X86_FEATURE_AMX_COMPLEX KVM_X86_FEATURE(CPUID_7_1_EDX, 8)
#define X86_FEATURE_AVX_VNNI_INT16 KVM_X86_FEATURE(CPUID_7_1_EDX, 10)
#define X86_FEATURE_PREFETCHITI KVM_X86_FEATURE(CPUID_7_1_EDX, 14)
@@ -151,6 +152,7 @@ static __always_inline u32 __feature_translate(int x86_feature)
KVM_X86_TRANSLATE_FEATURE(TSA_SQ_NO);
KVM_X86_TRANSLATE_FEATURE(TSA_L1_NO);
KVM_X86_TRANSLATE_FEATURE(MSR_IMM);
+ KVM_X86_TRANSLATE_FEATURE(APX);
default:
return x86_feature;
}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index e6477affac9a..c0a8143f274c 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4359,6 +4359,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
vcpu->arch.host_debugctl != svm->vmcb->save.dbgctl)
update_debugctlmsr(svm->vmcb->save.dbgctl);

+ kvm_load_guest_apx(vcpu);
kvm_wait_lapic_expire(vcpu);

/*
@@ -4381,6 +4382,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
}
+ kvm_save_guest_apx(vcpu);
vcpu->arch.regs_dirty = 0;

if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 8b24e682535b..c4c0da9281c1 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7693,10 +7693,12 @@ fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
else if (force_immediate_exit)
smp_send_reschedule(vcpu->cpu);

+ kvm_load_guest_apx(vcpu);
kvm_wait_lapic_expire(vcpu);

/* The actual VMENTER/EXIT is in the .noinstr.text section. */
vmx_vcpu_enter_exit(vcpu, __vmx_vcpu_run_flags(vmx));
+ kvm_save_guest_apx(vcpu);

/* All fields are clean at this point */
if (kvm_is_using_evmcs()) {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0757b93e528d..69abfdd946dd 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1220,9 +1220,13 @@ static void kvm_load_xfeatures(struct kvm_vcpu *vcpu, bool load_guest)
if (!kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE))
return;

- if (vcpu->arch.xcr0 != kvm_host.xcr0)
+ /*
+ * Do not load the definitive XCR0 yet; vcpu->arch.early_xcr0 keeps
+ * APX enabled so that the kernel can move to and from r16...r31.
+ */
+ if (vcpu->arch.early_xcr0 != kvm_host.xcr0)
xsetbv(XCR_XFEATURE_ENABLED_MASK,
- load_guest ? vcpu->arch.xcr0 : kvm_host.xcr0);
+ load_guest ? vcpu->arch.early_xcr0 : kvm_host.xcr0);

if (guest_cpu_cap_has(vcpu, X86_FEATURE_XSAVES) &&
vcpu->arch.ia32_xss != kvm_host.xss)
@@ -1302,6 +1302,11 @@ int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)

vcpu->arch.xcr0 = xcr0;

+ /* APX is needed to save/restore registers for fast path WRMSR. */
+ vcpu->arch.early_xcr0 = xcr0;
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_APX))
+ vcpu->arch.early_xcr0 |= kvm_host.xcr0 & XFEATURE_MASK_APX;
+
if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND)
vcpu->arch.cpuid_dynamic_bits_dirty = true;
return 0;
@@ -11056,6 +11061,49 @@ static void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
kvm_x86_call(set_apic_access_page_addr)(vcpu);
}

+/*
+ * Assuming the kernel does not use APX for now. When
+ * the kernel starts using APX this needs to move into
+ * assembly, and KVM_GET/SET_XSAVE needs to fill in
+ * EGPRs from vcpu->arch.regs.
+ */
+void __kvm_load_guest_apx(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.early_xcr0 != vcpu->arch.xcr0)
+ xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
+
+ if (!(vcpu->arch.xcr0 & XFEATURE_MASK_APX))
+ return;
+
+ WARN_ON_ONCE(!irqs_disabled());
+
+ asm("mov %[r16], %%r16\n"
+ "mov %[r17], %%r17\n" // ...
+ : : [r16] "m" (vcpu->arch.regs[16]),
+ [r17] "m" (vcpu->arch.regs[17]));
+}
+
+/*
+ * Assuming the kernel does not use APX for now. When
+ * the kernel starts using APX this needs to move into
+ * assembly and zero out APX registers for the host.
+ */
+void __kvm_save_guest_apx(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.early_xcr0 != vcpu->arch.xcr0)
+ xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.early_xcr0);
+
+ if (!(vcpu->arch.xcr0 & XFEATURE_MASK_APX))
+ return;
+
+ WARN_ON_ONCE(!irqs_disabled());
+
+ asm("mov %%r16, %[r16]\n"
+ "mov %%r17, %[r17]\n" // ...
+ : : [r16] "m" (vcpu->arch.regs[16]),
+ [r17] "m" (vcpu->arch.regs[17]));
+}
+
/*
* Called within kvm->srcu read side.
* Returns 1 to let vcpu_run() continue the guest execution loop without