[PATCH v4 07/21] KVM: x86: Support APX state for XSAVE ABI

From: Chang S. Bae

Date: Mon May 11 2026 - 21:41:37 EST


Introduce a facility to copy APX state between the VCPU cache and the
userspace buffer since APX state is stored there.

The existing fpstate copy functions historically sync all XSTATEs in
between userspace and kernel buffers [1]. In this regard, any additional
state handling logic should be consistent with them -- i.e. validation of
XSTATE_BV against the supported XCR0 mask.

Now with the two copy paths, their invocations require to take care of
orderings:

* When exporting to userspace, the fpstate function should runs first
since it zeros out the area of components either not present or
inactive. Then the VCPU cache function ensures its state copy.

* When importing from userspace, the VCPU cache function should run
first as the fpstate function always clears XSTATE_BV[APX] for not
saving in the storage.

[1] Except for PKRU state, as stored in struct thread_struct.

Signed-off-by: Chang S. Bae <chang.seok.bae@xxxxxxxxx>
---
V3 -> V4: Do not reset XSTATE_BV[APX], now with PATCH6 (Paolo)
---
arch/x86/kvm/cpuid.c | 10 ++++++++
arch/x86/kvm/cpuid.h | 2 ++
arch/x86/kvm/x86.c | 58 ++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 70 insertions(+)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index e69156b54cff..82cb7c8fbc07 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -59,6 +59,16 @@ void __init kvm_init_xstate_sizes(void)
}
}

+u32 xstate_size(unsigned int xfeature)
+{
+ return xstate_sizes[xfeature].eax;
+}
+
+u32 xstate_offset(unsigned int xfeature)
+{
+ return xstate_sizes[xfeature].ebx;
+}
+
u32 xstate_required_size(u64 xstate_bv, bool compacted)
{
u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 039b8e6f40ba..5ace99dd152b 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -64,6 +64,8 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,

void __init kvm_init_xstate_sizes(void);
u32 xstate_required_size(u64 xstate_bv, bool compacted);
+u32 xstate_size(unsigned int xfeature);
+u32 xstate_offset(unsigned int xfeature);

int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu);
int cpuid_query_maxguestphyaddr(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 48f259015ce4..3f029f9272a2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5805,6 +5805,48 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
return 0;
}

+#ifdef CONFIG_KVM_APX
+static void kvm_copy_vcpu_regs_to_uabi(struct kvm_vcpu *vcpu, void *buf, u64 supported_xcr0)
+{
+ union fpregs_state *xstate = (union fpregs_state *)buf;
+
+ BUILD_BUG_ON(NR_VCPU_GENERAL_PURPOSE_REGS <= VCPU_REGS_R31);
+
+ if (!(supported_xcr0 & XFEATURE_MASK_APX))
+ return;
+
+ memcpy(buf + xstate_offset(XFEATURE_APX),
+ &vcpu->arch.regs[VCPU_REGS_R16],
+ xstate_size(XFEATURE_APX));
+
+ xstate->xsave.header.xfeatures |= XFEATURE_MASK_APX;
+}
+
+static int kvm_copy_uabi_to_vcpu_regs(struct kvm_vcpu *vcpu, void *buf, u64 supported_xcr0)
+{
+ union fpregs_state *xstate = (union fpregs_state *)buf;
+
+ if (!(xstate->xsave.header.xfeatures & XFEATURE_MASK_APX))
+ return 0;
+
+ if (!(supported_xcr0 & XFEATURE_MASK_APX))
+ return -EINVAL;
+
+ BUILD_BUG_ON(NR_VCPU_GENERAL_PURPOSE_REGS <= VCPU_REGS_R31);
+
+ memcpy(&vcpu->arch.regs[VCPU_REGS_R16],
+ buf + xstate_offset(XFEATURE_APX),
+ xstate_size(XFEATURE_APX));
+
+ return 0;
+}
+#else
+static void kvm_copy_vcpu_regs_to_uabi(struct kvm_vcpu *vcpu, void *buf, u64 supported_xcr0) { };
+static int kvm_copy_uabi_to_vcpu_regs(struct kvm_vcpu *vcpu, void *buf, u64 supported_xcr0)
+{
+ return 0;
+}
+#endif

static int kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu,
u8 *state, unsigned int size)
@@ -5827,8 +5869,15 @@ static int kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu,
if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
return vcpu->kvm->arch.has_protected_state ? -EINVAL : 0;

+ /*
+ * This copy function zeros out userspace memory for any gap from the
+ * guest fpstate. So invoke before copying any other state, i.e. APX,
+ * that is not saved in fpstate.
+ */
fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, state, size,
supported_xcr0, vcpu->arch.pkru);
+ kvm_copy_vcpu_regs_to_uabi(vcpu, state, supported_xcr0);
+
return 0;
}

@@ -5843,6 +5892,7 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
struct kvm_xsave *guest_xsave)
{
union fpregs_state *xstate = (union fpregs_state *)guest_xsave->region;
+ int err;

if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
return vcpu->kvm->arch.has_protected_state ? -EINVAL : 0;
@@ -5854,6 +5904,14 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
*/
xstate->xsave.header.xfeatures &= ~vcpu->arch.guest_fpu.fpstate->xfd;

+ /*
+ * Copy APX state to VCPU cache before the following copy function
+ * which always unsets XSTATE_BV[APX] to avoid savings in its storage.
+ */
+ err = kvm_copy_uabi_to_vcpu_regs(vcpu, guest_xsave->region, kvm_caps.supported_xcr0);
+ if (err)
+ return err;
+
return fpu_copy_uabi_to_guest_fpstate(&vcpu->arch.guest_fpu,
guest_xsave->region,
kvm_caps.supported_xcr0,
--
2.51.0