[PATCH -tip] x86/fpu: Rewrite XSTATE_{XSAVE,XRESTORE} macros as static inline functions

From: Uros Bizjak
Date: Mon Mar 17 2025 - 05:33:57 EST


Rewrite XSTATE_{XSAVE,XRESTORE} macros as static inline functions
in order to use asm goto to optimize exception handling.

The code that used XSTATE_XSAVE improves from:

3f1fe: 49 0f ae 64 24 40 xsave64 0x40(%r12)
3f204: 31 ed xor %ebp,%ebp
3f206: 85 ed test %ebp,%ebp
3f208: 75 37 jne 3f241 <...>

to just:

3f201: 48 0f ae 65 40 xsave64 0x40(%rbp)

because there is no need to set and test temporary result
variable anymore.

bloat-o-meter reports a small code size improvement
(x86_64 defconfig, gcc-14.2.1):

add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-25 (-25)

Function old new delta
--------------------------------------------------------
save_fpregs_to_fpstate 141 135 -6
__fpu_restore_sig 1448 1429 -19

Total: Before=22809695, After=22809670, chg -0.00%

Signed-off-by: Uros Bizjak <ubizjak@xxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Borislav Petkov <bp@xxxxxxxxx>
Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>
Cc: "H. Peter Anvin" <hpa@xxxxxxxxx>
---
arch/x86/kernel/fpu/xstate.h | 54 ++++++++++++++++++++----------------
1 file changed, 30 insertions(+), 24 deletions(-)

diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h
index 1418423bc4c9..c8e31005567a 100644
--- a/arch/x86/kernel/fpu/xstate.h
+++ b/arch/x86/kernel/fpu/xstate.h
@@ -131,32 +131,38 @@ static inline int update_pkru_in_sigframe(struct xregs_state __user *buf, u64 ma
*
* Use XSAVE as a fallback.
*/
-#define XSTATE_XSAVE(st, lmask, hmask, err) \
- asm volatile("1: " ALTERNATIVE_3(XSAVE, \
- XSAVEOPT, X86_FEATURE_XSAVEOPT, \
- XSAVEC, X86_FEATURE_XSAVEC, \
- XSAVES, X86_FEATURE_XSAVES) \
- "\n\t" \
- "xor %[err], %[err]\n" \
- "3:\n" \
- _ASM_EXTABLE_TYPE_REG(1b, 3b, EX_TYPE_EFAULT_REG, %[err]) \
- : [err] "=r" (err) \
- : [xa] "m" (*(st)), "a" (lmask), "d" (hmask) \
- : "memory")
+static __always_inline int __xstate_xsave(struct xregs_state *st,
+ u32 lmask, u32 hmask)
+{
+ asm goto("1: " ALTERNATIVE_3(XSAVE,
+ XSAVEOPT, X86_FEATURE_XSAVEOPT,
+ XSAVEC, X86_FEATURE_XSAVEC,
+ XSAVES, X86_FEATURE_XSAVES)
+ _ASM_EXTABLE(1b, %l[fault])
+ :
+ : [xa] "m" (*st), "a" (lmask), "d" (hmask)
+ : "memory"
+ : fault);
+ return 0;
+fault:
+ return -EFAULT;
+}

/*
* Use XRSTORS to restore context if it is enabled. XRSTORS supports compact
* XSAVE area format.
*/
-#define XSTATE_XRESTORE(st, lmask, hmask) \
- asm volatile("1: " ALTERNATIVE(XRSTOR, \
- XRSTORS, X86_FEATURE_XSAVES) \
- "\n" \
- "3:\n" \
- _ASM_EXTABLE_TYPE(1b, 3b, EX_TYPE_FPU_RESTORE) \
- : \
- : [xa] "m" (*(st)), "a" (lmask), "d" (hmask) \
- : "memory")
+static __always_inline void __xstate_xrstor(struct xregs_state *st,
+ u32 lmask, u32 hmask)
+{
+ asm volatile("1: " ALTERNATIVE(XRSTOR,
+ XRSTORS, X86_FEATURE_XSAVES)
+ "3:\n"
+ _ASM_EXTABLE_TYPE(1b, 3b, EX_TYPE_FPU_RESTORE)
+ :
+ : [xa] "m" (*st), "a" (lmask), "d" (hmask)
+ : "memory");
+}

#if defined(CONFIG_X86_64) && defined(CONFIG_X86_DEBUG_FPU)
extern void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rstor);
@@ -208,7 +214,7 @@ static inline void os_xsave(struct fpstate *fpstate)
WARN_ON_FPU(!alternatives_patched);
xfd_validate_state(fpstate, mask, false);

- XSTATE_XSAVE(&fpstate->regs.xsave, lmask, hmask, err);
+ err = __xstate_xsave(&fpstate->regs.xsave, lmask, hmask);

/* We should never fault when copying to a kernel buffer: */
WARN_ON_FPU(err);
@@ -225,7 +231,7 @@ static inline void os_xrstor(struct fpstate *fpstate, u64 mask)
u32 hmask = mask >> 32;

xfd_validate_state(fpstate, mask, true);
- XSTATE_XRESTORE(&fpstate->regs.xsave, lmask, hmask);
+ __xstate_xrstor(&fpstate->regs.xsave, lmask, hmask);
}

/* Restore of supervisor state. Does not require XFD */
@@ -235,7 +241,7 @@ static inline void os_xrstor_supervisor(struct fpstate *fpstate)
u32 lmask = mask;
u32 hmask = mask >> 32;

- XSTATE_XRESTORE(&fpstate->regs.xsave, lmask, hmask);
+ __xstate_xrstor(&fpstate->regs.xsave, lmask, hmask);
}

/*
--
2.48.1