[PATCH v10 16/28] x86/fpu/xstate: Support both legacy and expanded signal XSTATE size

From: Chang S. Bae
Date: Wed Aug 25 2021 - 12:01:54 EST


Prepare to support two XSTATE sizes on the signal stack -- legacy and
expanded. Legacy programs have not requested access to AMX (or later
features), and the XSTATE on their signal stack can include up through
AVX-512.

Programs that request access to AVX (and/or later features) will have an
uncompressed XSTATE that includes those features. If such program that also
use the sigaltstack, they must assure that their sigaltstack is large
enough to handle that full XSTATE format. (This is most easily done by
using signal.h from glibc 2.34 or later)

Introduce a new XSTATE size variable for the legacy stack and some helpers.

Signed-off-by: Chang S. Bae <chang.seok.bae@xxxxxxxxx>
Reviewed-by: Len Brown <len.brown@xxxxxxxxx>
Cc: x86@xxxxxxxxxx
Cc: linux-kernel@xxxxxxxxxxxxxxx
---
Changes from v9:
* Use get_group_state_perm() to check the permission.

Changes from v6:
* Massage the code comments.

Changes form v5:
* Added as a new patch.
---
arch/x86/include/asm/fpu/internal.h | 23 +++++++++--
arch/x86/include/asm/fpu/xstate.h | 5 ++-
arch/x86/kernel/fpu/init.c | 1 +
arch/x86/kernel/fpu/signal.c | 63 ++++++++++++++++++++---------
arch/x86/kernel/fpu/xstate.c | 15 +++++++
5 files changed, 82 insertions(+), 25 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 1aa8bc75b24d..a7e39862df30 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -337,15 +337,30 @@ static inline void os_xrstor(struct xregs_state *xstate, u64 mask)
*/
static inline int xsave_to_user_sigframe(struct xregs_state __user *buf)
{
+ u32 lmask, hmask;
+ u64 mask;
+ int err;
+
/*
* Include the features which are not xsaved/rstored by the kernel
* internally, e.g. PKRU. That's user space ABI and also required
* to allow the signal handler to modify PKRU.
*/
- u64 mask = xfeatures_mask_uabi();
- u32 lmask = mask;
- u32 hmask = mask >> 32;
- int err;
+ mask = xfeatures_mask_uabi();
+
+ /*
+ * Exclude dynamic user states for non-opt-in threads.
+ */
+ if (xfeatures_mask_user_dynamic) {
+ struct fpu *fpu = &current->thread.fpu;
+
+ mask &= fpu->dynamic_state_perm ?
+ fpu->state_mask :
+ ~xfeatures_mask_user_dynamic;
+ }
+
+ lmask = mask;
+ hmask = mask >> 32;

/*
* Clear the xsave header first, so that reserved fields are
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 450537b0b92f..d896061933bb 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -145,10 +145,13 @@ extern void __init update_regset_xstate_info(unsigned int size,
* contains all the enabled state components.
* @user_size: The size of user-space buffer for signal and
* ptrace frames, in the non-compacted format.
+ * @user_minsig_size: The non-compacted legacy xstate size for signal.
+ * Legacy programs do not request to access dynamic
+ * states.
*/
struct fpu_xstate_buffer_config {
unsigned int min_size, max_size;
- unsigned int user_size;
+ unsigned int user_size, user_minsig_size;
};

extern struct fpu_xstate_buffer_config fpu_buf_cfg;
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index cd1f3114f3ca..75bacda2ab87 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -210,6 +210,7 @@ static void __init fpu__init_system_xstate_size_legacy(void)
fpu_buf_cfg.min_size = xstate_size;
fpu_buf_cfg.max_size = xstate_size;
fpu_buf_cfg.user_size = xstate_size;
+ fpu_buf_cfg.user_minsig_size = xstate_size;
}

/* Legacy code to initialize eager fpu mode. */
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index fe2732db6d6b..8273e9122ccb 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -15,9 +15,25 @@
#include <asm/sigframe.h>
#include <asm/trace/fpu.h>

+/*
+ * Record the signal xstate size and feature bits. Exclude dynamic user
+ * states. See fpu__init_prepare_fx_sw_frame(). The opt-in tasks will
+ * dynamically adjust the data.
+ */
static struct _fpx_sw_bytes fx_sw_reserved __ro_after_init;
static struct _fpx_sw_bytes fx_sw_reserved_ia32 __ro_after_init;

+static unsigned int current_sig_xstate_size(void)
+{
+ return get_group_state_perm(current) & xfeatures_mask_user_dynamic ?
+ fpu_buf_cfg.user_size : fpu_buf_cfg.user_minsig_size;
+}
+
+static inline int extend_sig_xstate_size(unsigned int size)
+{
+ return use_xsave() ? size + FP_XSTATE_MAGIC2_SIZE : size;
+}
+
/*
* Check for the presence of extended state information in the
* user fpstate pointer in the sigcontext.
@@ -36,7 +52,7 @@ static inline int check_xstate_in_sigframe(struct fxregs_state __user *fxbuf,
/* Check for the first magic field and other error scenarios. */
if (fx_sw->magic1 != FP_XSTATE_MAGIC1 ||
fx_sw->xstate_size < min_xstate_size ||
- fx_sw->xstate_size > fpu_buf_cfg.user_size ||
+ fx_sw->xstate_size > current_sig_xstate_size() ||
fx_sw->xstate_size > fx_sw->extended_size)
goto setfx;

@@ -94,20 +110,32 @@ static inline int save_fsave_header(struct task_struct *tsk, void __user *buf)

static inline int save_xstate_epilog(void __user *buf, int ia32_frame)
{
+ unsigned int current_xstate_size = current_sig_xstate_size();
struct xregs_state __user *x = buf;
- struct _fpx_sw_bytes *sw_bytes;
+ struct _fpx_sw_bytes sw_bytes;
u32 xfeatures;
int err;

- /* Setup the bytes not touched by the [f]xsave and reserved for SW. */
- sw_bytes = ia32_frame ? &fx_sw_reserved_ia32 : &fx_sw_reserved;
- err = __copy_to_user(&x->i387.sw_reserved, sw_bytes, sizeof(*sw_bytes));
+ /*
+ * Setup the bytes not touched by the [f]xsave and reserved for SW.
+ *
+ * Use the recorded values if it matches with the current task. Otherwise,
+ * adjust it.
+ */
+ sw_bytes = ia32_frame ? fx_sw_reserved_ia32 : fx_sw_reserved;
+ if (sw_bytes.xstate_size != current_xstate_size) {
+ unsigned int default_xstate_size = sw_bytes.xstate_size;
+
+ sw_bytes.xfeatures = xfeatures_mask_uabi();
+ sw_bytes.xstate_size = current_xstate_size;
+ sw_bytes.extended_size += (current_xstate_size - default_xstate_size);
+ }
+ err = __copy_to_user(&x->i387.sw_reserved, &sw_bytes, sizeof(sw_bytes));

if (!use_xsave())
return err;

- err |= __put_user(FP_XSTATE_MAGIC2,
- (__u32 __user *)(buf + fpu_buf_cfg.user_size));
+ err |= __put_user(FP_XSTATE_MAGIC2, (__u32 __user *)(buf + current_xstate_size));

/*
* Read the xfeatures which we copied (directly from the cpu or
@@ -144,7 +172,7 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf)
else
err = fnsave_to_user_sigframe((struct fregs_state __user *) buf);

- if (unlikely(err) && __clear_user(buf, fpu_buf_cfg.user_size))
+ if (unlikely(err) && __clear_user(buf, current_sig_xstate_size()))
err = -EFAULT;
return err;
}
@@ -205,7 +233,7 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
fpregs_unlock();

if (ret) {
- if (!fault_in_pages_writeable(buf_fx, fpu_buf_cfg.user_size))
+ if (!fault_in_pages_writeable(buf_fx, current_sig_xstate_size()))
goto retry;
return -EFAULT;
}
@@ -418,18 +446,13 @@ static int __fpu_restore_sig(void __user *buf, void __user *buf_fx,
fpregs_unlock();
return ret;
}
-static inline int xstate_sigframe_size(void)
-{
- return use_xsave() ? fpu_buf_cfg.user_size + FP_XSTATE_MAGIC2_SIZE :
- fpu_buf_cfg.user_size;
-}

/*
* Restore FPU state from a sigframe:
*/
int fpu__restore_sig(void __user *buf, int ia32_frame)
{
- unsigned int size = xstate_sigframe_size();
+ unsigned int size = extend_sig_xstate_size(current_sig_xstate_size());
struct fpu *fpu = &current->thread.fpu;
void __user *buf_fx = buf;
bool ia32_fxstate = false;
@@ -476,7 +499,7 @@ unsigned long
fpu__alloc_mathframe(unsigned long sp, int ia32_frame,
unsigned long *buf_fx, unsigned long *size)
{
- unsigned long frame_size = xstate_sigframe_size();
+ unsigned long frame_size = extend_sig_xstate_size(current_sig_xstate_size());

*buf_fx = sp = round_down(sp - frame_size, 64);
if (ia32_frame && use_fxsr()) {
@@ -491,7 +514,7 @@ fpu__alloc_mathframe(unsigned long sp, int ia32_frame,

unsigned long fpu__get_fpstate_size(void)
{
- unsigned long ret = xstate_sigframe_size();
+ unsigned long ret = extend_sig_xstate_size(fpu_buf_cfg.user_size);

/*
* This space is needed on (most) 32-bit kernels, or when a 32-bit
@@ -516,12 +539,12 @@ unsigned long fpu__get_fpstate_size(void)
*/
void fpu__init_prepare_fx_sw_frame(void)
{
- int ext_size = fpu_buf_cfg.user_size + FP_XSTATE_MAGIC2_SIZE;
- int xstate_size = fpu_buf_cfg.user_size;
+ int ext_size = fpu_buf_cfg.user_minsig_size + FP_XSTATE_MAGIC2_SIZE;
+ int xstate_size = fpu_buf_cfg.user_minsig_size;

fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1;
fx_sw_reserved.extended_size = ext_size;
- fx_sw_reserved.xfeatures = xfeatures_mask_uabi();
+ fx_sw_reserved.xfeatures = xfeatures_mask_uabi() & ~xfeatures_mask_user_dynamic;
fx_sw_reserved.xstate_size = xstate_size;

if (IS_ENABLED(CONFIG_IA32_EMULATION) ||
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index fefa6a139e96..bb31ef8a45b5 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -820,6 +820,21 @@ static int __init init_xstate_size(void)
* User space is always in standard format.
*/
fpu_buf_cfg.user_size = xsave_size;
+
+ /*
+ * The minimum signal xstate size is for non-opt-in user threads
+ * that do not access dynamic states.
+ */
+ if (xfeatures_mask_user_dynamic) {
+ int nr = fls64(xfeatures_mask_uabi() & ~xfeatures_mask_user_dynamic) - 1;
+ unsigned int size, offset, ecx, edx;
+
+ cpuid_count(XSTATE_CPUID, nr, &size, &offset, &ecx, &edx);
+ fpu_buf_cfg.user_minsig_size = offset + size;
+ } else {
+ fpu_buf_cfg.user_minsig_size = xsave_size;
+ }
+
return 0;
}

--
2.17.1