[PATCH v4 01/10] x86/xsaves: Define and use user_xstate_size for xstate size in signal context

From: Yu-cheng Yu
Date: Fri Mar 04 2016 - 13:16:12 EST


If "xsaves" is enabled, kernel always uses compacted format of xsave area.
But user space still uses standard format of xsave area. Thus, xstate size
in kernel's xsave area is smaller than xstate size in user's xsave area.
The xstate in signal frame should be in standard format for user's signal
handler to access.

In no "xsaves" case, xsave area in both user space and kernel space are in
standard format. Therefore, user's and kernel's xstate sizes are equal.

In "xsaves" case, xsave area in user space is in standard format while
xsave area in kernel space is in compacted format. Therefore, kernel's
xstate size is smaller than user's xstate size.

So here is the problem: currently kernel assumes its own xstate size is
signal frame's xstate size. This is not a problem in no "xsaves" case.
It is an issue in "xsaves" case because kernel's xstate size is smaller
than user's xstate size. In fpu__alloc_mathframe(), a smaller fpstate
buffer is allocated for the standard format xstate in signal frame.
Then kernel saves only part of xstate registers into this smaller
user's fpstate buffer and user will see part of the xstate registers in
signal context. Similar issue happens after returning from signal handler:
kernel will only restore part of xstate registers from user's fpstate
buffer in signal frame.

This patch defines and uses user_xstate_size for xstate size in signal
frame. It's read from returned value in ebx from CPUID leaf 0x0D subleaf
0x0. This is maximum size required by enabled states in XCR0 and may be
different from ecx when states at the end of the xsave area are not
enabled. This value indicates the size required for XSAVE to save all
supported user states in legacy/standard format.

Signed-off-by: Fenghua Yu <fenghua.yu@xxxxxxxxx>
Signed-off-by: Yu-cheng Yu <yu-cheng.yu@xxxxxxxxx>
Reviewed-by: Dave Hansen <dave.hansen@xxxxxxxxx>
---
arch/x86/include/asm/fpu/xstate.h | 1 -
arch/x86/include/asm/processor.h | 1 +
arch/x86/kernel/fpu/init.c | 5 ++-
arch/x86/kernel/fpu/signal.c | 26 ++++++++++----
arch/x86/kernel/fpu/xstate.c | 71 ++++++++++++++++++++++++---------------
5 files changed, 67 insertions(+), 37 deletions(-)

diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index af30fde..c6667f2 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -39,7 +39,6 @@
#define REX_PREFIX
#endif

-extern unsigned int xstate_size;
extern u64 xfeatures_mask;
extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 20c11d1..01e127e 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -366,6 +366,7 @@ DECLARE_PER_CPU(struct irq_stack *, softirq_stack);
#endif /* X86_64 */

extern unsigned int xstate_size;
+extern unsigned int user_xstate_size;

struct perf_event;

diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 6d9f0a7..4ac2561 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -193,7 +193,7 @@ static void __init fpu__init_task_struct_size(void)
}

/*
- * Set up the xstate_size based on the legacy FPU context size.
+ * Set up the user and kernel xstate_size based on the legacy FPU context size.
*
* We set this up first, and later it will be overwritten by
* fpu__init_system_xstate() if the CPU knows about xstates.
@@ -224,6 +224,9 @@ static void __init fpu__init_system_xstate_size_legacy(void)
else
xstate_size = sizeof(struct fregs_state);
}
+
+ user_xstate_size = xstate_size;
+
/*
* Quirk: we don't yet handle the XSAVES* instructions
* correctly, as we don't correctly convert between
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 31c6a60..ee6d662 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -31,7 +31,7 @@ static inline int check_for_xstate(struct fxregs_state __user *buf,
/* Check for the first magic field and other error scenarios. */
if (fx_sw->magic1 != FP_XSTATE_MAGIC1 ||
fx_sw->xstate_size < min_xstate_size ||
- fx_sw->xstate_size > xstate_size ||
+ fx_sw->xstate_size > user_xstate_size ||
fx_sw->xstate_size > fx_sw->extended_size)
return -1;

@@ -88,7 +88,7 @@ static inline int save_xstate_epilog(void __user *buf, int ia32_frame)
if (!use_xsave())
return err;

- err |= __put_user(FP_XSTATE_MAGIC2, (__u32 *)(buf + xstate_size));
+ err |= __put_user(FP_XSTATE_MAGIC2, (__u32 *)(buf + user_xstate_size));

/*
* Read the xfeatures which we copied (directly from the cpu or
@@ -125,7 +125,7 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf)
else
err = copy_fregs_to_user((struct fregs_state __user *) buf);

- if (unlikely(err) && __clear_user(buf, xstate_size))
+ if (unlikely(err) && __clear_user(buf, user_xstate_size))
err = -EFAULT;
return err;
}
@@ -175,8 +175,19 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
if (ia32_fxstate)
copy_fxregs_to_kernel(&tsk->thread.fpu);
} else {
+ /*
+ * It is a *bug* if kernel uses compacted-format for xsave
+ * area and we copy it out directly to a signal frame. It
+ * should have been handled above by saving the registers
+ * directly.
+ */
+ if (boot_cpu_has(X86_FEATURE_XSAVES)) {
+ WARN_ONCE(1, "x86/fpu: saving compacted-format xsave area to a signal frame!\n");
+ return -1;
+ }
+
fpstate_sanitize_xstate(&tsk->thread.fpu);
- if (__copy_to_user(buf_fx, xsave, xstate_size))
+ if (__copy_to_user(buf_fx, xsave, user_xstate_size))
return -1;
}

@@ -341,7 +352,8 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)

static inline int xstate_sigframe_size(void)
{
- return use_xsave() ? xstate_size + FP_XSTATE_MAGIC2_SIZE : xstate_size;
+ return use_xsave() ? user_xstate_size + FP_XSTATE_MAGIC2_SIZE :
+ user_xstate_size;
}

/*
@@ -385,12 +397,12 @@ fpu__alloc_mathframe(unsigned long sp, int ia32_frame,
*/
void fpu__init_prepare_fx_sw_frame(void)
{
- int size = xstate_size + FP_XSTATE_MAGIC2_SIZE;
+ int size = user_xstate_size + FP_XSTATE_MAGIC2_SIZE;

fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1;
fx_sw_reserved.extended_size = size;
fx_sw_reserved.xfeatures = xfeatures_mask;
- fx_sw_reserved.xstate_size = xstate_size;
+ fx_sw_reserved.xstate_size = user_xstate_size;

if (config_enabled(CONFIG_IA32_EMULATION) ||
config_enabled(CONFIG_X86_32)) {
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index d425cda5..9d41c0f 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -35,6 +35,8 @@ static unsigned int xstate_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] =
static unsigned int xstate_sizes[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1};
static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8];

+unsigned int user_xstate_size;
+
/*
* Clear all of the X86_FEATURE_* bits that are unavailable
* when the CPU has no XSAVE support.
@@ -159,7 +161,7 @@ void fpstate_sanitize_xstate(struct fpu *fpu)
*/
while (xfeatures) {
if (xfeatures & 0x1) {
- int offset = xstate_offsets[feature_bit];
+ int offset = xstate_comp_offsets[feature_bit];
int size = xstate_sizes[feature_bit];

memcpy((void *)fx + offset,
@@ -518,8 +520,9 @@ static void do_extra_xstate_size_checks(void)
XSTATE_WARN_ON(paranoid_xstate_size != xstate_size);
}

+
/*
- * Calculate total size of enabled xstates in XCR0/xfeatures_mask.
+ * Get total size of enabled xstates in XCR0/xfeatures_mask.
*
* Note the SDM's wording here. "sub-function 0" only enumerates
* the size of the *user* states. If we use it to size a buffer
@@ -529,34 +532,33 @@ static void do_extra_xstate_size_checks(void)
* Note that we do not currently set any bits on IA32_XSS so
* 'XCR0 | IA32_XSS == XCR0' for now.
*/
-static unsigned int __init calculate_xstate_size(void)
+static unsigned int __init get_xsaves_size(void)
{
unsigned int eax, ebx, ecx, edx;
- unsigned int calculated_xstate_size;
+ /*
+ * - CPUID function 0DH, sub-function 1:
+ * EBX enumerates the size (in bytes) required by
+ * the XSAVES instruction for an XSAVE area
+ * containing all the state components
+ * corresponding to bits currently set in
+ * XCR0 | IA32_XSS.
+ */
+ cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
+ return ebx;
+}

- if (!cpu_has_xsaves) {
- /*
- * - CPUID function 0DH, sub-function 0:
- * EBX enumerates the size (in bytes) required by
- * the XSAVE instruction for an XSAVE area
- * containing all the *user* state components
- * corresponding to bits currently set in XCR0.
- */
- cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
- calculated_xstate_size = ebx;
- } else {
- /*
- * - CPUID function 0DH, sub-function 1:
- * EBX enumerates the size (in bytes) required by
- * the XSAVES instruction for an XSAVE area
- * containing all the state components
- * corresponding to bits currently set in
- * XCR0 | IA32_XSS.
- */
- cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
- calculated_xstate_size = ebx;
- }
- return calculated_xstate_size;
+static unsigned int __init get_xsave_size(void)
+{
+ unsigned int eax, ebx, ecx, edx;
+ /*
+ * - CPUID function 0DH, sub-function 0:
+ * EBX enumerates the size (in bytes) required by
+ * the XSAVE instruction for an XSAVE area
+ * containing all the *user* state components
+ * corresponding to bits currently set in XCR0.
+ */
+ cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
+ return ebx;
}

/*
@@ -576,7 +578,15 @@ static bool is_supported_xstate_size(unsigned int test_xstate_size)
static int init_xstate_size(void)
{
/* Recompute the context size for enabled features: */
- unsigned int possible_xstate_size = calculate_xstate_size();
+ unsigned int possible_xstate_size;
+ unsigned int xsave_size;
+
+ xsave_size = get_xsave_size();
+
+ if (cpu_has_xsaves)
+ possible_xstate_size = get_xsaves_size();
+ else
+ possible_xstate_size = xsave_size;

/* Ensure we have the space to store all enabled: */
if (!is_supported_xstate_size(possible_xstate_size))
@@ -588,6 +598,11 @@ static int init_xstate_size(void)
*/
xstate_size = possible_xstate_size;
do_extra_xstate_size_checks();
+
+ /*
+ * User space is always in standard format.
+ */
+ user_xstate_size = xsave_size;
return 0;
}

--
1.9.1