[PATCH v3 Bugfix 2/6] x86/xsaves: Define and use user_xstate_size for xstate size in signal context

From: Fenghua Yu
Date: Fri May 08 2015 - 17:36:28 EST


From: Fenghua Yu <fenghua.yu@xxxxxxxxx>

If "xsaves" is enabled, kernel always uses compact format of xsave area.
But user space still uses standard format of xsave area. Thus, xstate size
in kernel's xsave area is smaller than xstate size in user's xsave area.
xstate in signal frame should be in standard format for user's signal
handler to access.

In no "xsaves" case, xsave area in both user space and kernel space are in
standard format. Therefore, user's and kernel's xstate sizes are equal.

In "xsaves" case, xsave area in user space is in standard format while
xsave area in kernel space is in compact format. Therefore, kernel's
xstate size is less than user's xstate size.

So here is the problem: currently kernel uses the kernel's xstate size
for xstate size in signal frame. This is not a problem in no "xsaves" case.
But it is an issue in "xsaves" case because kernel's xstate size is smaller
than user's xstate size. When setting up signal math frame in
alloc_ mathframe(), the fpstate is in standard format; but a smaller size
of fpstate buffer is allocated in signal frame for standard format
xstate. Then kernel saves only part of xstate registers into this smaller
user's fpstate buffer and user will see part of the xstate registers in
signal context. Similar issue happens after returning from signal handler:
kernel will only restore part of xstate registers from user's fpstate
buffer in signal frame.

This patch defines and uses user_xstate_size for xstate size in signal
frame. It's read from returned value in ebx from CPUID leaf 0x0D subleaf
0x0. This is maximum size required by enabled states in XCR0 and may be
different from ecx when states at the end of the xsave area are not
enabled. This value indicates the size required for XSAVE to save all
supported user states in legacy/standard format.

Signed-off-by: Fenghua Yu <fenghua.yu@xxxxxxxxx>
Reviewed-by: Dave Hansen <dave.hansen@xxxxxxxxx>
---
arch/x86/include/asm/fpu-internal.h | 3 ++-
arch/x86/include/asm/processor.h | 1 +
arch/x86/include/asm/xsave.h | 1 -
arch/x86/kernel/i387.c | 3 +++
arch/x86/kernel/xsave.c | 29 +++++++++++++++++++++--------
5 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
index da5e967..c00c769 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -496,7 +496,8 @@ extern int __restore_xstate_sig(void __user *buf, void __user *fx, int size);

static inline int xstate_sigframe_size(void)
{
- return use_xsave() ? xstate_size + FP_XSTATE_MAGIC2_SIZE : xstate_size;
+ return use_xsave() ? user_xstate_size + FP_XSTATE_MAGIC2_SIZE :
+ user_xstate_size;
}

static inline int restore_xstate_sig(void __user *buf, int ia32_frame)
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 23ba676..576ff8c 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -483,6 +483,7 @@ DECLARE_PER_CPU(struct irq_stack *, softirq_stack);
#endif /* X86_64 */

extern unsigned int xstate_size;
+extern unsigned int user_xstate_size;
extern void free_thread_xstate(struct task_struct *);
extern struct kmem_cache *task_xstate_cachep;

diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h
index c9a6d68..7799d18 100644
--- a/arch/x86/include/asm/xsave.h
+++ b/arch/x86/include/asm/xsave.h
@@ -44,7 +44,6 @@
#define REX_PREFIX
#endif

-extern unsigned int xstate_size;
extern u64 pcntxt_mask;
extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
extern struct xsave_struct *init_xstate_buf;
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 00918327..8a7b96b 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -166,6 +166,7 @@ static void init_thread_xstate(void)
setup_clear_cpu_cap(X86_FEATURE_XSAVE);
setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
xstate_size = sizeof(struct i387_soft_struct);
+ user_xstate_size = xstate_size;
return;
}

@@ -173,6 +174,8 @@ static void init_thread_xstate(void)
xstate_size = sizeof(struct i387_fxsave_struct);
else
xstate_size = sizeof(struct i387_fsave_struct);
+
+ user_xstate_size = xstate_size;
}

/*
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 3c0a9d1..f99a6b7 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -29,6 +29,7 @@ static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32;
static unsigned int *xstate_offsets, *xstate_sizes;
static unsigned int xstate_comp_offsets[sizeof(pcntxt_mask)*8];
static unsigned int xstate_features;
+unsigned int user_xstate_size;

/*
* If a processor implementation discern that a processor state component is
@@ -85,7 +86,7 @@ void __sanitize_i387_state(struct task_struct *tsk)
*/
while (xstate_bv) {
if (xstate_bv & 0x1) {
- int offset = xstate_offsets[feature_bit];
+ int offset = xstate_comp_offsets[feature_bit];
int size = xstate_sizes[feature_bit];

memcpy(((void *) fx) + offset,
@@ -116,7 +117,7 @@ static inline int check_for_xstate(struct i387_fxsave_struct __user *buf,
/* Check for the first magic field and other error scenarios. */
if (fx_sw->magic1 != FP_XSTATE_MAGIC1 ||
fx_sw->xstate_size < min_xstate_size ||
- fx_sw->xstate_size > xstate_size ||
+ fx_sw->xstate_size > user_xstate_size ||
fx_sw->xstate_size > fx_sw->extended_size)
return -1;

@@ -173,7 +174,8 @@ static inline int save_xstate_epilog(void __user *buf, int ia32_frame)
if (!use_xsave())
return err;

- err |= __put_user(FP_XSTATE_MAGIC2, (__u32 *)(buf + xstate_size));
+ err |= __put_user(FP_XSTATE_MAGIC2,
+ (__u32 *)(buf + user_xstate_size));

/*
* Read the xstate_bv which we copied (directly from the cpu or
@@ -210,7 +212,7 @@ static inline int save_user_xstate(struct xsave_struct __user *buf)
else
err = fsave_user((struct i387_fsave_struct __user *) buf);

- if (unlikely(err) && __clear_user(buf, xstate_size))
+ if (unlikely(err) && __clear_user(buf, user_xstate_size))
err = -EFAULT;
return err;
}
@@ -260,8 +262,16 @@ int save_xstate_sig(void __user *buf, void __user *buf_fx, int size)
if (ia32_fxstate)
fpu_fxsave(&tsk->thread.fpu);
} else {
+ /*
+ * When we come here, kernel uses standard format for
+ * xsave area and xstate size should be the same for
+ * both kernel and user space. Warn if it's not the
+ * case.
+ */
+ WARN_ONCE(user_xstate_size != kernel_xstate_size,
+ "Wrong kernel xstate size!\n");
sanitize_i387_state(tsk);
- if (__copy_to_user(buf_fx, xsave, xstate_size))
+ if (__copy_to_user(buf_fx, xsave, user_xstate_size))
return -1;
}

@@ -434,7 +444,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
static void prepare_fx_sw_frame(void)
{
int fsave_header_size = sizeof(struct i387_fsave_struct);
- int size = xstate_size + FP_XSTATE_MAGIC2_SIZE;
+ int size = user_xstate_size + FP_XSTATE_MAGIC2_SIZE;

if (config_enabled(CONFIG_X86_32))
size += fsave_header_size;
@@ -442,7 +452,7 @@ static void prepare_fx_sw_frame(void)
fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1;
fx_sw_reserved.extended_size = size;
fx_sw_reserved.xstate_bv = pcntxt_mask;
- fx_sw_reserved.xstate_size = xstate_size;
+ fx_sw_reserved.xstate_size = user_xstate_size;

if (config_enabled(CONFIG_IA32_EMULATION)) {
fx_sw_reserved_ia32 = fx_sw_reserved;
@@ -576,14 +586,18 @@ __setup("eagerfpu=", eager_fpu_setup);

/*
* Calculate total size of enabled xstates in XCR0/pcntxt_mask.
+ *
+ * XCR0/pcntxt_mask should not be changed after this.
*/
static void __init init_xstate_size(void)
{
unsigned int eax, ebx, ecx, edx;
int i;

+ cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
+ user_xstate_size = ebx;
+
if (!cpu_has_xsaves) {
- cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
xstate_size = ebx;
return;
}
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/