[RFT PATCH v1 3/7] arm64: implement basic lazy save and restore for FPSIMD registers

From: Jiang Liu
Date: Fri Sep 27 2013 - 04:08:56 EST


From: Jiang Liu <jiang.liu@xxxxxxxxxx>

Implement basic lazy save and restore for FPSIMD registers, which only
restore FPSIMD state on demand and save FPSIMD state if it has been
loaded on to hardware.

Signed-off-by: Jiang Liu <jiang.liu@xxxxxxxxxx>
Cc: Jiang Liu <liuj97@xxxxxxxxx>
---
arch/arm64/include/asm/fpsimd.h | 17 ++---
arch/arm64/kernel/fpsimd.c | 150 ++++++++++++++++++++++++++++++++++++++--
arch/arm64/kernel/process.c | 4 +-
arch/arm64/kernel/signal.c | 13 ++--
arch/arm64/kernel/signal32.c | 13 ++--
5 files changed, 164 insertions(+), 33 deletions(-)

diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index 4c2bc80..725b225 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -35,6 +35,7 @@ struct fpsimd_state {
__uint128_t vregs[32];
u32 fpsr;
u32 fpcr;
+ bool on_hw; /* soft state: whether loaded onto hw */
};
};
};
@@ -54,21 +55,15 @@ struct fpsimd_state {

struct task_struct;

-/* Clear FP status register, so it doesn't affect new FP context */
-static inline void fpsimd_init_hw_state(void)
-{
- int val = AARCH64_FPCR_DEFAULT_VAL;
-
- asm ("msr fpcr, %x0\n"
- "msr fpsr, xzr\n"
- : : "r"(val));
-}
-
extern void fpsimd_save_state(struct fpsimd_state *state);
extern void fpsimd_load_state(struct fpsimd_state *state);
-
extern void fpsimd_thread_switch(struct task_struct *next);
extern void fpsimd_flush_thread(void);
+extern void fpsimd_dup_state(struct fpsimd_state *src,
+ struct fpsimd_state *dst);
+extern void fpsimd_save_sigctx(struct fpsimd_state *state);
+extern void fpsimd_prepare_sigctx(struct fpsimd_state *ctx);
+extern void fpsimd_restore_sigctx(struct fpsimd_state *ctx);

#endif

diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 12a25e5..2208ba3 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -4,6 +4,8 @@
* Copyright (C) 2012 ARM Ltd.
* Author: Catalin Marinas <catalin.marinas@xxxxxxx>
*
+ * Copyright (C) Jiang Liu <jiang.liu@xxxxxxxxxx>
+ *
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
@@ -22,6 +24,7 @@
#include <linux/sched.h>
#include <linux/signal.h>
#include <linux/hardirq.h>
+#include <linux/jump_label.h>

#include <asm/fpsimd.h>
#include <asm/cputype.h>
@@ -33,13 +36,91 @@
#define FPEXC_IXF (1 << 4)
#define FPEXC_IDF (1 << 7)

+static struct static_key fpsimd_lazy_mode = STATIC_KEY_INIT_FALSE;
+
+static inline void fpsimd_set_on_hw(struct fpsimd_state *state)
+{
+ state->on_hw = true;
+}
+
+static inline void fpsimd_clear_on_hw(struct fpsimd_state *state)
+{
+ state->on_hw = false;
+}
+
+static inline bool fpsimd_is_on_hw(struct fpsimd_state *state)
+{
+ return state->on_hw;
+}
+
+/* Clear FP status register, so it doesn't affect new FP context */
+static inline void fpsimd_init_hw_state(void)
+{
+ int val = AARCH64_FPCR_DEFAULT_VAL;
+
+ asm ("msr fpcr, %x0\n"
+ "msr fpsr, xzr\n"
+ : : "r"(val));
+}
+
+static inline void fpsimd_enable_trap(void)
+{
+ u32 __val;
+
+ asm volatile ("mrs %0, cpacr_el1\n"
+ "and %w0, %w0, #0xFFCFFFFF\n"
+ "msr cpacr_el1, %0"
+ : "=&r" (__val));
+}
+
+static inline void fpsimd_disable_trap(void)
+{
+ u32 __val;
+
+ asm volatile ("mrs %0, cpacr_el1\n"
+ "orr %w0, %w0, #0x000300000\n"
+ "msr cpacr_el1, %0"
+ : "=&r" (__val));
+}
+
+/*
+ * If lazy mode is enabled, caller needs to disable preemption
+ * when calling fpsimd_load_state_lazy() and fpsimd_save_state_lazy().
+ */
+static void fpsimd_load_state_lazy(struct fpsimd_state *state)
+{
+ if (static_key_false(&fpsimd_lazy_mode)) {
+ fpsimd_clear_on_hw(state);
+ fpsimd_enable_trap();
+ } else {
+ fpsimd_load_state(state);
+ }
+}
+
+static void fpsimd_save_state_lazy(struct fpsimd_state *state)
+{
+ if (static_key_false(&fpsimd_lazy_mode)) {
+ if (!fpsimd_is_on_hw(state))
+ return;
+ }
+
+ fpsimd_save_state(state);
+}
+
/*
* Trapped FP/ASIMD access.
*/
void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs)
{
- /* TODO: implement lazy context saving/restoring */
- WARN_ON(1);
+ struct fpsimd_state *state = &current->thread.fpsimd_state;
+
+ if (static_key_false(&fpsimd_lazy_mode)) {
+ fpsimd_disable_trap();
+ fpsimd_load_state(state);
+ fpsimd_set_on_hw(state);
+ } else {
+ WARN_ON(1);
+ }
}

/*
@@ -73,9 +154,9 @@ void fpsimd_thread_switch(struct task_struct *next)
{
/* check if not kernel threads */
if (current->mm)
- fpsimd_save_state(&current->thread.fpsimd_state);
+ fpsimd_save_state_lazy(&current->thread.fpsimd_state);
if (next->mm)
- fpsimd_load_state(&next->thread.fpsimd_state);
+ fpsimd_load_state_lazy(&next->thread.fpsimd_state);
}

void fpsimd_flush_thread(void)
@@ -87,7 +168,59 @@ void fpsimd_flush_thread(void)
#if (AARCH64_FPCR_DEFAULT_VAL != 0)
state->fpcr = AARCH64_FPCR_DEFAULT_VAL;
#endif
- fpsimd_load_state(state);
+ fpsimd_load_state_lazy(state);
+ preempt_enable();
+}
+
+/*
+ * The 'src' has been copied into 'dst' when it's called , so only need to save
+ * the FPSIMD registers into 'dst' if 'src' has been loaded on hardware.
+ */
+void fpsimd_dup_state(struct fpsimd_state *src, struct fpsimd_state *dst)
+{
+ BUG_ON(src != &current->thread.fpsimd_state);
+ if (static_key_false(&fpsimd_lazy_mode)) {
+ preempt_disable();
+ if (fpsimd_is_on_hw(src))
+ fpsimd_save_state(dst);
+ fpsimd_clear_on_hw(dst);
+ preempt_enable();
+ } else {
+ fpsimd_save_state(dst);
+ }
+}
+
+void fpsimd_save_sigctx(struct fpsimd_state *state)
+{
+ preempt_disable();
+ fpsimd_save_state_lazy(state);
+ preempt_enable();
+}
+
+/* The old FPSIMD context has been saved into sigframe when it's called. */
+void fpsimd_prepare_sigctx(struct fpsimd_state *ctx)
+{
+ if (static_key_false(&fpsimd_lazy_mode)) {
+ preempt_disable();
+ if (fpsimd_is_on_hw(ctx)) {
+ fpsimd_init_hw_state();
+ } else {
+ ctx->fpsr = 0;
+ ctx->fpcr = AARCH64_FPCR_DEFAULT_VAL;
+ }
+ preempt_enable();
+ } else {
+ fpsimd_init_hw_state();
+ }
+}
+
+void fpsimd_restore_sigctx(struct fpsimd_state *ctx)
+{
+ struct fpsimd_state *state = &current->thread.fpsimd_state;
+
+ preempt_disable();
+ *state = *ctx;
+ fpsimd_load_state_lazy(state);
preempt_enable();
}

@@ -103,7 +236,10 @@ void kernel_neon_begin(void)
preempt_disable();

if (current->mm)
- fpsimd_save_state(&current->thread.fpsimd_state);
+ fpsimd_save_state_lazy(&current->thread.fpsimd_state);
+
+ if (static_key_false(&fpsimd_lazy_mode))
+ fpsimd_disable_trap();

fpsimd_init_hw_state();
}
@@ -112,7 +248,7 @@ EXPORT_SYMBOL(kernel_neon_begin);
void kernel_neon_end(void)
{
if (current->mm)
- fpsimd_load_state(&current->thread.fpsimd_state);
+ fpsimd_load_state_lazy(&current->thread.fpsimd_state);

preempt_enable();
}
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 7ae8a1f..0176fac 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -195,8 +195,10 @@ void release_thread(struct task_struct *dead_task)

int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
- fpsimd_save_state(&current->thread.fpsimd_state);
+ BUG_ON(src != current);
*dst = *src;
+ fpsimd_dup_state(&src->thread.fpsimd_state, &dst->thread.fpsimd_state);
+
return 0;
}

diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 6d80612..b6fe0d1 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -51,8 +51,7 @@ static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)
int err;

/* dump the hardware registers to the fpsimd_state structure */
- fpsimd_save_state(fpsimd);
- fpsimd_init_hw_state();
+ fpsimd_save_sigctx(fpsimd);

/* copy the FP and status/control registers */
err = __copy_to_user(ctx->vregs, fpsimd->vregs, sizeof(fpsimd->vregs));
@@ -63,6 +62,9 @@ static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)
__put_user_error(FPSIMD_MAGIC, &ctx->head.magic, err);
__put_user_error(sizeof(struct fpsimd_context), &ctx->head.size, err);

+ if (!err)
+ fpsimd_prepare_sigctx(fpsimd);
+
return err ? -EFAULT : 0;
}

@@ -87,11 +89,8 @@ static int restore_fpsimd_context(struct fpsimd_context __user *ctx)
__get_user_error(fpsimd.fpcr, &ctx->fpcr, err);

/* load the hardware registers from the fpsimd_state structure */
- if (!err) {
- preempt_disable();
- fpsimd_load_state(&fpsimd);
- preempt_enable();
- }
+ if (!err)
+ fpsimd_restore_sigctx(&fpsimd);

return err ? -EFAULT : 0;
}
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index cb2cb41..8b4cb89 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -247,8 +247,7 @@ static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame)
* Note that this also saves V16-31, which aren't visible
* in AArch32.
*/
- fpsimd_save_state(fpsimd);
- fpsimd_init_hw_state();
+ fpsimd_save_sigctx(fpsimd);

/* Place structure header on the stack */
__put_user_error(magic, &frame->magic, err);
@@ -276,6 +275,9 @@ static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame)
__put_user_error(0, &frame->ufp_exc.fpinst, err);
__put_user_error(0, &frame->ufp_exc.fpinst2, err);

+ if (!err)
+ fpsimd_prepare_sigctx(fpsimd);
+
return err ? -EFAULT : 0;
}

@@ -311,11 +313,8 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame)
* We don't need to touch the exception register, so
* reload the hardware state.
*/
- if (!err) {
- preempt_disable();
- fpsimd_load_state(&fpsimd);
- preempt_enable();
- }
+ if (!err)
+ fpsimd_restore_sigctx(&fpsimd);

return err ? -EFAULT : 0;
}
--
1.8.1.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/