[PATCH v2] MIPS: tidy up FPU context switching

From: Paul Burton
Date: Fri May 22 2015 - 06:42:50 EST


Rather than saving the scalar FP or vector context in the assembly
resume function, reuse the existing C code we have in fpu.h to do
exactly that. This reduces duplication, results in a much easier to read
resume function & should allow the compiler to optimise out more MSA
code due to is_msa_enabled()/cpu_has_msa being known-zero at compile
time for kernels without MSA support.

As a side effect this correctly disables MSA on the first entry to a
task, in which case resume will "return" to ret_from_kernel_thread or
ret_from_fork in order to call schedule_tail. This would lead to the
disable_msa call in switch_to not being executed, as reported by Leonid.

Signed-off-by: Paul Burton <paul.burton@xxxxxxxxxx>
Reported-by: Leonid Yegoshin <leonid.yegoshin@xxxxxxxxxx>
---
How about this (lightly tested for the moment) alternative to 10082?

Changes in v2:
- Introduce lose_fpu_inatomic to skip the preempt_{en,dis}able calls
and operate on the provided struct task_struct, which should be prev
rather than current.

arch/mips/include/asm/fpu.h | 21 ++++++++++++--------
arch/mips/include/asm/switch_to.h | 21 ++++----------------
arch/mips/kernel/r4k_switch.S | 41 +--------------------------------------
3 files changed, 18 insertions(+), 65 deletions(-)

diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h
index 084780b..87e8c78 100644
--- a/arch/mips/include/asm/fpu.h
+++ b/arch/mips/include/asm/fpu.h
@@ -164,25 +164,30 @@ static inline int own_fpu(int restore)
return ret;
}

-static inline void lose_fpu(int save)
+static inline void lose_fpu_inatomic(int save, struct task_struct *tsk)
{
- preempt_disable();
if (is_msa_enabled()) {
if (save) {
- save_msa(current);
- current->thread.fpu.fcr31 =
+ save_msa(tsk);
+ tsk->thread.fpu.fcr31 =
read_32bit_cp1_register(CP1_STATUS);
}
disable_msa();
- clear_thread_flag(TIF_USEDMSA);
+ clear_tsk_thread_flag(tsk, TIF_USEDMSA);
__disable_fpu();
} else if (is_fpu_owner()) {
if (save)
- _save_fp(current);
+ _save_fp(tsk);
__disable_fpu();
}
- KSTK_STATUS(current) &= ~ST0_CU1;
- clear_thread_flag(TIF_USEDFPU);
+ KSTK_STATUS(tsk) &= ~ST0_CU1;
+ clear_tsk_thread_flag(tsk, TIF_USEDFPU);
+}
+
+static inline void lose_fpu(int save)
+{
+ preempt_disable();
+ lose_fpu_inatomic(save, current);
preempt_enable();
}

diff --git a/arch/mips/include/asm/switch_to.h b/arch/mips/include/asm/switch_to.h
index e92d6c4b..c429d1a 100644
--- a/arch/mips/include/asm/switch_to.h
+++ b/arch/mips/include/asm/switch_to.h
@@ -16,29 +16,21 @@
#include <asm/watch.h>
#include <asm/dsp.h>
#include <asm/cop2.h>
-#include <asm/msa.h>
+#include <asm/fpu.h>

struct task_struct;

-enum {
- FP_SAVE_NONE = 0,
- FP_SAVE_VECTOR = -1,
- FP_SAVE_SCALAR = 1,
-};
-
/**
* resume - resume execution of a task
* @prev: The task previously executed.
* @next: The task to begin executing.
* @next_ti: task_thread_info(next).
- * @fp_save: Which, if any, FP context to save for prev.
*
* This function is used whilst scheduling to save the context of prev & load
* the context of next. Returns prev.
*/
extern asmlinkage struct task_struct *resume(struct task_struct *prev,
- struct task_struct *next, struct thread_info *next_ti,
- s32 fp_save);
+ struct task_struct *next, struct thread_info *next_ti);

extern unsigned int ll_bit;
extern struct task_struct *ll_task;
@@ -86,8 +78,8 @@ do { if (cpu_has_rw_llb) { \
#define switch_to(prev, next, last) \
do { \
u32 __c0_stat; \
- s32 __fpsave = FP_SAVE_NONE; \
__mips_mt_fpaff_switch_to(prev); \
+ lose_fpu_inatomic(1, prev); \
if (cpu_has_dsp) \
__save_dsp(prev); \
if (cop2_present && (KSTK_STATUS(prev) & ST0_CU2)) { \
@@ -99,12 +91,7 @@ do { \
write_c0_status(__c0_stat & ~ST0_CU2); \
} \
__clear_software_ll_bit(); \
- if (test_and_clear_tsk_thread_flag(prev, TIF_USEDFPU)) \
- __fpsave = FP_SAVE_SCALAR; \
- if (test_and_clear_tsk_thread_flag(prev, TIF_USEDMSA)) \
- __fpsave = FP_SAVE_VECTOR; \
- (last) = resume(prev, next, task_thread_info(next), __fpsave); \
- disable_msa(); \
+ (last) = resume(prev, next, task_thread_info(next)); \
} while (0)

#define finish_arch_switch(prev) \
diff --git a/arch/mips/kernel/r4k_switch.S b/arch/mips/kernel/r4k_switch.S
index 04cbbde3..92cd051 100644
--- a/arch/mips/kernel/r4k_switch.S
+++ b/arch/mips/kernel/r4k_switch.S
@@ -34,7 +34,7 @@
#ifndef USE_ALTERNATE_RESUME_IMPL
/*
* task_struct *resume(task_struct *prev, task_struct *next,
- * struct thread_info *next_ti, s32 fp_save)
+ * struct thread_info *next_ti)
*/
.align 5
LEAF(resume)
@@ -43,45 +43,6 @@
cpu_save_nonscratch a0
LONG_S ra, THREAD_REG31(a0)

- /*
- * Check whether we need to save any FP context. FP context is saved
- * iff the process has used the context with the scalar FPU or the MSA
- * ASE in the current time slice, as indicated by _TIF_USEDFPU and
- * _TIF_USEDMSA respectively. switch_to will have set fp_save
- * accordingly to an FP_SAVE_ enum value.
- */
- beqz a3, 2f
-
- /*
- * We do. Clear the saved CU1 bit for prev, such that next time it is
- * scheduled it will start in userland with the FPU disabled. If the
- * task uses the FPU then it will be enabled again via the do_cpu trap.
- * This allows us to lazily restore the FP context.
- */
- PTR_L t3, TASK_THREAD_INFO(a0)
- LONG_L t0, ST_OFF(t3)
- li t1, ~ST0_CU1
- and t0, t0, t1
- LONG_S t0, ST_OFF(t3)
-
- /* Check whether we're saving scalar or vector context. */
- bgtz a3, 1f
-
- /* Save 128b MSA vector context + scalar FP control & status. */
- .set push
- SET_HARDFLOAT
- cfc1 t1, fcr31
- msa_save_all a0
- .set pop /* SET_HARDFLOAT */
-
- sw t1, THREAD_FCR31(a0)
- b 2f
-
-1: /* Save 32b/64b scalar FP context. */
- fpu_save_double a0 t0 t1 # c0_status passed in t0
- # clobbers t1
-2:
-
#if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
PTR_LA t8, __stack_chk_guard
LONG_L t9, TASK_STACK_CANARY(a1)
--
2.4.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/