[GIT PULL] x86/fpu for v2.6.32

From: Ingo Molnar
Date: Fri Sep 11 2009 - 15:44:51 EST


Linus,

Please pull the x86-fpu-for-linus git tree from:

git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git x86-fpu-for-linus

Thanks,

Ingo

------------------>
Jeremy Fitzhardinge (4):
x86: split out core __math_state_restore
x86-32: make sure clts is batched during context switch
x86-64: move unlazy_fpu() into lazy cpu state part of context switch
x86-64: move clts into batch cpu state updates when preloading fpu


arch/x86/include/asm/i387.h | 1 +
arch/x86/kernel/process_32.c | 27 ++++++++++++++++-----------
arch/x86/kernel/process_64.c | 33 +++++++++++++++++++++------------
arch/x86/kernel/traps.c | 33 +++++++++++++++++++++++----------
4 files changed, 61 insertions(+), 33 deletions(-)

diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index 175adf5..2e75292 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -26,6 +26,7 @@ extern void fpu_init(void);
extern void mxcsr_feature_mask_init(void);
extern int init_fpu(struct task_struct *child);
extern asmlinkage void math_state_restore(void);
+extern void __math_state_restore(void);
extern void init_thread_xstate(void);
extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);

diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 59f4524..a80eddd 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -350,14 +350,21 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*next = &next_p->thread;
int cpu = smp_processor_id();
struct tss_struct *tss = &per_cpu(init_tss, cpu);
+ bool preload_fpu;

/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */

- __unlazy_fpu(prev_p);
+ /*
+ * If the task has used fpu the last 5 timeslices, just do a full
+ * restore of the math state immediately to avoid the trap; the
+ * chances of needing FPU soon are obviously high now
+ */
+ preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;

+ __unlazy_fpu(prev_p);

/* we're going to use this soon, after a few expensive things */
- if (next_p->fpu_counter > 5)
+ if (preload_fpu)
prefetch(next->xstate);

/*
@@ -398,6 +405,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
__switch_to_xtra(prev_p, next_p, tss);

+ /* If we're going to preload the fpu context, make sure clts
+ is run while we're batching the cpu state updates. */
+ if (preload_fpu)
+ clts();
+
/*
* Leave lazy mode, flushing any hypercalls made here.
* This must be done before restoring TLS segments so
@@ -407,15 +419,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*/
arch_end_context_switch(next_p);

- /* If the task has used fpu the last 5 timeslices, just do a full
- * restore of the math state immediately to avoid the trap; the
- * chances of needing FPU soon are obviously high now
- *
- * tsk_used_math() checks prevent calling math_state_restore(),
- * which can sleep in the case of !tsk_used_math()
- */
- if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
- math_state_restore();
+ if (preload_fpu)
+ __math_state_restore();

/*
* Restore %gs if needed (which is common)
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index ebefb54..a28279d 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -386,9 +386,17 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
int cpu = smp_processor_id();
struct tss_struct *tss = &per_cpu(init_tss, cpu);
unsigned fsindex, gsindex;
+ bool preload_fpu;
+
+ /*
+ * If the task has used fpu the last 5 timeslices, just do a full
+ * restore of the math state immediately to avoid the trap; the
+ * chances of needing FPU soon are obviously high now
+ */
+ preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;

/* we're going to use this soon, after a few expensive things */
- if (next_p->fpu_counter > 5)
+ if (preload_fpu)
prefetch(next->xstate);

/*
@@ -419,6 +427,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)

load_TLS(next, cpu);

+ /* Must be after DS reload */
+ unlazy_fpu(prev_p);
+
+ /* Make sure cpu is ready for new context */
+ if (preload_fpu)
+ clts();
+
/*
* Leave lazy mode, flushing any hypercalls made here.
* This must be done before restoring TLS segments so
@@ -459,9 +474,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
prev->gsindex = gsindex;

- /* Must be after DS reload */
- unlazy_fpu(prev_p);
-
/*
* Switch the PDA and FPU contexts.
*/
@@ -480,15 +492,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
__switch_to_xtra(prev_p, next_p, tss);

- /* If the task has used fpu the last 5 timeslices, just do a full
- * restore of the math state immediately to avoid the trap; the
- * chances of needing FPU soon are obviously high now
- *
- * tsk_used_math() checks prevent calling math_state_restore(),
- * which can sleep in the case of !tsk_used_math()
+ /*
+ * Preload the FPU context, now that we've determined that the
+ * task is likely to be using it.
*/
- if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
- math_state_restore();
+ if (preload_fpu)
+ __math_state_restore();
return prev_p;
}

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 5f935f0..71b9166 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -814,6 +814,28 @@ asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void)
}

/*
+ * __math_state_restore assumes that cr0.TS is already clear and the
+ * fpu state is all ready for use. Used during context switch.
+ */
+void __math_state_restore(void)
+{
+ struct thread_info *thread = current_thread_info();
+ struct task_struct *tsk = thread->task;
+
+ /*
+ * Paranoid restore. send a SIGSEGV if we fail to restore the state.
+ */
+ if (unlikely(restore_fpu_checking(tsk))) {
+ stts();
+ force_sig(SIGSEGV, tsk);
+ return;
+ }
+
+ thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
+ tsk->fpu_counter++;
+}
+
+/*
* 'math_state_restore()' saves the current math information in the
* old math state array, and gets the new ones from the current task
*
@@ -844,17 +866,8 @@ asmlinkage void math_state_restore(void)
}

clts(); /* Allow maths ops (or we recurse) */
- /*
- * Paranoid restore. send a SIGSEGV if we fail to restore the state.
- */
- if (unlikely(restore_fpu_checking(tsk))) {
- stts();
- force_sig(SIGSEGV, tsk);
- return;
- }

- thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
- tsk->fpu_counter++;
+ __math_state_restore();
}
EXPORT_SYMBOL_GPL(math_state_restore);

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/