Please test: SMP+lazy FPU restore patch for 2.1.95

Adam J. Richter (adam@yggdrasil.com)
Sun, 12 Apr 1998 12:21:16 -0700


Here is a patch that I believe implements the lazy FPU
restore on SMP kernels. I would appreciate it if people would
test and benchmark this patch. I do not even know if it compiles
for sparc, arm, powerpc, etc., so I am particularly interested in
having people on those architectures give it a whirl. For x86,
I can confirm that it compiles without warnings and does not
crash my machine, but also seems to make no difference on the
Byte Magazine nbench FPU index, which is the only benchmark that
I have tried.

Benefits:

1. A single CPU machine running an SMP kernel now
gets the same lazy FPU saving and lazy FPU restore
benefits as on a non-SMP kernel.

2. A multiprocessor machine gets lazy restores, but still
no lazy saves. That is, the FPU state is saved back
task_struct, so that the process can be run on another
CPU if need be, but the FPU restore is avoided if
the running process was the most recent process to
use the FPU and it had not executed any floating
point instructions on another CPU in the interim.

Cool implementation notes:

1. There is a lock for each CPU's FPU context, but
the lock is only used at the loading of a
context into the FPU and when a process is
destroyed. At all other times, the only writes
being done are by a processor clearing its own
FPU context, and this can be (and is) safely
done without the need to acquire the lock,
(because the only value that one CPU ever
does to another CPU's FPU context is NULL, the
same value that is being written when the lock
is not being acquired).

2. If a process executes some floating point
instructions on CPU A, then some integer code
while on other CPU(s), and then returns to CPU A,
this code is smart enough to still avoid doing an
FPU restore into CPU A if the process's floating
point context is still loaded and current. It
also knows not to this if any floating point
instructions were executed by that process while
on another CPU.

Anyhow, please let me know if this code work and if
it helps performance.

Adam J. Richter __ ______________ 4880 Stevens Creek Blvd, Suite 205
adam@yggdrasil.com \ / San Jose, California 95129-1034
+1 408 261-6630 | g g d r a s i l United States of America
fax +1 408 261-6631 "Free Software For The Rest Of Us."
-------------------------CUT HERE------------------------------------

diff -u -r --new-file linux/arch/alpha/kernel/smp.c linux.hacked/arch/alpha/kernel/smp.c
--- linux/arch/alpha/kernel/smp.c Fri Apr 10 12:08:01 1998
+++ linux.hacked/arch/alpha/kernel/smp.c Fri Apr 10 09:37:46 1998
@@ -356,14 +356,14 @@
if (user) {
if (current->priority < DEF_PRIORITY) {
kstat.cpu_nice++;
- kstat.per_cpu_nice[cpu]++;
+ per_cpu[cpu].kstat.nice++;
} else {
kstat.cpu_user++;
- kstat.per_cpu_user[cpu]++;
+ per_cpu[cpu].kstat.nice++;
}
} else {
kstat.cpu_system++;
- kstat.per_cpu_system[cpu]++;
+ per_cpu[cpu].kstat.system++;
}
spin_unlock(&ticker_lock);
}
diff -u -r --new-file linux/arch/arm/kernel/process.c linux.hacked/arch/arm/kernel/process.c
--- linux/arch/arm/kernel/process.c Fri Apr 10 12:08:03 1998
+++ linux.hacked/arch/arm/kernel/process.c Fri Apr 10 09:52:03 1998
@@ -154,8 +154,14 @@
*/
void exit_thread(void)
{
- if (last_task_used_math == current)
- last_task_used_math = NULL;
+ int cpu;
+ for(cpu = 0; cpu < NR_CPUS; cpu++) {
+ spin_lock(&per_cpu[cpu].fpu.lock);
+ if (per_cpu[cpu].data.math_task == current) {
+ per_cpu[cpu].fpu.task = NULL;
+ }
+ spin_unlock(&per_cpu[cpu].fpu.lock);
+ }
}

void flush_thread(void)
@@ -164,7 +170,7 @@

for (i = 0; i < 8; i++)
current->debugreg[i] = 0;
- if (last_task_used_math == current)
+ if (is_last_math_task(current))
last_task_used_math = NULL;
current->used_math = 0;
current->flags &= ~PF_USEDFPU;
@@ -190,7 +196,7 @@
/*
* Save current math state in p->tss.fpe_save if not already there.
*/
- if (last_task_used_math == current)
+ if (is_last_math_task(current))
fpe_save (&p->tss.fpstate.soft);

return 0;
@@ -204,7 +210,7 @@
int fpvalid = 0;

if (current->used_math) {
- if (last_task_used_math == current)
+ if (is_last_math_task(current))
fpe_save (&current->tss.fpstate.soft);

memcpy (fp, &current->tss.fpstate.soft, sizeof (fp));
diff -u -r --new-file linux/arch/arm/kernel/traps.c linux.hacked/arch/arm/kernel/traps.c
--- linux/arch/arm/kernel/traps.c Fri Apr 10 12:08:03 1998
+++ linux.hacked/arch/arm/kernel/traps.c Fri Apr 10 09:51:39 1998
@@ -234,14 +234,21 @@
*/
asmlinkage void math_state_restore (void)
{
- if (last_task_used_math == current)
+ if (is_last_math_task(current))
return;
if (last_task_used_math)
/*
* Save current fp state into last_task_used_math->tss.fpe_save
*/
fpe_save (&last_task_used_math->tss.fpstate.soft);
+
+#ifdef __SMP__
+ current->last_math_cpu = smp_processor_id();
+#endif /* __SMP__ */
+
+ spin_lock(&per_cpu[smp_processor_id()].fpu.lock);
last_task_used_math = current;
+ spin_unlock(&per_cpu[smp_processor_id()].fpu.lock);
if (current->used_math) {
/*
* Restore current fp state from current->tss.fpe_save
diff -u -r --new-file linux/arch/i386/kernel/process.c linux.hacked/arch/i386/kernel/process.c
--- linux/arch/i386/kernel/process.c Fri Apr 10 12:08:06 1998
+++ linux.hacked/arch/i386/kernel/process.c Fri Apr 10 09:50:08 1998
@@ -424,9 +424,15 @@

void exit_thread(void)
{
- /* forget lazy i387 state */
- if (last_task_used_math == current)
- last_task_used_math = NULL;
+ int cpu;
+ /* forget lazy i387 state on all CPU's. */
+ for(cpu = 0; cpu < NR_CPUS; cpu++) {
+ spin_lock(&per_cpu[cpu].fpu.lock);
+ if (per_cpu[cpu].fpu.task == current) {
+ per_cpu[cpu].fpu.task = NULL;
+ }
+ spin_unlock(&per_cpu[cpu].fpu.lock);
+ }
/* forget local segments */
__asm__ __volatile__("movl %w0,%%fs ; movl %w0,%%gs ; lldt %w0"
: /* no outputs */
@@ -463,10 +469,11 @@
*/
#ifdef __SMP__
if (current->flags & PF_USEDFPU) {
+ last_task_used_math = NULL;
stts();
}
#else
- if (last_task_used_math == current) {
+ if (is_last_math_task(current)) {
last_task_used_math = NULL;
stts();
}
@@ -525,7 +532,7 @@
*/
p->tss.bitmap = sizeof(struct thread_struct);

- if (last_task_used_math == current)
+ if (is_last_math_task(current))
__asm__("clts ; fnsave %0 ; frstor %0":"=m" (p->tss.i387));

return 0;
@@ -540,7 +547,7 @@

if ((fpvalid = current->used_math) != 0) {
if (boot_cpu_data.hard_math) {
- if (last_task_used_math == current) {
+ if (is_last_math_task(current)) {
__asm__("clts ; fsave %0; fwait": :"m" (*fpu));
}
else
diff -u -r --new-file linux/arch/i386/kernel/ptrace.c linux.hacked/arch/i386/kernel/ptrace.c
--- linux/arch/i386/kernel/ptrace.c Fri Apr 10 12:08:06 1998
+++ linux.hacked/arch/i386/kernel/ptrace.c Thu Apr 9 19:14:09 1998
@@ -624,7 +624,7 @@
#ifdef CONFIG_MATH_EMULATION
if ( boot_cpu_data.hard_math ) {
#endif
- if (last_task_used_math == child) {
+ if (is_last_math_task(child)) {
clts();
__asm__("fnsave %0; fwait":"=m" (child->tss.i387.hard));
last_task_used_math = NULL;
@@ -652,7 +652,7 @@
#ifdef CONFIG_MATH_EMULATION
if ( boot_cpu_data.hard_math ) {
#endif
- if (last_task_used_math == child) {
+ if (is_last_math_task(child)) {
/* Discard the state of the FPU */
last_task_used_math = NULL;
}
diff -u -r --new-file linux/arch/i386/kernel/signal.c linux.hacked/arch/i386/kernel/signal.c
--- linux/arch/i386/kernel/signal.c Fri Apr 10 12:08:06 1998
+++ linux.hacked/arch/i386/kernel/signal.c Thu Apr 9 19:12:57 1998
@@ -148,10 +148,11 @@
{
#ifdef __SMP__
if (current->flags & PF_USEDFPU) {
+ last_task_used_math = NULL;
stts();
}
#else
- if (current == last_task_used_math) {
+ if (is_last_math_task(current)) {
last_task_used_math = NULL;
stts();
}
@@ -296,11 +297,13 @@
#ifdef __SMP__
if (current->flags & PF_USEDFPU) {
__asm__ __volatile__("fnsave %0":"=m"(current->tss.i387.hard));
+ last_task_used_math = NULL;
+ __asm__ __volatile__("fwait"); /* not needed on 486+ */
stts();
current->flags &= ~PF_USEDFPU;
}
#else
- if (current == last_task_used_math) {
+ if (is_last_math_task(current)) {
__asm__ __volatile__("fnsave %0":"=m"(current->tss.i387.hard));
last_task_used_math = NULL;
__asm__ __volatile__("fwait"); /* not needed on 486+ */
diff -u -r --new-file linux/arch/i386/kernel/smp.c linux.hacked/arch/i386/kernel/smp.c
--- linux/arch/i386/kernel/smp.c Fri Apr 10 12:08:06 1998
+++ linux.hacked/arch/i386/kernel/smp.c Fri Apr 10 09:37:07 1998
@@ -1398,14 +1398,14 @@
}
if (p->priority < DEF_PRIORITY) {
kstat.cpu_nice += user;
- kstat.per_cpu_nice[cpu] += user;
+ per_cpu[cpu].kstat.nice += user;
} else {
kstat.cpu_user += user;
- kstat.per_cpu_user[cpu] += user;
+ per_cpu[cpu].kstat.user += user;
}

kstat.cpu_system += system;
- kstat.per_cpu_system[cpu] += system;
+ per_cpu[cpu].kstat.system += system;

}
prof_counter[cpu]=prof_multiplier[cpu];
diff -u -r --new-file linux/arch/i386/kernel/traps.c linux.hacked/arch/i386/kernel/traps.c
--- linux/arch/i386/kernel/traps.c Fri Apr 10 12:08:06 1998
+++ linux.hacked/arch/i386/kernel/traps.c Fri Apr 10 09:49:35 1998
@@ -313,16 +313,12 @@

lock_kernel();
clts();
-#ifdef __SMP__
- task = current;
-#else
task = last_task_used_math;
last_task_used_math = NULL;
if (!task) {
__asm__("fnclex");
goto out;
}
-#endif
/*
* Save the info for the exception handler
*/
@@ -333,9 +329,7 @@
force_sig(SIGFPE, task);
task->tss.trap_no = 16;
task->tss.error_code = 0;
-#ifndef __SMP__
out:
-#endif
unlock_kernel();
}

@@ -373,15 +367,23 @@
* case we swap processors. We also don't use the coprocessor
* timer - IRQ 13 mode isn't used with SMP machines (thank god).
*/
-#ifndef __SMP__
- if (last_task_used_math == current)
+ if (is_last_math_task(current))
return;
if (last_task_used_math)
__asm__("fnsave %0":"=m" (last_task_used_math->tss.i387));
else
__asm__("fnclex");
+
+ /* We spin lock here to avoid a potential conflict with
+ the code for destroying defunction threads in process.c,
+ which writes NULL into the math_task field of other processors.
+ */
+#ifdef __SMP__
+ current->last_math_cpu = smp_processor_id();
+#endif /* __SMP__ */
+ spin_lock(&per_cpu[smp_processor_id()].fpu.lock);
last_task_used_math = current;
-#endif
+ spin_unlock(&per_cpu[smp_processor_id()].fpu.lock);

if(current->used_math)
__asm__("frstor %0": :"m" (current->tss.i387));
diff -u -r --new-file linux/arch/ppc/kernel/align.c linux.hacked/arch/ppc/kernel/align.c
--- linux/arch/ppc/kernel/align.c Fri Apr 10 12:08:20 1998
+++ linux.hacked/arch/ppc/kernel/align.c Thu Apr 9 19:06:44 1998
@@ -194,7 +194,7 @@
return -EFAULT; /* bad address */
}

- if ((flags & F) && last_task_used_math == current)
+ if ((flags & F) && is_last_math_task(current))
giveup_fpu();

if (flags & M)
diff -u -r --new-file linux/arch/ppc/kernel/process.c linux.hacked/arch/ppc/kernel/process.c
--- linux/arch/ppc/kernel/process.c Fri Apr 10 12:08:21 1998
+++ linux.hacked/arch/ppc/kernel/process.c Fri Apr 10 09:49:17 1998
@@ -77,7 +77,7 @@
int
dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpregs)
{
- if (last_task_used_math == current)
+ if (is_last_math_task(current))
giveup_fpu();
memcpy(fpregs, &current->tss.fpr[0], sizeof(*fpregs));
return 1;
@@ -228,13 +228,20 @@

void exit_thread(void)
{
- if (last_task_used_math == current)
- last_task_used_math = NULL;
+ int cpu;
+
+ for(cpu = 0; cpu < NR_CPUS; cpu++) {
+ spin_lock(&per_cpu[cpu].fpu.lock);
+ if (per_cpu[cpu].fpu.task == current) {
+ per_cpu[cpu].fpu.task = NULL;
+ }
+ spin_unlock(&per_cpu[cpu].fpu.lock);
+ }
}

void flush_thread(void)
{
- if (last_task_used_math == current)
+ if (is_last_math_task(current))
last_task_used_math = NULL;
}

diff -u -r --new-file linux/arch/ppc/kernel/ptrace.c linux.hacked/arch/ppc/kernel/ptrace.c
--- linux/arch/ppc/kernel/ptrace.c Fri Apr 10 12:08:21 1998
+++ linux.hacked/arch/ppc/kernel/ptrace.c Thu Apr 9 18:59:31 1998
@@ -390,7 +390,7 @@
tmp = get_reg(child, addr);
}
else if (addr >= PT_FPR0 && addr <= PT_FPSCR) {
- if (last_task_used_math == child)
+ if (is_last_math_task(child))
giveup_fpu();
tmp = ((long *)child->tss.fpr)[addr - PT_FPR0];
}
@@ -423,7 +423,7 @@
goto out;
}
if (addr >= PT_FPR0 && addr < PT_FPR0 + 64) {
- if (last_task_used_math == child)
+ if (is_last_math_task(child))
giveup_fpu();
((long *)child->tss.fpr)[addr - PT_FPR0] = data;
ret = 0;
diff -u -r --new-file linux/arch/ppc/kernel/signal.c linux.hacked/arch/ppc/kernel/signal.c
--- linux/arch/ppc/kernel/signal.c Fri Apr 10 12:08:21 1998
+++ linux.hacked/arch/ppc/kernel/signal.c Thu Apr 9 18:59:09 1998
@@ -201,7 +201,7 @@
if (sc == (struct sigcontext_struct *)(sigctx.regs)) {
/* Last stacked signal - restore registers */
sr = (struct sigregs *) sigctx.regs;
- if (last_task_used_math == current)
+ if (is_last_math_task(current))
giveup_fpu();
if (copy_from_user(saved_regs, &sr->gp_regs,
sizeof(sr->gp_regs)))
@@ -249,7 +249,7 @@

if (verify_area(VERIFY_WRITE, frame, sizeof(*frame)))
goto badframe;
- if (last_task_used_math == current)
+ if (is_last_math_task(current))
giveup_fpu();
if (__copy_to_user(&frame->gp_regs, regs, GP_REGS_SIZE)
|| __copy_to_user(&frame->fp_regs, current->tss.fpr,
diff -u -r --new-file linux/arch/ppc/kernel/traps.c linux.hacked/arch/ppc/kernel/traps.c
--- linux/arch/ppc/kernel/traps.c Fri Apr 10 12:08:21 1998
+++ linux.hacked/arch/ppc/kernel/traps.c Thu Apr 9 18:58:39 1998
@@ -170,7 +170,7 @@
{
int fixed;

- if (last_task_used_math == current)
+ if (is_last_math_task(current))
giveup_fpu();
fixed = fix_alignment(regs);
if (fixed == 1) {
diff -u -r --new-file linux/arch/sparc/kernel/process.c linux.hacked/arch/sparc/kernel/process.c
--- linux/arch/sparc/kernel/process.c Fri Apr 10 12:08:23 1998
+++ linux.hacked/arch/sparc/kernel/process.c Fri Apr 10 09:48:50 1998
@@ -325,6 +325,7 @@
*/
void exit_thread(void)
{
+ int cpu;
#ifndef __SMP__
if(last_task_used_math == current) {
#else
@@ -334,9 +335,14 @@
put_psr(get_psr() | PSR_EF);
fpsave(&current->tss.float_regs[0], &current->tss.fsr,
&current->tss.fpqueue[0], &current->tss.fpqdepth);
-#ifndef __SMP__
- last_task_used_math = NULL;
-#else
+ for(cpu = 0; cpu < NR_CPUS; cpu++) {
+ spin_lock(&per_cpu[cpu].fpu.lock);
+ if (per_cpu[cpu].fpu.task == current) {
+ per_cpu[cpu].fpu.task = NULL;
+ }
+ spin_unlock(&per_cpu[cpu].fpu.lock);
+ }
+#ifdef __SMP__
current->flags &= ~PF_USEDFPU;
#endif
}
@@ -359,9 +365,8 @@
put_psr(get_psr() | PSR_EF);
fpsave(&current->tss.float_regs[0], &current->tss.fsr,
&current->tss.fpqueue[0], &current->tss.fpqdepth);
-#ifndef __SMP__
last_task_used_math = NULL;
-#else
+#ifdef __SMP__
current->flags &= ~PF_USEDFPU;
#endif
}
diff -u -r --new-file linux/arch/sparc/kernel/signal.c linux.hacked/arch/sparc/kernel/signal.c
--- linux/arch/sparc/kernel/signal.c Fri Apr 10 12:08:23 1998
+++ linux.hacked/arch/sparc/kernel/signal.c Thu Apr 9 18:53:48 1998
@@ -181,6 +181,7 @@
{
#ifdef __SMP__
if (current->flags & PF_USEDFPU)
+ last_task_used_math = 0;
regs->psr &= ~PSR_EF;
#else
if (current == last_task_used_math) {
@@ -418,6 +419,7 @@
put_psr(get_psr() | PSR_EF);
fpsave(&current->tss.float_regs[0], &current->tss.fsr,
&current->tss.fpqueue[0], &current->tss.fpqdepth);
+ last_task_used_math = 0;
regs->psr &= ~(PSR_EF);
current->flags &= ~(PF_USEDFPU);
}
diff -u -r --new-file linux/arch/sparc/kernel/traps.c linux.hacked/arch/sparc/kernel/traps.c
--- linux/arch/sparc/kernel/traps.c Fri Apr 10 12:08:23 1998
+++ linux.hacked/arch/sparc/kernel/traps.c Fri Apr 10 09:47:52 1998
@@ -192,16 +192,23 @@

put_psr(get_psr() | PSR_EF); /* Allow FPU ops. */
regs->psr |= PSR_EF;
-#ifndef __SMP__
- if(last_task_used_math == current)
+ if(is_last_math_task(current))
goto out;
- if(last_task_used_math) {
+ if(smp_num_cpus == 1 && last_task_used_math) {
+ /* For multi-CPUs, the fpu state is saved by switch_to().*/
/* Other processes fpu state, save away */
struct task_struct *fptask = last_task_used_math;
fpsave(&fptask->tss.float_regs[0], &fptask->tss.fsr,
&fptask->tss.fpqueue[0], &fptask->tss.fpqdepth);
}
+
+#ifdef __SMP__
+ current->last_math_cpu = smp_processor_id();
+#endif /* __SMP__ */
+
+ spin_lock(&per_cpu[smp_processor_id()].fpu.lock);
last_task_used_math = current;
+ spin_unlock(&per_cpu[smp_processor_id()].fpu.task);
if(current->used_math) {
fpload(&current->tss.float_regs[0], &current->tss.fsr);
} else {
@@ -209,18 +216,10 @@
fpload(&init_fregs[0], &init_fsr);
current->used_math = 1;
}
-#else
- if(!current->used_math) {
- fpload(&init_fregs[0], &init_fsr);
- current->used_math = 1;
- } else {
- fpload(&current->tss.float_regs[0], &current->tss.fsr);
- }
+#ifdef __SMP__
current->flags |= PF_USEDFPU;
#endif
-#ifndef __SMP__
out:
-#endif
unlock_kernel();
}

@@ -233,22 +232,14 @@
unsigned long psr)
{
static calls = 0;
-#ifndef __SMP__
struct task_struct *fpt = last_task_used_math;
-#else
- struct task_struct *fpt = current;
-#endif
lock_kernel();
put_psr(get_psr() | PSR_EF);
/* If nobody owns the fpu right now, just clear the
* error into our fake static buffer and hope it don't
* happen again. Thank you crashme...
*/
-#ifndef __SMP__
if(!fpt) {
-#else
- if(!(fpt->flags & PF_USEDFPU)) {
-#endif
fpsave(&fake_regs[0], &fake_fsr, &fake_queue[0], &fake_depth);
regs->psr &= ~PSR_EF;
goto out;
@@ -275,9 +266,7 @@
goto out;
}
send_sig(SIGFPE, fpt, 1);
-#ifndef __SMP__
last_task_used_math = NULL;
-#endif
regs->psr &= ~PSR_EF;
if(calls > 0)
calls=0;
diff -u -r --new-file linux/fs/proc/array.c linux.hacked/fs/proc/array.c
--- linux/fs/proc/array.c Fri Apr 10 12:10:06 1998
+++ linux.hacked/fs/proc/array.c Fri Apr 10 09:37:17 1998
@@ -240,12 +240,12 @@
for (i = 0 ; i < smp_num_cpus; i++)
len += sprintf(buffer + len, "cpu%d %u %u %u %lu\n",
i,
- kstat.per_cpu_user[cpu_logical_map(i)],
- kstat.per_cpu_nice[cpu_logical_map(i)],
- kstat.per_cpu_system[cpu_logical_map(i)],
- jiffies - ( kstat.per_cpu_user[cpu_logical_map(i)] \
- + kstat.per_cpu_nice[cpu_logical_map(i)] \
- + kstat.per_cpu_system[cpu_logical_map(i)]));
+ per_cpu[cpu_logical_map(i)].kstat.user,
+ per_cpu[cpu_logical_map(i)].kstat.nice,
+ per_cpu[cpu_logical_map(i)].kstat.system,
+ jiffies - ( per_cpu[cpu_logical_map(i)].kstat.user \
+ + per_cpu[cpu_logical_map(i)].kstat.nice \
+ + per_cpu[cpu_logical_map(i)].kstat.system));
len += sprintf(buffer + len,
"disk %u %u %u %u\n"
"disk_rio %u %u %u %u\n"
diff -u -r --new-file linux/include/asm-i386/system.h linux.hacked/include/asm-i386/system.h
--- linux/include/asm-i386/system.h Fri Apr 10 12:10:19 1998
+++ linux.hacked/include/asm-i386/system.h Fri Apr 10 09:58:05 1998
@@ -53,7 +53,6 @@
*/


-#ifdef __SMP__
/*
* Keep the lock depth straight. If we switch on an interrupt from
* kernel->user task we need to lose a depth, and if we switch the
@@ -70,7 +69,7 @@
*/

#define switch_to(prev,next) do { \
- if(prev->flags&PF_USEDFPU) \
+ if(smp_num_cpus > 1 && prev->flags&PF_USEDFPU) \
{ \
__asm__ __volatile__("fnsave %0":"=m" (prev->tss.i387.hard)); \
__asm__ __volatile__("fwait"); \
@@ -80,27 +79,9 @@
: /* no output */ \
:"m" (*(((char *)&next->tss.tr)-4)), \
"c" (next)); \
- /* Now maybe reload the debug registers */ \
- if(prev->debugreg[7]){ \
- loaddebug(prev,0); \
- loaddebug(prev,1); \
- loaddebug(prev,2); \
- loaddebug(prev,3); \
- loaddebug(prev,6); \
- loaddebug(prev,7); \
+ if (is_last_math_task(prev)) { \
+ __asm__("clts"); \
} \
-} while (0)
-
-#else
-#define switch_to(prev,next) do { \
-__asm__("ljmp %0\n\t" \
- "cmpl %1,"SYMBOL_NAME_STR(last_task_used_math)"\n\t" \
- "jne 1f\n\t" \
- "clts\n" \
- "1:" \
- : /* no outputs */ \
- :"m" (*(((char *)&next->tss.tr)-4)), \
- "r" (prev), "r" (next)); \
/* Now maybe reload the debug registers */ \
if(prev->debugreg[7]){ \
loaddebug(prev,0); \
@@ -111,7 +92,6 @@
loaddebug(prev,7); \
} \
} while (0)
-#endif

#define _set_base(addr,base) \
__asm__("movw %%dx,%0\n\t" \
diff -u -r --new-file linux/include/asm-sparc/system.h linux.hacked/include/asm-sparc/system.h
--- linux/include/asm-sparc/system.h Fri Apr 10 12:10:32 1998
+++ linux.hacked/include/asm-sparc/system.h Fri Apr 10 10:02:10 1998
@@ -55,9 +55,8 @@
extern void fpsave(unsigned long *fpregs, unsigned long *fsr,
void *fpqueue, unsigned long *fpqdepth);

-#ifdef __SMP__
#define SWITCH_ENTER \
- if(prev->flags & PF_USEDFPU) { \
+ if(smp_num_cpus > 1 && prev->flags & PF_USEDFPU) { \
put_psr(get_psr() | PSR_EF); \
fpsave(&prev->tss.float_regs[0], &prev->tss.fsr, \
&prev->tss.fpqueue[0], &prev->tss.fpqdepth); \
@@ -65,11 +64,7 @@
prev->tss.kregs->psr &= ~PSR_EF; \
}

-#define SWITCH_DO_LAZY_FPU
-#else
-#define SWITCH_ENTER
-#define SWITCH_DO_LAZY_FPU if(last_task_used_math != next) next->tss.kregs->psr&=~PSR_EF;
-#endif
+#define SWITCH_DO_LAZY_FPU if(!is_last_math_task(next)) next->tss.kregs->psr&=~PSR_EF;

/* Much care has gone into this code, do not touch it. */
#define switch_to(prev, next) do { \
diff -u -r --new-file linux/include/linux/kernel_stat.h linux.hacked/include/linux/kernel_stat.h
--- linux/include/linux/kernel_stat.h Fri Apr 10 12:10:43 1998
+++ linux.hacked/include/linux/kernel_stat.h Fri Apr 10 09:32:10 1998
@@ -16,9 +16,6 @@

struct kernel_stat {
unsigned int cpu_user, cpu_nice, cpu_system;
- unsigned int per_cpu_user[NR_CPUS],
- per_cpu_nice[NR_CPUS],
- per_cpu_system[NR_CPUS];
unsigned int dk_drive[DK_NDRIVE];
unsigned int dk_drive_rio[DK_NDRIVE];
unsigned int dk_drive_wio[DK_NDRIVE];
diff -u -r --new-file linux/include/linux/per_cpu.h linux.hacked/include/linux/per_cpu.h
--- linux/include/linux/per_cpu.h Wed Dec 31 16:00:00 1969
+++ linux.hacked/include/linux/per_cpu.h Fri Apr 10 10:41:00 1998
@@ -0,0 +1,41 @@
+#ifndef _LINUX_PER_CPU_H
+#define _LINUX_PER_CPU_H
+
+#include <asm/irq.h>
+#include <asm/smp.h>
+#include <asm/cache.h>
+#include <asm/spinlock.h>
+
+#ifdef __KERNEL__
+
+struct per_cpu_stats {
+ unsigned int user;
+ unsigned int nice;
+ unsigned int system;
+ unsigned int irqs[NR_IRQS];
+};
+
+struct per_cpu_fpu_context { /* fpu = Floating Point Unit */
+ spinlock_t lock;
+ struct task_struct *task;
+};
+
+#ifdef L1_CACHE_BYTES
+#define L1_CACHE_FILL(n) \
+ unsigned char _filler_[L1_CACHE_BYTES > (n) ? L1_CACHE_BYTES - (n) : 0]
+#else
+#define L1_CACHE_FILL(n) /* nothing */
+#endif /* L1_CACHE_BYTES */
+
+extern struct per_cpu {
+ struct per_cpu_stats kstat;
+ struct per_cpu_fpu_context fpu;
+ L1_CACHE_FILL(sizeof(struct per_cpu_stats) +
+ sizeof(struct per_cpu_fpu_context));
+} per_cpu[NR_CPUS];
+
+#undef L1_CACHE_FILL
+
+#endif /* __KERNEL__ */
+#endif /* _LINUX_PER_CPU_H */
+
diff -u -r --new-file linux/include/linux/sched.h linux.hacked/include/linux/sched.h
--- linux/include/linux/sched.h Fri Apr 10 12:10:45 1998
+++ linux.hacked/include/linux/sched.h Fri Apr 10 10:41:04 1998
@@ -11,6 +11,7 @@
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/times.h>
+#include <linux/per_cpu.h>

#include <asm/system.h>
#include <asm/semaphore.h>
@@ -285,6 +286,7 @@
int has_cpu;
int processor;
int last_processor;
+ int last_math_cpu; /* Last CPU on which math code was executed.*/
int lock_depth; /* Lock depth. We can context switch in and out of holding a syscall kernel lock... */
/* Spinlocks for various pieces or per-task state. */
spinlock_t sigmask_lock; /* Protects signal and blocked */
@@ -376,7 +378,6 @@

extern struct mm_struct init_mm;
extern struct task_struct *task[NR_TASKS];
-extern struct task_struct *last_task_used_math;

extern struct task_struct **tarray_freelist;
extern spinlock_t taskslot_lock;
@@ -438,6 +439,20 @@
extern int charge_uid(struct task_struct *p, int count);

#include <asm/current.h>
+
+#define last_task_used_math (per_cpu[smp_processor_id()].fpu.task)
+#ifdef __SMP__
+# define last_math_on_this_cpu(task) \
+ ((task)->last_math_cpu == smp_processor_id())
+#else
+# define last_math_on_this_cpu(task) 1
+#endif
+static inline int
+is_last_math_task(struct task_struct *task) {
+ return (last_task_used_math == (task) &&
+ last_math_on_this_cpu((task)));
+}
+

extern unsigned long volatile jiffies;
extern unsigned long itimer_ticks;
diff -u -r --new-file linux/kernel/sched.c linux.hacked/kernel/sched.c
--- linux/kernel/sched.c Fri Apr 10 12:10:52 1998
+++ linux.hacked/kernel/sched.c Fri Apr 10 09:47:15 1998
@@ -98,8 +98,6 @@
* via the SMP irq return path.
*/

-struct task_struct *last_task_used_math = NULL;
-
struct task_struct * task[NR_TASKS] = {&init_task, };

struct kernel_stat kstat = { 0 };
@@ -1583,6 +1581,8 @@
read_unlock(&tasklist_lock);
}

+struct per_cpu per_cpu[NR_CPUS];
+
__initfunc(void sched_init(void))
{
/*
@@ -1600,6 +1600,12 @@

for(nr = 0; nr < PIDHASH_SZ; nr++)
pidhash[nr] = NULL;
+
+ memset(per_cpu, 0, sizeof(per_cpu));
+ for(nr = 0; nr < NR_CPUS; nr++) {
+ spin_lock_init(&per_cpu[nr].fpu.lock);
+ per_cpu[nr].fpu.task = NULL;
+ }

init_bh(TIMER_BH, timer_bh);
init_bh(TQUEUE_BH, tqueue_bh);

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu