[RFC] exit_thread() speedups in x86 process.c
From: cutaway
Date: Mon Jun 13 2005 - 19:36:31 EST
In the current exit_thread() implementation, it appears including the I/O
port map tear down code within the exit_thread() generates enough autovar
data that the compiler needs to spill 4 registers to the stack resulting in
(4) PUSH on entry and (4) POP on exit.
When I tried extracting the map teardown into a seperate function, the
situation changed dramatically to where NO REGISTERS were being
pushed/popped in the normal path entry/exit.
Below is the original generated code, code my proposal generated, and an
#ifdef'd change that produced this elimination of the PUSH/POP's.
Unless I'm on drugs, this looks like a solid winner in a fairly important
code path :)
--------- Original exit_thread() -------
615 .globl exit_thread
616 .type exit_thread,@function
617 exit_thread:
618 02cc 55 pushl %ebp
619 02cd 57 pushl %edi
620 02ce 56 pushl %esi
621 02cf 53 pushl %ebx
622 02d0 B800E0FF movl $-8192,%eax
blah, blah...
629 02e5 85C0 testl %eax,%eax
630 02e7 7507 jne .L1675
631 .L1657:
632 02e9 5B popl %ebx
633 02ea 5E popl %esi
634 02eb 5F popl %edi
635 02ec 5D popl %ebp
636 02ed C3 ret
637 02ee 89F6 .p2align 2
638 .L1675:
639 02f0 50 pushl %eax
640 02f1 E8FCFFFF call kfree
...Lots of stuff here to tear down port maps...
--------- Proposed exit_thread() -------
655 .globl exit_thread
656 .type exit_thread,@function
657 exit_thread:
///////////////////////////////////////
// Note how all PUSH/POP's are
// gone from the mainline code now
///////////////////////////////////////
658 0340 B800E0FF movl $-8192,%eax
658 FF
659
660 0345 21E0 andl %esp,%eax
661
662 0347 8B00 movl (%eax),%eax
663 0349 05C00100 addl $448,%eax
663 00
664 034e 8B907C02 movl 636(%eax),%edx
664 0000
665 0354 85D2 testl %edx,%edx
666 0356 7504 jne .L1676
667 0358 C3 ret
668 0359 8D7600 .p2align 2
669 .L1676:
670 035c 50 pushl %eax
671 035d E86AFFFF call NukePortMap
671 FF
672 0362 58 popl %eax
673 0363 C3 ret
---- This is the change that eliminates the PUSH/POP's ---
#ifdef __TONYI__
static void NukePortMap(struct thread_struct *t)
{
int cpu = get_cpu();
struct tss_struct *tss = &per_cpu(init_tss, cpu);
kfree(t->io_bitmap_ptr);
t->io_bitmap_ptr = NULL;
/*
* Careful, clear this in the TSS too:
*/
memset(tss->io_bitmap, 0xff, tss->io_bitmap_max);
t->io_bitmap_max = 0;
tss->io_bitmap_owner = NULL;
tss->io_bitmap_max = 0;
tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
put_cpu();
}
#endif
/*
* Free current thread data structures etc..
*/
void exit_thread(void)
{
struct task_struct *tsk = current;
struct thread_struct *t = &tsk->thread;
/* The process may have allocated an io port bitmap... nuke it. */
if (unlikely(NULL != t->io_bitmap_ptr)) {
#ifdef __TONYI__
NukePortMap(t);
#else
int cpu = get_cpu();
struct tss_struct *tss = &per_cpu(init_tss, cpu);
kfree(t->io_bitmap_ptr);
t->io_bitmap_ptr = NULL;
/*
* Careful, clear this in the TSS too:
*/
memset(tss->io_bitmap, 0xff, tss->io_bitmap_max);
t->io_bitmap_max = 0;
tss->io_bitmap_owner = NULL;
tss->io_bitmap_max = 0;
tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
put_cpu();
#endif
}
}
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/