[tip:x86/process] x86/process: Optimize TIF checks in __switch_to_xtra()

From: tip-bot for Kyle Huey
Date: Sat Mar 11 2017 - 06:51:53 EST


Commit-ID: af8b3cd3934ec60f4c2a420d19a9d416554f140b
Gitweb: http://git.kernel.org/tip/af8b3cd3934ec60f4c2a420d19a9d416554f140b
Author: Kyle Huey <me@xxxxxxxxxxxx>
AuthorDate: Tue, 14 Feb 2017 00:11:02 -0800
Committer: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
CommitDate: Sat, 11 Mar 2017 12:45:17 +0100

x86/process: Optimize TIF checks in __switch_to_xtra()

Help the compiler to avoid reevaluating the thread flags for each checked
bit by reordering the bit checks and providing an explicit xor for
evaluation.

With default defconfigs for each arch,

x86_64: arch/x86/kernel/process.o
text data bss dec hex
3056 8577 16 11649 2d81 Before
3024 8577 16 11617 2d61 After

i386: arch/x86/kernel/process.o
text data bss dec hex
2957 8673 8 11638 2d76 Before
2925 8673 8 11606 2d56 After

Originally-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Signed-off-by: Kyle Huey <khuey@xxxxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Andy Lutomirski <luto@xxxxxxxxxx>
Link: http://lkml.kernel.org/r/20170214081104.9244-2-khuey@xxxxxxxxxxxx
Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>

---
arch/x86/kernel/process.c | 65 ++++++++++++++++++++++++++---------------------
1 file changed, 36 insertions(+), 29 deletions(-)

diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index f675915..ea9ea25 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -182,54 +182,61 @@ int set_tsc_mode(unsigned int val)
return 0;
}

+static inline void switch_to_bitmap(struct tss_struct *tss,
+ struct thread_struct *prev,
+ struct thread_struct *next,
+ unsigned long tifp, unsigned long tifn)
+{
+ if (tifn & _TIF_IO_BITMAP) {
+ /*
+ * Copy the relevant range of the IO bitmap.
+ * Normally this is 128 bytes or less:
+ */
+ memcpy(tss->io_bitmap, next->io_bitmap_ptr,
+ max(prev->io_bitmap_max, next->io_bitmap_max));
+ /*
+ * Make sure that the TSS limit is correct for the CPU
+ * to notice the IO bitmap.
+ */
+ refresh_tss_limit();
+ } else if (tifp & _TIF_IO_BITMAP) {
+ /*
+ * Clear any possible leftover bits:
+ */
+ memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
+ }
+}
+
void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
struct tss_struct *tss)
{
struct thread_struct *prev, *next;
+ unsigned long tifp, tifn;

prev = &prev_p->thread;
next = &next_p->thread;

- if (test_tsk_thread_flag(prev_p, TIF_BLOCKSTEP) ^
- test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) {
+ tifn = READ_ONCE(task_thread_info(next_p)->flags);
+ tifp = READ_ONCE(task_thread_info(prev_p)->flags);
+ switch_to_bitmap(tss, prev, next, tifp, tifn);
+
+ propagate_user_return_notify(prev_p, next_p);
+
+ if ((tifp ^ tifn) & _TIF_BLOCKSTEP) {
unsigned long debugctl = get_debugctlmsr();

debugctl &= ~DEBUGCTLMSR_BTF;
- if (test_tsk_thread_flag(next_p, TIF_BLOCKSTEP))
+ if (tifn & _TIF_BLOCKSTEP)
debugctl |= DEBUGCTLMSR_BTF;
-
update_debugctlmsr(debugctl);
}

- if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
- test_tsk_thread_flag(next_p, TIF_NOTSC)) {
- /* prev and next are different */
- if (test_tsk_thread_flag(next_p, TIF_NOTSC))
+ if ((tifp ^ tifn) & _TIF_NOTSC) {
+ if (tifn & _TIF_NOTSC)
hard_disable_TSC();
else
hard_enable_TSC();
}
-
- if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
- /*
- * Copy the relevant range of the IO bitmap.
- * Normally this is 128 bytes or less:
- */
- memcpy(tss->io_bitmap, next->io_bitmap_ptr,
- max(prev->io_bitmap_max, next->io_bitmap_max));
-
- /*
- * Make sure that the TSS limit is correct for the CPU
- * to notice the IO bitmap.
- */
- refresh_tss_limit();
- } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
- /*
- * Clear any possible leftover bits:
- */
- memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
- }
- propagate_user_return_notify(prev_p, next_p);
}

/*