[tip:x86/asm] x86/arch_prctl/64: Remove FSBASE/GSBASE < 4G optimization

From: tip-bot for Andy Lutomirski
Date: Fri Apr 29 2016 - 06:51:20 EST


Commit-ID: 731e33e39a5b95ad77017811b3ced32ecf9dc666
Gitweb: http://git.kernel.org/tip/731e33e39a5b95ad77017811b3ced32ecf9dc666
Author: Andy Lutomirski <luto@xxxxxxxxxx>
AuthorDate: Tue, 26 Apr 2016 12:23:28 -0700
Committer: Ingo Molnar <mingo@xxxxxxxxxx>
CommitDate: Fri, 29 Apr 2016 11:56:41 +0200

x86/arch_prctl/64: Remove FSBASE/GSBASE < 4G optimization

As far as I know, the optimization doesn't work on any modern distro
because modern distros use high addresses for ASLR. Remove it.

The ptrace code was either wrong or very strange, but the behavior
with this patch should be essentially identical to the behavior
without this patch unless user code goes out of its way to mislead
ptrace.

On newer CPUs, once the FSGSBASE instructions are enabled, we won't
want to use the optimized variant anyway.

This isn't actually much of a performance regression, it has no effect
on normal dynamically linked programs, and it's a considerably
simplification. It also removes some nasty special cases from code
that is already way too full of special cases for comfort.

Signed-off-by: Andy Lutomirski <luto@xxxxxxxxxx>
Cc: Andy Lutomirski <luto@xxxxxxxxxxxxxx>
Cc: Borislav Petkov <bp@xxxxxxxxx>
Cc: Brian Gerst <brgerst@xxxxxxxxx>
Cc: Denys Vlasenko <dvlasenk@xxxxxxxxxx>
Cc: H. Peter Anvin <hpa@xxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Link: http://lkml.kernel.org/r/dd1599b08866961dba9d2458faa6bbd7fba471d7.1461698311.git.luto@xxxxxxxxxx
Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
---
arch/x86/include/asm/segment.h | 7 -----
arch/x86/kernel/process_64.c | 71 +++++++-----------------------------------
arch/x86/kernel/ptrace.c | 44 ++++----------------------
3 files changed, 17 insertions(+), 105 deletions(-)

diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index e1a4afd..1549caa0 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -208,13 +208,6 @@
#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8 + 3)
#define __PER_CPU_SEG (GDT_ENTRY_PER_CPU*8 + 3)

-/* TLS indexes for 64-bit - hardcoded in arch_prctl(): */
-#define FS_TLS 0
-#define GS_TLS 1
-
-#define GS_TLS_SEL ((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3)
-#define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3)
-
#endif

#ifndef CONFIG_PARAVIRT
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 24d1b7f..864fe2c 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -136,25 +136,6 @@ void release_thread(struct task_struct *dead_task)
}
}

-static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
-{
- struct user_desc ud = {
- .base_addr = addr,
- .limit = 0xfffff,
- .seg_32bit = 1,
- .limit_in_pages = 1,
- .useable = 1,
- };
- struct desc_struct *desc = t->thread.tls_array;
- desc += tls;
- fill_ldt(desc, &ud);
-}
-
-static inline u32 read_32bit_tls(struct task_struct *t, int tls)
-{
- return get_desc_base(&t->thread.tls_array[tls]);
-}
-
int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
unsigned long arg, struct task_struct *p, unsigned long tls)
{
@@ -554,25 +535,12 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
if (addr >= TASK_SIZE_OF(task))
return -EPERM;
cpu = get_cpu();
- /* handle small bases via the GDT because that's faster to
- switch. */
- if (addr <= 0xffffffff) {
- set_32bit_tls(task, GS_TLS, addr);
- if (doit) {
- load_TLS(&task->thread, cpu);
- load_gs_index(GS_TLS_SEL);
- }
- task->thread.gsindex = GS_TLS_SEL;
- task->thread.gs = 0;
- } else {
- task->thread.gsindex = 0;
- task->thread.gs = addr;
- if (doit) {
- load_gs_index(0);
- ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);
- }
+ task->thread.gsindex = 0;
+ task->thread.gs = addr;
+ if (doit) {
+ load_gs_index(0);
+ ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);
}
- put_cpu();
break;
case ARCH_SET_FS:
/* Not strictly needed for fs, but do it for symmetry
@@ -580,25 +548,12 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
if (addr >= TASK_SIZE_OF(task))
return -EPERM;
cpu = get_cpu();
- /* handle small bases via the GDT because that's faster to
- switch. */
- if (addr <= 0xffffffff) {
- set_32bit_tls(task, FS_TLS, addr);
- if (doit) {
- load_TLS(&task->thread, cpu);
- loadsegment(fs, FS_TLS_SEL);
- }
- task->thread.fsindex = FS_TLS_SEL;
- task->thread.fs = 0;
- } else {
- task->thread.fsindex = 0;
- task->thread.fs = addr;
- if (doit) {
- /* set the selector to 0 to not confuse
- __switch_to */
- loadsegment(fs, 0);
- ret = wrmsrl_safe(MSR_FS_BASE, addr);
- }
+ task->thread.fsindex = 0;
+ task->thread.fs = addr;
+ if (doit) {
+ /* set the selector to 0 to not confuse __switch_to */
+ loadsegment(fs, 0);
+ ret = wrmsrl_safe(MSR_FS_BASE, addr);
}
put_cpu();
break;
@@ -606,8 +561,6 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
unsigned long base;
if (doit)
rdmsrl(MSR_FS_BASE, base);
- else if (task->thread.fsindex == FS_TLS_SEL)
- base = read_32bit_tls(task, FS_TLS);
else
base = task->thread.fs;
ret = put_user(base, (unsigned long __user *)addr);
@@ -617,8 +570,6 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
unsigned long base;
if (doit)
rdmsrl(MSR_KERNEL_GS_BASE, base);
- else if (task->thread.gsindex == GS_TLS_SEL)
- base = read_32bit_tls(task, GS_TLS);
else
base = task->thread.gs;
ret = put_user(base, (unsigned long __user *)addr);
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 0f4d2a5..e72ab40 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -303,29 +303,11 @@ static int set_segment_reg(struct task_struct *task,

switch (offset) {
case offsetof(struct user_regs_struct,fs):
- /*
- * If this is setting fs as for normal 64-bit use but
- * setting fs_base has implicitly changed it, leave it.
- */
- if ((value == FS_TLS_SEL && task->thread.fsindex == 0 &&
- task->thread.fs != 0) ||
- (value == 0 && task->thread.fsindex == FS_TLS_SEL &&
- task->thread.fs == 0))
- break;
task->thread.fsindex = value;
if (task == current)
loadsegment(fs, task->thread.fsindex);
break;
case offsetof(struct user_regs_struct,gs):
- /*
- * If this is setting gs as for normal 64-bit use but
- * setting gs_base has implicitly changed it, leave it.
- */
- if ((value == GS_TLS_SEL && task->thread.gsindex == 0 &&
- task->thread.gs != 0) ||
- (value == 0 && task->thread.gsindex == GS_TLS_SEL &&
- task->thread.gs == 0))
- break;
task->thread.gsindex = value;
if (task == current)
load_gs_index(task->thread.gsindex);
@@ -453,31 +435,17 @@ static unsigned long getreg(struct task_struct *task, unsigned long offset)
#ifdef CONFIG_X86_64
case offsetof(struct user_regs_struct, fs_base): {
/*
- * do_arch_prctl may have used a GDT slot instead of
- * the MSR. To userland, it appears the same either
- * way, except the %fs segment selector might not be 0.
+ * XXX: This will not behave as expected if called on
+ * current or if fsindex != 0.
*/
- unsigned int seg = task->thread.fsindex;
- if (task->thread.fs != 0)
- return task->thread.fs;
- if (task == current)
- asm("movl %%fs,%0" : "=r" (seg));
- if (seg != FS_TLS_SEL)
- return 0;
- return get_desc_base(&task->thread.tls_array[FS_TLS]);
+ return task->thread.fs;
}
case offsetof(struct user_regs_struct, gs_base): {
/*
- * Exactly the same here as the %fs handling above.
+ * XXX: This will not behave as expected if called on
+ * current or if fsindex != 0.
*/
- unsigned int seg = task->thread.gsindex;
- if (task->thread.gs != 0)
- return task->thread.gs;
- if (task == current)
- asm("movl %%gs,%0" : "=r" (seg));
- if (seg != GS_TLS_SEL)
- return 0;
- return get_desc_base(&task->thread.tls_array[GS_TLS]);
+ return task->thread.gs;
}
#endif
}