[tip:x86/asm] x86/vdso: Remove runtime 32-bit vDSO selection

From: tip-bot for Andy Lutomirski
Date: Wed Oct 07 2015 - 12:19:40 EST


Commit-ID: 0a6d1fa0d2b48fbae444e46e7f37a4832b2f8bdf
Gitweb: http://git.kernel.org/tip/0a6d1fa0d2b48fbae444e46e7f37a4832b2f8bdf
Author: Andy Lutomirski <luto@xxxxxxxxxx>
AuthorDate: Mon, 5 Oct 2015 17:47:56 -0700
Committer: Ingo Molnar <mingo@xxxxxxxxxx>
CommitDate: Wed, 7 Oct 2015 11:34:08 +0200

x86/vdso: Remove runtime 32-bit vDSO selection

32-bit userspace will now always see the same vDSO, which is
exactly what used to be the int80 vDSO. Subsequent patches will
clean it up and make it support SYSENTER and SYSCALL using
alternatives.

Signed-off-by: Andy Lutomirski <luto@xxxxxxxxxx>
Cc: Andy Lutomirski <luto@xxxxxxxxxxxxxx>
Cc: Borislav Petkov <bp@xxxxxxxxx>
Cc: Brian Gerst <brgerst@xxxxxxxxx>
Cc: Denys Vlasenko <dvlasenk@xxxxxxxxxx>
Cc: H. Peter Anvin <hpa@xxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: linux-kernel@xxxxxxxxxxxxxxx
Link: http://lkml.kernel.org/r/e7e6b3526fa442502e6125fe69486aab50813c32.1444091584.git.luto@xxxxxxxxxx
Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
---
arch/x86/entry/vdso/Makefile | 35 ++-----
arch/x86/entry/vdso/vdso2c.c | 1 -
arch/x86/entry/vdso/vdso32-setup.c | 28 +----
arch/x86/entry/vdso/vdso32/syscall.S | 75 -------------
arch/x86/entry/vdso/vdso32/sysenter.S | 116 ---------------------
.../entry/vdso/vdso32/{int80.S => system_call.S} | 0
arch/x86/entry/vdso/vma.c | 13 +--
arch/x86/ia32/ia32_signal.c | 4 +-
arch/x86/include/asm/elf.h | 2 +-
arch/x86/include/asm/vdso.h | 9 +-
arch/x86/kernel/signal.c | 4 +-
arch/x86/xen/setup.c | 13 +--
12 files changed, 21 insertions(+), 279 deletions(-)

diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index a3d0767..3bfb39e 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -19,9 +19,7 @@ obj-y += vma.o
# vDSO images to build
vdso_img-$(VDSO64-y) += 64
vdso_img-$(VDSOX32-y) += x32
-vdso_img-$(VDSO32-y) += 32-int80
-vdso_img-$(CONFIG_IA32_EMULATION) += 32-syscall
-vdso_img-$(VDSO32-y) += 32-sysenter
+vdso_img-$(VDSO32-y) += 32

obj-$(VDSO32-y) += vdso32-setup.o

@@ -122,15 +120,6 @@ $(obj)/%.so: $(obj)/%.so.dbg
$(obj)/vdsox32.so.dbg: $(src)/vdsox32.lds $(vobjx32s) FORCE
$(call if_changed,vdso)

-#
-# Build multiple 32-bit vDSO images to choose from at boot time.
-#
-vdso32.so-$(VDSO32-y) += int80
-vdso32.so-$(CONFIG_IA32_EMULATION) += syscall
-vdso32.so-$(VDSO32-y) += sysenter
-
-vdso32-images = $(vdso32.so-y:%=vdso32-%.so)
-
CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-m,elf_i386 -Wl,-soname=linux-gate.so.1

@@ -139,14 +128,12 @@ VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-m,elf_i386 -Wl,-soname=linux-gate.so.1
override obj-dirs = $(dir $(obj)) $(obj)/vdso32/

targets += vdso32/vdso32.lds
-targets += vdso32/note.o vdso32/vclock_gettime.o $(vdso32.so-y:%=vdso32/%.o)
+targets += vdso32/note.o vdso32/vclock_gettime.o vdso32/system_call.o
targets += vdso32/vclock_gettime.o

-$(obj)/vdso32.o: $(vdso32-images:%=$(obj)/%)
-
KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS))
-$(vdso32-images:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32)
-$(vdso32-images:%=$(obj)/%.dbg): asflags-$(CONFIG_X86_64) += -m32
+$(obj)/vdso32.so.dbg: KBUILD_AFLAGS = $(KBUILD_AFLAGS_32)
+$(obj)/vdso32.so.dbg: asflags-$(CONFIG_X86_64) += -m32

KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS))
KBUILD_CFLAGS_32 := $(filter-out -mcmodel=kernel,$(KBUILD_CFLAGS_32))
@@ -157,13 +144,13 @@ KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector)
KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls)
KBUILD_CFLAGS_32 += -fno-omit-frame-pointer
KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING
-$(vdso32-images:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
+$(obj)/vdso32.so.dbg: KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)

-$(vdso32-images:%=$(obj)/%.dbg): $(obj)/vdso32-%.so.dbg: FORCE \
- $(obj)/vdso32/vdso32.lds \
- $(obj)/vdso32/vclock_gettime.o \
- $(obj)/vdso32/note.o \
- $(obj)/vdso32/%.o
+$(obj)/vdso32.so.dbg: FORCE \
+ $(obj)/vdso32/vdso32.lds \
+ $(obj)/vdso32/vclock_gettime.o \
+ $(obj)/vdso32/note.o \
+ $(obj)/vdso32/system_call.o
$(call if_changed,vdso)

#
@@ -206,4 +193,4 @@ $(vdso_img_insttargets): install_%: $(obj)/%.dbg $(MODLIB)/vdso FORCE
PHONY += vdso_install $(vdso_img_insttargets)
vdso_install: $(vdso_img_insttargets) FORCE

-clean-files := vdso32-syscall* vdso32-sysenter* vdso32-int80* vdso64* vdso-image-*.c vdsox32.so*
+clean-files := vdso32.so vdso32.so.dbg vdso64* vdso-image-*.c vdsox32.so*
diff --git a/arch/x86/entry/vdso/vdso2c.c b/arch/x86/entry/vdso/vdso2c.c
index 8627db2..2637eb1 100644
--- a/arch/x86/entry/vdso/vdso2c.c
+++ b/arch/x86/entry/vdso/vdso2c.c
@@ -98,7 +98,6 @@ struct vdso_sym required_syms[] = {
"VDSO_FAKE_SECTION_TABLE_END", false
},
{"VDSO32_NOTE_MASK", true},
- {"VDSO32_SYSENTER_RETURN", true},
{"__kernel_vsyscall", true},
{"__kernel_sigreturn", true},
{"__kernel_rt_sigreturn", true},
diff --git a/arch/x86/entry/vdso/vdso32-setup.c b/arch/x86/entry/vdso/vdso32-setup.c
index e904c27..08a317a 100644
--- a/arch/x86/entry/vdso/vdso32-setup.c
+++ b/arch/x86/entry/vdso/vdso32-setup.c
@@ -48,35 +48,9 @@ __setup("vdso32=", vdso32_setup);
__setup_param("vdso=", vdso_setup, vdso32_setup, 0);
#endif

-#ifdef CONFIG_X86_64
-
-#define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SYSENTER32))
-#define vdso32_syscall() (boot_cpu_has(X86_FEATURE_SYSCALL32))
-
-#else /* CONFIG_X86_32 */
-
-#define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SEP))
-#define vdso32_syscall() (0)
-
-#endif /* CONFIG_X86_64 */
-
-#if defined(CONFIG_X86_32) || defined(CONFIG_COMPAT)
-const struct vdso_image *selected_vdso32;
-#endif
-
int __init sysenter_setup(void)
{
-#ifdef CONFIG_COMPAT
- if (vdso32_syscall())
- selected_vdso32 = &vdso_image_32_syscall;
- else
-#endif
- if (vdso32_sysenter())
- selected_vdso32 = &vdso_image_32_sysenter;
- else
- selected_vdso32 = &vdso_image_32_int80;
-
- init_vdso_image(selected_vdso32);
+ init_vdso_image(&vdso_image_32);

return 0;
}
diff --git a/arch/x86/entry/vdso/vdso32/syscall.S b/arch/x86/entry/vdso/vdso32/syscall.S
deleted file mode 100644
index 6b286bb..0000000
--- a/arch/x86/entry/vdso/vdso32/syscall.S
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Code for the vDSO. This version uses the syscall instruction.
- *
- * First get the common code for the sigreturn entry points.
- * This must come first.
- */
-#define SYSCALL_ENTER_KERNEL syscall
-#include "sigreturn.S"
-
-#include <asm/segment.h>
-
- .text
- .globl __kernel_vsyscall
- .type __kernel_vsyscall,@function
- ALIGN
-__kernel_vsyscall:
-.LSTART_vsyscall:
- push %ebp
-.Lpush_ebp:
- movl %ecx, %ebp
- syscall
- movl %ebp, %ecx
- popl %ebp
-.Lpop_ebp:
- ret
-.LEND_vsyscall:
- .size __kernel_vsyscall,.-.LSTART_vsyscall
-
- .section .eh_frame,"a",@progbits
-.LSTARTFRAME:
- .long .LENDCIE-.LSTARTCIE
-.LSTARTCIE:
- .long 0 /* CIE ID */
- .byte 1 /* Version number */
- .string "zR" /* NUL-terminated augmentation string */
- .uleb128 1 /* Code alignment factor */
- .sleb128 -4 /* Data alignment factor */
- .byte 8 /* Return address register column */
- .uleb128 1 /* Augmentation value length */
- .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
- .byte 0x0c /* DW_CFA_def_cfa */
- .uleb128 4
- .uleb128 4
- .byte 0x88 /* DW_CFA_offset, column 0x8 */
- .uleb128 1
- .align 4
-.LENDCIE:
-
- .long .LENDFDE1-.LSTARTFDE1 /* Length FDE */
-.LSTARTFDE1:
- .long .LSTARTFDE1-.LSTARTFRAME /* CIE pointer */
- .long .LSTART_vsyscall-. /* PC-relative start address */
- .long .LEND_vsyscall-.LSTART_vsyscall
- .uleb128 0 /* Augmentation length */
- /* What follows are the instructions for the table generation.
- We have to record all changes of the stack pointer. */
- .byte 0x40 + .Lpush_ebp-.LSTART_vsyscall /* DW_CFA_advance_loc */
- .byte 0x0e /* DW_CFA_def_cfa_offset */
- .uleb128 8
- .byte 0x85, 0x02 /* DW_CFA_offset %ebp -8 */
- .byte 0x40 + .Lpop_ebp-.Lpush_ebp /* DW_CFA_advance_loc */
- .byte 0xc5 /* DW_CFA_restore %ebp */
- .byte 0x0e /* DW_CFA_def_cfa_offset */
- .uleb128 4
- .align 4
-.LENDFDE1:
- .previous
-
- /*
- * Pad out the segment to match the size of the sysenter.S version.
- */
-VDSO32_vsyscall_eh_frame_size = 0x40
- .section .data,"aw",@progbits
- .space VDSO32_vsyscall_eh_frame_size-(.LENDFDE1-.LSTARTFRAME), 0
- .previous
diff --git a/arch/x86/entry/vdso/vdso32/sysenter.S b/arch/x86/entry/vdso/vdso32/sysenter.S
deleted file mode 100644
index e354bce..0000000
--- a/arch/x86/entry/vdso/vdso32/sysenter.S
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Code for the vDSO. This version uses the sysenter instruction.
- *
- * First get the common code for the sigreturn entry points.
- * This must come first.
- */
-#include "sigreturn.S"
-
-/*
- * The caller puts arg2 in %ecx, which gets pushed. The kernel will use
- * %ecx itself for arg2. The pushing is because the sysexit instruction
- * (found in entry.S) requires that we clobber %ecx with the desired %esp.
- * User code might expect that %ecx is unclobbered though, as it would be
- * for returning via the iret instruction, so we must push and pop.
- *
- * The caller puts arg3 in %edx, which the sysexit instruction requires
- * for %eip. Thus, exactly as for arg2, we must push and pop.
- *
- * Arg6 is different. The caller puts arg6 in %ebp. Since the sysenter
- * instruction clobbers %esp, the user's %esp won't even survive entry
- * into the kernel. We store %esp in %ebp. Code in entry.S must fetch
- * arg6 from the stack.
- *
- * You can not use this vsyscall for the clone() syscall because the
- * three words on the parent stack do not get copied to the child.
- */
- .text
- .globl __kernel_vsyscall
- .type __kernel_vsyscall,@function
- ALIGN
-__kernel_vsyscall:
-.LSTART_vsyscall:
- push %ecx
-.Lpush_ecx:
- push %edx
-.Lpush_edx:
- push %ebp
-.Lenter_kernel:
- movl %esp,%ebp
- sysenter
-
- /* 7: align return point with nop's to make disassembly easier */
- .space 7,0x90
-
- /* 14: System call restart point is here! (SYSENTER_RETURN-2) */
- int $0x80
- /* 16: System call normal return point is here! */
-VDSO32_SYSENTER_RETURN: /* Symbol used by sysenter.c via vdso32-syms.h */
- pop %ebp
-.Lpop_ebp:
- pop %edx
-.Lpop_edx:
- pop %ecx
-.Lpop_ecx:
- ret
-.LEND_vsyscall:
- .size __kernel_vsyscall,.-.LSTART_vsyscall
- .previous
-
- .section .eh_frame,"a",@progbits
-.LSTARTFRAMEDLSI:
- .long .LENDCIEDLSI-.LSTARTCIEDLSI
-.LSTARTCIEDLSI:
- .long 0 /* CIE ID */
- .byte 1 /* Version number */
- .string "zR" /* NUL-terminated augmentation string */
- .uleb128 1 /* Code alignment factor */
- .sleb128 -4 /* Data alignment factor */
- .byte 8 /* Return address register column */
- .uleb128 1 /* Augmentation value length */
- .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
- .byte 0x0c /* DW_CFA_def_cfa */
- .uleb128 4
- .uleb128 4
- .byte 0x88 /* DW_CFA_offset, column 0x8 */
- .uleb128 1
- .align 4
-.LENDCIEDLSI:
- .long .LENDFDEDLSI-.LSTARTFDEDLSI /* Length FDE */
-.LSTARTFDEDLSI:
- .long .LSTARTFDEDLSI-.LSTARTFRAMEDLSI /* CIE pointer */
- .long .LSTART_vsyscall-. /* PC-relative start address */
- .long .LEND_vsyscall-.LSTART_vsyscall
- .uleb128 0
- /* What follows are the instructions for the table generation.
- We have to record all changes of the stack pointer. */
- .byte 0x40 + (.Lpush_ecx-.LSTART_vsyscall) /* DW_CFA_advance_loc */
- .byte 0x0e /* DW_CFA_def_cfa_offset */
- .byte 0x08 /* RA at offset 8 now */
- .byte 0x40 + (.Lpush_edx-.Lpush_ecx) /* DW_CFA_advance_loc */
- .byte 0x0e /* DW_CFA_def_cfa_offset */
- .byte 0x0c /* RA at offset 12 now */
- .byte 0x40 + (.Lenter_kernel-.Lpush_edx) /* DW_CFA_advance_loc */
- .byte 0x0e /* DW_CFA_def_cfa_offset */
- .byte 0x10 /* RA at offset 16 now */
- .byte 0x85, 0x04 /* DW_CFA_offset %ebp -16 */
- /* Finally the epilogue. */
- .byte 0x40 + (.Lpop_ebp-.Lenter_kernel) /* DW_CFA_advance_loc */
- .byte 0x0e /* DW_CFA_def_cfa_offset */
- .byte 0x0c /* RA at offset 12 now */
- .byte 0xc5 /* DW_CFA_restore %ebp */
- .byte 0x40 + (.Lpop_edx-.Lpop_ebp) /* DW_CFA_advance_loc */
- .byte 0x0e /* DW_CFA_def_cfa_offset */
- .byte 0x08 /* RA at offset 8 now */
- .byte 0x40 + (.Lpop_ecx-.Lpop_edx) /* DW_CFA_advance_loc */
- .byte 0x0e /* DW_CFA_def_cfa_offset */
- .byte 0x04 /* RA at offset 4 now */
- .align 4
-.LENDFDEDLSI:
- .previous
-
- /*
- * Emit a symbol with the size of this .eh_frame data,
- * to verify it matches the other versions.
- */
-VDSO32_vsyscall_eh_frame_size = (.LENDFDEDLSI-.LSTARTFRAMEDLSI)
diff --git a/arch/x86/entry/vdso/vdso32/int80.S b/arch/x86/entry/vdso/vdso32/system_call.S
similarity index 100%
rename from arch/x86/entry/vdso/vdso32/int80.S
rename to arch/x86/entry/vdso/vdso32/system_call.S
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 4345431..64df471 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -180,21 +180,10 @@ up_fail:
#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
static int load_vdso32(void)
{
- int ret;
-
if (vdso32_enabled != 1) /* Other values all mean "disabled" */
return 0;

- ret = map_vdso(selected_vdso32, false);
- if (ret)
- return ret;
-
- if (selected_vdso32->sym_VDSO32_SYSENTER_RETURN)
- current_thread_info()->sysenter_return =
- current->mm->context.vdso +
- selected_vdso32->sym_VDSO32_SYSENTER_RETURN;
-
- return 0;
+ return map_vdso(&vdso_image_32, false);
}
#endif

diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index a0a19b7..e6a5c275 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -289,7 +289,7 @@ int ia32_setup_frame(int sig, struct ksignal *ksig,
/* Return stub is in 32bit vsyscall page */
if (current->mm->context.vdso)
restorer = current->mm->context.vdso +
- selected_vdso32->sym___kernel_sigreturn;
+ vdso_image_32.sym___kernel_sigreturn;
else
restorer = &frame->retcode;
}
@@ -368,7 +368,7 @@ int ia32_setup_rt_frame(int sig, struct ksignal *ksig,
restorer = ksig->ka.sa.sa_restorer;
else
restorer = current->mm->context.vdso +
- selected_vdso32->sym___kernel_rt_sigreturn;
+ vdso_image_32.sym___kernel_rt_sigreturn;
put_user_ex(ptr_to_compat(restorer), &frame->pretcode);

/*
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 141c561..2ee05c4 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -328,7 +328,7 @@ else \

#define VDSO_ENTRY \
((unsigned long)current->mm->context.vdso + \
- selected_vdso32->sym___kernel_vsyscall)
+ vdso_image_32.sym___kernel_vsyscall)

struct linux_binprm;

diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index 8021bd2..5bcb1de 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -26,7 +26,6 @@ struct vdso_image {
long sym___kernel_sigreturn;
long sym___kernel_rt_sigreturn;
long sym___kernel_vsyscall;
- long sym_VDSO32_SYSENTER_RETURN;
};

#ifdef CONFIG_X86_64
@@ -38,13 +37,7 @@ extern const struct vdso_image vdso_image_x32;
#endif

#if defined CONFIG_X86_32 || defined CONFIG_COMPAT
-extern const struct vdso_image vdso_image_32_int80;
-#ifdef CONFIG_COMPAT
-extern const struct vdso_image vdso_image_32_syscall;
-#endif
-extern const struct vdso_image vdso_image_32_sysenter;
-
-extern const struct vdso_image *selected_vdso32;
+extern const struct vdso_image vdso_image_32;
#endif

extern void __init init_vdso_image(const struct vdso_image *image);
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index da52e6b..d87ce92 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -299,7 +299,7 @@ __setup_frame(int sig, struct ksignal *ksig, sigset_t *set,

if (current->mm->context.vdso)
restorer = current->mm->context.vdso +
- selected_vdso32->sym___kernel_sigreturn;
+ vdso_image_32.sym___kernel_sigreturn;
else
restorer = &frame->retcode;
if (ksig->ka.sa.sa_flags & SA_RESTORER)
@@ -363,7 +363,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,

/* Set up to return from userspace. */
restorer = current->mm->context.vdso +
- selected_vdso32->sym___kernel_rt_sigreturn;
+ vdso_image_32.sym___kernel_rt_sigreturn;
if (ksig->ka.sa.sa_flags & SA_RESTORER)
restorer = ksig->ka.sa.sa_restorer;
put_user_ex(restorer, &frame->pretcode);
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 1c30e4a..63320b6 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -965,17 +965,8 @@ char * __init xen_auto_xlated_memory_setup(void)
static void __init fiddle_vdso(void)
{
#ifdef CONFIG_X86_32
- /*
- * This could be called before selected_vdso32 is initialized, so
- * just fiddle with both possible images. vdso_image_32_syscall
- * can't be selected, since it only exists on 64-bit systems.
- */
- u32 *mask;
- mask = vdso_image_32_int80.data +
- vdso_image_32_int80.sym_VDSO32_NOTE_MASK;
- *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
- mask = vdso_image_32_sysenter.data +
- vdso_image_32_sysenter.sym_VDSO32_NOTE_MASK;
+ u32 *mask = vdso_image_32.data +
+ vdso_image_32.sym_VDSO32_NOTE_MASK;
*mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
#endif
}
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/