[tip:x86/mm] x86/vdso: Add mremap hook to vm_special_mapping

From: tip-bot for Dmitry Safonov
Date: Fri Jul 08 2016 - 09:17:07 EST


Commit-ID: b059a453b1cf1c8453c2b2ed373d3147d6264ebd
Gitweb: http://git.kernel.org/tip/b059a453b1cf1c8453c2b2ed373d3147d6264ebd
Author: Dmitry Safonov <dsafonov@xxxxxxxxxxxxx>
AuthorDate: Tue, 28 Jun 2016 14:35:38 +0300
Committer: Ingo Molnar <mingo@xxxxxxxxxx>
CommitDate: Fri, 8 Jul 2016 14:17:51 +0200

x86/vdso: Add mremap hook to vm_special_mapping

Add possibility for 32-bit user-space applications to move
the vDSO mapping.

Previously, when a user-space app called mremap() for the vDSO
address, in the syscall return path it would land on the previous
address of the vDSOpage, resulting in segmentation violation.

Now it lands fine and returns to userspace with a remapped vDSO.

This will also fix the context.vdso pointer for 64-bit, which does
not affect the user of vDSO after mremap() currently, but this
may change in the future.

As suggested by Andy, return -EINVAL for mremap() that would
split the vDSO image: that operation cannot possibly result in
a working system so reject it.

Renamed and moved the text_mapping structure declaration inside
map_vdso(), as it used only there and now it complements the
vvar_mapping variable.

There is still a problem for remapping the vDSO in glibc
applications: the linker relocates addresses for syscalls
on the vDSO page, so you need to relink with the new
addresses.

Without that the next syscall through glibc may fail:

Program received signal SIGSEGV, Segmentation fault.
#0 0xf7fd9b80 in __kernel_vsyscall ()
#1 0xf7ec8238 in _exit () from /usr/lib32/libc.so.6

Signed-off-by: Dmitry Safonov <dsafonov@xxxxxxxxxxxxx>
Acked-by: Andy Lutomirski <luto@xxxxxxxxxx>
Cc: 0x7f454c46@xxxxxxxxx
Cc: Borislav Petkov <bp@xxxxxxxxx>
Cc: Brian Gerst <brgerst@xxxxxxxxx>
Cc: Denys Vlasenko <dvlasenk@xxxxxxxxxx>
Cc: H. Peter Anvin <hpa@xxxxxxxxx>
Cc: Josh Poimboeuf <jpoimboe@xxxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: linux-mm@xxxxxxxxx
Link: http://lkml.kernel.org/r/20160628113539.13606-2-dsafonov@xxxxxxxxxxxxx
Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
---
arch/x86/entry/vdso/vma.c | 47 ++++++++++++++++++++++++++++++++++++++++++-----
include/linux/mm_types.h | 3 +++
mm/mmap.c | 10 ++++++++++
3 files changed, 55 insertions(+), 5 deletions(-)

diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index ab220ac..3329844 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -12,6 +12,7 @@
#include <linux/random.h>
#include <linux/elf.h>
#include <linux/cpu.h>
+#include <linux/ptrace.h>
#include <asm/pvclock.h>
#include <asm/vgtod.h>
#include <asm/proto.h>
@@ -97,10 +98,40 @@ static int vdso_fault(const struct vm_special_mapping *sm,
return 0;
}

-static const struct vm_special_mapping text_mapping = {
- .name = "[vdso]",
- .fault = vdso_fault,
-};
+static void vdso_fix_landing(const struct vdso_image *image,
+ struct vm_area_struct *new_vma)
+{
+#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
+ if (in_ia32_syscall() && image == &vdso_image_32) {
+ struct pt_regs *regs = current_pt_regs();
+ unsigned long vdso_land = image->sym_int80_landing_pad;
+ unsigned long old_land_addr = vdso_land +
+ (unsigned long)current->mm->context.vdso;
+
+ /* Fixing userspace landing - look at do_fast_syscall_32 */
+ if (regs->ip == old_land_addr)
+ regs->ip = new_vma->vm_start + vdso_land;
+ }
+#endif
+}
+
+static int vdso_mremap(const struct vm_special_mapping *sm,
+ struct vm_area_struct *new_vma)
+{
+ unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
+ const struct vdso_image *image = current->mm->context.vdso_image;
+
+ if (image->size != new_size)
+ return -EINVAL;
+
+ if (WARN_ON_ONCE(current->mm != new_vma->vm_mm))
+ return -EFAULT;
+
+ vdso_fix_landing(image, new_vma);
+ current->mm->context.vdso = (void __user *)new_vma->vm_start;
+
+ return 0;
+}

static int vvar_fault(const struct vm_special_mapping *sm,
struct vm_area_struct *vma, struct vm_fault *vmf)
@@ -151,6 +182,12 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
struct vm_area_struct *vma;
unsigned long addr, text_start;
int ret = 0;
+
+ static const struct vm_special_mapping vdso_mapping = {
+ .name = "[vdso]",
+ .fault = vdso_fault,
+ .mremap = vdso_mremap,
+ };
static const struct vm_special_mapping vvar_mapping = {
.name = "[vvar]",
.fault = vvar_fault,
@@ -185,7 +222,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
image->size,
VM_READ|VM_EXEC|
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
- &text_mapping);
+ &vdso_mapping);

if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index ca3e517..917f2b6 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -594,6 +594,9 @@ struct vm_special_mapping {
int (*fault)(const struct vm_special_mapping *sm,
struct vm_area_struct *vma,
struct vm_fault *vmf);
+
+ int (*mremap)(const struct vm_special_mapping *sm,
+ struct vm_area_struct *new_vma);
};

enum tlb_flush_reason {
diff --git a/mm/mmap.c b/mm/mmap.c
index de2c176..234edff 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2943,9 +2943,19 @@ static const char *special_mapping_name(struct vm_area_struct *vma)
return ((struct vm_special_mapping *)vma->vm_private_data)->name;
}

+static int special_mapping_mremap(struct vm_area_struct *new_vma)
+{
+ struct vm_special_mapping *sm = new_vma->vm_private_data;
+
+ if (sm->mremap)
+ return sm->mremap(sm, new_vma);
+ return 0;
+}
+
static const struct vm_operations_struct special_mapping_vmops = {
.close = special_mapping_close,
.fault = special_mapping_fault,
+ .mremap = special_mapping_mremap,
.name = special_mapping_name,
};