Re: 2.6.23-mm1

From: Jiri Kosina
Date: Fri Oct 19 2007 - 17:55:31 EST


On Wed, 17 Oct 2007, KAMEZAWA Hiroyuki wrote:

> > hm, I guess this is probably due to pie-randomization patch, right?
> > (could you please try reverting it, to see whether things get back to
> > normal).
> Maybe this can be fix.

Andrew,

below is a fixed version with patch from Kamezawa Hiroyuki incorporated.
It fixes the small regression Kamezawa found just at the time you sent
merge request for this patch to Linus -- that ia32 ELF binaires on x86_64
were able to allocate only about 2/3 of memory they were able to allocate
without this patch. Apart from this fix, the patch is the same as it has
been in -mm tree for quite some time.

It'd be great if it could make it for 2.6.24, if feasible. Thanks.


From: Jiri Kosina <jkosina@xxxxxxx>
Subject: PIE executable randomization

This patch is using mmap()'s randomization functionality in such a way
that it maps the main executable of (specially compiled/linked -pie/-fpie)
ET_DYN binaries onto a random address (in cases in which mmap() is allowed
to perform a randomization).

The code has been extraced from Ingo's exec-shield patch
http://people.redhat.com/mingo/exec-shield/

[akpm@xxxxxxxxxxxxxxxxxxxx: fix used-uninitialsied warning]
[kamezawa.hiroyu@xxxxxxxxxxxxxx: fixed ia32 ELF on x86_64 handling]
Signed-off-by: Jiri Kosina <jkosina@xxxxxxx>

diff --git a/arch/ia64/ia32/binfmt_elf32.c b/arch/ia64/ia32/binfmt_elf32.c
index f6ae3ec..3db699b 100644
--- a/arch/ia64/ia32/binfmt_elf32.c
+++ b/arch/ia64/ia32/binfmt_elf32.c
@@ -226,7 +226,7 @@ elf32_set_personality (void)
}

static unsigned long
-elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type)
+elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type, unsigned long unused)
{
unsigned long pgoff = (eppnt->p_vaddr) & ~IA32_PAGE_MASK;

diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 907942e..95485e6 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -12,6 +12,7 @@
#include <linux/file.h>
#include <linux/utsname.h>
#include <linux/personality.h>
+#include <linux/random.h>

#include <asm/uaccess.h>
#include <asm/ia32.h>
@@ -65,6 +66,7 @@ static void find_start_end(unsigned long flags, unsigned long *begin,
unsigned long *end)
{
if (!test_thread_flag(TIF_IA32) && (flags & MAP_32BIT)) {
+ unsigned long new_begin;
/* This is usually used needed to map code in small
model, so it needs to be in the first 31bit. Limit
it to that. This means we need to move the
@@ -74,6 +76,11 @@ static void find_start_end(unsigned long flags, unsigned long *begin,
of playground for now. -AK */
*begin = 0x40000000;
*end = 0x80000000;
+ if (current->flags & PF_RANDOMIZE) {
+ new_begin = randomize_range(*begin, *begin + 0x02000000, 0);
+ if (new_begin)
+ *begin = new_begin;
+ }
} else {
*begin = TASK_UNMAPPED_BASE;
*end = TASK_SIZE;
@@ -143,6 +150,97 @@ full_search:
}
}

+
+unsigned long
+arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
+ const unsigned long len, const unsigned long pgoff,
+ const unsigned long flags)
+{
+ struct vm_area_struct *vma;
+ struct mm_struct *mm = current->mm;
+ unsigned long addr = addr0;
+
+ /* requested length too big for entire address space */
+ if (len > TASK_SIZE)
+ return -ENOMEM;
+
+ if (flags & MAP_FIXED)
+ return addr;
+
+ /* for MAP_32BIT mappings we force the legact mmap base */
+ if (!test_thread_flag(TIF_IA32) && (flags & MAP_32BIT))
+ goto bottomup;
+
+ /* requesting a specific address */
+ if (addr) {
+ addr = PAGE_ALIGN(addr);
+ vma = find_vma(mm, addr);
+ if (TASK_SIZE - len >= addr &&
+ (!vma || addr + len <= vma->vm_start))
+ return addr;
+ }
+
+ /* check if free_area_cache is useful for us */
+ if (len <= mm->cached_hole_size) {
+ mm->cached_hole_size = 0;
+ mm->free_area_cache = mm->mmap_base;
+ }
+
+ /* either no address requested or can't fit in requested address hole */
+ addr = mm->free_area_cache;
+
+ /* make sure it can fit in the remaining address space */
+ if (addr > len) {
+ vma = find_vma(mm, addr-len);
+ if (!vma || addr <= vma->vm_start)
+ /* remember the address as a hint for next time */
+ return (mm->free_area_cache = addr-len);
+ }
+
+ if (mm->mmap_base < len)
+ goto bottomup;
+
+ addr = mm->mmap_base-len;
+
+ do {
+ /*
+ * Lookup failure means no vma is above this address,
+ * else if new region fits below vma->vm_start,
+ * return with success:
+ */
+ vma = find_vma(mm, addr);
+ if (!vma || addr+len <= vma->vm_start)
+ /* remember the address as a hint for next time */
+ return (mm->free_area_cache = addr);
+
+ /* remember the largest hole we saw so far */
+ if (addr + mm->cached_hole_size < vma->vm_start)
+ mm->cached_hole_size = vma->vm_start - addr;
+
+ /* try just below the current vma->vm_start */
+ addr = vma->vm_start-len;
+ } while (len < vma->vm_start);
+
+bottomup:
+ /*
+ * A failed mmap() very likely causes application failure,
+ * so fall back to the bottom-up function here. This scenario
+ * can happen with large stack limits and large mmap()
+ * allocations.
+ */
+ mm->cached_hole_size = ~0UL;
+ mm->free_area_cache = TASK_UNMAPPED_BASE;
+ addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
+ /*
+ * Restore the topdown base:
+ */
+ mm->free_area_cache = mm->mmap_base;
+ mm->cached_hole_size = ~0UL;
+
+ return addr;
+}
+
+
asmlinkage long sys_uname(struct new_utsname __user * name)
{
int err;
diff --git a/arch/x86/mm/mmap_64.c b/arch/x86/mm/mmap_64.c
index 80bba0d..6ffc045 100644
--- a/arch/x86/mm/mmap_64.c
+++ b/arch/x86/mm/mmap_64.c
@@ -1,29 +1,115 @@
-/* Copyright 2005 Andi Kleen, SuSE Labs.
- * Licensed under GPL, v.2
+/*
+ * linux/arch/x86-64/mm/mmap.c
+ *
+ * flexible mmap layout support
+ *
+ * Based on code by Ingo Molnar and Andi Kleen, copyrighted
+ * as follows:
+ *
+ * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
+ * All Rights Reserved.
+ * Copyright 2005 Andi Kleen, SUSE Labs.
+ * Copyright 2007 Jiri Kosina, SUSE Labs.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
*/
+
+#include <linux/personality.h>
#include <linux/mm.h>
-#include <linux/sched.h>
#include <linux/random.h>
+#include <linux/limits.h>
+#include <linux/sched.h>
#include <asm/ia32.h>

-/* Notebook: move the mmap code from sys_x86_64.c over here. */
+/*
+ * Top of mmap area (just below the process stack).
+ *
+ * Leave an at least ~128 MB hole.
+ */
+#define MIN_GAP (128*1024*1024)
+#define MAX_GAP (TASK_SIZE/6*5)

-void arch_pick_mmap_layout(struct mm_struct *mm)
+static inline unsigned long mmap_base(void)
+{
+ unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur;
+
+ if (gap < MIN_GAP)
+ gap = MIN_GAP;
+ else if (gap > MAX_GAP)
+ gap = MAX_GAP;
+
+ return TASK_SIZE - (gap & PAGE_MASK);
+}
+
+static inline int mmap_is_32(void)
{
#ifdef CONFIG_IA32_EMULATION
- if (current_thread_info()->flags & _TIF_IA32)
- return ia32_pick_mmap_layout(mm);
+ if (test_thread_flag(TIF_IA32))
+ return 1;
#endif
- mm->mmap_base = TASK_UNMAPPED_BASE;
+ return 0;
+}
+
+static inline int mmap_is_legacy(void)
+{
+ if (current->personality & ADDR_COMPAT_LAYOUT)
+ return 1;
+
+ if (current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY)
+ return 1;
+
+ return sysctl_legacy_va_layout;
+}
+
+/*
+ * This function, called very early during the creation of a new
+ * process VM image, sets up which VM layout function to use:
+ */
+void arch_pick_mmap_layout(struct mm_struct *mm)
+{
+ int rnd = 0;
if (current->flags & PF_RANDOMIZE) {
/* Add 28bit randomness which is about 40bits of address space
because mmap base has to be page aligned.
- or ~1/128 of the total user VM
- (total user address space is 47bits) */
- unsigned rnd = get_random_int() & 0xfffffff;
- mm->mmap_base += ((unsigned long)rnd) << PAGE_SHIFT;
+ or ~1/128 of the total user VM
+ (total user address space is 47bits) */
+ rnd = get_random_int() & 0xfffffff;
}
- mm->get_unmapped_area = arch_get_unmapped_area;
- mm->unmap_area = arch_unmap_area;
-}

+ /*
+ * Fall back to the standard layout if the personality
+ * bit is set, or if the expected stack growth is unlimited:
+ */
+ if (mmap_is_32()) {
+#ifdef CONFIG_IA32_EMULATION
+ /* ia32_pick_mmap_layout has its own. */
+ return ia32_pick_mmap_layout(mm);
+#endif
+ } else if(mmap_is_legacy()) {
+ mm->mmap_base = TASK_UNMAPPED_BASE;
+ mm->get_unmapped_area = arch_get_unmapped_area;
+ mm->unmap_area = arch_unmap_area;
+ } else {
+ mm->mmap_base = mmap_base();
+ mm->get_unmapped_area = arch_get_unmapped_area_topdown;
+ mm->unmap_area = arch_unmap_area_topdown;
+ if (current->flags & PF_RANDOMIZE)
+ rnd = -rnd;
+ }
+ if (current->flags & PF_RANDOMIZE) {
+ mm->mmap_base += ((long)rnd) << PAGE_SHIFT;
+ }
+}
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 6e2f3b8..3e0cafc 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -45,7 +45,7 @@

static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
static int load_elf_library(struct file *);
-static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int);
+static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int, unsigned long);

/*
* If we don't support core dumping, then supply a NULL so we
@@ -298,33 +298,70 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
#ifndef elf_map

static unsigned long elf_map(struct file *filep, unsigned long addr,
- struct elf_phdr *eppnt, int prot, int type)
+ struct elf_phdr *eppnt, int prot, int type,
+ unsigned long total_size)
{
unsigned long map_addr;
- unsigned long pageoffset = ELF_PAGEOFFSET(eppnt->p_vaddr);
+ unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
+ unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
+ addr = ELF_PAGESTART(addr);
+ size = ELF_PAGEALIGN(size);

- down_write(&current->mm->mmap_sem);
/* mmap() will return -EINVAL if given a zero size, but a
* segment with zero filesize is perfectly valid */
- if (eppnt->p_filesz + pageoffset)
- map_addr = do_mmap(filep, ELF_PAGESTART(addr),
- eppnt->p_filesz + pageoffset, prot, type,
- eppnt->p_offset - pageoffset);
- else
- map_addr = ELF_PAGESTART(addr);
+ if (!size)
+ return addr;
+
+ down_write(&current->mm->mmap_sem);
+ /*
+ * total_size is the size of the ELF (interpreter) image.
+ * The _first_ mmap needs to know the full size, otherwise
+ * randomization might put this image into an overlapping
+ * position with the ELF binary image. (since size < total_size)
+ * So we first map the 'big' image - and unmap the remainder at
+ * the end. (which unmap is needed for ELF images with holes.)
+ */
+ if (total_size) {
+ total_size = ELF_PAGEALIGN(total_size);
+ map_addr = do_mmap(filep, addr, total_size, prot, type, off);
+ if (!BAD_ADDR(map_addr))
+ do_munmap(current->mm, map_addr+size, total_size-size);
+ } else
+ map_addr = do_mmap(filep, addr, size, prot, type, off);
+
up_write(&current->mm->mmap_sem);
return(map_addr);
}

#endif /* !elf_map */

+static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
+{
+ int i, first_idx = -1, last_idx = -1;
+
+ for (i = 0; i < nr; i++) {
+ if (cmds[i].p_type == PT_LOAD) {
+ last_idx = i;
+ if (first_idx == -1)
+ first_idx = i;
+ }
+ }
+ if (first_idx == -1)
+ return 0;
+
+ return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
+ ELF_PAGESTART(cmds[first_idx].p_vaddr);
+}
+
+
/* This is much more generalized than the library routine read function,
so we keep this separate. Technically the library read function
is only provided so that we can read a.out libraries that have
an ELF header */

static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
- struct file *interpreter, unsigned long *interp_load_addr)
+ struct file *interpreter, unsigned long *interp_map_addr,
+ unsigned long no_base)
{
struct elf_phdr *elf_phdata;
struct elf_phdr *eppnt;
@@ -332,6 +369,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
int load_addr_set = 0;
unsigned long last_bss = 0, elf_bss = 0;
unsigned long error = ~0UL;
+ unsigned long total_size;
int retval, i, size;

/* First of all, some simple consistency checks */
@@ -370,6 +408,12 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
goto out_close;
}

+ total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
+ if (!total_size) {
+ error = -EINVAL;
+ goto out_close;
+ }
+
eppnt = elf_phdata;
for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
if (eppnt->p_type == PT_LOAD) {
@@ -387,9 +431,14 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
vaddr = eppnt->p_vaddr;
if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
elf_type |= MAP_FIXED;
+ else if (no_base && interp_elf_ex->e_type == ET_DYN)
+ load_addr = -vaddr;

map_addr = elf_map(interpreter, load_addr + vaddr,
- eppnt, elf_prot, elf_type);
+ eppnt, elf_prot, elf_type, total_size);
+ total_size = 0;
+ if (!*interp_map_addr)
+ *interp_map_addr = map_addr;
error = map_addr;
if (BAD_ADDR(map_addr))
goto out_close;
@@ -455,8 +504,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
goto out_close;
}

- *interp_load_addr = load_addr;
- error = ((unsigned long)interp_elf_ex->e_entry) + load_addr;
+ error = load_addr;

out_close:
kfree(elf_phdata);
@@ -553,7 +601,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
int elf_exec_fileno;
int retval, i;
unsigned int size;
- unsigned long elf_entry, interp_load_addr = 0;
+ unsigned long elf_entry;
+ unsigned long interp_load_addr = 0;
unsigned long start_code, end_code, start_data, end_data;
unsigned long reloc_func_desc = 0;
char passed_fileno[6];
@@ -825,9 +874,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
current->mm->start_stack = bprm->p;

/* Now we do a little grungy work by mmaping the ELF image into
- the correct location in memory. At this point, we assume that
- the image should be loaded at fixed address, not at a variable
- address. */
+ the correct location in memory. */
for(i = 0, elf_ppnt = elf_phdata;
i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
int elf_prot = 0, elf_flags;
@@ -881,11 +928,15 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
* default mmap base, as well as whatever program they
* might try to exec. This is because the brk will
* follow the loader, and is not movable. */
+#ifdef CONFIG_X86
+ load_bias = 0;
+#else
load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
+#endif
}

error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
- elf_prot, elf_flags);
+ elf_prot, elf_flags,0);
if (BAD_ADDR(error)) {
send_sig(SIGKILL, current, 0);
retval = IS_ERR((void *)error) ?
@@ -961,13 +1012,25 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
}

if (elf_interpreter) {
- if (interpreter_type == INTERPRETER_AOUT)
+ if (interpreter_type == INTERPRETER_AOUT) {
elf_entry = load_aout_interp(&loc->interp_ex,
interpreter);
- else
+ } else {
+ unsigned long uninitialized_var(interp_map_addr);
+
elf_entry = load_elf_interp(&loc->interp_elf_ex,
interpreter,
- &interp_load_addr);
+ &interp_map_addr,
+ load_bias);
+ if (!IS_ERR((void *)elf_entry)) {
+ /*
+ * load_elf_interp() returns relocation
+ * adjustment
+ */
+ interp_load_addr = elf_entry;
+ elf_entry += loc->interp_elf_ex.e_entry;
+ }
+ }
if (BAD_ADDR(elf_entry)) {
force_sig(SIGSEGV, current);
retval = IS_ERR((void *)elf_entry) ?
diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h
index a79f535..3e5f4f8 100644
--- a/include/asm-x86/pgtable_64.h
+++ b/include/asm-x86/pgtable_64.h
@@ -410,6 +410,7 @@ pte_t *lookup_address(unsigned long addr);
remap_pfn_range(vma, vaddr, pfn, size, prot)

#define HAVE_ARCH_UNMAPPED_AREA
+#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN

#define pgtable_cache_init() do { } while (0)
#define check_pgt_cache() do { } while (0)
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/