[PATCH v10 2/2] mm/mremap: optionally randomize mremap(..., MREMAP_MAYMOVE)

From: Topi Miettinen
Date: Sun Jan 24 2021 - 07:44:18 EST


New sysctl kernel.randomize_mremap, when set, can be used to force
mremap(..., MREMAP_MAYMOVE) to always move the mappings even if not
necessary. In addition to improved address space layout randomization,
this can expose bugs where the caller is not actually expecting a
moved mapping, even though this may sometimes happen without this
flag.

Example:
$ cat mremap.c
#define _GNU_SOURCE
#include <stddef.h>
#include <sys/mman.h>

int main(void) {
void *addr = mmap(NULL, 4096, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
addr = mremap(addr, 4096, 8192, MREMAP_MAYMOVE);
mremap(addr, 4096, 4096, MREMAP_MAYMOVE);
return 0;
}
$ gcc -o mremap mremap.c
$ strace -e mmap,mremap ./mremap
mmap(NULL, 4096, PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x68a16298d000
mremap(0x68a16298d000, 4096, 8192, MREMAP_MAYMOVE) = 0x68a16298d000
mremap(0x68a16298d000, 4096, 4096, MREMAP_MAYMOVE) = 0x68a16298d000

Setting the sysctl enables randomization:
$ sudo sysctl kernel.randomize_mremap=1
$ strace -e mmap,mremap ./mremap
mmap(NULL, 4096, PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x6366429cf000
mremap(0x6366429cf000, 4096, 8192, MREMAP_MAYMOVE) = 0x70aa47ad0000
mremap(0x70aa47ad0000, 4096, 4096, MREMAP_MAYMOVE) = 0x5b37dc166000

CC: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
CC: Jann Horn <jannh@xxxxxxxxxx>
CC: Kees Cook <keescook@xxxxxxxxxxxx>
CC: Matthew Wilcox <willy@xxxxxxxxxxxxx>
CC: Mike Rapoport <rppt@xxxxxxxxxx>
CC: Linux API <linux-api@xxxxxxxxxxxxxxx>
Signed-off-by: Topi Miettinen <toiwoton@xxxxxxxxx>
---
Documentation/admin-guide/sysctl/kernel.rst | 9 +++++++
include/linux/mm.h | 2 ++
kernel/sysctl.c | 7 ++++++
mm/mremap.c | 26 +++++++++++++++++++--
4 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index c13f865c806c..eeca8c8f96d0 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -1029,6 +1029,15 @@ defined, these additional entries are present:
number of cycles between interrupts used to feed the pool.


+randomize_mremap
+==================
+
+This option, when set, can be used to force mremap(...,
+MREMAP_MAYMOVE) to always move the mappings even if not necessary.
+In addition to improved address space layout randomization, this can
+expose bugs where the caller is not actually expecting a moved
+mapping, even though this may sometimes happen without this flag.
+
randomize_va_space
==================

diff --git a/include/linux/mm.h b/include/linux/mm.h
index b4915412abbe..98aa466c2901 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2990,6 +2990,8 @@ void drop_slab_node(int nid);
extern int randomize_va_space;
#endif

+extern int randomize_mremap;
+
const char * arch_vma_name(struct vm_area_struct *vma);
#ifdef CONFIG_MMU
void print_vma_addr(char *prefix, unsigned long rip);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index afad085960b8..02bd9ba89f27 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2429,6 +2429,13 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "randomize_mremap",
+ .data = &randomize_mremap,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
#endif
#if defined(CONFIG_S390) && defined(CONFIG_SMP)
{
diff --git a/mm/mremap.c b/mm/mremap.c
index 138abbae4f75..386da905f39f 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -648,6 +648,14 @@ static int vma_expandable(struct vm_area_struct *vma, unsigned long delta)
return 1;
}

+/*
+ * Force mremap(..., MREMAP_MAYMOVE) to always move the mappings even
+ * if not necessary. This can expose bugs where the caller is not
+ * actually expecting a moved mapping, even though this may sometimes
+ * happen without this flag.
+ */
+int randomize_mremap __read_mostly = 0;
+
/*
* Expand (or shrink) an existing mapping, potentially moving it at the
* same time (controlled by the MREMAP_MAYMOVE flag and available VM space)
@@ -665,6 +673,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
unsigned long charged = 0;
bool locked = false;
bool downgraded = false;
+ bool randomize = false;
struct vm_userfaultfd_ctx uf = NULL_VM_UFFD_CTX;
LIST_HEAD(uf_unmap_early);
LIST_HEAD(uf_unmap);
@@ -720,6 +729,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
goto out;
}

+ randomize = (flags & MREMAP_MAYMOVE) && randomize_mremap;
/*
* Always allow a shrinking remap: that just unmaps
* the unnecessary pages..
@@ -730,7 +740,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
int retval;

retval = __do_munmap(mm, addr+new_len, old_len - new_len,
- &uf_unmap, true);
+ &uf_unmap, !randomize);
if (retval < 0 && old_len != new_len) {
ret = retval;
goto out;
@@ -738,6 +748,16 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
} else if (retval == 1)
downgraded = true;
ret = addr;
+
+ /*
+ * Caller is happy with a new address, so let's move
+ * even if not necessary
+ */
+ if (randomize)
+ ret = mremap_to(addr, new_len, 0, new_len,
+ &locked, flags, &uf, &uf_unmap_early,
+ &uf_unmap);
+
goto out;
}

@@ -751,8 +771,10 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
}

/* old_len exactly to the end of the area..
+ * But when randomizing, don't just expand the mapping if
+ * caller is happy with a moved and resized mapping
*/
- if (old_len == vma->vm_end - addr) {
+ if (old_len == vma->vm_end - addr && !randomize) {
/* can we just expand the current mapping? */
if (vma_expandable(vma, new_len - old_len)) {
int pages = (new_len - old_len) >> PAGE_SHIFT;
--
2.29.2