[PATCH] mremap: add a 2s delay for MAP_FIXED case

From: Aaron Lu
Date: Thu Nov 17 2016 - 02:11:08 EST


Add a 2s delay for MAP_FIXED case to enlarge the race window so that we
can hit the race in user space.

Signed-off-by: Aaron Lu <aaron.lu@xxxxxxxxx>
---
fs/exec.c | 2 +-
include/linux/mm.h | 2 +-
mm/mremap.c | 19 ++++++++++++-------
3 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index 4e497b9ee71e..1e49ce9a23bd 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -619,7 +619,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
* process cleanup to remove whatever mess we made.
*/
if (length != move_page_tables(vma, old_start,
- vma, new_start, length, false))
+ vma, new_start, length, false, false))
return -ENOMEM;

lru_add_drain();
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a92c8d73aeaf..5e35fe3d914a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1392,7 +1392,7 @@ int vma_is_stack_for_current(struct vm_area_struct *vma);
extern unsigned long move_page_tables(struct vm_area_struct *vma,
unsigned long old_addr, struct vm_area_struct *new_vma,
unsigned long new_addr, unsigned long len,
- bool need_rmap_locks);
+ bool need_rmap_locks, bool delay);
extern unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
unsigned long end, pgprot_t newprot,
int dirty_accountable, int prot_numa);
diff --git a/mm/mremap.c b/mm/mremap.c
index da22ad2a5678..8e35279ca622 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -22,6 +22,7 @@
#include <linux/mmu_notifier.h>
#include <linux/uaccess.h>
#include <linux/mm-arch-hooks.h>
+#include <linux/delay.h>

#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
@@ -166,7 +167,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
unsigned long move_page_tables(struct vm_area_struct *vma,
unsigned long old_addr, struct vm_area_struct *new_vma,
unsigned long new_addr, unsigned long len,
- bool need_rmap_locks)
+ bool need_rmap_locks, bool delay)
{
unsigned long extent, next, old_end;
pmd_t *old_pmd, *new_pmd;
@@ -224,8 +225,11 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
new_vma, new_pmd, new_addr, need_rmap_locks);
need_flush = true;
}
- if (likely(need_flush))
+ if (likely(need_flush)) {
+ if (delay)
+ msleep(2000);
flush_tlb_range(vma, old_end-len, old_addr);
+ }

mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);

@@ -234,7 +238,8 @@ unsigned long move_page_tables(struct vm_area_struct *vma,

static unsigned long move_vma(struct vm_area_struct *vma,
unsigned long old_addr, unsigned long old_len,
- unsigned long new_len, unsigned long new_addr, bool *locked)
+ unsigned long new_len, unsigned long new_addr,
+ bool *locked, bool delay)
{
struct mm_struct *mm = vma->vm_mm;
struct vm_area_struct *new_vma;
@@ -273,7 +278,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,
return -ENOMEM;

moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len,
- need_rmap_locks);
+ need_rmap_locks, delay);
if (moved_len < old_len) {
err = -ENOMEM;
} else if (vma->vm_ops && vma->vm_ops->mremap) {
@@ -287,7 +292,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,
* and then proceed to unmap new area instead of old.
*/
move_page_tables(new_vma, new_addr, vma, old_addr, moved_len,
- true);
+ true, delay);
vma = new_vma;
old_len = new_len;
old_addr = new_addr;
@@ -442,7 +447,7 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
if (offset_in_page(ret))
goto out1;

- ret = move_vma(vma, addr, old_len, new_len, new_addr, locked);
+ ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, true);
if (!(offset_in_page(ret)))
goto out;
out1:
@@ -576,7 +581,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
goto out;
}

- ret = move_vma(vma, addr, old_len, new_len, new_addr, &locked);
+ ret = move_vma(vma, addr, old_len, new_len, new_addr, &locked, false);
}
out:
if (offset_in_page(ret)) {
--
2.5.5


--AqsLC8rIMeq19msA
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="raceremap.c"

#define _GNU_SOURCE
#define _XOPEN_SOURCE 500
#include <sched.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <assert.h>
#include <sys/io.h>

#define BUFLEN 4096

static char wistmpfile[] = "/mnt/willitscale.XXXXXX";

char *testcase_description = "mremap";

char *buf;
char *newbuf = (char *)0x700000000000;
#define FILE_SIZE (4096*128)

static void mdelay(int ms)
{
int i;

// gain io permission for the delay
assert(ioperm(0x80, 8, 1) == 0);

for (i = 0; i < ms; i++)
inb(0x80);
}

void testcase_prepare(void)
{
int fd = mkstemp(wistmpfile);

assert(fd >= 0);
assert(pwrite(fd, "X", 1, FILE_SIZE-1) == 1);
buf = mmap(NULL, FILE_SIZE, PROT_READ|PROT_WRITE,
MAP_SHARED, fd, 0);
assert(buf != (void *)-1);
close(fd);
}

static volatile int step = 0;

void testcase(unsigned long long *iterations)
{
int cpu = sched_getcpu();
int fd = open(wistmpfile, O_RDWR);
off_t offset = sched_getcpu() * BUFLEN;
long counterread = 0;
long *counterbuf = (void *)&buf[offset];
assert(fd >= 0);

printf("cpu%d runs\n", cpu);

while (1) {
int ret;

if (cpu == 0) {
void *tmpbuf;

// wait for step 1 done
while (step < 1);

// step 2: start mremap to have the old PTE emptied
printf("cpu%d: going to remap\n", cpu);
step = 2;
tmpbuf = mremap(buf, FILE_SIZE, FILE_SIZE,
MREMAP_FIXED | MREMAP_MAYMOVE,
newbuf);
assert(tmpbuf == newbuf);
printf("cpu%d: remap done\n", cpu);
pause();
}

// step 1: dirty the old PTE
*counterbuf = 1;

step = 1;
while (step < 2);

// step 3: clean this page
// delay a little while to give mremap some time
// to empty the old PTE and setup new PTE
mdelay(1000);
printf("cpu%d: going to clean the page\n", cpu);
posix_fadvise(fd, offset, BUFLEN, POSIX_FADV_DONTNEED);

// step 4: now the page is cleaned, its new PTE is
// write protected but since mremap didn't flush tlb
// for the old PTE yet, we could still access the old
// addr and that will not dirty anything
printf("cpu%d: going to write 2\n", cpu);
*counterbuf = 2;
printf("cpu%d wrote 2\n", cpu);

// step 5: drop this page from page cache and then
// read it back to verify if the last write gets lost
// munmap the page first, or the FADV_DONTNEED won't
// kick the page out of page cache
munmap(newbuf + offset, BUFLEN);
posix_fadvise(fd, offset, BUFLEN, POSIX_FADV_DONTNEED);
ret = pread(fd, &counterread, sizeof(counterread), offset);
assert(ret == sizeof(counterread));

if (counterread != 2) {
printf("*cpu%d wrote 2 gets lost\n", cpu);
fflush(stdout);
}
exit(0);
}
}

void testcase_cleanup(void)
{
unlink(wistmpfile);
}

--AqsLC8rIMeq19msA--