Re: [PATCH 01/13] mm: Update ptep_get_lockless()s comment

From: Nadav Amit
Date: Sat Oct 29 2022 - 20:18:48 EST


On Oct 29, 2022, at 12:14 PM, Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> wrote:

> It didn't fail for me before, it doesn't fail for me with those patches.

For the record, I tried to run the PoC on another machine, and it indeed did
not fail.

Turns out I had a small bug in one of the mechanisms that were intended to
make the failure more likely (I should have mapped again or madvised
HPAGE_SIZE to increase the time zap_pte_range spends to increase the
probability of the race).

I am still trying to figure out how to address this issue, and whether the
fact that some rmap_walk(), which do not use PVMW_SYNC are of an issue.

---

#define _GNU_SOURCE
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <time.h>

#define handle_error(msg) \
do { perror(msg); exit(EXIT_FAILURE); } while (0)

void *p;
volatile bool stop = false;
pid_t flusher_pid;
int fd;

#define PAGE_SIZE (4096ul)
#define PAGES_PER_PMD (512)
#define HPAGE_SIZE (PAGE_SIZE * PAGES_PER_PMD)

// Comment MUNMAP_TEST for MADV_DONTNEED test
#define MUNMAP_TEST

void *dirtying_thread(void *arg)
{
int i;

while (!stop) {
for (i = 1; i < PAGES_PER_PMD; i++) {
*(volatile char *)(p + (i * PAGE_SIZE) + 64) = 5;
}
}
return NULL;
}

void *checking_thread(void *arg)
{
volatile unsigned long *ul_p = (volatile unsigned long*)p;
unsigned long cnt = 0;

while (!stop) {
*ul_p = cnt;
if (*ul_p != cnt) {
printf("FAILED: expected %ld, got %ld\n", cnt, *ul_p);
kill(flusher_pid, SIGTERM);
exit(0);
}
cnt++;
}
return NULL;
}

void *remap_thread(void *arg)
{
void *ptr;
struct timespec t = {
.tv_nsec = 10000,
};

while (!stop) {
#ifdef MUNMAP_TEST
ptr = mmap(p, HPAGE_SIZE, PROT_READ|PROT_WRITE,
MAP_SHARED|MAP_FIXED|MAP_POPULATE, fd, 0);
if (ptr == MAP_FAILED)
handle_error("remap_thread");
#else
if (madvise(p, HPAGE_SIZE, MADV_DONTNEED) < 0)
handle_error("MADV_DONTNEED");
nanosleep(&t, NULL);
#endif
}
return NULL;
}

void flushing_process(void)
{
// Remove the pages to speed up rmap_walk and allow to drop caches.
if (madvise(p, HPAGE_SIZE, MADV_DONTNEED) < 0)
handle_error("MADV_DONTNEED");

while (true) {
if (msync(p, PAGE_SIZE, MS_SYNC))
handle_error("msync");
if (posix_fadvise(fd, 0, PAGE_SIZE, POSIX_FADV_DONTNEED))
handle_error("posix_fadvise");
}
}

int main(int argc, char *argv[])
{
void *(*thread_funcs[])(void*) = {
&dirtying_thread,
&checking_thread,
&remap_thread,
};
int r, i;
int rc1, rc2;
unsigned long addr;
void *ptr;
char *page = malloc(PAGE_SIZE);
int n_threads = sizeof(thread_funcs) / sizeof(*thread_funcs);
pthread_t *threads = malloc(sizeof(pthread_t) * n_threads);
pid_t pid;

if (argc < 2) {
fprintf(stderr, "usages: %s [filename]\n", argv[0]);
exit(EXIT_FAILURE);
}

fd = open(argv[1], O_RDWR|O_CREAT, 0666);
if (fd == -1)
handle_error("open fd");

for (i = 0; i < PAGES_PER_PMD; i++) {
if (write(fd, page, PAGE_SIZE) != PAGE_SIZE)
handle_error("write");
}
free(page);

ptr = mmap(NULL, HPAGE_SIZE * 2, PROT_NONE, MAP_PRIVATE|MAP_ANON,
-1, 0);

if (ptr == MAP_FAILED)
handle_error("mmap anon");

addr = (unsigned long)(ptr + HPAGE_SIZE - 1) & ~(HPAGE_SIZE - 1);
printf("starting...\n");

ptr = mmap((void *)addr, HPAGE_SIZE, PROT_READ|PROT_WRITE,
MAP_SHARED|MAP_FIXED|MAP_POPULATE, fd, 0);

if (ptr == MAP_FAILED)
handle_error("mmap file - start");

p = ptr;

for (i = 0; i < n_threads; i++) {
r = pthread_create(&threads[i], NULL, thread_funcs[i], NULL);
if (r)
handle_error("pthread_create");
}

// Run the flushing process in a different process, so msync() would
// not require mmap_lock.
pid = fork();
if (pid == 0)
flushing_process();
flusher_pid = pid;

sleep(60);

stop = true;
for (i = 0; i < n_threads; i++)
pthread_join(threads[i], NULL);
kill(flusher_pid, SIGTERM);
printf("Finished without an error\n");

exit(0);
}