Incorrect RSS page accounting of processes with multiple mapping pages

From: Shu Ming
Date: Mon Feb 08 2021 - 01:18:18 EST


Hi,

I believe there is an unexpected RES page accounting when doing
multiple page mapping. The sample code was pasted below. In the
sample code, The same 1g pages are mapped for three times. And it is
expected that the process gets 1g RES instead of 3g RES pages(top
command showed result).

memfd.c
----
#include <stdio.h>
#include <stddef.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <linux/memfd.h>
#include "memfd.h"

const size_t SIZE = 1024*1024*1024; // 1g

int main() {
long step=0;
long UNITS = SIZE / 4;
int fd = memfd_create("testmemfd", MFD_ALLOW_SEALING);
// replacing the MFD_ALLOW_SEALING flag with 0 doesn't seem to
change anything
if (fd == -1) {
perror("memfd_create");
}
if (ftruncate(fd, SIZE) == -1) {
perror("ftruncate");
}
void * data1 = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (data1 == MAP_FAILED) {
perror("mmap");
}
void * data2 = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (data2 == MAP_FAILED) {
perror("mmap");
}
void * data3 = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (data3 == MAP_FAILED) {
perror("mmap");
}
//close(fd);
// removing close(fd) or the mmap() code doesn't seem to change anything

printf("%d\n", fd);
while (1) {
step = step % UNITS;
((int *)data1)[step] = 1;
((int *)data2)[step] = 2;
((int *)data3)[step] = 3;
step++;
}
return 0;
}

----
memfd.h

#ifndef _MEMFD_H
#define _MEMFD_H

/*
* * SPDX-License-Identifier: Unlicense
* *
* * No glibc wrappers exist for memfd_create(2), so provide our own.
* *
* * Also define memfd fcntl sealing macros. While they are already
* * defined in the kernel header file <linux/fcntl.h>, that file as
* * a whole conflicts with the original glibc header <fnctl.h>.
* */

static inline int memfd_create(const char *name, unsigned int flags) {
return syscall(__NR_memfd_create, name, flags);
}

#ifndef F_LINUX_SPECIFIC_BASE
#define F_LINUX_SPECIFIC_BASE 1024
#endif

#ifndef F_ADD_SEALS
#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10)

#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */
#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */
#define F_SEAL_GROW 0x0004 /* prevent file from growing */
#define F_SEAL_WRITE 0x0008 /* prevent writes */
#endif

#endif /* _MEMFD_H */