Re: [patch 0/2] mm: reduce reclaim stalls with heavy anon and dirtycache

From: Tejun Heo
Date: Fri Jan 24 2014 - 18:32:08 EST


On Fri, Jan 24, 2014 at 05:21:44PM -0500, Tejun Heo wrote:
> The trigger conditions seem quite plausible - high anon memory usage
> w/ heavy buffered IO and swap configured - and it's highly likely that
> this is happening in the wild too. (this can happen with copying
> large files to usb sticks too, right?)

So, just tested with the usb stick and these two patches, while not
perfect, make a world of difference. The problem is really easy to
reproduce on my machine which has 8gig of memory with the two attached
test programs.

* run "test-membloat 4300" and wait for it to report completion.

* run "test-latency"

Mount a slow USB stick and copy a large (multi-gig) file to it.
test-latency tries to print out a dot every 10ms but will report a
log2 number if the latency becomes more than twice high - ie. 4 means
it took 2^4 * 10ms to complete a loop which is supposed to take
slightly longer than 10ms (10ms sleep + 4 page fault). My USB stick
only can do a couple mbytes/s and without these patches the machine
becomes basically useless. It's just not useable, it stutters more
than it runs until the whole file finishes copying.

Because I've been using tmpfs as build target for a while, I've been
experiencing this occassionally and secretly growing bitter
disappointment towards the linux kernel which developed into
self-loathing to the point where I found booting into win8 consoling
after looking at my machine stuttering for 45mins while it was
repartitioning the hard drive to make room for steamos. Oh the irony.
I had to stay in fetal position for a while afterwards. It was a
crisis.

With the patches applied, for both heavy harddrive IO and
copy-large-file-to-slow-USB cases, the behavior is vastly improved.
It does stutter for a while once memory is filled up but stabilizes in
somewhere above ten seconds and then stays responsive. While it isn't
perfect, it's not completely ridiculous as before.

So, lots of kudos to Johannes for *finally* fixing the issue and I
strongly believe this is something we should consider for -stable even
if that takes considerable amount of effort to verify it's not too
harmful for other workloads.

Thanks a lot.

--
tejun
#include <stdio.h>
#include <sys/time.h>
#include <sys/mman.h>
#include <time.h>
#include <math.h>
#include <stdlib.h>
#include <unistd.h>

#define NR_ALPHAS ('z' - 'a' + 1)

int main(int argc, char **argv)
{
struct timespec intv_ts = { }, ts;
unsigned long long time0, time1;
long long msecs = 10;
const size_t map_size = 4096 * 4;

if (argc > 1) {
msecs = atoll(argv[1]);
if (msecs <= 0) {
fprintf(stderr, "test-latency [interval-in-msecs]\n");
return 1;
}
}

intv_ts.tv_sec = msecs / 1000;
intv_ts.tv_nsec = (msecs % 1000) * 1000000;

clock_gettime(CLOCK_MONOTONIC, &ts);
time1 = ts.tv_sec * 1000000000LLU + ts.tv_nsec;

while (1) {
void *map, *p;
int idx;
char c;

nanosleep(&intv_ts, NULL);
map = mmap(NULL, map_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (map == MAP_FAILED) {
perror("mmap");
return 1;
}

for (p = map; p < map + map_size; p += 4096)
*(volatile unsigned long *)p = 0xdeadbeef;

munmap(map, map_size);

time0 = time1;
clock_gettime(CLOCK_MONOTONIC, &ts);
time1 = ts.tv_sec * 1000000000LLU + ts.tv_nsec;

idx = (time1 - time0) / msecs / 1000000;
idx = log2(idx);
if (idx <= 1) {
c = '.';
} else {
if (idx > 9)
idx = 9;
c = '0' + idx;
}
write(1, &c, 1);
}
}
#include <stdio.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <time.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>

int main(int argc, char **argv)
{
struct timespec ts_100s = { .tv_sec = 100 };
long mbytes, cnt;
void *map, *p;
int fd = -1;
int flags;

if (argc < 2 || (mbytes = atol(argv[1])) <= 0) {
fprintf(stderr, "test-membloat SIZE_IN_MBYTES [FILENAME]\n");
return 1;
}

if (argc >= 3) {
fd = open(argv[2], O_CREAT|O_TRUNC|O_RDWR, S_IRWXU);
if (fd < 0) {
perror("open");
return 1;
}

if (ftruncate(fd, mbytes << 20)) {
perror("ftruncate");
return 1;
}

flags = MAP_SHARED;
} else {
flags = MAP_ANONYMOUS | MAP_PRIVATE;
}

map = mmap(NULL, (size_t)mbytes << 20, PROT_READ | PROT_WRITE,
flags, fd, 0);
if (map == MAP_FAILED) {
perror("mmap");
return 1;
}

for (p = map, cnt = 0; p < map + (mbytes << 20); p += 4096) {
*(volatile unsigned long *)p = 0xdeadbeef;
cnt++;
}

printf("faulted in %ld mbytes, %ld pages\n", mbytes, cnt);

while (1)
nanosleep(&ts_100s, NULL);

return 0;
}