Re: [PATCH] mm: throttle and inc min_seq when both page types reach MIN_NR_GENS

From: Zhaoyang Huang
Date: Wed Oct 09 2024 - 21:28:29 EST


On Thu, Oct 10, 2024 at 4:52 AM Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> wrote:
>
> On Wed, 9 Oct 2024 15:49:53 +0800 "zhaoyang.huang" <zhaoyang.huang@xxxxxxxxxx> wrote:
>
> > From: Zhaoyang Huang <zhaoyang.huang@xxxxxxxxxx>
> >
> > The test case of [1] leads to system hang which caused by a local
> > watchdog thread starved over 20s on a 5.5GB RAM ANDROID15(v6.6)
> > system. This commit solve the issue by have the reclaimer be throttled
> > and increase min_seq if both page types reach MIN_NR_GENS, which may
> > introduce a livelock of switching type with holding lruvec->lru_lock.
> >
> > [1]
> > launch below script 8 times simutanously which allocates 1GB virtual
> > memory and access it from user space by each thread.
> > $ costmem -c1024000 -b12800 -o0 &
> >
>
> That looks like a pretty simple testcase. Do people know where to get
> `costmem' from?
Sorry, I am just aware that this is an internal test tool integrated
into the SDK by our folks. Here is an old version of costmem which I
can share

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>

unsigned int block_size = 64;
unsigned int cost_size = 1024 * 1024;
int oom_adj = 15;

static void usage(void)
{
printf("Usage:\n");
printf(" costmem [-ccost_size(KB) -bblock_size(KB) -oOom_adj(-16 to 15)]\n");
printf(" such as: costmem -c2048 -b128 -o15\n");
}

void process_options(int argc, char **argv)
{
int opt = 0;
while ((opt = getopt (argc, argv, "c:b:o:")) != -1) {
switch (opt) {
case 'c':
cost_size = (unsigned int)atoi(optarg);
break;
case 'b':
block_size = (unsigned int)atoi(optarg);
break;
case 'o':
oom_adj = atoi(optarg);
break;
default:
break;
}
}
}

int main(int argc, char *argv[])
{
int i, max;
char *memory;
size_t j;
size_t page_size;
int rval = -EINVAL;
char text[128] = {0};
int fd;
pid_t pid = getpid();

if (argc < 2) {
usage();
return rval;
} else if (argc == 2) {
if (strstr(argv[1], "help"))
usage();
return rval;
}

process_options(argc, argv);
if (oom_adj < -16 || oom_adj > 15) {
printf("Oom_adj must between -16 to 15\n");
return rval;
}

sprintf(text, "/proc/%d/oom_adj", pid);

fd = open(text, O_WRONLY);

if (-1 == fd) {
perror("open");
return rval;
} else {
sprintf(text, "%d", oom_adj);
if (write(fd, text, strlen(text)) == -1)
perror("write");

close(fd);
}

printf("Cost mem %d KB, %d KB per Block, oom_adj %d\n", cost_size,
block_size, oom_adj);

max = cost_size / block_size;

for(i = 1; i < max + 1; i++) {
memory = malloc(block_size * 1024);
if(NULL == memory){
perror("malloc");
return rval;
}

if(mlock(memory, block_size * 1024) == -1) {
perror("mlock");
return rval;
}

memset(memory, 0, block_size * 1024);

printf("%dKB,", (int)(block_size * i));
if(9 == i % 10)
printf("\n");
}

printf("Have malloc and mlock %d KB mem\n", block_size * i);
printf("Have malloc and mlock %d KB mem\n", block_size * i);
printf("Have malloc and mlock %d KB mem\n", block_size * i);

i = 0;
while(1){
sleep(20);
i++;
printf(".");
if(9 == i % 10)
printf("Please Ctrl+c to kill this APP\n");
}
return 0;
}

>
> > --- a/mm/vmscan.c
> > +++ b/mm/vmscan.c
>
> This is a somewhat serious issue, so I'll add the patch for some
> testing, but I'll await feedback from MGLRU developers before
> proceeeding further, thanks.
IMHO, MGLRU is now lack of the mechanism of 'too_many_isolated' thing,
should we do it in this way or others?
>