Re: [PATCH v2] mm/swap: fix swap_info_struct race between swapoff and get_swap_pages()
From: Aaron Lu
Date: Thu Apr 06 2023 - 08:13:14 EST
On Wed, Apr 05, 2023 at 12:08:47AM +0800, Rongwei Wang wrote:
> Hello
>
> I have fix up some stuff base on Patch v1. And in order to help all readers
> and reviewers to
>
> reproduce this bug, share a reproducer here:
I reproduced this problem under a VM this way:
$ sudo ./stress-ng --swap 1
// on another terminal
$ for i in `seq 8`; do ./swap & done
Looks simpler than yours :-)
(Didn't realize you have posted your reproducer here since I'm not CCed
and just found it after invented mine)
Then the warning message normally appear within a few seconds.
Here is the code for the above swap prog:
#include <stdio.h>
#include <stddef.h>
#include <sys/mman.h>
#define SIZE 0x100000
int main(void)
{
int i, ret;
void *p;
p = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
if (p == MAP_FAILED) {
perror("mmap");
return -1;
}
ret = 0;
while (1) {
for (i = 0; i < SIZE; i += 0x1000)
((char *)p)[i] = 1;
ret = madvise(p, SIZE, MADV_PAGEOUT);
if (ret != 0) {
perror("madvise");
break;
}
}
return ret;
}
Unfortunately, this test prog did not work on kernels before v5.4 because
MADV_PAGEOUT is introduced in v5.4. I tested on v5.4 and the problem is
also there.
Haven't found a way to trigger swap with swap device come and go on
kernels before v5.4; tried putting the test prog in a memcg with memory
limit but then the prog is easily killed due to nowhere to swap out.
>
> swap_bomb.sh
>
> #!/usr/bin/env bash
>
> stress-ng -a 1 --class vm -t 12h --metrics --times -x bigheap,stackmmap,mlock,vm-splice,mmapaddr,mmapfixed,mmapfork,mmaphuge,mmapmany,mprotect,mremap,msync,msyncmany,physpage,tmpfs,vm-addr,vm-rw,brk,vm-segv,userfaultfd,malloc,stack,munmap,dev-shm,bad-altstack,shm-sysv,pageswap,madvise,vm,shm,env,mmap
> --verify -v &
> stress-ng -a 1 --class vm -t 12h --metrics --times -x bigheap,stackmmap,mlock,vm-splice,mmapaddr,mmapfixed,mmapfork,mmaphuge,mmapmany,mprotect,mremap,msync,msyncmany,physpage,tmpfs,vm-addr,vm-rw,brk,vm-segv,userfaultfd,malloc,stack,munmap,dev-shm,bad-altstack,shm-sysv,pageswap,madvise,vm,shm,env,mmap
> --verify -v &
> stress-ng -a 1 --class vm -t 12h --metrics --times -x bigheap,stackmmap,mlock,vm-splice,mmapaddr,mmapfixed,mmapfork,mmaphuge,mmapmany,mprotect,mremap,msync,msyncmany,physpage,tmpfs,vm-addr,vm-rw,brk,vm-segv,userfaultfd,malloc,stack,munmap,dev-shm,bad-altstack,shm-sysv,pageswap,madvise,vm,shm,env,mmap
> --verify -v &
> stress-ng -a 1 --class vm -t 12h --metrics --times -x bigheap,stackmmap,mlock,vm-splice,mmapaddr,mmapfixed,mmapfork,mmaphuge,mmapmany,mprotect,mremap,msync,msyncmany,physpage,tmpfs,vm-addr,vm-rw,brk,vm-segv,userfaultfd,malloc,stack,munmap,dev-shm,bad-altstack,shm-sysv,pageswap,madvise,vm,shm,env,mmap
> --verify -v
>
>
> madvise_shared.c
>
> #include <stdio.h>
> #include <stdlib.h>
> #include <sys/mman.h>
> #include <unistd.h>
>
> #define MSIZE (1024 * 1024 * 2)
>
> int main()
> {
> char *shm_addr;
> unsigned long i;
>
> while (1) {
> // Map shared memory segment
> shm_addr =
> mmap(NULL, MSIZE, PROT_READ | PROT_WRITE,
> MAP_SHARED | MAP_ANONYMOUS, -1, 0);
> if (shm_addr == MAP_FAILED) {
> perror("Failed to map shared memory segment");
> exit(EXIT_FAILURE);
> }
>
> for (i = 0; i < MSIZE; i++) {
> shm_addr[i] = 1;
> }
>
> // Advise kernel on usage pattern of shared memory
> if (madvise(shm_addr, MSIZE, MADV_PAGEOUT) == -1) {
> perror
> ("Failed to advise kernel on shared memory
> usage");
> exit(EXIT_FAILURE);
> }
>
> for (i = 0; i < MSIZE; i++) {
> shm_addr[i] = 1;
> }
>
> // Advise kernel on usage pattern of shared memory
> if (madvise(shm_addr, MSIZE, MADV_PAGEOUT) == -1) {
> perror
> ("Failed to advise kernel on shared memory
> usage");
> exit(EXIT_FAILURE);
> }
> // Use shared memory
> printf("Hello, shared memory: 0x%lx\n", shm_addr);
>
> // Unmap shared memory segment
> if (munmap(shm_addr, MSIZE) == -1) {
> perror("Failed to unmap shared memory segment");
> exit(EXIT_FAILURE);
> }
> }
>
> return 0;
> }
>
> The bug will reproduce more quickly (about 2~5 minutes) if concurrent more
> swap_bomb.sh and madvise_shared.
>
> Thanks.