Re: [PATCH v3] coredump: exit_files() in coredump_wait() if MMF_DUMP_MAPPED_SHARED is not set

From: Xin Zhao

Date: Fri Jun 19 2026 - 23:05:43 EST


On Fri, 19 Jun 2026 19:33:16 +0200 Mateusz Guzik <mjguzik@xxxxxxxxx> wrote:

> The claim is the coredump takes "some time" without specifying what kind
> of window is it (seconds, minutes?), nor where said time is spent.
>
> For example it is known that a big mmapped areas are slow to dump even
> if they are sparsely populated.
>
> So I would suggest profiling what exactly happens in this case. It is
> virtually guaranteed the time can be shortened, but it is plausible even
> with fixups it will be too long.

I test it by running the first testflock, wait until it output
'get flock success' and then run the second testflock.
testflock will send signal 6 to trigger coredump to the first testflock.
The second flock wait 11 seconds to get the flock. (it may wait less time
if I do not polling get the flock--add sleep in the while loop, as the two
testflock function is run by ssh, and they share the same cpu), but still
wait a long time(about 5 seconds).

./testflock
get flock success!
Aborted (core dumped)

./testflock
get flock fail!
lock_pid=8724
get flock success!
waitms=11443


The testflock program(the test program only use 500MB heap memory, the
memory used by other business processes will exceed this value):

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/file.h>
#include <errno.h>
#include <time.h>
#include <signal.h>
#include <sys/syscall.h>

static inline long get_msec(void)
{
struct timespec ts;

clock_gettime(CLOCK_MONOTONIC, &ts);
return ts.tv_sec * 1000 + ts.tv_nsec / 1000000;
}

#ifndef F_GETFD_EX
#define F_GETFD_EX 18
#endif

#ifndef F_SETFD_EX
#define F_SETFD_EX 19
#endif

#define FD_CLOBCOR 2

int main(int argc, char *argv[])
{
size_t size_mb = 512;
size_t size, i;
long start = 0, stop;
int mode = 1;

const char *dir = "/map/zhaoxin/flock_tmp";
int fd;
int ret;
char *buf;

if (argc == 2 && strcmp(argv[1], "0") == 0) {
printf("Do not send signal when fail!\n");
mode = 0;
}

size = size_mb * 1024 * 1024;
buf = (char*)malloc(size);
if (!buf) {
perror("malloc 失败");
return 1;
}
for (i = 0; i < size; i++) {
buf[i] = (char)i;
}

/* 打开目录,需要读权限 */
fd = open(dir, O_CREAT | O_RDWR, 0777);
if (fd < 0) {
perror("open");
exit(EXIT_FAILURE);
}
int setfd_ex_ret = fcntl(fd, F_SETFD_EX, FD_CLOBCOR);

struct flock fl;
fl.l_type = F_WRLCK;
fl.l_whence = SEEK_SET;
fl.l_start = 0;
fl.l_len = 0;

bool brunonce = false;
int lastresult;

int lock_pid = 0;

while (1) {
int result = fcntl(fd, F_SETLK, &fl);
if (errno == EBADF || errno == EINVAL) {
printf("errno=%d\n", errno);
return -1;
}
if (!brunonce) {
brunonce = true;
// let lastresult differ from result
lastresult = result + 1;
}
if (lastresult != result) {
if (result == 0) {
printf("get flock success!\n");
if (start) {
stop = get_msec();
printf("waitms=%ld\n", stop - start);
}
while (1) {
sleep(1000);
}
}
lastresult = result;
if (errno == EAGAIN || errno == EACCES) {
printf("get flock fail!\n");
struct flock lock;
(void)memset(&lock, 0, sizeof(lock));
lock.l_type = F_WRLCK;
lock.l_whence = SEEK_SET;
lock.l_start = 0;
lock.l_len = 0;
if (fcntl(fd, F_GETLK, &lock) == 0) {
if (lock.l_type != F_UNLCK) {
lock_pid = lock.l_pid;

start = get_msec();

if (mode) {
int ret = syscall(SYS_tgkill, lock_pid, lock_pid, SIGABRT);
}

}
printf("lock_pid=%d\n", lock_pid);
}
}
}
}

return 0;
}

Thanks
Xin Zhao