Re: [PATCH -mm v2 1/3] mm/oom_kill: remove the wrong fatal_signal_pending() check in oom_kill_process()

From: Tetsuo Handa
Date: Fri Oct 02 2015 - 08:33:31 EST


Michal Hocko wrote:
> > Since T sends SIGKILL to all clone(CLONE_VM) tasks upon coredump, P needs
> > to do
>
> It does that only to all threads in the _same_ thread group AFAIU.

I'm confused. What the _same_ thread group?

I can observe that SIGKILL is sent to all

clone(CLONE_THREAD | CLONE_SIGHAND | CLONE_VM)
clone(CLONE_SIGHAND | CLONE_VM)
clone(CLONE_VM)

threads upon coredump.

---------- testing program start ----------
#define _GNU_SOURCE
#include <stdlib.h>
#include <sys/types.h>
#include <unistd.h>
#include <fcntl.h>
#include <sched.h>
#include <sys/mman.h>

static int file_mapper(void *unused)
{
const int fd = open("/proc/self/exe", O_RDONLY);
void *ptr[10000]; /* Will cause SIGSEGV due to stack overflow */
int i;
sleep(2);
while (1) {
for (i = 0; i < 10000; i++)
ptr[i] = mmap(NULL, 4096, PROT_READ, MAP_PRIVATE, fd,
0);
for (i = 0; i < 10000; i++)
munmap(ptr[i], 4096);
}
return 0;
}

int main(int argc, char *argv[])
{
int i;
for (i = 0; i < 5; i++) {
char *cp = malloc(4 * 1024);
if (!cp || clone(file_mapper, cp + 4 * 1024,
CLONE_THREAD | CLONE_SIGHAND | CLONE_VM, NULL) == -1)
break;
}
for (i = 0; i < 5; i++) {
char *cp = malloc(4 * 1024);
if (!cp || clone(file_mapper, cp + 4 * 1024,
CLONE_SIGHAND | CLONE_VM, NULL) == -1)
break;
}
for (i = 0; i < 5; i++) {
char *cp = malloc(4 * 1024);
if (!cp || clone(file_mapper, cp + 4 * 1024,
CLONE_VM, NULL) == -1)
break;
}
while (1)
pause();
return 0;
}
---------- testing program end ----------

---------- debug printk() patch start ----------
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -295,6 +295,8 @@ static int zap_process(struct task_struct *start, int exit_code, int flags)
for_each_thread(start, t) {
task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
if (t != current && t->mm) {
+ printk(KERN_INFO "Setting SIGKILL to %s(%u)\n",
+ t->comm, t->pid);
sigaddset(&t->pending.signal, SIGKILL);
signal_wake_up(t, 1);
nr++;
---------- debug printk() patch end ----------

---------- kernel log start ----------
[ 4829.770899] a.out[11614]: segfault at 1e1f768 ip 00000000004007be sp 0000000001e1f770 error 6 in a.out[400000+1000]
[ 4829.774190] Setting SIGKILL to a.out(11613)
[ 4829.775954] Setting SIGKILL to a.out(11615)
[ 4829.777191] Setting SIGKILL to a.out(11616)
[ 4829.778381] Setting SIGKILL to a.out(11617)
[ 4829.779537] Setting SIGKILL to a.out(11618)
[ 4829.781057] Setting SIGKILL to a.out(11619)
[ 4829.782236] Setting SIGKILL to a.out(11620)
[ 4829.783401] Setting SIGKILL to a.out(11621)
[ 4829.784569] Setting SIGKILL to a.out(11622)
[ 4829.785700] Setting SIGKILL to a.out(11623)
[ 4829.786848] Setting SIGKILL to a.out(11624)
[ 4829.788001] Setting SIGKILL to a.out(11625)
[ 4829.789132] Setting SIGKILL to a.out(11626)
[ 4829.790332] Setting SIGKILL to a.out(11627)
[ 4829.791593] Setting SIGKILL to a.out(11628)
[ 4829.792941] a.out[11624]: segfault at 1e29808 ip 00000000004007be sp 0000000001e29810 error 6 in a.out[400000+1000]
[ 4829.795493] a.out[11622]: segfault at 1e277e8 ip 00000000004007be sp 0000000001e277f0 error 6
[ 4829.797171] a.out[11623]: segfault at 1e287f8 ip 00000000004007be sp 0000000001e28800 error 6
[ 4829.797545] a.out[11621]: segfault at 1e267d8 ip 00000000004007be sp 0000000001e267e0 error 6
[ 4829.797547] a.out[11618]: segfault at 1e237a8 ip 00000000004007be sp 0000000001e237b0 error 6
[ 4829.797548] a.out[11617]: segfault at 1e22798 ip 00000000004007be sp 0000000001e227a0 error 6
[ 4829.797550] a.out[11619]: segfault at 1e247b8 ip 00000000004007be sp 0000000001e247c0 error 6
[ 4829.797552] a.out[11620]: segfault at 1e257c8 ip 00000000004007be sp 0000000001e257d0 error 6
[ 4829.802631] a.out[11615]: segfault at 1e20778 ip 00000000004007be sp 0000000001e20780 error 6
[ 4829.802633] in a.out[400000+1000]
[ 4829.802639] in a.out[400000+1000]
[ 4829.802642] in a.out[400000+1000]
[ 4829.802655] in a.out[400000+1000]
[ 4829.802659] in a.out[400000+1000]
[ 4829.802662] in a.out[400000+1000]
[ 4829.814605] in a.out[400000+1000]
[ 4829.819500] in a.out[400000+1000]
---------- kernel log end ----------
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/