[PATCH] pid: add handling of too many zombie processes
From: liuq
Date: Wed Feb 08 2023 - 04:49:26 EST
There is a common situation that a parent process forks many child
processes to execute tasks, but the parent process does not execute
wait/waitpid when the child process exits, resulting in a large
number of child processes becoming zombie processes.
At this time, if the number of processes in the system out of
kernel.pid_max, the new fork syscall will fail, and the system will
not be able to execute any command at this time
(unless an old process exits)
eg:
[root@lq-workstation ~]# ls
-bash: fork: retry: Resource temporarily unavailable
-bash: fork: retry: Resource temporarily unavailable
-bash: fork: retry: Resource temporarily unavailable
-bash: fork: retry: Resource temporarily unavailable
-bash: fork: Resource temporarily unavailable
[root@lq-workstation ~]# reboot
-bash: fork: retry: Resource temporarily unavailable
-bash: fork: retry: Resource temporarily unavailable
-bash: fork: retry: Resource temporarily unavailable
-bash: fork: retry: Resource temporarily unavailable
-bash: fork: Resource temporarily unavailable
I dealt with this situation in the alloc_pid function,
and found a process with the most zombie subprocesses,
and more than 10(or other reasonable values?) zombie subprocesses,
so I tried to kill this process to release the pid resources.
Signed-off-by: liuq <liuq131@xxxxxxxxxxxxxxx>
---
include/linux/mm.h | 2 ++
kernel/pid.c | 6 +++-
mm/oom_kill.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 77 insertions(+), 1 deletion(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8f857163ac89..afcff08a3878 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1940,6 +1940,8 @@ static inline void clear_page_pfmemalloc(struct page *page)
* Can be called by the pagefault handler when it gets a VM_FAULT_OOM.
*/
extern void pagefault_out_of_memory(void);
+extern void pid_max_oom_check(struct pid_namespace *ns);
+
#define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK)
#define offset_in_thp(page, p) ((unsigned long)(p) & (thp_size(page) - 1))
diff --git a/kernel/pid.c b/kernel/pid.c
index 3fbc5e46b721..1a9a60e19ab6 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -237,7 +237,11 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
idr_preload_end();
if (nr < 0) {
- retval = (nr == -ENOSPC) ? -EAGAIN : nr;
+ retval = nr;
+ if (nr == -ENOSPC) {
+ retval = -EAGAIN;
+ pid_max_oom_check(tmp);
+ }
goto out_free;
}
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 1276e49b31b0..18d05d706f48 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -1260,3 +1260,73 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags)
return -ENOSYS;
#endif /* CONFIG_MMU */
}
+
+static void oom_pid_evaluate_task(struct task_struct *p,
+ struct task_struct **max_zombie_task, int *max_zombie_num)
+{
+ struct task_struct *child;
+ int zombie_num = 0;
+
+ list_for_each_entry(child, &p->children, sibling) {
+ if (child->exit_state == EXIT_ZOMBIE)
+ zombie_num++;
+ }
+ if (zombie_num > *max_zombie_num) {
+ *max_zombie_num = zombie_num;
+ *max_zombie_task = p;
+ }
+}
+#define MAX_ZOMBIE_NUM 10
+struct task_struct *pid_max_bad_process(struct pid_namespace *ns)
+{
+ int max_zombie_num = 0;
+ struct task_struct *max_zombie_task = &init_task;
+ struct task_struct *p;
+
+ rcu_read_lock();
+ for_each_process(p)
+ oom_pid_evaluate_task(p, &max_zombie_task, &max_zombie_num);
+ rcu_read_unlock();
+
+ if (max_zombie_num > MAX_ZOMBIE_NUM) {
+ pr_info("process %d has %d zombie child\n",
+ task_pid_nr_ns(max_zombie_task, ns), max_zombie_num);
+ return max_zombie_task;
+ }
+
+ return NULL;
+}
+
+void pid_max_oom_kill_process(struct task_struct *task)
+{
+ struct oom_control oc = {
+ .zonelist = NULL,
+ .nodemask = NULL,
+ .memcg = NULL,
+ .gfp_mask = 0,
+ .order = 0,
+ };
+
+ get_task_struct(task);
+ oc.chosen = task;
+
+ if (mem_cgroup_oom_synchronize(true))
+ return;
+
+ if (!mutex_trylock(&oom_lock))
+ return;
+
+ oom_kill_process(&oc, "Out of pid max(oom_kill_allocating_task)");
+ mutex_unlock(&oom_lock);
+}
+
+void pid_max_oom_check(struct pid_namespace *ns)
+{
+ struct task_struct *p;
+
+ p = pid_max_bad_process(ns);
+ if (p) {
+ pr_info("oom_kill process %d\n", task_pid_nr_ns(p, ns));
+ pid_max_oom_kill_process(p);
+ }
+}
--
2.27.0