[PATCH] mm: introduce oom_kill_disable sysctl knob

From: Minchan Kim
Date: Fri Nov 06 2020 - 15:32:45 EST


It's hard to have some tests to be supposed to work under heavy
memory pressure(e.g., injecting some memory hogger) because
out-of-memory killer easily kicks out one of processes so system
is broken or system loses the memory pressure state since it has
plenty of free memory soon so.
Even though we could mark existing process's oom_adj to -1000,
it couldn't cover upcoming processes to be forked for the job.

This knob is handy to keep system memory pressure.

Signed-off-by: Minchan Kim <minchan@xxxxxxxxxx>
---
Documentation/admin-guide/sysctl/vm.rst | 14 ++++++++++++++
include/linux/mm.h | 2 ++
include/linux/oom.h | 1 +
kernel/sysctl.c | 9 +++++++++
mm/oom_kill.c | 24 ++++++++++++++++++++++++
5 files changed, 50 insertions(+)

diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst
index f455fa00c00f..49dcedfaf0c0 100644
--- a/Documentation/admin-guide/sysctl/vm.rst
+++ b/Documentation/admin-guide/sysctl/vm.rst
@@ -694,6 +694,20 @@ is used in oom_kill_allocating_task.

The default value is 0.

+oom_kill_disable
+================
+
+This disables or enables OOM killing in out-of-memory situations.
+
+If this is set to one, the OOM killer is disabled so OOM kill never
+hapens in out-of-memory situation. It could cause system dangerous
+state due to memory allocation failure so user should be careful to
+use it.
+
+If this is set to zero, the OOM killer is enabled so OOM kill happens
+in out-of-memory situations.
+
+The default value is 0.

overcommit_kbytes
=================
diff --git a/include/linux/mm.h b/include/linux/mm.h
index db6ae4d3fb4e..a98400cee341 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -215,6 +215,8 @@ int overcommit_kbytes_handler(struct ctl_table *, int, void *, size_t *,
loff_t *);
int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *,
loff_t *);
+int oom_kill_disable_handler(struct ctl_table *, int, void *, size_t *,
+ loff_t *);

#define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))

diff --git a/include/linux/oom.h b/include/linux/oom.h
index 2db9a1432511..0f378498e6aa 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -126,5 +126,6 @@ extern struct task_struct *find_lock_task_mm(struct task_struct *p);
/* sysctls */
extern int sysctl_oom_dump_tasks;
extern int sysctl_oom_kill_allocating_task;
+extern int sysctl_oom_kill_disable;
extern int sysctl_panic_on_oom;
#endif /* _INCLUDE_LINUX_OOM_H */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index afad085960b8..1fe872fe1c05 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2690,6 +2690,15 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "oom_kill_disable",
+ .data = &sysctl_oom_kill_disable,
+ .maxlen = sizeof(sysctl_oom_kill_disable),
+ .mode = 0644,
+ .proc_handler = oom_kill_disable_handler,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
{
.procname = "oom_dump_tasks",
.data = &sysctl_oom_dump_tasks,
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 8b84661a6410..0f48cdeeb1e7 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -53,6 +53,7 @@

int sysctl_panic_on_oom;
int sysctl_oom_kill_allocating_task;
+int sysctl_oom_kill_disable;
int sysctl_oom_dump_tasks = 1;

/*
@@ -72,6 +73,29 @@ static inline bool is_memcg_oom(struct oom_control *oc)
return oc->memcg != NULL;
}

+int oom_kill_disable_handler(struct ctl_table *table, int write, void *buffer,
+ size_t *lenp, loff_t *ppos)
+{
+ int ret;
+
+ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+ if (ret || !write)
+ goto out;
+ if (sysctl_oom_kill_disable == 1) {
+ if (!oom_killer_disable(HZ))
+ ret = -EBUSY;
+ } else {
+ if (mutex_lock_killable(&oom_lock)) {
+ ret = -EBUSY;
+ goto out;
+ }
+ oom_killer_enable();
+ mutex_unlock(&oom_lock);
+ }
+out:
+ return ret;
+}
+
#ifdef CONFIG_NUMA
/**
* oom_cpuset_eligible() - check task eligiblity for kill
--
2.29.1.341.ge80a0c044ae-goog