Re: [RFC -v2] panic_on_oom_timeout

From: Tetsuo Handa
Date: Sat Jun 20 2015 - 03:58:17 EST


Tetsuo Handa wrote:
> One case is that the system can not panic of threads are unable to call
> out_of_memory() for some reason.
^ if

> Well, if without analysis purpose,
>
> if (time_after(jiffies, oom_start + sysctl_panic_on_oom_timeout * HZ))
> panic();
>
> (that is, pass the jiffies as of calling out_of_memory() for the first time
> of this memory allocation request as an argument to out_of_memory(), and
> compare at check_panic_on_oom()) is sufficient? Very simple implementation
> because we do not use mod_timer()/del_timer().

Here is an untested patch.

diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index b5b4278..4c64b92 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -355,7 +355,7 @@ static void moom_callback(struct work_struct *ignored)
{
mutex_lock(&oom_lock);
if (!out_of_memory(node_zonelist(first_memory_node, GFP_KERNEL),
- GFP_KERNEL, 0, NULL, true))
+ GFP_KERNEL, 0, NULL, true, NULL))
pr_info("OOM request ignored because killer is disabled\n");
mutex_unlock(&oom_lock);
}
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 7deecb7..75525e9 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -64,14 +64,16 @@ extern void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,

extern void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
int order, const nodemask_t *nodemask,
- struct mem_cgroup *memcg);
+ struct mem_cgroup *memcg,
+ const unsigned long *oom_start);

extern enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
unsigned long totalpages, const nodemask_t *nodemask,
bool force_kill);

extern bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
- int order, nodemask_t *mask, bool force_kill);
+ int order, nodemask_t *mask, bool force_kill,
+ const unsigned long *oom_start);

extern void exit_oom_victim(void);

@@ -99,4 +101,5 @@ static inline bool task_will_free_mem(struct task_struct *task)
extern int sysctl_oom_dump_tasks;
extern int sysctl_oom_kill_allocating_task;
extern int sysctl_panic_on_oom;
+extern int sysctl_panic_on_oom_timeout;
#endif /* _INCLUDE_LINUX_OOM_H */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index c566b56..74a1b68 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1162,6 +1162,14 @@ static struct ctl_table vm_table[] = {
.extra2 = &two,
},
{
+ .procname = "panic_on_oom_timeout",
+ .data = &sysctl_panic_on_oom_timeout,
+ .maxlen = sizeof(sysctl_panic_on_oom_timeout),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ },
+ {
.procname = "oom_kill_allocating_task",
.data = &sysctl_oom_kill_allocating_task,
.maxlen = sizeof(sysctl_oom_kill_allocating_task),
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index acb93c5..ab1ae3e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1563,7 +1563,8 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
goto unlock;
}

- check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, order, NULL, memcg);
+ check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, order, NULL, memcg,
+ NULL);
totalpages = mem_cgroup_get_limit(memcg) ? : 1;
for_each_mem_cgroup_tree(iter, memcg) {
struct css_task_iter it;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index dff991e..9d30f2e 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -40,6 +40,7 @@
#include <trace/events/oom.h>

int sysctl_panic_on_oom;
+int sysctl_panic_on_oom_timeout;
int sysctl_oom_kill_allocating_task;
int sysctl_oom_dump_tasks = 1;

@@ -602,7 +603,8 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
*/
void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
int order, const nodemask_t *nodemask,
- struct mem_cgroup *memcg)
+ struct mem_cgroup *memcg,
+ const unsigned long *oom_start)
{
if (likely(!sysctl_panic_on_oom))
return;
@@ -614,6 +616,14 @@ void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
*/
if (constraint != CONSTRAINT_NONE)
return;
+ /*
+ * panic_on_oom_timeout only affects panic_on_oom == 1 and
+ * CONSTRAINT_NONE.
+ */
+ if (sysctl_panic_on_oom_timeout && oom_start &&
+ time_before(jiffies,
+ *oom_start + sysctl_panic_on_oom_timeout * HZ))
+ return;
}
dump_header(NULL, gfp_mask, order, memcg, nodemask);
panic("Out of memory: %s panic_on_oom is enabled\n",
@@ -641,6 +651,8 @@ EXPORT_SYMBOL_GPL(unregister_oom_notifier);
* @order: amount of memory being requested as a power of 2
* @nodemask: nodemask passed to page allocator
* @force_kill: true if a task must be killed, even if others are exiting
+ * @oom_start: Pointer to jiffies as of calling this function for the first
+ * time of this memory allocation request. Ignored if NULL.
*
* If we run out of memory, we have the choice between either
* killing a random task (bad), letting the system crash (worse)
@@ -648,7 +660,8 @@ EXPORT_SYMBOL_GPL(unregister_oom_notifier);
* don't have to be perfect here, we just have to be good.
*/
bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
- int order, nodemask_t *nodemask, bool force_kill)
+ int order, nodemask_t *nodemask, bool force_kill,
+ const unsigned long *oom_start)
{
const nodemask_t *mpol_mask;
struct task_struct *p;
@@ -687,7 +700,8 @@ bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
constraint = constrained_alloc(zonelist, gfp_mask, nodemask,
&totalpages);
mpol_mask = (constraint == CONSTRAINT_MEMORY_POLICY) ? nodemask : NULL;
- check_panic_on_oom(constraint, gfp_mask, order, mpol_mask, NULL);
+ check_panic_on_oom(constraint, gfp_mask, order, mpol_mask, NULL,
+ oom_start);

if (sysctl_oom_kill_allocating_task && current->mm &&
!oom_unkillable_task(current, NULL, nodemask) &&
@@ -734,7 +748,7 @@ void pagefault_out_of_memory(void)
if (!mutex_trylock(&oom_lock))
return;

- if (!out_of_memory(NULL, 0, 0, NULL, false)) {
+ if (!out_of_memory(NULL, 0, 0, NULL, false, NULL)) {
/*
* There shouldn't be any user tasks runnable while the
* OOM killer is disabled, so the current task has to
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 73aa335..3a75fe8 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2678,7 +2678,9 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...)

static inline struct page *
__alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
- const struct alloc_context *ac, unsigned long *did_some_progress)
+ const struct alloc_context *ac,
+ unsigned long *did_some_progress,
+ unsigned long *oom_start)
{
struct page *page;

@@ -2731,7 +2733,10 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
goto out;
}
/* Exhausted what can be done so it's blamo time */
- if (out_of_memory(ac->zonelist, gfp_mask, order, ac->nodemask, false)
+ if (!*oom_start)
+ *oom_start = jiffies;
+ if (out_of_memory(ac->zonelist, gfp_mask, order, ac->nodemask, false,
+ oom_start)
|| WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL))
*did_some_progress = 1;
out:
@@ -2968,6 +2973,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
enum migrate_mode migration_mode = MIGRATE_ASYNC;
bool deferred_compaction = false;
int contended_compaction = COMPACT_CONTENDED_NONE;
+ unsigned long oom_start = 0;

/*
* In the slowpath, we sanity check order to avoid ever trying to
@@ -3122,7 +3128,8 @@ retry:
}

/* Reclaim has failed us, start killing things */
- page = __alloc_pages_may_oom(gfp_mask, order, ac, &did_some_progress);
+ page = __alloc_pages_may_oom(gfp_mask, order, ac, &did_some_progress,
+ &oom_start);
if (page)
goto got_pg;

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
Please read the FAQ at http://www.tux.org/lkml/