Re: [patch 5/5] oom: invoke OOM killer from pagefault handler

From: Kirill Korotaev
Date: Thu Oct 12 2006 - 11:06:49 EST


Nick,

AFAICS, 1 page allocation which is done in page fault handler
can fail in the only case - OOM kills current, so if we failed
we should have TIF_MEMDIE and just kill current.
Selecting another process for killing if page fault fails means
taking another victim with the one being already killed.

my 2 cents.

Thanks,
Kirill

> Rather than have the pagefault handler kill a process directly if it gets a
> VM_FAULT_OOM, have it call into the OOM killer.
>
> Only converted a few architectures so far - this is just an RFC.
>
> Index: linux-2.6/mm/oom_kill.c
> ===================================================================
> --- linux-2.6.orig/mm/oom_kill.c
> +++ linux-2.6/mm/oom_kill.c
> @@ -376,6 +376,57 @@ int unregister_oom_notifier(struct notif
> }
> EXPORT_SYMBOL_GPL(unregister_oom_notifier);
>
> +/*
> + * Must be called with cpuset_lock and tasklist_lock held for read.
> + */
> +void __out_of_memory(void)
> +{
> + unsigned long points = 0;
> + struct task_struct *p;
> +
> + if (sysctl_panic_on_oom)
> + panic("out of memory. panic_on_oom is selected\n");
> +retry:
> + /*
> + * Rambo mode: Shoot down a process and hope it solves whatever
> + * issues we may have.
> + */
> + p = select_bad_process(&points);
> +
> + if (PTR_ERR(p) == -1UL)
> + return;
> +
> + /* Found nothing?!?! Either we hang forever, or we panic. */
> + if (!p) {
> + read_unlock(&tasklist_lock);
> + cpuset_unlock();
> + panic("Out of memory and no killable processes...\n");
> + }
> +
> + if (oom_kill_process(p, points, "Out of memory"))
> + goto retry;
> +}
> +
> +/*
> + * pagefault handler calls into here because it is out of memory but
> + * doesn't know exactly how or why.
> + */
> +void pagefault_out_of_memory(void)
> +{
> + if (printk_ratelimit()) {
> + printk(KERN_WARNING "%s invoked oom-killer from pagefault: "
> + "oomkilladj=%d\n", current->oomkilladj);
> + dump_stack();
> + show_mem();
> + }
> +
> + cpuset_lock();
> + read_lock(&tasklist_lock);
> + __out_of_memory();
> + read_unlock(&tasklist_lock);
> + cpuset_unlock();
> +}
> +
> /**
> * out_of_memory - kill the "best" process when we run out of memory
> *
> @@ -386,8 +437,6 @@ EXPORT_SYMBOL_GPL(unregister_oom_notifie
> */
> void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
> {
> - struct task_struct *p;
> - unsigned long points = 0;
> unsigned long freed = 0;
>
> blocking_notifier_call_chain(&oom_notify_list, 0, &freed);
> @@ -412,42 +461,18 @@ void out_of_memory(struct zonelist *zone
> */
> switch (constrained_alloc(zonelist, gfp_mask)) {
> case CONSTRAINT_MEMORY_POLICY:
> - oom_kill_process(current, points,
> - "No available memory (MPOL_BIND)");
> + oom_kill_process(current, 0, "No available memory (MPOL_BIND)");
> break;
>
> case CONSTRAINT_CPUSET:
> - oom_kill_process(current, points,
> - "No available memory in cpuset");
> + oom_kill_process(current, 0, "No available memory in cpuset");
> break;
>
> case CONSTRAINT_NONE:
> - if (sysctl_panic_on_oom)
> - panic("out of memory. panic_on_oom is selected\n");
> -retry:
> - /*
> - * Rambo mode: Shoot down a process and hope it solves whatever
> - * issues we may have.
> - */
> - p = select_bad_process(&points);
> -
> - if (PTR_ERR(p) == -1UL)
> - goto out;
> -
> - /* Found nothing?!?! Either we hang forever, or we panic. */
> - if (!p) {
> - read_unlock(&tasklist_lock);
> - cpuset_unlock();
> - panic("Out of memory and no killable processes...\n");
> - }
> -
> - if (oom_kill_process(p, points, "Out of memory"))
> - goto retry;
> -
> + __out_of_memory();
> break;
> }
>
> -out:
> read_unlock(&tasklist_lock);
> cpuset_unlock();
>
> Index: linux-2.6/arch/alpha/mm/fault.c
> ===================================================================
> --- linux-2.6.orig/arch/alpha/mm/fault.c
> +++ linux-2.6/arch/alpha/mm/fault.c
> @@ -143,7 +143,6 @@ do_page_fault(unsigned long address, uns
> goto bad_area;
> }
>
> - survive:
> /* If for any reason at all we couldn't handle the fault,
> make sure we exit gracefully rather than endlessly redo
> the fault. */
> @@ -190,19 +189,13 @@ do_page_fault(unsigned long address, uns
> die_if_kernel("Oops", regs, cause, (unsigned long*)regs - 16);
> do_exit(SIGKILL);
>
> - /* We ran out of memory, or some other thing happened to us that
> - made us unable to handle the page fault gracefully. */
> + /*
> + * We ran out of memory, call the OOM killer, and return to userspace
> + * (the fault will be retried if we weren't killed)
> + */
> out_of_memory:
> - if (is_init(current)) {
> - yield();
> - down_read(&mm->mmap_sem);
> - goto survive;
> - }
> - printk(KERN_ALERT "VM: killing process %s(%d)\n",
> - current->comm, current->pid);
> - if (!user_mode(regs))
> - goto no_context;
> - do_exit(SIGKILL);
> + pagefault_out_of_memory();
> + return;
>
> do_sigbus:
> /* Send a sigbus, regardless of whether we were in kernel
> Index: linux-2.6/arch/i386/mm/fault.c
> ===================================================================
> --- linux-2.6.orig/arch/i386/mm/fault.c
> +++ linux-2.6/arch/i386/mm/fault.c
> @@ -444,7 +444,6 @@ good_area:
> goto bad_area;
> }
>
> - survive:
> /*
> * If for any reason at all we couldn't handle the fault,
> * make sure we exit gracefully rather than endlessly redo
> @@ -583,21 +582,14 @@ no_context:
> bust_spinlocks(0);
> do_exit(SIGKILL);
>
> -/*
> - * We ran out of memory, or some other thing happened to us that made
> - * us unable to handle the page fault gracefully.
> - */
> out_of_memory:
> + /*
> + * We ran out of memory, call the OOM killer, and return to userspace
> + * (the fault will be retried if we weren't killed)
> + */
> up_read(&mm->mmap_sem);
> - if (is_init(tsk)) {
> - yield();
> - down_read(&mm->mmap_sem);
> - goto survive;
> - }
> - printk("VM: killing process %s\n", tsk->comm);
> - if (error_code & 4)
> - do_exit(SIGKILL);
> - goto no_context;
> + pagefault_out_of_memory();
> + return;
>
> do_sigbus:
> up_read(&mm->mmap_sem);
> Index: linux-2.6/arch/ia64/mm/fault.c
> ===================================================================
> --- linux-2.6.orig/arch/ia64/mm/fault.c
> +++ linux-2.6/arch/ia64/mm/fault.c
> @@ -155,7 +155,6 @@ ia64_do_page_fault (unsigned long addres
> if ((vma->vm_flags & mask) != mask)
> goto bad_area;
>
> - survive:
> /*
> * If for any reason at all we couldn't handle the fault, make
> * sure we exit gracefully rather than endlessly redo the
> @@ -280,13 +279,10 @@ ia64_do_page_fault (unsigned long addres
>
> out_of_memory:
> up_read(&mm->mmap_sem);
> - if (is_init(current)) {
> - yield();
> - down_read(&mm->mmap_sem);
> - goto survive;
> - }
> - printk(KERN_CRIT "VM: killing process %s\n", current->comm);
> - if (user_mode(regs))
> - do_exit(SIGKILL);
> - goto no_context;
> + /*
> + * We ran out of memory, call the OOM killer, and return to userspace
> + * (the fault will be retried if we weren't killed)
> + */
> + pagefault_out_of_memory();
> + return;
> }
> Index: linux-2.6/arch/powerpc/mm/fault.c
> ===================================================================
> --- linux-2.6.orig/arch/powerpc/mm/fault.c
> +++ linux-2.6/arch/powerpc/mm/fault.c
> @@ -342,7 +342,6 @@ good_area:
> * make sure we exit gracefully rather than endlessly redo
> * the fault.
> */
> - survive:
> switch (handle_mm_fault(mm, vma, address, is_write)) {
>
> case VM_FAULT_MINOR:
> @@ -380,21 +379,14 @@ bad_area_nosemaphore:
>
> return SIGSEGV;
>
> -/*
> - * We ran out of memory, or some other thing happened to us that made
> - * us unable to handle the page fault gracefully.
> - */
> out_of_memory:
> + /*
> + * We ran out of memory, call the OOM killer, and return to userspace
> + * (the fault will be retried if we weren't killed)
> + */
> up_read(&mm->mmap_sem);
> - if (is_init(current)) {
> - yield();
> - down_read(&mm->mmap_sem);
> - goto survive;
> - }
> - printk("VM: killing process %s\n", current->comm);
> - if (user_mode(regs))
> - do_exit(SIGKILL);
> - return SIGKILL;
> + pagefault_out_of_memory();
> + return 0;
>
> do_sigbus:
> up_read(&mm->mmap_sem);
> Index: linux-2.6/arch/x86_64/mm/fault.c
> ===================================================================
> --- linux-2.6.orig/arch/x86_64/mm/fault.c
> +++ linux-2.6/arch/x86_64/mm/fault.c
> @@ -407,7 +407,6 @@ asmlinkage void __kprobes do_page_fault(
> if (unlikely(in_atomic() || !mm))
> goto bad_area_nosemaphore;
>
> - again:
> /* When running in the kernel we expect faults to occur only to
> * addresses in user space. All other faults represent errors in the
> * kernel and should generate an OOPS. Unfortunatly, in the case of an
> @@ -574,20 +573,14 @@ no_context:
> oops_end(flags);
> do_exit(SIGKILL);
>
> -/*
> - * We ran out of memory, or some other thing happened to us that made
> - * us unable to handle the page fault gracefully.
> - */
> out_of_memory:
> + /*
> + * We ran out of memory, call the OOM killer, and return to userspace
> + * (the fault will be retried if we weren't killed)
> + */
> up_read(&mm->mmap_sem);
> - if (is_init(current)) {
> - yield();
> - goto again;
> - }
> - printk("VM: killing process %s\n", tsk->comm);
> - if (error_code & 4)
> - do_exit(SIGKILL);
> - goto no_context;
> + pagefault_out_of_memory();
> + return;
>
> do_sigbus:
> up_read(&mm->mmap_sem);
> Index: linux-2.6/include/linux/mm.h
> ===================================================================
> --- linux-2.6.orig/include/linux/mm.h
> +++ linux-2.6/include/linux/mm.h
> @@ -617,6 +617,11 @@ static inline int page_mapped(struct pag
> */
> #define VM_FAULT_WRITE 0x10
>
> +/*
> + * Can be called by the pagefault handler when it gets a VM_FAULT_OOM.
> + */
> +extern void pagefault_out_of_memory(void);
> +
> #define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK)
>
> extern void show_free_areas(void);
> Index: linux-2.6/arch/um/kernel/trap.c
> ===================================================================
> --- linux-2.6.orig/arch/um/kernel/trap.c
> +++ linux-2.6/arch/um/kernel/trap.c
> @@ -75,7 +75,6 @@ good_area:
> goto out;
>
> do {
> -survive:
> switch (handle_mm_fault(mm, vma, address, is_write)){
> case VM_FAULT_MINOR:
> current->min_flt++;
> @@ -119,13 +118,13 @@ out_nosemaphore:
> * us unable to handle the page fault gracefully.
> */
> out_of_memory:
> - if (is_init(current)) {
> - up_read(&mm->mmap_sem);
> - yield();
> - down_read(&mm->mmap_sem);
> - goto survive;
> - }
> - goto out;
> + /*
> + * We ran out of memory, call the OOM killer, and return to userspace
> + * (the fault will be retried if we weren't killed)
> + */
> + up_read(&mm->mmap_sem);
> + pagefault_out_of_memory();
> + goto out_nosemaphore;
> }
>
> void segv_handler(int sig, union uml_pt_regs *regs)
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/