o fixes the init-gets-sigsegv during oom.
o avoids iopl'ed tasks to get SIGKILL and send them a SIGTERM so X
won't screwup the console due oom in IA32.
o makes the signal code to better react to oom.
o updates a few places that doesn't know about the new handle_mm_fault
interface yet (btw in ptrace David prefers to send the sigkill
due oom to the traced `tsk` and not to the `current` tracer task,
I left to him to send an incremental patch if he wants, actually I
prefer the way on my patch because obviously safe).
o removes the deprecated oom function.
o avoid the swap algorithm to reassing stuff more than one time
per-try.
diff -urN 2.3.36pre5/arch/alpha/kernel/signal.c 2.3.36pre5-oom/arch/alpha/kernel/signal.c
--- 2.3.36pre5/arch/alpha/kernel/signal.c Wed Nov 24 18:22:03 1999
+++ 2.3.36pre5-oom/arch/alpha/kernel/signal.c Mon Jan 3 19:00:05 2000
@@ -437,6 +437,8 @@
err |= __copy_to_user(frame->extramask, &set->sig[1],
sizeof(frame->extramask));
}
+ if (err)
+ goto give_sigsegv;
/* Set up to return from userspace. If provided, use a stub
already in userspace. */
@@ -499,6 +501,8 @@
err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, sw,
set->sig[0], oldsp);
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+ if (err)
+ goto give_sigsegv;
/* Set up to return from userspace. If provided, use a stub
already in userspace. */
diff -urN 2.3.36pre5/arch/alpha/mm/fault.c 2.3.36pre5-oom/arch/alpha/mm/fault.c
--- 2.3.36pre5/arch/alpha/mm/fault.c Wed Nov 24 18:22:03 1999
+++ 2.3.36pre5-oom/arch/alpha/mm/fault.c Mon Jan 3 19:00:05 2000
@@ -130,13 +130,13 @@
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
+survive:
fault = handle_mm_fault(current, vma, address, cause > 0);
- up(&mm->mmap_sem);
-
if (fault < 0)
goto out_of_memory;
if (fault == 0)
goto do_sigbus;
+ up(&mm->mmap_sem);
return;
@@ -177,13 +177,23 @@
* us unable to handle the page fault gracefully.
*/
out_of_memory:
- printk(KERN_ALERT "VM: killing process %s(%d)\n",
- current->comm, current->pid);
- if (!user_mode(regs))
- goto no_context;
- do_exit(SIGKILL);
+ if (current->pid == 1)
+ {
+ current->policy |= SCHED_YIELD;
+ schedule();
+ goto survive;
+ }
+ up(&mm->mmap_sem);
+ if (user_mode(regs))
+ {
+ printk(KERN_ALERT "VM: killing process %s(%d)\n",
+ current->comm, current->pid);
+ do_exit(SIGKILL);
+ }
+ goto no_context;
do_sigbus:
+ up(&mm->mmap_sem);
/*
* Send a sigbus, regardless of whether we were in kernel
* or user mode.
diff -urN 2.3.36pre5/arch/i386/kernel/signal.c 2.3.36pre5-oom/arch/i386/kernel/signal.c
--- 2.3.36pre5/arch/i386/kernel/signal.c Wed Nov 24 18:22:03 1999
+++ 2.3.36pre5-oom/arch/i386/kernel/signal.c Mon Jan 3 19:00:05 2000
@@ -419,13 +419,19 @@
? current->exec_domain->signal_invmap[sig]
: sig),
&frame->sig);
+ if (err)
+ goto give_sigsegv;
err |= setup_sigcontext(&frame->sc, &frame->fpstate, regs, set->sig[0]);
+ if (err)
+ goto give_sigsegv;
if (_NSIG_WORDS > 1) {
err |= __copy_to_user(frame->extramask, &set->sig[1],
sizeof(frame->extramask));
}
+ if (err)
+ goto give_sigsegv;
/* Set up to return from userspace. If provided, use a stub
already in userspace. */
@@ -486,6 +492,8 @@
err |= __put_user(&frame->info, &frame->pinfo);
err |= __put_user(&frame->uc, &frame->puc);
err |= __copy_to_user(&frame->info, info, sizeof(*info));
+ if (err)
+ goto give_sigsegv;
/* Create the ucontext. */
err |= __put_user(0, &frame->uc.uc_flags);
@@ -497,6 +505,8 @@
err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate,
regs, set->sig[0]);
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+ if (err)
+ goto give_sigsegv;
/* Set up to return from userspace. If provided, use a stub
already in userspace. */
diff -urN 2.3.36pre5/arch/i386/mm/fault.c 2.3.36pre5-oom/arch/i386/mm/fault.c
--- 2.3.36pre5/arch/i386/mm/fault.c Wed Nov 24 18:22:04 1999
+++ 2.3.36pre5-oom/arch/i386/mm/fault.c Mon Jan 3 19:00:05 2000
@@ -31,6 +31,7 @@
{
struct vm_area_struct * vma;
unsigned long start = (unsigned long) addr;
+ int fault;
if (!size)
return 1;
@@ -50,8 +51,12 @@
start &= PAGE_MASK;
for (;;) {
- if (handle_mm_fault(current, vma, start, 1) <= 0)
- goto bad_area;
+survive:
+ fault = handle_mm_fault(current, vma, start, 1);
+ if (!fault)
+ goto do_sigbus;
+ if (fault < 0)
+ goto out_of_memory;
if (!size)
break;
size--;
@@ -74,6 +79,19 @@
bad_area:
return 0;
+
+do_sigbus:
+ force_sig(SIGBUS, current);
+ goto bad_area;
+
+out_of_memory:
+ if (current->pid == 1)
+ {
+ current->policy |= SCHED_YIELD;
+ schedule();
+ goto survive;
+ }
+ goto bad_area;
}
static inline void handle_wp_test (void)
@@ -188,6 +206,7 @@
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
+survive:
{
int fault = handle_mm_fault(tsk, vma, address, write);
if (fault < 0)
@@ -280,10 +299,33 @@
* us unable to handle the page fault gracefully.
*/
out_of_memory:
+ if (tsk->pid == 1)
+ {
+ tsk->policy |= SCHED_YIELD;
+ schedule();
+ goto survive;
+ }
up(&mm->mmap_sem);
- printk("VM: killing process %s\n", tsk->comm);
if (error_code & 4)
- do_exit(SIGKILL);
+ {
+ if (!((regs->eflags >> 12) & 3))
+ {
+ printk(KERN_ALERT "VM: killing process %s\n",
+ tsk->comm);
+ do_exit(SIGKILL);
+ }
+ else
+ {
+ /*
+ * The task is running with privilegies and so we
+ * trust it and we give it a chance to die gracefully.
+ */
+ printk(KERN_ALERT "VM: terminating process %s\n",
+ tsk->comm);
+ force_sig(SIGTERM, current);
+ return;
+ }
+ }
goto no_context;
do_sigbus:
diff -urN 2.3.36pre5/fs/exec.c 2.3.36pre5-oom/fs/exec.c
--- 2.3.36pre5/fs/exec.c Mon Jan 3 18:56:24 2000
+++ 2.3.36pre5-oom/fs/exec.c Mon Jan 3 19:02:11 2000
@@ -277,13 +277,13 @@
pmd = pmd_alloc(pgd, address);
if (!pmd) {
__free_page(page);
- oom(tsk);
+ force_sig(SIGKILL, tsk);
return;
}
pte = pte_alloc(pmd, address);
if (!pte) {
__free_page(page);
- oom(tsk);
+ force_sig(SIGKILL, tsk);
return;
}
if (!pte_none(*pte)) {
diff -urN 2.3.36pre5/include/linux/mm.h 2.3.36pre5-oom/include/linux/mm.h
--- 2.3.36pre5/include/linux/mm.h Mon Jan 3 18:56:25 2000
+++ 2.3.36pre5-oom/include/linux/mm.h Mon Jan 3 19:00:05 2000
@@ -400,7 +400,6 @@
unsigned int * zones_size, unsigned long zone_start_paddr);
extern void mem_init(void);
extern void show_mem(void);
-extern void oom(struct task_struct * tsk);
extern void si_meminfo(struct sysinfo * val);
extern void swapin_readahead(swp_entry_t);
diff -urN 2.3.36pre5/kernel/ptrace.c 2.3.36pre5-oom/kernel/ptrace.c
--- 2.3.36pre5/kernel/ptrace.c Sun Nov 21 03:20:20 1999
+++ 2.3.36pre5-oom/kernel/ptrace.c Mon Jan 3 19:01:02 2000
@@ -26,6 +26,7 @@
unsigned long mapnr;
unsigned long maddr;
struct page *page;
+ int fault;
repeat:
pgdir = pgd_offset(vma->vm_mm, addr);
@@ -64,8 +65,12 @@
fault_in_page:
/* -1: out of memory. 0 - unmapped page */
- if (handle_mm_fault(tsk, vma, addr, write) > 0)
+ fault = handle_mm_fault(tsk, vma, addr, write);
+ if (fault > 0)
goto repeat;
+ if (fault < 0)
+ /* the out of memory is been triggered by the current task. */
+ force_sig(SIGKILL, current);
return 0;
bad_pgd:
diff -urN 2.3.36pre5/mm/memory.c 2.3.36pre5-oom/mm/memory.c
--- 2.3.36pre5/mm/memory.c Mon Jan 3 18:56:25 2000
+++ 2.3.36pre5-oom/mm/memory.c Mon Jan 3 19:00:05 2000
@@ -70,16 +70,6 @@
mem_map_t * mem_map = NULL;
/*
- * oom() prints a message (so that the user knows why the process died),
- * and gives the process an untrappable SIGKILL.
- */
-void oom(struct task_struct * task)
-{
- printk("\nOut of memory for %s.\n", task->comm);
- force_sig(SIGKILL, task);
-}
-
-/*
* Note: this doesn't free the actual pages themselves. That
* has been handled earlier when unmapping all the memory regions.
*/
diff -urN 2.3.36pre5/mm/vmscan.c 2.3.36pre5-oom/mm/vmscan.c
--- 2.3.36pre5/mm/vmscan.c Fri Dec 31 16:33:05 1999
+++ 2.3.36pre5-oom/mm/vmscan.c Mon Jan 3 19:00:05 2000
@@ -328,6 +328,7 @@
struct task_struct * p;
int counter;
int __ret = 0;
+ int assign = 0;
lock_kernel();
/*
@@ -347,12 +348,9 @@
counter = nr_threads / (priority+1);
if (counter < 1)
counter = 1;
- if (counter > nr_threads)
- counter = nr_threads;
for (; counter >= 0; counter--) {
- int assign = 0;
- int max_cnt = 0;
+ unsigned long max_cnt = 0;
struct mm_struct *best = NULL;
int pid = 0;
select:
@@ -365,7 +363,7 @@
if (mm->rss <= 0)
continue;
/* Refresh swap_cnt? */
- if (assign)
+ if (assign == 1)
mm->swap_cnt = mm->rss;
if (mm->swap_cnt > max_cnt) {
max_cnt = mm->swap_cnt;
@@ -374,6 +372,8 @@
}
}
read_unlock(&tasklist_lock);
+ if (assign == 1)
+ assign = 2;
if (!best) {
if (!assign) {
assign = 1;
Andrea
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/
This archive was generated by hypermail 2b29 : Sat Jan 15 2000 - 21:00:12 EST