[patch] oom send SIGBUS

Andrea Arcangeli (andrea@suse.de)
Thu, 10 Jun 1999 03:29:54 +0200 (CEST)


This patch is against clean 2.2.9. It won't allow a malicious use to kill
the machine trapping the sigbus while the machine is OOM. It also makes
sure to release/own the kernel lock when needed in do_wp_page + minor
do_wp_page cleanup and removal of put_page(). We'll still get the sigbus
in the shared mapping case and we'll still stop the copy-user if we was
oom-faulting into the kernel.

Index: linux//arch/i386/mm/fault.c
===================================================================
RCS file: /var/cvs/linux/arch/i386/mm/fault.c,v
retrieving revision 1.1.1.1
diff -u -r1.1.1.1 fault.c
--- linux//arch/i386/mm/fault.c 1999/01/18 01:28:56 1.1.1.1
+++ linux//arch/i386/mm/fault.c 1999/06/10 01:04:23
@@ -265,7 +265,6 @@
tsk->tss.cr2 = address;
tsk->tss.error_code = error_code;
tsk->tss.trap_no = 14;
- force_sig(SIGBUS, tsk);

/* Kernel mode? Handle exceptions or die */
if (!(error_code & 4))
Index: linux//mm/filemap.c
===================================================================
RCS file: /var/cvs/linux/mm/filemap.c,v
retrieving revision 1.1.1.12
diff -u -r1.1.1.12 filemap.c
--- linux//mm/filemap.c 1999/05/14 18:25:05 1.1.1.12
+++ linux//mm/filemap.c 1999/06/10 01:12:09
@@ -939,7 +939,7 @@
new_page = 0;
offset = (address & PAGE_MASK) - area->vm_start + area->vm_offset;
if (offset >= inode->i_size && (area->vm_flags & VM_SHARED) && area->vm_mm == current->mm)
- goto no_page;
+ goto do_sigbus;

/*
* Do we have something in the page cache already?
@@ -958,7 +958,7 @@
if (no_share && !new_page) {
new_page = page_cache_alloc();
if (!new_page)
- goto failure;
+ goto release_and_oom;
}

if (PageLocked(page))
@@ -1006,7 +1006,7 @@
if (!new_page)
new_page = page_cache_alloc();
if (!new_page)
- goto no_page;
+ goto oom;

/*
* During getting the above page we might have slept,
@@ -1058,7 +1058,16 @@
page_cache_release(page);
if (new_page)
page_cache_free(new_page);
-no_page:
+ return 0;
+
+do_sigbus:
+ force_sig(SIGBUS, current);
+ return 0;
+
+release_and_oom:
+ page_cache_release(page);
+oom:
+ oom(current);
return 0;
}

Index: linux//mm/memory.c
===================================================================
RCS file: /var/cvs/linux/mm/memory.c,v
retrieving revision 1.1.1.4
diff -u -r1.1.1.4 memory.c
--- linux//mm/memory.c 1999/04/17 14:27:30 1.1.1.4
+++ linux//mm/memory.c 1999/06/10 01:13:06
@@ -545,19 +545,6 @@
}

/*
- * sanity-check function..
- */
-static void put_page(pte_t * page_table, pte_t pte)
-{
- if (!pte_none(*page_table)) {
- free_page_and_swap_cache(pte_page(pte));
- return;
- }
-/* no need for flush_tlb */
- set_pte(page_table, pte);
-}
-
-/*
* This routine is used to map in a page into an address space: needed by
* execve() for the initial stack and environment pages.
*/
@@ -614,21 +601,16 @@
* and potentially makes it more efficient.
*/
static int do_wp_page(struct task_struct * tsk, struct vm_area_struct * vma,
- unsigned long address, pte_t *page_table)
+ unsigned long address, pte_t *page_table, pte_t pte)
{
- pte_t pte;
unsigned long old_page, new_page;
struct page * page_map;

pte = *page_table;
new_page = __get_free_page(GFP_USER);
- /* Did someone else copy this page for us while we slept? */
+ /* Did swap_out() unmapped the protected page while we slept? */
if (pte_val(*page_table) != pte_val(pte))
goto end_wp_page;
- if (!pte_present(pte))
- goto end_wp_page;
- if (pte_write(pte))
- goto end_wp_page;
old_page = pte_page(pte);
if (MAP_NR(old_page) >= max_mapnr)
goto bad_wp_page;
@@ -652,39 +634,48 @@
delete_from_swap_cache(page_map);
/* FallThrough */
case 1:
- /* We can release the kernel lock now.. */
- unlock_kernel();
-
flush_cache_page(vma, address);
set_pte(page_table, pte_mkdirty(pte_mkwrite(pte)));
flush_tlb_page(vma, address);
end_wp_page:
+ /*
+ * We can release the kernel lock now.. Now swap_out will see
+ * a dirty page and so won't get confused and flush_tlb_page
+ * won't SMP race.
+ */
+ unlock_kernel();
+
if (new_page)
free_page(new_page);
return 1;
}

- unlock_kernel();
if (!new_page)
- return 0;
+ goto no_new_page;

- if (PageReserved(mem_map + MAP_NR(old_page)))
+ if (PageReserved(page_map))
++vma->vm_mm->rss;
copy_cow_page(old_page,new_page);
flush_page_to_ram(old_page);
flush_page_to_ram(new_page);
flush_cache_page(vma, address);
set_pte(page_table, pte_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot))));
- free_page(old_page);
flush_tlb_page(vma, address);
+ unlock_kernel();
+ __free_page(page_map);
return 1;

bad_wp_page:
+ unlock_kernel();
printk("do_wp_page: bogus page at address %08lx (%08lx)\n",address,old_page);
send_sig(SIGKILL, tsk, 1);
if (new_page)
free_page(new_page);
return 0;
+no_new_page:
+ unlock_kernel();
+ oom(tsk);
+ return 0;
}

/*
@@ -811,15 +802,18 @@
if (write_access) {
unsigned long page = __get_free_page(GFP_USER);
if (!page)
- return 0;
+ goto oom;
clear_page(page);
entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
vma->vm_mm->rss++;
tsk->min_flt++;
flush_page_to_ram(page);
}
- put_page(page_table, entry);
+ set_pte(page_table, entry);
return 1;
+ oom:
+ oom(tsk);
+ return 0;
}

/*
@@ -876,7 +870,7 @@
} else if (atomic_read(&mem_map[MAP_NR(page)].count) > 1 &&
!(vma->vm_flags & VM_SHARED))
entry = pte_wrprotect(entry);
- put_page(page_table, entry);
+ set_pte(page_table, entry);
/* no need to invalidate: a not-present page shouldn't be cached */
return 1;
}
@@ -910,7 +904,7 @@
flush_tlb_page(vma, address);
if (write_access) {
if (!pte_write(entry))
- return do_wp_page(tsk, vma, address, pte);
+ return do_wp_page(tsk, vma, address, pte, entry);

entry = pte_mkdirty(entry);
set_pte(pte, entry);
@@ -928,18 +922,23 @@
{
pgd_t *pgd;
pmd_t *pmd;
+ pte_t * pte;

pgd = pgd_offset(vma->vm_mm, address);
pmd = pmd_alloc(pgd, address);
- if (pmd) {
- pte_t * pte = pte_alloc(pmd, address);
- if (pte) {
- if (handle_pte_fault(tsk, vma, address, write_access, pte)) {
- update_mmu_cache(vma, address, *pte);
- return 1;
- }
- }
- }
+ if (!pmd)
+ goto oom;
+ pte = pte_alloc(pmd, address);
+ if (!pte)
+ goto oom;
+ if (!handle_pte_fault(tsk, vma, address, write_access, pte))
+ goto fail;
+ update_mmu_cache(vma, address, *pte);
+ return 1;
+
+ oom:
+ oom(tsk);
+ fail:
return 0;
}

Andrea Arcangeli

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/