[PATCH] swap 07/13 objrmap page_table_lock

From: Hugh Dickins (hugh@veritas.com)
Date: Tue Mar 25 2003 - 17:17:46 EST


A recent update removed the page_table_lock from objrmap. That was
simply wrong: it's necessary to guard mm->rss (in try_to_unmap), and
it's necessary to ensure the page tables don't get freed beneath us
(in which case any modification of the pte, by page_referenced or
by try_to_unmap, might be corrupting a freed and reused page).

Then restore the original SWAP_AGAIN semantics in try_to_unmap:
simplest for inner levels to return SWAP_AGAIN or SWAP_FAILED,
outer level to decide SWAP_SUCCESS if all pages were unmapped.
Stop searching when all have been unmapped.

page_convert_anon left for the moment with FIXMEs rather than
page_table_lock. I believe it can be done without page_table_lock
(which would be problematic there), but have noticed several other
bugs there, more patches will follow at a later date (though I'd
much rather anobjrmap, which handles all this so much more cleanly).

--- swap06/mm/rmap.c Tue Mar 25 20:43:51 2003
+++ swap07/mm/rmap.c Tue Mar 25 20:44:02 2003
@@ -143,9 +143,13 @@
 static int
 page_referenced_obj_one(struct vm_area_struct *vma, struct page *page)
 {
+ struct mm_struct *mm = vma->vm_mm;
         pte_t *pte;
         int referenced = 0;
 
+ if (!spin_trylock(&mm->page_table_lock))
+ return 1;
+
         pte = find_pte(vma, page, NULL);
         if (pte) {
                 if (ptep_test_and_clear_young(pte))
@@ -153,6 +157,7 @@
                 pte_unmap(pte);
         }
 
+ spin_unlock(&mm->page_table_lock);
         return referenced;
 }
 
@@ -489,7 +494,10 @@
         unsigned long address;
         pte_t *pte;
         pte_t pteval;
- int ret = SWAP_SUCCESS;
+ int ret = SWAP_AGAIN;
+
+ if (!spin_trylock(&mm->page_table_lock))
+ return ret;
 
         pte = find_pte(vma, page, &address);
         if (!pte)
@@ -518,6 +526,7 @@
         pte_unmap(pte);
 
 out:
+ spin_unlock(&mm->page_table_lock);
         return ret;
 }
 
@@ -538,7 +547,7 @@
 {
         struct address_space *mapping = page->mapping;
         struct vm_area_struct *vma;
- int ret = SWAP_SUCCESS;
+ int ret = SWAP_AGAIN;
 
         if (!mapping)
                 BUG();
@@ -547,23 +556,20 @@
                 BUG();
 
         if (down_trylock(&mapping->i_shared_sem))
- return SWAP_AGAIN;
+ return ret;
         
         list_for_each_entry(vma, &mapping->i_mmap, shared) {
                 ret = try_to_unmap_obj_one(vma, page);
- if (ret != SWAP_SUCCESS)
+ if (ret == SWAP_FAIL || !page->pte.mapcount)
                         goto out;
         }
 
         list_for_each_entry(vma, &mapping->i_mmap_shared, shared) {
                 ret = try_to_unmap_obj_one(vma, page);
- if (ret != SWAP_SUCCESS)
+ if (ret == SWAP_FAIL || !page->pte.mapcount)
                         goto out;
         }
 
- if (page->pte.mapcount)
- BUG();
-
 out:
         up(&mapping->i_shared_sem);
         return ret;
@@ -753,8 +759,10 @@
                 }
         }
 out:
- if (!page_mapped(page))
+ if (!page_mapped(page)) {
                 dec_page_state(nr_mapped);
+ ret = SWAP_SUCCESS;
+ }
         return ret;
 }
 
@@ -839,6 +847,7 @@
 
         index = NRPTE-1;
         list_for_each_entry(vma, &mapping->i_mmap, shared) {
+ /* FIXME: unsafe without page_table_lock */
                 pte = find_pte(vma, page, NULL);
                 if (pte) {
                         pte_paddr = ptep_to_paddr(pte);
@@ -855,6 +864,7 @@
                 }
         }
         list_for_each_entry(vma, &mapping->i_mmap_shared, shared) {
+ /* FIXME: unsafe without page_table_lock */
                 pte = find_pte(vma, page, NULL);
                 if (pte) {
                         pte_paddr = ptep_to_paddr(pte);

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Mon Mar 31 2003 - 22:00:21 EST