[patch 11/16] swapcache bugfixes

From: Andrew Morton (akpm@zip.com.au)
Date: Sat Jun 01 2002 - 03:43:16 EST


Fixes a few lock ranking bugs (and deadlocks) related to
swap_list_lock(), swap_device_lock(), mapping->page_lock and
mapping->private_lock.

- Cannot call block_flushpage->try_to_free_buffers() inside
  mapping->page_lock. Because __set_page_dirty_buffers() takes
  ->page_lock inside ->private-lock.

- Cannot call swap_free->swap_list_lock/swap_device_lock inside
  mapping->page_lock because exclusive_swap_page() takes ->page_lock
  inside swap_info_get().

The patch also removes all the block_flushpage() calls from the swap
code in favour of a direct call to try_to_free_buffers().

The theory is that the page is locked, there is no I/O underway, nobody
else has access to the buffers so they MUST be freeable. A bunch of
BUG() checks have been added, and unless someone manages to trigger
one, the "block_flushpage() inside spinlock" problem is fixed.

=====================================

--- 2.5.19/mm/swap_state.c~mfsc_deadlock Sat Jun 1 01:18:11 2002
+++ 2.5.19-akpm/mm/swap_state.c Sat Jun 1 01:18:11 2002
@@ -14,7 +14,7 @@
 #include <linux/init.h>
 #include <linux/pagemap.h>
 #include <linux/smp_lock.h>
-#include <linux/buffer_head.h> /* for block_sync_page()/block_flushpage() */
+#include <linux/buffer_head.h> /* block_sync_page()/try_to_free_buffers() */
 
 #include <asm/pgtable.h>
 
@@ -150,11 +150,15 @@ void delete_from_swap_cache(struct page
 {
         swp_entry_t entry;
 
- if (!PageLocked(page))
+ /*
+ * I/O should have completed and nobody can have a ref against the
+ * page's buffers
+ */
+ BUG_ON(!PageLocked(page));
+ BUG_ON(PageWriteback(page));
+ if (page_has_buffers(page) && !try_to_free_buffers(page))
                 BUG();
-
- block_flushpage(page, 0);
-
+
         entry.val = page->index;
 
         write_lock(&swapper_space.page_lock);
@@ -219,7 +223,15 @@ int move_from_swap_cache(struct page *pa
         void **pslot;
         int err;
 
- if (!PageLocked(page))
+ /*
+ * Drop the buffers now, before taking the page_lock. Because
+ * mapping->private_lock nests outside mapping->page_lock.
+ * This "must" succeed. The page is locked and all I/O has completed
+ * and nobody else has a ref against its buffers.
+ */
+ BUG_ON(!PageLocked(page));
+ BUG_ON(PageWriteback(page));
+ if (page_has_buffers(page) && !try_to_free_buffers(page))
                 BUG();
 
         write_lock(&swapper_space.page_lock);
@@ -229,10 +241,8 @@ int move_from_swap_cache(struct page *pa
         if (!err) {
                 swp_entry_t entry;
 
- block_flushpage(page, 0);
                 entry.val = page->index;
                 __delete_from_swap_cache(page);
- swap_free(entry);
 
                 *pslot = page;
                 page->flags &= ~(1 << PG_uptodate | 1 << PG_error |
@@ -248,11 +258,16 @@ int move_from_swap_cache(struct page *pa
                 /* fix that up */
                 list_del(&page->list);
                 list_add(&page->list, &mapping->dirty_pages);
+ write_unlock(&mapping->page_lock);
+ write_unlock(&swapper_space.page_lock);
+
+ /* Do this outside ->page_lock */
+ swap_free(entry);
+ return 0;
         }
 
         write_unlock(&mapping->page_lock);
         write_unlock(&swapper_space.page_lock);
-
         return err;
 }
 
--- 2.5.19/mm/swapfile.c~mfsc_deadlock Sat Jun 1 01:18:11 2002
+++ 2.5.19-akpm/mm/swapfile.c Sat Jun 1 01:18:12 2002
@@ -16,7 +16,7 @@
 #include <linux/namei.h>
 #include <linux/shm.h>
 #include <linux/blkdev.h>
-#include <linux/buffer_head.h> /* for block_flushpage() */
+#include <linux/buffer_head.h> /* for try_to_free_buffers() */
 
 #include <asm/pgtable.h>
 #include <linux/swapops.h>
@@ -326,7 +326,9 @@ int remove_exclusive_swap_page(struct pa
         swap_info_put(p);
 
         if (retval) {
- block_flushpage(page, 0);
+ BUG_ON(PageWriteback(page));
+ if (page_has_buffers(page) && !try_to_free_buffers(page))
+ BUG();
                 swap_free(entry);
                 page_cache_release(page);
         }
--- 2.5.19/mm/filemap.c~mfsc_deadlock Sat Jun 1 01:18:11 2002
+++ 2.5.19-akpm/mm/filemap.c Sat Jun 1 01:18:12 2002
@@ -53,7 +53,9 @@
  * pagemap_lru_lock
  * ->i_shared_lock (vmtruncate)
  * ->private_lock (__free_pte->__set_page_dirty_buffers)
- * ->mapping->page_lock
+ * ->swap_list_lock
+ * ->swap_device_lock (exclusive_swap_page, others)
+ * ->mapping->page_lock
  * ->inode_lock (__mark_inode_dirty)
  * ->sb_lock (fs/fs-writeback.c)
  */

-
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Fri Jun 07 2002 - 22:00:10 EST