[PATCH 2/3] Memory management livelock

From: Mikulas Patocka
Date: Wed Sep 24 2008 - 14:53:16 EST


Avoid starvation when walking address space.

Signed-off-by: Mikulas Patocka <mpatocka@xxxxxxxxxx>

---
include/linux/pagemap.h | 1 +
mm/filemap.c | 20 ++++++++++++++++++++
mm/page-writeback.c | 37 ++++++++++++++++++++++++++++++++++++-
mm/truncate.c | 24 +++++++++++++++++++++++-
4 files changed, 80 insertions(+), 2 deletions(-)

Index: linux-2.6.27-rc7-devel/include/linux/pagemap.h
===================================================================
--- linux-2.6.27-rc7-devel.orig/include/linux/pagemap.h 2008-09-24 02:57:37.000000000 +0200
+++ linux-2.6.27-rc7-devel/include/linux/pagemap.h 2008-09-24 02:59:04.000000000 +0200
@@ -21,6 +21,7 @@
#define AS_EIO (__GFP_BITS_SHIFT + 0) /* IO error on async write */
#define AS_ENOSPC (__GFP_BITS_SHIFT + 1) /* ENOSPC on async write */
#define AS_MM_ALL_LOCKS (__GFP_BITS_SHIFT + 2) /* under mm_take_all_locks() */
+#define AS_STARVATION (__GFP_BITS_SHIFT + 3) /* an anti-starvation barrier */

static inline void mapping_set_error(struct address_space *mapping, int error)
{
Index: linux-2.6.27-rc7-devel/mm/filemap.c
===================================================================
--- linux-2.6.27-rc7-devel.orig/mm/filemap.c 2008-09-24 02:59:33.000000000 +0200
+++ linux-2.6.27-rc7-devel/mm/filemap.c 2008-09-24 03:13:47.000000000 +0200
@@ -269,10 +269,19 @@ int wait_on_page_writeback_range(struct
int nr_pages;
int ret = 0;
pgoff_t index;
+ long pages_to_process;

if (end < start)
return 0;

+ /*
+ * Estimate the number of pages to process. If we process significantly
+ * more than this, someone is making writeback pages under us.
+ * We must pull the anti-starvation plug.
+ */
+ pages_to_process = bdi_stat(mapping->backing_dev_info, BDI_WRITEBACK);
+ pages_to_process += (pages_to_process >> 3) + 16;
+
pagevec_init(&pvec, 0);
index = start;
while ((index <= end) &&
@@ -288,6 +297,10 @@ int wait_on_page_writeback_range(struct
if (page->index > end)
continue;

+ if (pages_to_process >= 0)
+ if (!pages_to_process--)
+ wait_on_bit_lock(&mapping->flags, AS_STARVATION, wait_action_schedule, TASK_UNINTERRUPTIBLE);
+
wait_on_page_writeback(page);
if (PageError(page))
ret = -EIO;
@@ -296,6 +309,13 @@ int wait_on_page_writeback_range(struct
cond_resched();
}

+ if (pages_to_process < 0) {
+ smp_mb__before_clear_bit();
+ clear_bit(AS_STARVATION, &mapping->flags);
+ smp_mb__after_clear_bit();
+ wake_up_bit(&mapping->flags, AS_STARVATION);
+ }
+
/* Check for outstanding write errors */
if (test_and_clear_bit(AS_ENOSPC, &mapping->flags))
ret = -ENOSPC;
Index: linux-2.6.27-rc7-devel/mm/page-writeback.c
===================================================================
--- linux-2.6.27-rc7-devel.orig/mm/page-writeback.c 2008-09-24 03:10:34.000000000 +0200
+++ linux-2.6.27-rc7-devel/mm/page-writeback.c 2008-09-24 03:20:24.000000000 +0200
@@ -435,6 +435,18 @@ static void balance_dirty_pages(struct a

struct backing_dev_info *bdi = mapping->backing_dev_info;

+ /*
+ * If there is sync() starving on this address space, block
+ * writers until it finishes.
+ */
+ if (unlikely(test_bit(AS_STARVATION, &mapping->flags))) {
+ wait_on_bit_lock(&mapping->flags, AS_STARVATION, wait_action_schedule, TASK_UNINTERRUPTIBLE);
+ smp_mb__before_clear_bit();
+ clear_bit(AS_STARVATION, &mapping->flags);
+ smp_mb__after_clear_bit();
+ wake_up_bit(&mapping->flags, AS_STARVATION);
+ }
+
for (;;) {
struct writeback_control wbc = {
.bdi = bdi,
@@ -876,12 +888,21 @@ int write_cache_pages(struct address_spa
pgoff_t end; /* Inclusive */
int scanned = 0;
int range_whole = 0;
+ long pages_to_process;

if (wbc->nonblocking && bdi_write_congested(bdi)) {
wbc->encountered_congestion = 1;
return 0;
}

+ /*
+ * Estimate the number of pages to process. If we process significantly
+ * more than this, someone is making dirty pages under us.
+ * Pull the anti-starvation plug to stop him.
+ */
+ pages_to_process = bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE);
+ pages_to_process += (pages_to_process >> 3) + 16;
+
pagevec_init(&pvec, 0);
if (wbc->range_cyclic) {
index = mapping->writeback_index; /* Start from prev offset */
@@ -902,7 +923,13 @@ retry:

scanned = 1;
for (i = 0; i < nr_pages; i++) {
- struct page *page = pvec.pages[i];
+ struct page *page;
+
+ if (pages_to_process >= 0)
+ if (!pages_to_process--)
+ wait_on_bit_lock(&mapping->flags, AS_STARVATION, wait_action_schedule, TASK_UNINTERRUPTIBLE);
+
+ page = pvec.pages[i];

/*
* At this point we hold neither mapping->tree_lock nor
@@ -949,6 +976,14 @@ retry:
pagevec_release(&pvec);
cond_resched();
}
+
+ if (pages_to_process < 0) {
+ smp_mb__before_clear_bit();
+ clear_bit(AS_STARVATION, &mapping->flags);
+ smp_mb__after_clear_bit();
+ wake_up_bit(&mapping->flags, AS_STARVATION);
+ }
+
if (!scanned && !done) {
/*
* We hit the last page and there is more work to be done: wrap
Index: linux-2.6.27-rc7-devel/mm/truncate.c
===================================================================
--- linux-2.6.27-rc7-devel.orig/mm/truncate.c 2008-09-24 03:16:15.000000000 +0200
+++ linux-2.6.27-rc7-devel/mm/truncate.c 2008-09-24 03:18:00.000000000 +0200
@@ -392,6 +392,14 @@ int invalidate_inode_pages2_range(struct
int ret2 = 0;
int did_range_unmap = 0;
int wrapped = 0;
+ long pages_to_process;
+
+ /*
+ * Estimate number of pages to process. If we process more, someone
+ * is making pages under us.
+ */
+ pages_to_process = mapping->nrpages;
+ pages_to_process += (pages_to_process >> 3) + 16;

pagevec_init(&pvec, 0);
next = start;
@@ -399,9 +407,15 @@ int invalidate_inode_pages2_range(struct
pagevec_lookup(&pvec, mapping, next,
min(end - next, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
for (i = 0; i < pagevec_count(&pvec); i++) {
- struct page *page = pvec.pages[i];
+ struct page *page;
pgoff_t page_index;

+ if (pages_to_process >= 0)
+ if (!pages_to_process--)
+ wait_on_bit_lock(&mapping->flags, AS_STARVATION, wait_action_schedule, TASK_UNINTERRUPTIBLE);
+
+ page = pvec.pages[i];
+
lock_page(page);
if (page->mapping != mapping) {
unlock_page(page);
@@ -449,6 +463,14 @@ int invalidate_inode_pages2_range(struct
pagevec_release(&pvec);
cond_resched();
}
+
+ if (pages_to_process < 0) {
+ smp_mb__before_clear_bit();
+ clear_bit(AS_STARVATION, &mapping->flags);
+ smp_mb__after_clear_bit();
+ wake_up_bit(&mapping->flags, AS_STARVATION);
+ }
+
return ret;
}
EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range);

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/