[...]
I've added a small change to your patch. It simply allows growing
buffer with GFP_BUFFER but only if absolute necessary. I've found this
solution experimentally by playing with some conditions and positions
calling wakeup_bdflush(1) in refill_freelist. This is done by running
two `bonnie -s 200' and two `make clean; make -j zImage' in loops.
The patch is against pre-patch-2.0.31-7 plus Gadi's deadlock patch ...
it's one of the fastest kernel I've ever seen. ... But there are
also testers needed for this patch _before_ the next pre-patch release
or the real 2.0.31 ... anybody out there?
Werner
-----------------------------------------------------------------------------
diff -urN linux-2.0.31-linus/fs/buffer.c linux/fs/buffer.c
--- linux-2.0.31-linus/fs/buffer.c Mon Aug 18 13:58:51 1997
+++ linux/fs/buffer.c Mon Aug 18 18:43:32 1997
@@ -662,6 +662,12 @@
}
goto repeat;
}
+
+ /* Dirty buffers should not overtake, wakeup_bdflush(1) calls
+ bdflush and sleeps, therefore kswapd does his important work. */
+ if ((nr_buffers_type[BUF_DIRTY] > nr_buffers * bdf_prm.b_un.nfract/100) ||
+ (nr_free_pages < (min_free_pages >> 1)))
+ wakeup_bdflush(1);
/* Too bad, that was not enough. Try a little harder to grow some. */
@@ -672,19 +678,8 @@
};
}
-#if 0
- /*
- * In order to protect our reserved pages,
- * return now if we got any buffers.
- */
- if (free_list[BUFSIZE_INDEX(size)])
- return;
-
/* and repeat until we find something good */
- if (!grow_buffers(GFP_ATOMIC, size))
- wakeup_bdflush(1);
-#endif
- wakeup_bdflush(1);
+ grow_buffers(GFP_ATOMIC, size);
/* decrease needed even if there is no success */
needed -= PAGE_SIZE;
@@ -1717,7 +1712,7 @@
* dirty buffers, then make the next write to a
* loop device to be a blocking write.
* This lets us block--which we _must_ do! */
- if (ndirty == 0 && nr_buffers_type[BUF_DIRTY] > 0) {
+ if (ndirty == 0 && nr_buffers_type[BUF_DIRTY] > 0 && wrta_cmd != WRITE) {
wrta_cmd = WRITE;
continue;
}
@@ -1725,7 +1720,7 @@
/* If there are still a lot of dirty buffers around, skip the sleep
and flush some more */
- if(nr_buffers_type[BUF_DIRTY] <= nr_buffers * bdf_prm.b_un.nfract/100) {
+ if(ndirty == 0 || nr_buffers_type[BUF_DIRTY] <= nr_buffers * bdf_prm.b_un.nfract/100) {
wake_up(&bdflush_done);
current->signal = 0;
interruptible_sleep_on(&bdflush_wait);