I'm writing this mail in rmail-mode of emacs on a X display on a system
load between 9 and 29 due a pair running `make -j' in two kernel trees
together with a `tar tfvz <66434131Kb_gzip_tar_on_NFS> > /dev/null '.
Sometimes it's a bit slow if the load is above 20 :-) ... but it works
quit good. It's only a i586/133 with 64Mb and 80Mb swap.
You will find the necessary (ohh ispell works :-) patch enclosed.
I've combined two patches of Bill Hawes <whawes@star.net>, some lines
out of a patch of Benjamin C R LaHaise <blah@dot.superaje.com>, and a few
own changes:
* A few barrier() in try_to_free_page to avoid compiler kludges
at optimising (mm/vmscan.c) ... NO state fix (this would temporary
freeze a system under hight load due swap I/O).
* return to the caller in refill_freelist() if nr_free_pages <= 5
or grow_buffers(GFP_ATOMIC, size) fails (fs/buffer.c).
* set minimum of pages for min_free_pages to 24, correct a possible
NULL pointer on freeing a non existing page in swap_in(), set the
initial age of swapped in pages twice of PAGE_INITIAL_AGE in swap_in()
(both in mm/page_alloc.c) and the same in shm_swap_in() (ipc/shm.c).
This small changes gives (inter)active tasks what they need :-)
* Some cosmetic changes done during looking at the code (found a
possible `small read ahead' configuration currently not used).
Werner
--------------------------------------------------------------------------
diff -urN linux-2.0.31-clean/fs/buffer.c linux/fs/buffer.c
--- linux-2.0.31-clean/fs/buffer.c Tue Jun 10 12:58:46 1997
+++ linux/fs/buffer.c Wed Jul 9 14:18:51 1997
@@ -548,7 +548,7 @@
if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1 ||
buffer_dirty(bh)) {
- refile_buffer(bh);
+ /* WSH: don't attempt to refile here! */
return 0;
}
@@ -669,12 +669,15 @@
};
}
- /* and repeat until we find something good */
- if (grow_buffers(GFP_ATOMIC, size))
- needed -= PAGE_SIZE;
- else
- wakeup_bdflush(1);
- goto repeat;
+ if (nr_free_pages > 5) {
+ /* and repeat until we find something good */
+ if (grow_buffers(GFP_ATOMIC, size)) {
+ needed -= PAGE_SIZE;
+ goto repeat;
+ };
+ }
+
+ wakeup_bdflush(1);
}
/*
@@ -922,6 +925,34 @@
wake_up(&buffer_wait);
}
+/*
+ * We can't put completed temporary IO buffer_heads directly onto the
+ * unused_list when they become unlocked, since the device driver
+ * end_request routines still expect access to the buffer_head's
+ * fields after the final unlock. So, the device driver puts them on
+ * the reuse_list instead once IO completes, and we recover these to
+ * the unused_list here.
+ *
+ * The reuse_list receives buffers from interrupt routines, so we need
+ * to be IRQ-safe here (but note that interrupts only _add_ to the
+ * reuse_list, never take away. So we don't need to worry about the
+ * reuse_list magically emptying).
+ */
+static inline void recover_reusable_buffer_heads(void)
+{
+ if (reuse_list) {
+ struct buffer_head *head;
+
+ head = xchg(&reuse_list, NULL);
+
+ do {
+ struct buffer_head *bh = head;
+ head = head->b_next_free;
+ put_unused_buffer_head(bh);
+ } while (head);
+ }
+}
+
static void get_more_buffer_heads(void)
{
struct buffer_head * bh;
@@ -949,38 +980,14 @@
*/
run_task_queue(&tq_disk);
sleep_on(&buffer_wait);
+ /*
+ * After we wake up, check for released async buffer heads.
+ */
+ recover_reusable_buffer_heads();
}
}
-/*
- * We can't put completed temporary IO buffer_heads directly onto the
- * unused_list when they become unlocked, since the device driver
- * end_request routines still expect access to the buffer_head's
- * fields after the final unlock. So, the device driver puts them on
- * the reuse_list instead once IO completes, and we recover these to
- * the unused_list here.
- *
- * The reuse_list receives buffers from interrupt routines, so we need
- * to be IRQ-safe here (but note that interrupts only _add_ to the
- * reuse_list, never take away. So we don't need to worry about the
- * reuse_list magically emptying).
- */
-static inline void recover_reusable_buffer_heads(void)
-{
- if (reuse_list) {
- struct buffer_head *head;
-
- head = xchg(&reuse_list, NULL);
-
- do {
- struct buffer_head *bh = head;
- head = head->b_next_free;
- put_unused_buffer_head(bh);
- } while (head);
- }
-}
-
static struct buffer_head * get_unused_buffer_head(void)
{
struct buffer_head * bh;
@@ -1161,6 +1168,7 @@
free_async_buffers(bh);
restore_flags(flags);
after_unlock_page(page);
+ wake_up(&buffer_wait);
}
++current->maj_flt;
return 0;
@@ -1534,6 +1542,7 @@
next->b_count--;
}
}
+ run_task_queue(&tq_disk);
#ifdef DEBUG
if (ncount) printk("sync_old_buffers: %d dirty buffers not on dirty list\n", ncount);
printk("Wrote %d/%d buffers\n", nwritten, ndirty);
diff -urN linux-2.0.31-clean/ipc/shm.c linux/ipc/shm.c
--- linux-2.0.31-clean/ipc/shm.c Fri Nov 22 15:25:18 1996
+++ linux/ipc/shm.c Thu Jul 10 17:43:22 1997
@@ -13,6 +13,7 @@
#include <linux/stat.h>
#include <linux/malloc.h>
#include <linux/swap.h>
+#include <linux/swapctl.h>
#include <asm/segment.h>
#include <asm/pgtable.h>
@@ -656,6 +657,9 @@
oom(current);
return BAD_PAGE;
}
+ /* Give the physical reallocated page a bigger start */
+ mem_map[MAP_NR(page)].age = (2*PAGE_INITIAL_AGE);
+
pte_val(pte) = shp->shm_pages[idx];
if (pte_present(pte)) {
free_page (page); /* doesn't sleep */
diff -urN linux-2.0.31-clean/mm/filemap.c linux/mm/filemap.c
--- linux-2.0.31-clean/mm/filemap.c Tue Jun 10 12:58:48 1997
+++ linux/mm/filemap.c Thu Jul 10 16:50:40 1997
@@ -450,7 +450,7 @@
#define PageAlignSize(size) (((size) + PAGE_SIZE -1) & PAGE_MASK)
-#if 0 /* small readahead */
+#ifdef CONFIG_READA_SMALL /* small readahead */
#define MAX_READAHEAD PageAlignSize(4096*7)
#define MIN_READAHEAD PageAlignSize(4096*2)
#else /* large readahead */
diff -urN linux-2.0.31-clean/mm/mlock.c linux/mm/mlock.c
--- linux-2.0.31-clean/mm/mlock.c Wed Sep 11 16:57:19 1996
+++ linux/mm/mlock.c Wed Jul 9 14:18:51 1997
@@ -202,7 +202,7 @@
/* we may lock at most half of physical memory... */
/* (this check is pretty bogus, but doesn't hurt) */
- if (locked > MAP_NR(high_memory)/2)
+ if (locked > (MAP_NR(high_memory) >> 1))
return -ENOMEM;
return do_mlock(start, len, 1);
@@ -259,7 +259,7 @@
/* we may lock at most half of physical memory... */
/* (this check is pretty bogus, but doesn't hurt) */
- if (current->mm->total_vm > MAP_NR(high_memory)/2)
+ if (current->mm->total_vm > (MAP_NR(high_memory) >> 1))
return -ENOMEM;
return do_mlockall(flags);
diff -urN linux-2.0.31-clean/mm/page_alloc.c linux/mm/page_alloc.c
--- linux-2.0.31-clean/mm/page_alloc.c Sat Aug 17 20:19:29 1996
+++ linux/mm/page_alloc.c Thu Jul 10 17:42:58 1997
@@ -264,11 +264,11 @@
/*
* select nr of pages we try to keep free for important stuff
- * with a minimum of 16 pages. This is totally arbitrary
+ * with a minimum of 24 pages. This is totally arbitrary
*/
i = (end_mem - PAGE_OFFSET) >> (PAGE_SHIFT+7);
- if (i < 16)
- i = 16;
+ if (i < 24)
+ i = 24;
min_free_pages = i;
free_pages_low = i + (i>>1);
free_pages_high = i + i;
@@ -311,7 +311,8 @@
unsigned long page = __get_free_page(GFP_KERNEL);
if (pte_val(*page_table) != entry) {
- free_page(page);
+ if (page)
+ free_page(page);
return;
}
if (!page) {
@@ -320,6 +321,9 @@
oom(tsk);
return;
}
+ /* Give the physical reallocated page a bigger start */
+ mem_map[MAP_NR(page)].age = (2*PAGE_INITIAL_AGE);
+
read_swap_page(entry, (char *) page);
if (pte_val(*page_table) != entry) {
free_page(page);
diff -urN linux-2.0.31-clean/mm/vmscan.c linux/mm/vmscan.c
--- linux-2.0.31-clean/mm/vmscan.c Sat Dec 14 13:24:31 1996
+++ linux/mm/vmscan.c Wed Jul 9 14:30:56 1997
@@ -19,6 +19,7 @@
#include <linux/swap.h>
#include <linux/fs.h>
#include <linux/swapctl.h>
+#include <linux/pagemap.h>
#include <linux/smp_lock.h>
#include <asm/dma.h>
@@ -32,6 +33,13 @@
*/
static int next_swap_jiffies = 0;
+/*
+ * Was the last kswapd wakeup caused by
+ * nr_free_pages < free_pages_low
+ */
+static int last_wakeup_low = 0;
+
+
/*
* How often do we do a pageout scan during normal conditions?
* Default is four times a second.
@@ -343,17 +351,23 @@
switch (state) {
do {
case 0:
+ barrier();
if (shrink_mmap(i, dma))
return 1;
state = 1;
+ barrier();
case 1:
+ barrier();
if (shm_swap(i, dma))
return 1;
state = 2;
+ barrier();
default:
+ barrier();
if (swap_out(i, dma, wait))
return 1;
state = 0;
+ barrier();
i--;
} while ((i - stop) >= 0);
}
@@ -402,6 +416,9 @@
printk ("Started kswapd v%.*s\n", i, s);
while (1) {
+ /* low on memory, we need to start swapping soon */
+ next_swap_jiffies = jiffies +
+ (last_wakeup_low ? swapout_interval >> 1 : swapout_interval);
kswapd_awake = 0;
current->signal = 0;
run_task_queue(&tq_disk);
@@ -410,7 +427,8 @@
swapstats.wakeups++;
/* Do the background pageout: */
for (i=0; i < kswapd_ctl.maxpages; i++)
- try_to_free_page(GFP_KERNEL, 0, 0);
+ try_to_free_page(GFP_KERNEL, 0,
+ (nr_free_pages < min_free_pages));
}
}
@@ -421,16 +439,15 @@
void swap_tick(void)
{
int want_wakeup = 0;
- static int last_wakeup_low = 0;
if ((nr_free_pages + nr_async_pages) < free_pages_low) {
if (last_wakeup_low)
- want_wakeup = jiffies >= next_swap_jiffies;
+ want_wakeup = (jiffies >= next_swap_jiffies);
else
last_wakeup_low = want_wakeup = 1;
}
else if (((nr_free_pages + nr_async_pages) < free_pages_high) &&
- jiffies >= next_swap_jiffies) {
+ (jiffies >= next_swap_jiffies)) {
last_wakeup_low = 0;
want_wakeup = 1;
}
@@ -440,7 +457,6 @@
wake_up(&kswapd_wait);
need_resched = 1;
}
- next_swap_jiffies = jiffies + swapout_interval;
}
timer_active |= (1<<SWAP_TIMER);
}