low memory buffer cachebug fix, need some testers...

David S. Miller (davem@jenolan.rutgers.edu)
Mon, 9 Jun 1997 02:11:34 -0400


This should apply to any recent 2.0.3X kernel, I believe it should
help with the situations such as "my 4MB ram machine hangs when you
try to make a 9gb ext2 partition with mke2fs" and this sort of thing.

Warning, I ran it though my tests, but I need others to be brave
enough to test it, I also want feedback from the people who can
reproduce it easily with existing kernels. I'd like to know if this
fixes it, or just makes it a little better, or makes it worse 8-)

--- linux/fs/buffer.c.~1~ Thu May 29 13:15:12 1997
+++ linux/fs/buffer.c Mon Jun 9 01:32:41 1997
@@ -670,7 +670,7 @@
}

/* and repeat until we find something good */
- if (grow_buffers(GFP_ATOMIC, size))
+ if (grow_buffers(GFP_BUFFER, size))
needed -= PAGE_SIZE;
else
wakeup_bdflush(1);
@@ -931,11 +931,16 @@
* This is critical. We can't swap out pages to get
* more buffer heads, because the swap-out may need
* more buffer-heads itself. Thus GFP_ATOMIC.
+ *
+ * This is no longer true, it is GFP_BUFFER again, the
+ * swapping code now knows not to perform I/O when that
+ * GFP level is specified... -DaveM
*/
+
/* we now use kmalloc() here instead of gfp as we want
to be able to easily release buffer heads - they
took up quite a bit of memory (tridge) */
- bh = (struct buffer_head *) kmalloc(sizeof(*bh),GFP_ATOMIC);
+ bh = (struct buffer_head *) kmalloc(sizeof(*bh),GFP_BUFFER);
if (bh) {
put_unused_buffer_head(bh);
nr_buffer_heads++;
--- linux/mm/vmscan.c.~1~ Mon Jun 2 17:25:12 1997
+++ linux/mm/vmscan.c Mon Jun 9 01:33:51 1997
@@ -68,7 +68,7 @@
* have died while we slept).
*/
static inline int try_to_swap_out(struct task_struct * tsk, struct vm_area_struct* vma,
- unsigned long address, pte_t * page_table, int dma, int wait)
+ unsigned long address, pte_t * page_table, int dma, int wait, int can_do_io)
{
pte_t pte;
unsigned long entry;
@@ -100,6 +100,8 @@
if (page_map->age)
return 0;
if (pte_dirty(pte)) {
+ if(!can_do_io)
+ return 0;
if (vma->vm_ops && vma->vm_ops->swapout) {
pid_t pid = tsk->pid;
vma->vm_mm->rss--;
@@ -157,7 +159,8 @@
*/

static inline int swap_out_pmd(struct task_struct * tsk, struct vm_area_struct * vma,
- pmd_t *dir, unsigned long address, unsigned long end, int dma, int wait)
+ pmd_t *dir, unsigned long address, unsigned long end, int dma, int wait,
+ int can_do_io)
{
pte_t * pte;
unsigned long pmd_end;
@@ -179,7 +182,7 @@
do {
int result;
tsk->swap_address = address + PAGE_SIZE;
- result = try_to_swap_out(tsk, vma, address, pte, dma, wait);
+ result = try_to_swap_out(tsk, vma, address, pte, dma, wait, can_do_io);
if (result)
return result;
address += PAGE_SIZE;
@@ -189,7 +192,8 @@
}

static inline int swap_out_pgd(struct task_struct * tsk, struct vm_area_struct * vma,
- pgd_t *dir, unsigned long address, unsigned long end, int dma, int wait)
+ pgd_t *dir, unsigned long address, unsigned long end, int dma, int wait,
+ int can_do_io)
{
pmd_t * pmd;
unsigned long pgd_end;
@@ -209,7 +213,7 @@
end = pgd_end;

do {
- int result = swap_out_pmd(tsk, vma, pmd, address, end, dma, wait);
+ int result = swap_out_pmd(tsk, vma, pmd, address, end, dma, wait, can_do_io);
if (result)
return result;
address = (address + PMD_SIZE) & PMD_MASK;
@@ -219,7 +223,7 @@
}

static int swap_out_vma(struct task_struct * tsk, struct vm_area_struct * vma,
- pgd_t *pgdir, unsigned long start, int dma, int wait)
+ pgd_t *pgdir, unsigned long start, int dma, int wait, int can_do_io)
{
unsigned long end;

@@ -230,7 +234,7 @@

end = vma->vm_end;
while (start < end) {
- int result = swap_out_pgd(tsk, vma, pgdir, start, end, dma, wait);
+ int result = swap_out_pgd(tsk, vma, pgdir, start, end, dma, wait, can_do_io);
if (result)
return result;
start = (start + PGDIR_SIZE) & PGDIR_MASK;
@@ -239,7 +243,7 @@
return 0;
}

-static int swap_out_process(struct task_struct * p, int dma, int wait)
+static int swap_out_process(struct task_struct * p, int dma, int wait, int can_do_io)
{
unsigned long address;
struct vm_area_struct* vma;
@@ -260,7 +264,7 @@
address = vma->vm_start;

for (;;) {
- int result = swap_out_vma(p, vma, pgd_offset(p->mm, address), address, dma, wait);
+ int result = swap_out_vma(p, vma, pgd_offset(p->mm, address), address, dma, wait, can_do_io);
if (result)
return result;
vma = vma->vm_next;
@@ -272,7 +276,7 @@
return 0;
}

-static int swap_out(unsigned int priority, int dma, int wait)
+static int swap_out(unsigned int priority, int dma, int wait, int gfp_level)
{
static int swap_task;
int loop, counter;
@@ -311,7 +315,7 @@
}
if (!--p->swap_cnt)
swap_task++;
- switch (swap_out_process(p, dma, wait)) {
+ switch (swap_out_process(p, dma, wait, gfp_level != GFP_BUFFER)) {
case 0:
if (p->swap_cnt)
swap_task++;
@@ -343,15 +347,22 @@
switch (state) {
do {
case 0:
+ /* Don't worry here for the GFP_BUFFER case, shrink_mmap never
+ * tries to write dirty things out...
+ */
if (shrink_mmap(i, dma))
return 1;
state = 1;
case 1:
- if (shm_swap(i, dma))
+ /* shm_swap must always perform some I/O if it succeeds
+ * in finding things to free up, so don't waste any time
+ * if we are trying to get some buffer heads...
+ */
+ if (priority != GFP_BUFFER && shm_swap(i, dma))
return 1;
state = 2;
default:
- if (swap_out(i, dma, wait))
+ if (swap_out(i, dma, wait, priority))
return 1;
state = 0;
i--;