Re: buffers vs. pages vs. kernel speed

Dr. Werner Fink (werner@suse.de)
Tue, 17 Jun 1997 14:36:17 +0200


A few hours ago I wrote:

> Hi Dave,
>
> I've added one small change to your patch in __get_free_pages
> (mm/page_alloc.c):
> Maybe it's better to handle GFP_BUFFER the same way as GFP_NFS to
> avoid heavy swapping if a process needs buffer but allow swapping if
> necessary. This change makes the mouse much more usable.
>
> One point I've removed from a previous patch I've made is the skipping
> of buffer pages if shrink_mmap is called in the case of getting free
> buffer pages.
>
> This patch is against pre-patch-2.0.31-2 and includes your patch.
>
> Werner

[ ... patch deleted ...]

There is a typo in the patch above, please try the following one.

Werner

---------------------------------------------------------------------
diff -urN linux-2.0.31-clean/fs/buffer.c linux/fs/buffer.c
--- linux-2.0.31-clean/fs/buffer.c Tue Jun 10 12:58:46 1997
+++ linux/fs/buffer.c Tue Jun 17 12:14:06 1997
@@ -660,20 +660,15 @@
goto repeat;
}

- /* Too bad, that was not enough. Try a little harder to grow some. */
-
- if (nr_free_pages > min_free_pages + 5) {
- if (grow_buffers(GFP_BUFFER, size)) {
- needed -= PAGE_SIZE;
- goto repeat;
- };
- }
+ /* Too bad, that was not enough. Try a little harder to grow some.
+ * and repeat until we find something good
+ */

- /* and repeat until we find something good */
- if (grow_buffers(GFP_ATOMIC, size))
+ if (grow_buffers(GFP_BUFFER, size))
needed -= PAGE_SIZE;
else
wakeup_bdflush(1);
+
goto repeat;
}

@@ -931,11 +926,16 @@
* This is critical. We can't swap out pages to get
* more buffer heads, because the swap-out may need
* more buffer-heads itself. Thus GFP_ATOMIC.
+ *
+ * This is no longer true, it is GFP_BUFFER again, the
+ * swapping code now knows not to perform I/O when that
+ * GFP level is specified... -DaveM
*/
+
/* we now use kmalloc() here instead of gfp as we want
to be able to easily release buffer heads - they
took up quite a bit of memory (tridge) */
- bh = (struct buffer_head *) kmalloc(sizeof(*bh),GFP_ATOMIC);
+ bh = (struct buffer_head *) kmalloc(sizeof(*bh),GFP_BUFFER);
if (bh) {
put_unused_buffer_head(bh);
nr_buffer_heads++;
diff -urN linux-2.0.31-clean/include/linux/mm.h linux/include/linux/mm.h
--- linux-2.0.31-clean/include/linux/mm.h Sat Mar 29 01:08:17 1997
+++ linux/include/linux/mm.h Mon Jun 16 22:26:25 1997
@@ -295,7 +295,7 @@

/* filemap.c */
extern unsigned long page_unuse(unsigned long);
-extern int shrink_mmap(int, int);
+extern int shrink_mmap(int, int, int);
extern void truncate_inode_pages(struct inode *, unsigned long);

#define GFP_BUFFER 0x00
diff -urN linux-2.0.31-clean/mm/filemap.c linux/mm/filemap.c
--- linux-2.0.31-clean/mm/filemap.c Tue Jun 10 12:58:48 1997
+++ linux/mm/filemap.c Mon Jun 16 22:19:32 1997
@@ -127,7 +127,7 @@
}
}

-int shrink_mmap(int priority, int dma)
+int shrink_mmap(int priority, int dma, int gfp_level)
{
static int clock = 0;
struct page * page;
diff -urN linux-2.0.31-clean/mm/page_alloc.c linux/mm/page_alloc.c
--- linux-2.0.31-clean/mm/page_alloc.c Sat Aug 17 20:19:29 1996
+++ linux/mm/page_alloc.c Tue Jun 17 12:13:24 1997
@@ -202,9 +202,14 @@
priority = GFP_ATOMIC;
}
}
- reserved_pages = 5;
- if (priority != GFP_NFS)
- reserved_pages = min_free_pages;
+ switch (priority) {
+ case GFP_NFS:
+ case GFP_BUFFER:
+ reserved_pages = 5;
+ break;
+ default:
+ reserved_pages = min_free_pages;
+ }
save_flags(flags);
repeat:
cli();
@@ -214,7 +219,7 @@
return 0;
}
restore_flags(flags);
- if (priority != GFP_BUFFER && try_to_free_page(priority, dma, 1))
+ if (try_to_free_page(priority, dma, 1))
goto repeat;
return 0;
}
@@ -264,11 +269,11 @@

/*
* select nr of pages we try to keep free for important stuff
- * with a minimum of 16 pages. This is totally arbitrary
+ * with a minimum of 24 pages. This is totally arbitrary
*/
i = (end_mem - PAGE_OFFSET) >> (PAGE_SHIFT+7);
- if (i < 16)
- i = 16;
+ if (i < 24)
+ i = 24;
min_free_pages = i;
free_pages_low = i + (i>>1);
free_pages_high = i + i;
diff -urN linux-2.0.31-clean/mm/vmscan.c linux/mm/vmscan.c
--- linux-2.0.31-clean/mm/vmscan.c Sat Dec 14 13:24:31 1996
+++ linux/mm/vmscan.c Mon Jun 16 20:11:47 1997
@@ -68,7 +68,7 @@
* have died while we slept).
*/
static inline int try_to_swap_out(struct task_struct * tsk, struct vm_area_struct* vma,
- unsigned long address, pte_t * page_table, int dma, int wait)
+ unsigned long address, pte_t * page_table, int dma, int wait, int can_do_io)
{
pte_t pte;
unsigned long entry;
@@ -100,6 +100,8 @@
if (page_map->age)
return 0;
if (pte_dirty(pte)) {
+ if(!can_do_io)
+ return 0;
if (vma->vm_ops && vma->vm_ops->swapout) {
pid_t pid = tsk->pid;
vma->vm_mm->rss--;
@@ -157,7 +159,8 @@
*/

static inline int swap_out_pmd(struct task_struct * tsk, struct vm_area_struct * vma,
- pmd_t *dir, unsigned long address, unsigned long end, int dma, int wait)
+ pmd_t *dir, unsigned long address, unsigned long end, int dma, int wait,
+ int can_do_io)
{
pte_t * pte;
unsigned long pmd_end;
@@ -179,7 +182,7 @@
do {
int result;
tsk->swap_address = address + PAGE_SIZE;
- result = try_to_swap_out(tsk, vma, address, pte, dma, wait);
+ result = try_to_swap_out(tsk, vma, address, pte, dma, wait, can_do_io);
if (result)
return result;
address += PAGE_SIZE;
@@ -189,7 +192,8 @@
}

static inline int swap_out_pgd(struct task_struct * tsk, struct vm_area_struct * vma,
- pgd_t *dir, unsigned long address, unsigned long end, int dma, int wait)
+ pgd_t *dir, unsigned long address, unsigned long end, int dma, int wait,
+ int can_do_io)
{
pmd_t * pmd;
unsigned long pgd_end;
@@ -209,7 +213,7 @@
end = pgd_end;

do {
- int result = swap_out_pmd(tsk, vma, pmd, address, end, dma, wait);
+ int result = swap_out_pmd(tsk, vma, pmd, address, end, dma, wait, can_do_io);
if (result)
return result;
address = (address + PMD_SIZE) & PMD_MASK;
@@ -219,7 +223,7 @@
}

static int swap_out_vma(struct task_struct * tsk, struct vm_area_struct * vma,
- pgd_t *pgdir, unsigned long start, int dma, int wait)
+ pgd_t *pgdir, unsigned long start, int dma, int wait, int can_do_io)
{
unsigned long end;

@@ -230,7 +234,7 @@

end = vma->vm_end;
while (start < end) {
- int result = swap_out_pgd(tsk, vma, pgdir, start, end, dma, wait);
+ int result = swap_out_pgd(tsk, vma, pgdir, start, end, dma, wait, can_do_io);
if (result)
return result;
start = (start + PGDIR_SIZE) & PGDIR_MASK;
@@ -239,7 +243,7 @@
return 0;
}

-static int swap_out_process(struct task_struct * p, int dma, int wait)
+static int swap_out_process(struct task_struct * p, int dma, int wait, int can_do_io)
{
unsigned long address;
struct vm_area_struct* vma;
@@ -260,7 +264,7 @@
address = vma->vm_start;

for (;;) {
- int result = swap_out_vma(p, vma, pgd_offset(p->mm, address), address, dma, wait);
+ int result = swap_out_vma(p, vma, pgd_offset(p->mm, address), address, dma, wait, can_do_io);
if (result)
return result;
vma = vma->vm_next;
@@ -272,7 +276,7 @@
return 0;
}

-static int swap_out(unsigned int priority, int dma, int wait)
+static int swap_out(unsigned int priority, int dma, int wait, int gfp_level)
{
static int swap_task;
int loop, counter;
@@ -311,7 +315,7 @@
}
if (!--p->swap_cnt)
swap_task++;
- switch (swap_out_process(p, dma, wait)) {
+ switch (swap_out_process(p, dma, wait, gfp_level != GFP_BUFFER)) {
case 0:
if (p->swap_cnt)
swap_task++;
@@ -330,33 +334,61 @@
* to be. This works out OK, because we now do proper aging on page
* contents.
*/
+#define TRIED_SMMAP 0x1
+#define TRIED_SSWAP 0x2
+#define TRIED_SWOUT 0x4
int try_to_free_page(int priority, int dma, int wait)
{
static int state = 0;
int i=6;
- int stop;
+ int stop, tried_mask = 0;
+ int old_stop, old_state;

/* we don't try as hard if we're not waiting.. */
stop = 3;
if (wait)
stop = 0;
+ old_stop = stop;
+ old_state = state;
switch (state) {
do {
case 0:
- if (shrink_mmap(i, dma))
+ /* Don't worry here for the GFP_BUFFER case, shrink_mmap never
+ * tries to write dirty things out...
+ */
+ tried_mask |= TRIED_SMMAP;
+ if (shrink_mmap(i, dma, priority)) {
+ state = 1;
return 1;
- state = 1;
+ }
case 1:
- if (shm_swap(i, dma))
- return 1;
- state = 2;
+ /* shm_swap must always perform some I/O if it succeeds
+ * in finding things to free up, so don't waste any time
+ * if we are trying to get some buffer heads...
+ */
+ if (priority != GFP_BUFFER) {
+ tried_mask |= TRIED_SSWAP;
+ if(shm_swap(i, dma)) {
+ state = 2;
+ return 1;
+ }
+ }
default:
- if (swap_out(i, dma, wait))
+ tried_mask |= TRIED_SWOUT;
+ if (swap_out(i, dma, wait, priority)) {
+ state = 0;
return 1;
- state = 0;
+ }
i--;
} while ((i - stop) >= 0);
}
+ printk("try_to_free_page(%d,%d,%d): FAIL try(%s:%s:%s) state[o(%d):n(%d)] "
+ "stop[o(%d):n(%d)]\n", priority, dma, wait,
+ tried_mask & TRIED_SMMAP ? "shrink_mmap" : "",
+ tried_mask & TRIED_SSWAP ? "shm_swap" : "",
+ tried_mask & TRIED_SWOUT ? "swap_out" : "",
+ old_state, state, old_stop, stop);
+ state = 0;
return 0;
}