--- .orig/include/linux/page-flags.h 2006-02-05 10:00:48.000000000 -0500 +++ 01-vmscan-rotate-fix/include/linux/page-flags.h 2006-02-04 09:18:17.000000000 -0500 @@ -149,6 +149,12 @@ unsigned long pgrotated; /* pages rotated to tail of the LRU */ unsigned long nr_bounce; /* pages for bounce buffers */ + + unsigned long pgrotcalls; /* page rotation stats */ + unsigned long pgrotlocked; + unsigned long pgrotdirty; + unsigned long pgrotactive; + unsigned long pgrotnonlru; }; extern void get_page_state(struct page_state *ret); --- .orig/include/linux/swap.h 2006-02-05 10:00:49.000000000 -0500 +++ 01-vmscan-rotate-fix/include/linux/swap.h 2006-02-04 09:23:24.000000000 -0500 @@ -175,6 +175,7 @@ extern int try_to_free_pages(struct zone **, gfp_t); extern int shrink_all_memory(int); extern int vm_swappiness; +extern int vm_wb_put_lru; #ifdef CONFIG_NUMA extern int zone_reclaim_mode; --- .orig/include/linux/sysctl.h 2006-02-05 10:00:49.000000000 -0500 +++ 01-vmscan-rotate-fix/include/linux/sysctl.h 2006-02-04 09:23:05.000000000 -0500 @@ -184,6 +184,7 @@ VM_PERCPU_PAGELIST_FRACTION=30,/* int: fraction of pages in each percpu_pagelist */ VM_ZONE_RECLAIM_MODE=31, /* reclaim local zone memory before going off node */ VM_ZONE_RECLAIM_INTERVAL=32, /* time period to wait after reclaim failure */ + VM_WB_PUT_LRU=33, /* add page to LRU before calling writepage() */ }; --- .orig/kernel/sysctl.c 2006-02-05 10:00:49.000000000 -0500 +++ 01-vmscan-rotate-fix/kernel/sysctl.c 2006-02-04 09:24:17.000000000 -0500 @@ -891,6 +891,16 @@ .strategy = &sysctl_jiffies, }, #endif + { + .ctl_name = VM_WB_PUT_LRU, + .procname = "wb_put_lru", + .data = &vm_wb_put_lru, + .maxlen = sizeof(vm_wb_put_lru), + .mode = 0644, + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec, + .extra1 = &zero, + }, { .ctl_name = 0 } }; --- .orig/mm/page_alloc.c 2006-02-05 10:00:49.000000000 -0500 +++ 01-vmscan-rotate-fix/mm/page_alloc.c 2006-02-04 09:20:38.000000000 -0500 @@ -2360,6 +2360,12 @@ "pgrotated", "nr_bounce", + + "pgrotcalls", + "pgrotlocked", + "pgrotdirty", + "pgrotactive", + "pgrotnonlru", }; static void *vmstat_start(struct seq_file *m, loff_t *pos) --- .orig/mm/swap.c 2006-02-05 10:00:49.000000000 -0500 +++ 01-vmscan-rotate-fix/mm/swap.c 2006-02-04 09:19:14.000000000 -0500 @@ -71,14 +71,24 @@ struct zone *zone; unsigned long flags; - if (PageLocked(page)) + inc_page_state(pgrotcalls); + + if (PageLocked(page)) { + inc_page_state(pgrotlocked); return 1; - if (PageDirty(page)) + } + if (PageDirty(page)) { + inc_page_state(pgrotdirty); return 1; - if (PageActive(page)) + } + if (PageActive(page)) { + inc_page_state(pgrotactive); return 1; - if (!PageLRU(page)) + } + if (!PageLRU(page)) { + inc_page_state(pgrotnonlru); return 1; + } zone = page_zone(page); spin_lock_irqsave(&zone->lru_lock, flags); --- .orig/mm/vmscan.c 2006-02-05 10:00:50.000000000 -0500 +++ 01-vmscan-rotate-fix/mm/vmscan.c 2006-02-04 11:33:00.000000000 -0500 @@ -126,6 +126,7 @@ * From 0 .. 100. Higher means more swappy. */ int vm_swappiness = 60; +int vm_wb_put_lru = 1; static long total_memory; static LIST_HEAD(shrinker_list); @@ -308,7 +309,7 @@ /* * pageout is called by shrink_list() for each dirty page. Calls ->writepage(). */ -static pageout_t pageout(struct page *page, struct address_space *mapping) +static pageout_t pageout(struct page *page, struct address_space *mapping, int *on_lru) { /* * If the page is dirty, only perform writeback if that write @@ -357,6 +358,27 @@ .for_reclaim = 1, }; + /* + * Put page back on LRU before calling writepage + * because that could result in a call to + * rotate_reclaimable_page(). If the LRU flag + * is clear, rotate_reclaimable_page() will fail + * to move the page to the tail of the inactive list. + */ + if (on_lru && vm_wb_put_lru) { + struct zone *zone = page_zone(page); + + *on_lru = 1; + spin_lock_irq(&zone->lru_lock); + if (likely(!TestSetPageLRU(page))) { + list_add(&page->lru, &zone->inactive_list); + zone->nr_inactive++; + } else { + BUG(); + } + spin_unlock_irq(&zone->lru_lock); + } + SetPageReclaim(page); res = mapping->a_ops->writepage(page, &wbc); if (res < 0) @@ -431,6 +453,7 @@ struct page *page; int may_enter_fs; int referenced; + int on_lru = 0; cond_resched(); @@ -502,7 +525,7 @@ goto keep_locked; /* Page is dirty, try to write it out here */ - switch(pageout(page, mapping)) { + switch(pageout(page, mapping, &on_lru)) { case PAGE_KEEP: goto keep_locked; case PAGE_ACTIVATE: @@ -558,18 +581,30 @@ free_it: unlock_page(page); reclaimed++; - if (!pagevec_add(&freed_pvec, page)) - __pagevec_release_nonlru(&freed_pvec); + if (!on_lru) { + if (!pagevec_add(&freed_pvec, page)) + __pagevec_release_nonlru(&freed_pvec); + } else { + page_cache_release(page); + } continue; activate_locked: - SetPageActive(page); - pgactivate++; + if (!on_lru) { + SetPageActive(page); + pgactivate++; + } else { + activate_page(page); + } keep_locked: unlock_page(page); keep: - list_add(&page->lru, &ret_pages); - BUG_ON(PageLRU(page)); + if (!on_lru) { + list_add(&page->lru, &ret_pages); + BUG_ON(PageLRU(page)); + } else { + page_cache_release(page); + } } list_splice(&ret_pages, page_list); if (pagevec_count(&freed_pvec)) @@ -637,7 +672,7 @@ if (PageDirty(page)) { /* Page is dirty, try to write it out here */ - switch(pageout(page, mapping)) { + switch(pageout(page, mapping, NULL)) { case PAGE_KEEP: case PAGE_ACTIVATE: goto unlock_retry; @@ -936,7 +971,7 @@ * Trigger writeout if page is dirty */ if (PageDirty(page)) { - switch (pageout(page, mapping)) { + switch (pageout(page, mapping, NULL)) { case PAGE_KEEP: case PAGE_ACTIVATE: goto unlock_both;