[PATCH] 3/5 VM changes: dont-rotate-active-list.patch

From: Nikita Danilov (Nikita@Namesys.COM)
Date: Wed Jul 09 2003 - 03:47:12 EST


Currently, if zone is short on free pages, refill_inactive_zone() starts
moving pages from active_list to inactive_list, rotating active_list as it
goes. That is, pages from the tail of active_list are transferred to its head,
thus destroying lru ordering, exactly when we need it most --- when system is
low on free memory and page replacement has to be performed.

This patch modifies refill_inactive_zone() so that it scans active_list
without rotating it. To achieve this, special dummy page zone->scan_page
is maintained for each zone. This page marks a place in the active_list
reached during scanning.

As an additional bonus, if memory pressure is not so big as to start swapping
mapped pages (reclaim_mapped == 0 in refill_inactive_zone()), then not
referenced mapped pages can be left behind zone->scan_page instead of moving
them to the head of active_list. When reclaim_mapped mode is activated,
zone->scan_page is reset back to the tail of active_list so that these pages
can be re-scanned.

diff -puN include/linux/mmzone.h~dont-rotate-active-list include/linux/mmzone.h
--- i386/include/linux/mmzone.h~dont-rotate-active-list Wed Jul 9 12:24:51 2003
+++ i386-god/include/linux/mmzone.h Wed Jul 9 12:24:51 2003
@@ -146,6 +146,12 @@ struct zone {
         /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
         unsigned long zone_start_pfn;
 
+ /*
+ * dummy page used as place holder during scanning of
+ * active_list in refill_inactive_zone()
+ */
+ struct page *scan_page;
+
         /*
          * rarely used fields:
          */
diff -puN mm/page_alloc.c~dont-rotate-active-list mm/page_alloc.c
--- i386/mm/page_alloc.c~dont-rotate-active-list Wed Jul 9 12:24:51 2003
+++ i386-god/mm/page_alloc.c Wed Jul 9 12:24:51 2003
@@ -1201,6 +1201,9 @@ void __init memmap_init_zone(struct page
         memmap_init_zone((start), (size), (nid), (zone), (start_pfn))
 #endif
 
+/* dummy pages used to scan active lists */
+static struct page scan_pages[MAX_NR_NODES][MAX_NR_ZONES];
+
 /*
  * Set up the zone data structures:
  * - mark all pages reserved
@@ -1223,6 +1226,7 @@ static void __init free_area_init_core(s
                 struct zone *zone = pgdat->node_zones + j;
                 unsigned long size, realsize;
                 unsigned long batch;
+ struct page *scan_page;
 
                 zone_table[nid * MAX_NR_ZONES + j] = zone;
                 realsize = size = zones_size[j];
@@ -1275,6 +1279,22 @@ static void __init free_area_init_core(s
                 atomic_set(&zone->refill_counter, 0);
                 zone->nr_active = 0;
                 zone->nr_inactive = 0;
+
+ /* initialize dummy page used for scanning */
+ scan_page = &scan_pages[nid][j];
+ zone->scan_page = scan_page;
+ memset(scan_page, 0, sizeof *scan_page);
+ scan_page->flags =
+ (1 << PG_locked) |
+ (1 << PG_error) |
+ (1 << PG_lru) |
+ (1 << PG_active) |
+ (1 << PG_reserved);
+ set_page_zone(scan_page, j);
+ page_cache_get(scan_page);
+ INIT_LIST_HEAD(&scan_page->list);
+ list_add(&scan_page->lru, &zone->active_list);
+
                 if (!size)
                         continue;
 
diff -puN mm/vmscan.c~dont-rotate-active-list mm/vmscan.c
--- i386/mm/vmscan.c~dont-rotate-active-list Wed Jul 9 12:24:51 2003
+++ i386-god/mm/vmscan.c Wed Jul 9 12:24:51 2003
@@ -49,14 +49,15 @@
 int vm_swappiness = 60;
 static long total_memory;
 
+#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
+
 #ifdef ARCH_HAS_PREFETCH
 #define prefetch_prev_lru_page(_page, _base, _field) \
         do { \
                 if ((_page)->lru.prev != _base) { \
                         struct page *prev; \
                                                                         \
- prev = list_entry(_page->lru.prev, \
- struct page, lru); \
+ prev = lru_to_page(&(_page)->lru); \
                         prefetch(&prev->_field); \
                 } \
         } while (0)
@@ -70,8 +71,7 @@ static long total_memory;
                 if ((_page)->lru.prev != _base) { \
                         struct page *prev; \
                                                                         \
- prev = list_entry(_page->lru.prev, \
- struct page, lru); \
+ prev = lru_to_page(&(_page)->lru); \
                         prefetchw(&prev->_field); \
                 } \
         } while (0)
@@ -350,7 +350,7 @@ shrink_list(struct list_head *page_list,
                 struct page *page;
                 int may_enter_fs;
 
- page = list_entry(page_list->prev, struct page, lru);
+ page = lru_to_page(page_list);
                 list_del(&page->lru);
 
                 if (TestSetPageLocked(page))
@@ -552,8 +552,7 @@ shrink_cache(const int nr_pages, struct
 
                 while (nr_scan++ < nr_to_process &&
                                 !list_empty(&zone->inactive_list)) {
- page = list_entry(zone->inactive_list.prev,
- struct page, lru);
+ page = lru_to_page(&zone->inactive_list);
 
                         prefetchw_prev_lru_page(page,
                                                 &zone->inactive_list, flags);
@@ -591,7 +590,7 @@ shrink_cache(const int nr_pages, struct
                  * Put back any unfreeable pages.
                  */
                 while (!list_empty(&page_list)) {
- page = list_entry(page_list.prev, struct page, lru);
+ page = lru_to_page(&page_list);
                         if (TestSetPageLRU(page))
                                 BUG();
                         list_del(&page->lru);
@@ -612,6 +611,39 @@ done:
         return ret;
 }
 
+
+/* move pages from @page_list to the @spot, that should be somewhere on the
+ * @zone->active_list */
+static int
+spill_on_spot(struct zone *zone,
+ struct list_head *page_list, struct list_head *spot,
+ struct pagevec *pvec)
+{
+ struct page *page;
+ int moved;
+
+ moved = 0;
+ while (!list_empty(page_list)) {
+ page = lru_to_page(page_list);
+ prefetchw_prev_lru_page(page, page_list, flags);
+ if (TestSetPageLRU(page))
+ BUG();
+ BUG_ON(!PageActive(page));
+ list_move(&page->lru, spot);
+ moved++;
+ if (!pagevec_add(pvec, page)) {
+ zone->nr_active += moved;
+ moved = 0;
+ spin_unlock_irq(&zone->lru_lock);
+ __pagevec_release(pvec);
+ spin_lock_irq(&zone->lru_lock);
+ }
+ }
+ return moved;
+}
+
+
+
 /*
  * This moves pages from the active list to the inactive list.
  *
@@ -638,37 +670,17 @@ refill_inactive_zone(struct zone *zone,
         int nr_pages = nr_pages_in;
         LIST_HEAD(l_hold); /* The pages which were snipped off */
         LIST_HEAD(l_inactive); /* Pages to go onto the inactive_list */
- LIST_HEAD(l_active); /* Pages to go onto the active_list */
+ LIST_HEAD(l_ignore); /* Pages to be returned to the active_list */
+ LIST_HEAD(l_active); /* Pages to go onto the head of the
+ * active_list */
         struct page *page;
+ struct page *scan;
         struct pagevec pvec;
         int reclaim_mapped = 0;
         long mapped_ratio;
         long distress;
         long swap_tendency;
 
- lru_add_drain();
- pgmoved = 0;
- spin_lock_irq(&zone->lru_lock);
- while (nr_pages && !list_empty(&zone->active_list)) {
- page = list_entry(zone->active_list.prev, struct page, lru);
- prefetchw_prev_lru_page(page, &zone->active_list, flags);
- if (!TestClearPageLRU(page))
- BUG();
- list_del(&page->lru);
- if (page_count(page) == 0) {
- /* It is currently in pagevec_release() */
- SetPageLRU(page);
- list_add(&page->lru, &zone->active_list);
- } else {
- page_cache_get(page);
- list_add(&page->lru, &l_hold);
- pgmoved++;
- }
- nr_pages--;
- }
- zone->nr_active -= pgmoved;
- spin_unlock_irq(&zone->lru_lock);
-
         /*
          * `distress' is a measure of how much trouble we're having reclaiming
          * pages. 0 -> no problems. 100 -> great trouble.
@@ -700,10 +712,53 @@ refill_inactive_zone(struct zone *zone,
         if (swap_tendency >= 100)
                 reclaim_mapped = 1;
 
+ scan = zone->scan_page;
+ lru_add_drain();
+ pgmoved = 0;
+ spin_lock_irq(&zone->lru_lock);
+ if (reclaim_mapped) {
+ /*
+ * When scanning active_list with !reclaim_mapped mapped
+ * inactive pages are left behind zone->scan_page. If zone is
+ * switched to reclaim_mapped mode reset zone->scan_page to
+ * the end of inactive list so that inactive mapped pages are
+ * re-scanned.
+ */
+ list_move_tail(&scan->lru, &zone->active_list);
+ }
+ while (nr_pages && zone->active_list.prev != zone->active_list.next) {
+ /*
+ * if head of active list reached---wrap to the tail
+ */
+ if (scan->lru.prev == &zone->active_list)
+ list_move_tail(&scan->lru, &zone->active_list);
+ page = lru_to_page(&scan->lru);
+ prefetchw_prev_lru_page(page, &zone->active_list, flags);
+ if (!TestClearPageLRU(page))
+ BUG();
+ list_del(&page->lru);
+ if (page_count(page) == 0) {
+ /* It is currently in pagevec_release() */
+ SetPageLRU(page);
+ list_add(&page->lru, &zone->active_list);
+ } else {
+ page_cache_get(page);
+ list_add(&page->lru, &l_hold);
+ pgmoved++;
+ }
+ nr_pages--;
+ }
+ zone->nr_active -= pgmoved;
+ spin_unlock_irq(&zone->lru_lock);
+
         while (!list_empty(&l_hold)) {
- page = list_entry(l_hold.prev, struct page, lru);
+ page = lru_to_page(&l_hold);
                 list_del(&page->lru);
                 if (page_mapped(page)) {
+ /*
+ * probably it would be useful to transfer dirty bit
+ * from pte to the @page here.
+ */
                         pte_chain_lock(page);
                         if (page_mapped(page) && page_referenced(page)) {
                                 pte_chain_unlock(page);
@@ -712,7 +767,7 @@ refill_inactive_zone(struct zone *zone,
                         }
                         pte_chain_unlock(page);
                         if (!reclaim_mapped) {
- list_add(&page->lru, &l_active);
+ list_add(&page->lru, &l_ignore);
                                 continue;
                         }
                 }
@@ -732,7 +787,7 @@ refill_inactive_zone(struct zone *zone,
         pgmoved = 0;
         spin_lock_irq(&zone->lru_lock);
         while (!list_empty(&l_inactive)) {
- page = list_entry(l_inactive.prev, struct page, lru);
+ page = lru_to_page(&l_inactive);
                 prefetchw_prev_lru_page(page, &l_inactive, flags);
                 if (TestSetPageLRU(page))
                         BUG();
@@ -760,23 +815,9 @@ refill_inactive_zone(struct zone *zone,
                 spin_lock_irq(&zone->lru_lock);
         }
 
- pgmoved = 0;
- while (!list_empty(&l_active)) {
- page = list_entry(l_active.prev, struct page, lru);
- prefetchw_prev_lru_page(page, &l_active, flags);
- if (TestSetPageLRU(page))
- BUG();
- BUG_ON(!PageActive(page));
- list_move(&page->lru, &zone->active_list);
- pgmoved++;
- if (!pagevec_add(&pvec, page)) {
- zone->nr_active += pgmoved;
- pgmoved = 0;
- spin_unlock_irq(&zone->lru_lock);
- __pagevec_release(&pvec);
- spin_lock_irq(&zone->lru_lock);
- }
- }
+ pgmoved = spill_on_spot(zone, &l_active, &zone->active_list, &pvec);
+ zone->nr_active += pgmoved;
+ pgmoved = spill_on_spot(zone, &l_ignore, &scan->lru, &pvec);
         zone->nr_active += pgmoved;
         spin_unlock_irq(&zone->lru_lock);
         pagevec_release(&pvec);

_
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Tue Jul 15 2003 - 22:00:30 EST