[PATCH v2 9/9] mm/swap: count a new anonymous page as a reclaim_state's rotate
From: js1304
Date: Thu Feb 20 2020 - 00:12:35 EST
From: Joonsoo Kim <iamjoonsoo.kim@xxxxxxx>
reclaim_stat's rotate is used for controlling the ratio of scanning page
between file and anonymous LRU. All new anonymous pages are counted
for rotate before the patch, protecting anonymous pages on active LRU, and,
it makes that reclaim on anonymous LRU is less happened than file LRU.
Now, situation is changed. all new anonymous pages are not added
to the active LRU so rotate would be far less than before. It will cause
that reclaim on anonymous LRU happens more and it would result in bad
effect on some system that is optimized for previous setting.
Therefore, this patch counts a new anonymous page as a reclaim_state's
rotate. Although it is non-logical to add this count to
the reclaim_state's rotate in current algorithm, reducing the regression
would be more important.
I found this regression on kernel-build test and it is roughly 2~5%
performance degradation. With this workaround, performance is completely
restored.
v2: fix a bug that reuses the rotate value for previous page
Reported-by: kernel test robot <lkp@xxxxxxxxx>
Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@xxxxxxx>
---
mm/swap.c | 29 ++++++++++++++++++++++++++++-
1 file changed, 28 insertions(+), 1 deletion(-)
diff --git a/mm/swap.c b/mm/swap.c
index 18b2735..9001d81 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -187,6 +187,9 @@ int get_kernel_page(unsigned long start, int write, struct page **pages)
}
EXPORT_SYMBOL_GPL(get_kernel_page);
+static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec,
+ void *arg);
+
static void pagevec_lru_move_fn(struct pagevec *pvec,
void (*move_fn)(struct page *page, struct lruvec *lruvec, void *arg),
void *arg)
@@ -199,6 +202,7 @@ static void pagevec_lru_move_fn(struct pagevec *pvec,
for (i = 0; i < pagevec_count(pvec); i++) {
struct page *page = pvec->pages[i];
struct pglist_data *pagepgdat = page_pgdat(page);
+ void *arg_orig = arg;
if (pagepgdat != pgdat) {
if (pgdat)
@@ -207,8 +211,22 @@ static void pagevec_lru_move_fn(struct pagevec *pvec,
spin_lock_irqsave(&pgdat->lru_lock, flags);
}
+ if (move_fn == __pagevec_lru_add_fn) {
+ struct list_head *entry = &page->lru;
+ unsigned long next = (unsigned long)entry->next;
+ unsigned long rotate = next & 2;
+
+ if (rotate) {
+ VM_BUG_ON(arg);
+
+ next = next & ~2;
+ entry->next = (struct list_head *)next;
+ arg = (void *)rotate;
+ }
+ }
lruvec = mem_cgroup_page_lruvec(page, pgdat);
(*move_fn)(page, lruvec, arg);
+ arg = arg_orig;
}
if (pgdat)
spin_unlock_irqrestore(&pgdat->lru_lock, flags);
@@ -475,6 +493,14 @@ void lru_cache_add_inactive_or_unevictable(struct page *page,
hpage_nr_pages(page));
count_vm_event(UNEVICTABLE_PGMLOCKED);
}
+
+ if (PageSwapBacked(page) && evictable) {
+ struct list_head *entry = &page->lru;
+ unsigned long next = (unsigned long)entry->next;
+
+ next = next | 2;
+ entry->next = (struct list_head *)next;
+ }
lru_cache_add(page);
}
@@ -927,6 +953,7 @@ static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec,
{
enum lru_list lru;
int was_unevictable = TestClearPageUnevictable(page);
+ unsigned long rotate = (unsigned long)arg;
VM_BUG_ON_PAGE(PageLRU(page), page);
@@ -962,7 +989,7 @@ static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec,
if (page_evictable(page)) {
lru = page_lru(page);
update_page_reclaim_stat(lruvec, page_is_file_cache(page),
- PageActive(page));
+ PageActive(page) | rotate);
if (was_unevictable)
count_vm_event(UNEVICTABLE_PGRESCUED);
} else {
--
2.7.4