[RFC PATCH v1 05/13] mm: add batching logic to add/delete/move API's

From: daniel . m . jordan
Date: Wed Jan 31 2018 - 18:15:06 EST


Change the add/delete/move LRU API's in mm_inline.h to account for LRU
batching. Now when a page is added to the front of the LRU, it's
assigned a batch number that's used to decide which spinlock in the
lru_batch_lock array to take when removing that page from the LRU. Each
newly-added page is also unconditionally made a sentinel page.

As more pages are added to the front of an LRU, the same batch number is
used for each until a threshold is reached, at which point a batch is
ready and the sentinel bits are unset in all but the first and last pages
of the batch. This allows those inner pages to be removed with a batch
lock rather than the heavier lru_lock.

Signed-off-by: Daniel Jordan <daniel.m.jordan@xxxxxxxxxx>
---
include/linux/mm_inline.h | 119 ++++++++++++++++++++++++++++++++++++++++++++--
include/linux/mmzone.h | 3 ++
mm/swap.c | 2 +-
mm/vmscan.c | 4 +-
4 files changed, 122 insertions(+), 6 deletions(-)

diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index d7fc46ebc33b..ec8b966a1c76 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -3,6 +3,7 @@
#define LINUX_MM_INLINE_H

#include <linux/huge_mm.h>
+#include <linux/random.h>
#include <linux/swap.h>

/**
@@ -44,27 +45,139 @@ static __always_inline void update_lru_size(struct lruvec *lruvec,
#endif
}

+static __always_inline void __add_page_to_lru_list(struct page *page,
+ struct lruvec *lruvec, enum lru_list lru)
+{
+ int tag;
+ struct page *cur, *next, *second_page;
+ struct lru_list_head *head = &lruvec->lists[lru];
+
+ list_add(&page->lru, lru_head(head));
+ /* Set sentinel unconditionally until batch is full. */
+ page->lru_sentinel = true;
+
+ second_page = container_of(page->lru.next, struct page, lru);
+ VM_BUG_ON_PAGE(!second_page->lru_sentinel, second_page);
+
+ page->lru_batch = head->first_batch_tag;
+ ++head->first_batch_npages;
+
+ if (head->first_batch_npages < LRU_BATCH_MAX)
+ return;
+
+ tag = head->first_batch_tag;
+ if (likely(second_page->lru_batch == tag)) {
+ /* Unset sentinel bit in all non-sentinel nodes. */
+ cur = second_page;
+ list_for_each_entry_from(cur, lru_head(head), lru) {
+ next = list_next_entry(cur, lru);
+ if (next->lru_batch != tag)
+ break;
+ cur->lru_sentinel = false;
+ }
+ }
+
+ tag = prandom_u32_max(NUM_LRU_BATCH_LOCKS);
+ if (unlikely(tag == head->first_batch_tag))
+ tag = (tag + 1) % NUM_LRU_BATCH_LOCKS;
+ head->first_batch_tag = tag;
+ head->first_batch_npages = 0;
+}
+
static __always_inline void add_page_to_lru_list(struct page *page,
struct lruvec *lruvec, enum lru_list lru)
{
update_lru_size(lruvec, lru, page_zonenum(page), hpage_nr_pages(page));
- list_add(&page->lru, lru_head(&lruvec->lists[lru]));
+ __add_page_to_lru_list(page, lruvec, lru);
+}
+
+static __always_inline void __add_page_to_lru_list_tail(struct page *page,
+ struct lruvec *lruvec, enum lru_list lru)
+{
+ int tag;
+ struct page *cur, *prev, *second_page;
+ struct lru_list_head *head = &lruvec->lists[lru];
+
+ list_add_tail(&page->lru, lru_head(head));
+ /* Set sentinel unconditionally until batch is full. */
+ page->lru_sentinel = true;
+
+ second_page = container_of(page->lru.prev, struct page, lru);
+ VM_BUG_ON_PAGE(!second_page->lru_sentinel, second_page);
+
+ page->lru_batch = head->last_batch_tag;
+ ++head->last_batch_npages;
+
+ if (head->last_batch_npages < LRU_BATCH_MAX)
+ return;
+
+ tag = head->last_batch_tag;
+ if (likely(second_page->lru_batch == tag)) {
+ /* Unset sentinel bit in all non-sentinel nodes. */
+ cur = second_page;
+ list_for_each_entry_from_reverse(cur, lru_head(head), lru) {
+ prev = list_prev_entry(cur, lru);
+ if (prev->lru_batch != tag)
+ break;
+ cur->lru_sentinel = false;
+ }
+ }
+
+ tag = prandom_u32_max(NUM_LRU_BATCH_LOCKS);
+ if (unlikely(tag == head->last_batch_tag))
+ tag = (tag + 1) % NUM_LRU_BATCH_LOCKS;
+ head->last_batch_tag = tag;
+ head->last_batch_npages = 0;
}

static __always_inline void add_page_to_lru_list_tail(struct page *page,
struct lruvec *lruvec, enum lru_list lru)
{
+
update_lru_size(lruvec, lru, page_zonenum(page), hpage_nr_pages(page));
- list_add_tail(&page->lru, lru_head(&lruvec->lists[lru]));
+ __add_page_to_lru_list_tail(page, lruvec, lru);
}

-static __always_inline void del_page_from_lru_list(struct page *page,
+static __always_inline void __del_page_from_lru_list(struct page *page,
struct lruvec *lruvec, enum lru_list lru)
{
+ struct page *left, *right;
+
+ left = container_of(page->lru.prev, struct page, lru);
+ right = container_of(page->lru.next, struct page, lru);
+
+ if (page->lru_sentinel) {
+ VM_BUG_ON(!left->lru_sentinel && !right->lru_sentinel);
+ left->lru_sentinel = true;
+ right->lru_sentinel = true;
+ }
+
list_del(&page->lru);
+}
+
+static __always_inline void del_page_from_lru_list(struct page *page,
+ struct lruvec *lruvec, enum lru_list lru)
+{
+ __del_page_from_lru_list(page, lruvec, lru);
update_lru_size(lruvec, lru, page_zonenum(page), -hpage_nr_pages(page));
}

+static __always_inline void move_page_to_lru_list(struct page *page,
+ struct lruvec *lruvec,
+ enum lru_list lru)
+{
+ __del_page_from_lru_list(page, lruvec, lru);
+ __add_page_to_lru_list(page, lruvec, lru);
+}
+
+static __always_inline void move_page_to_lru_list_tail(struct page *page,
+ struct lruvec *lruvec,
+ enum lru_list lru)
+{
+ __del_page_from_lru_list(page, lruvec, lru);
+ __add_page_to_lru_list_tail(page, lruvec, lru);
+}
+
/**
* page_lru_base_type - which LRU list type should a page be on?
* @page: the page to test
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index feca75b8f492..492f86cdb346 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -19,6 +19,7 @@
#include <linux/page-flags-layout.h>
#include <linux/atomic.h>
#include <linux/mm_types.h>
+#include <linux/pagevec.h>
#include <asm/page.h>

/* Free memory management - zoned buddy allocator. */
@@ -260,6 +261,8 @@ struct lruvec {
#define LRU_ALL_ANON (BIT(LRU_INACTIVE_ANON) | BIT(LRU_ACTIVE_ANON))
#define LRU_ALL ((1 << NR_LRU_LISTS) - 1)

+#define LRU_BATCH_MAX PAGEVEC_SIZE
+
#define NUM_LRU_BATCH_LOCKS 32
struct lru_batch_lock {
spinlock_t lock;
diff --git a/mm/swap.c b/mm/swap.c
index 286636bb6a4f..67eb89fc9435 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -561,7 +561,7 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec,
* The page's writeback ends up during pagevec
* We moves tha page into tail of inactive.
*/
- list_move_tail(&page->lru, lru_head(&lruvec->lists[lru]));
+ move_page_to_lru_list_tail(page, lruvec, lru);
__count_vm_event(PGROTATED);
}

diff --git a/mm/vmscan.c b/mm/vmscan.c
index aa629c4720dd..b4c32a65a40f 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1553,7 +1553,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,

case -EBUSY:
/* else it is being freed elsewhere */
- list_move(&page->lru, src);
+ move_page_to_lru_list(page, lruvec, lru);
continue;

default:
@@ -1943,7 +1943,7 @@ static unsigned move_active_pages_to_lru(struct lruvec *lruvec,

nr_pages = hpage_nr_pages(page);
update_lru_size(lruvec, lru, page_zonenum(page), nr_pages);
- list_move(&page->lru, lru_head(&lruvec->lists[lru]));
+ move_page_to_lru_list(page, lruvec, lru);

if (put_page_testzero(page)) {
__ClearPageLRU(page);
--
2.16.1