[PATCH -v4 6/9] mm, THP, swap: Support to add/delete THP to/from swap cache

From: Huang, Ying
Date: Thu Sep 29 2016 - 02:35:32 EST


From: Huang Ying <ying.huang@xxxxxxxxx>

With this patch, a THP (Transparent Huge Page) can be added/deleted
to/from the swap cache as a set of (HPAGE_PMD_NR) sub-pages.

This will be used for the THP (Transparent Huge Page) swap support.
Where one THP may be added/delted to/from the swap cache. This will
batch the swap cache operations to reduce the lock acquire/release times
for the THP swap too.

Cc: Hugh Dickins <hughd@xxxxxxxxxx>
Cc: Shaohua Li <shli@xxxxxxxxxx>
Cc: Minchan Kim <minchan@xxxxxxxxxx>
Cc: Rik van Riel <riel@xxxxxxxxxx>
Cc: Andrea Arcangeli <aarcange@xxxxxxxxxx>
Cc: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx>
Signed-off-by: "Huang, Ying" <ying.huang@xxxxxxxxx>
---
include/linux/page-flags.h | 2 +-
mm/swap_state.c | 58 ++++++++++++++++++++++++++++++++--------------
2 files changed, 41 insertions(+), 19 deletions(-)

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 74e4dda..f5bcbea 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -314,7 +314,7 @@ PAGEFLAG_FALSE(HighMem)
#endif

#ifdef CONFIG_SWAP
-PAGEFLAG(SwapCache, swapcache, PF_NO_COMPOUND)
+PAGEFLAG(SwapCache, swapcache, PF_NO_TAIL)
#else
PAGEFLAG_FALSE(SwapCache)
#endif
diff --git a/mm/swap_state.c b/mm/swap_state.c
index d3f047b..3115762 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -43,6 +43,7 @@ struct address_space swapper_spaces[MAX_SWAPFILES] = {
};

#define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0)
+#define ADD_CACHE_INFO(x, nr) do { swap_cache_info.x += (nr); } while (0)

static struct {
unsigned long add_total;
@@ -80,25 +81,33 @@ void show_swap_cache_info(void)
*/
int __add_to_swap_cache(struct page *page, swp_entry_t entry)
{
- int error;
+ int error, i, nr = hpage_nr_pages(page);
struct address_space *address_space;
+ struct page *cur_page;
+ swp_entry_t cur_entry;

VM_BUG_ON_PAGE(!PageLocked(page), page);
VM_BUG_ON_PAGE(PageSwapCache(page), page);
VM_BUG_ON_PAGE(!PageSwapBacked(page), page);

- get_page(page);
+ page_ref_add(page, nr);
SetPageSwapCache(page);
- set_page_private(page, entry.val);

address_space = swap_address_space(entry);
+ cur_page = page;
+ cur_entry.val = entry.val;
spin_lock_irq(&address_space->tree_lock);
- error = radix_tree_insert(&address_space->page_tree,
- swp_offset(entry), page);
+ for (i = 0; i < nr; i++, cur_page++, cur_entry.val++) {
+ set_page_private(cur_page, cur_entry.val);
+ error = radix_tree_insert(&address_space->page_tree,
+ swp_offset(cur_entry), cur_page);
+ if (unlikely(error))
+ break;
+ }
if (likely(!error)) {
- address_space->nrpages++;
- __inc_node_page_state(page, NR_FILE_PAGES);
- INC_CACHE_INFO(add_total);
+ address_space->nrpages += nr;
+ __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, nr);
+ ADD_CACHE_INFO(add_total, nr);
}
spin_unlock_irq(&address_space->tree_lock);

@@ -109,9 +118,16 @@ int __add_to_swap_cache(struct page *page, swp_entry_t entry)
* So add_to_swap_cache() doesn't returns -EEXIST.
*/
VM_BUG_ON(error == -EEXIST);
- set_page_private(page, 0UL);
ClearPageSwapCache(page);
- put_page(page);
+ set_page_private(cur_page, 0UL);
+ while (i--) {
+ cur_page--;
+ cur_entry.val--;
+ set_page_private(cur_page, 0UL);
+ radix_tree_delete(&address_space->page_tree,
+ swp_offset(cur_entry));
+ }
+ page_ref_sub(page, nr);
}

return error;
@@ -122,7 +138,7 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
{
int error;

- error = radix_tree_maybe_preload(gfp_mask);
+ error = radix_tree_maybe_preload_order(gfp_mask, compound_order(page));
if (!error) {
error = __add_to_swap_cache(page, entry);
radix_tree_preload_end();
@@ -138,6 +154,7 @@ void __delete_from_swap_cache(struct page *page)
{
swp_entry_t entry;
struct address_space *address_space;
+ int i, nr = hpage_nr_pages(page);

VM_BUG_ON_PAGE(!PageLocked(page), page);
VM_BUG_ON_PAGE(!PageSwapCache(page), page);
@@ -145,12 +162,17 @@ void __delete_from_swap_cache(struct page *page)

entry.val = page_private(page);
address_space = swap_address_space(entry);
- radix_tree_delete(&address_space->page_tree, swp_offset(entry));
- set_page_private(page, 0);
ClearPageSwapCache(page);
- address_space->nrpages--;
- __dec_node_page_state(page, NR_FILE_PAGES);
- INC_CACHE_INFO(del_total);
+ for (i = 0; i < nr; i++, entry.val++) {
+ struct page *cur_page = page + i;
+
+ radix_tree_delete(&address_space->page_tree,
+ swp_offset(entry));
+ set_page_private(cur_page, 0);
+ }
+ address_space->nrpages -= nr;
+ __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, -nr);
+ ADD_CACHE_INFO(del_total, nr);
}

/**
@@ -227,8 +249,8 @@ void delete_from_swap_cache(struct page *page)
__delete_from_swap_cache(page);
spin_unlock_irq(&address_space->tree_lock);

- swapcache_free(entry);
- put_page(page);
+ __swapcache_free(entry, PageTransHuge(page));
+ page_ref_sub(page, hpage_nr_pages(page));
}

/*
--
2.9.3