[PATCH] mm, hugetlb: set PageLRU for in-use/active hugepages

From: Naoya Horiguchi
Date: Mon Feb 16 2015 - 22:27:11 EST


Currently we are not safe from concurrent calls of isolate_huge_page(),
which can make the victim hugepage in invalid state and results in BUG_ON().

The root problem of this is that we don't have any information on struct page
(so easily accessible) about the hugepage's activeness. Note that hugepages'
activeness means just being linked to hstate->hugepage_activelist, which is
not the same as normal pages' activeness represented by PageActive flag.

Normal pages are isolated by isolate_lru_page() which prechecks PageLRU before
isolation, so let's do similarly for hugetlb. PageLRU is unused on hugetlb,
so this change is mostly straightforward. One non-straightforward point is that
__put_compound_page() calls __page_cache_release() to do some LRU works,
but this is obviously for thps and assumes that hugetlb has always !PageLRU.
This assumption is no more true, so this patch simply adds if (!PageHuge) to
avoid calling __page_cache_release() for hugetlb.

Set/ClearPageLRU should be called within hugetlb_lock, but hugetlb_cow() and
hugetlb_no_page() don't do this. This is justified because in these function
SetPageLRU is called right after the hugepage is allocated and no other thread
tries to isolate it.

Fixes: commit 31caf665e666 ("mm: migrate: make core migration code aware of hugepage")
Signed-off-by: Naoya Horiguchi <n-horiguchi@xxxxxxxxxxxxx>
Cc: <stable@xxxxxxxxxxxxxxx> [3.12+]
---
mm/hugetlb.c | 17 ++++++++++++++---
mm/swap.c | 4 +++-
2 files changed, 17 insertions(+), 4 deletions(-)

diff --git v3.19_with_hugemigration_fixes.orig/mm/hugetlb.c v3.19_with_hugemigration_fixes/mm/hugetlb.c
index a2bfd02e289f..e28489270d9a 100644
--- v3.19_with_hugemigration_fixes.orig/mm/hugetlb.c
+++ v3.19_with_hugemigration_fixes/mm/hugetlb.c
@@ -830,7 +830,7 @@ static void update_and_free_page(struct hstate *h, struct page *page)
page[i].flags &= ~(1 << PG_locked | 1 << PG_error |
1 << PG_referenced | 1 << PG_dirty |
1 << PG_active | 1 << PG_private |
- 1 << PG_writeback);
+ 1 << PG_writeback | 1 << PG_lru);
}
VM_BUG_ON_PAGE(hugetlb_cgroup_from_page(page), page);
set_compound_page_dtor(page, NULL);
@@ -875,6 +875,7 @@ void free_huge_page(struct page *page)
ClearPagePrivate(page);

spin_lock(&hugetlb_lock);
+ ClearPageLRU(page);
hugetlb_cgroup_uncharge_page(hstate_index(h),
pages_per_huge_page(h), page);
if (restore_reserve)
@@ -2889,6 +2890,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
copy_user_huge_page(new_page, old_page, address, vma,
pages_per_huge_page(h));
__SetPageUptodate(new_page);
+ SetPageLRU(new_page);

mmun_start = address & huge_page_mask(h);
mmun_end = mmun_start + huge_page_size(h);
@@ -3001,6 +3003,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
}
clear_huge_page(page, address, pages_per_huge_page(h));
__SetPageUptodate(page);
+ SetPageLRU(page);

if (vma->vm_flags & VM_MAYSHARE) {
int err;
@@ -3794,6 +3797,7 @@ int dequeue_hwpoisoned_huge_page(struct page *hpage)
* so let it point to itself with list_del_init().
*/
list_del_init(&hpage->lru);
+ ClearPageLRU(hpage);
set_page_refcounted(hpage);
h->free_huge_pages--;
h->free_huge_pages_node[nid]--;
@@ -3806,11 +3810,17 @@ int dequeue_hwpoisoned_huge_page(struct page *hpage)

bool isolate_huge_page(struct page *page, struct list_head *list)
{
+ bool ret = true;
+
VM_BUG_ON_PAGE(!PageHead(page), page);
- if (!get_page_unless_zero(page))
- return false;
spin_lock(&hugetlb_lock);
+ if (!PageLRU(page) || !get_page_unless_zero(page)) {
+ ret = false;
+ goto unlock;
+ }
+ ClearPageLRU(page);
list_move_tail(&page->lru, list);
+unlock:
spin_unlock(&hugetlb_lock);
return true;
}
@@ -3819,6 +3829,7 @@ void putback_active_hugepage(struct page *page)
{
VM_BUG_ON_PAGE(!PageHead(page), page);
spin_lock(&hugetlb_lock);
+ SetPageLRU(page);
list_move_tail(&page->lru, &(page_hstate(page))->hugepage_activelist);
spin_unlock(&hugetlb_lock);
put_page(page);
diff --git v3.19_with_hugemigration_fixes.orig/mm/swap.c v3.19_with_hugemigration_fixes/mm/swap.c
index 8a12b33936b4..ea8fe72999a8 100644
--- v3.19_with_hugemigration_fixes.orig/mm/swap.c
+++ v3.19_with_hugemigration_fixes/mm/swap.c
@@ -31,6 +31,7 @@
#include <linux/memcontrol.h>
#include <linux/gfp.h>
#include <linux/uio.h>
+#include <linux/hugetlb.h>

#include "internal.h"

@@ -75,7 +76,8 @@ static void __put_compound_page(struct page *page)
{
compound_page_dtor *dtor;

- __page_cache_release(page);
+ if (!PageHuge(page))
+ __page_cache_release(page);
dtor = get_compound_page_dtor(page);
(*dtor)(page);
}
--
1.9.3
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/