[PATCH] mm/hugetlb: make hugetlb migration callback CMA aware

From: Joonsoo Kim
Date: Wed Jul 08 2020 - 01:39:26 EST


new_non_cma_page() in gup.c which try to allocate migration target page
requires to allocate the new page that is not on the CMA area.
new_non_cma_page() implements it by removing __GFP_MOVABLE flag. This way
works well for THP page or normal page but not for hugetlb page.

hugetlb page allocation process consists of two steps. First is dequeing
from the pool. Second is, if there is no available page on the queue,
allocating from the page allocator.

new_non_cma_page() can control allocation from the page allocator by
specifying correct gfp flag. However, dequeing cannot be controlled until
now, so, new_non_cma_page() skips dequeing completely. It is a suboptimal
since new_non_cma_page() cannot utilize hugetlb pages on the queue so this
patch tries to fix this situation.

This patch makes new_non_cma_page() uses memalloc_nocma_{save,restore}
to exclude CMA memory rather than manually clearing __GFP_MOVABLE. And,
this patch also makes the deque function on hugetlb CMA aware. In the
deque function, CMA memory is skipped if PF_MEMALLOC_NOCMA flag is set
by memalloc_nocma_{save,restore}.

Acked-by: Mike Kravetz <mike.kravetz@xxxxxxxxxx>
Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@xxxxxxx>
---
include/linux/hugetlb.h | 2 --
mm/gup.c | 32 +++++++++++++++-----------------
mm/hugetlb.c | 11 +++++++++--
3 files changed, 24 insertions(+), 21 deletions(-)

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index bb93e95..34a10e5 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -509,8 +509,6 @@ struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
nodemask_t *nmask, gfp_t gfp_mask);
struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma,
unsigned long address);
-struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
- int nid, nodemask_t *nmask);
int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
pgoff_t idx);

diff --git a/mm/gup.c b/mm/gup.c
index 5daadae..79142a9 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1623,6 +1623,8 @@ static struct page *new_non_cma_page(struct page *page, unsigned long private)
* allocation memory.
*/
gfp_t gfp_mask = GFP_USER | __GFP_NOWARN;
+ unsigned int flags = memalloc_nocma_save();
+ struct page *new_page = NULL;

if (PageHighMem(page))
gfp_mask |= __GFP_HIGHMEM;
@@ -1630,33 +1632,29 @@ static struct page *new_non_cma_page(struct page *page, unsigned long private)
#ifdef CONFIG_HUGETLB_PAGE
if (PageHuge(page)) {
struct hstate *h = page_hstate(page);
- /*
- * We don't want to dequeue from the pool because pool pages will
- * mostly be from the CMA region.
- */
- return alloc_migrate_huge_page(h, gfp_mask, nid, NULL);
+
+ new_page = alloc_huge_page_nodemask(h, nid, NULL, gfp_mask);
+ goto out;
}
#endif
+
if (PageTransHuge(page)) {
- struct page *thp;
/*
* ignore allocation failure warnings
*/
gfp_t thp_gfpmask = GFP_TRANSHUGE | __GFP_NOWARN;

- /*
- * Remove the movable mask so that we don't allocate from
- * CMA area again.
- */
- thp_gfpmask &= ~__GFP_MOVABLE;
- thp = __alloc_pages_node(nid, thp_gfpmask, HPAGE_PMD_ORDER);
- if (!thp)
- return NULL;
- prep_transhuge_page(thp);
- return thp;
+ new_page = __alloc_pages_node(nid, thp_gfpmask, HPAGE_PMD_ORDER);
+ if (new_page)
+ prep_transhuge_page(new_page);
+ goto out;
}

- return __alloc_pages_node(nid, gfp_mask, 0);
+ new_page = __alloc_pages_node(nid, gfp_mask, 0);
+
+out:
+ memalloc_nocma_restore(flags);
+ return new_page;
}

static long check_and_migrate_cma_pages(struct task_struct *tsk,
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 3245aa0..514e29c 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -29,6 +29,7 @@
#include <linux/numa.h>
#include <linux/llist.h>
#include <linux/cma.h>
+#include <linux/sched/mm.h>

#include <asm/page.h>
#include <asm/tlb.h>
@@ -1036,10 +1037,16 @@ static void enqueue_huge_page(struct hstate *h, struct page *page)
static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid)
{
struct page *page;
+ bool nocma = !!(READ_ONCE(current->flags) & PF_MEMALLOC_NOCMA);
+
+ list_for_each_entry(page, &h->hugepage_freelists[nid], lru) {
+ if (nocma && is_migrate_cma_page(page))
+ continue;

- list_for_each_entry(page, &h->hugepage_freelists[nid], lru)
if (!PageHWPoison(page))
break;
+ }
+
/*
* if 'non-isolated free hugepage' not found on the list,
* the allocation fails.
@@ -1928,7 +1935,7 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
return page;
}

-struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
+static struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
int nid, nodemask_t *nmask)
{
struct page *page;
--
2.7.4