[PATCH 5.11 207/329] mm/gup: check for isolation errors

From: Greg Kroah-Hartman
Date: Mon May 17 2021 - 11:33:46 EST


From: Pavel Tatashin <pasha.tatashin@xxxxxxxxxx>

[ Upstream commit 6e7f34ebb8d25d71ce7f4580ba3cbfc10b895580 ]

It is still possible that we pin movable CMA pages if there are
isolation errors and cma_page_list stays empty when we check again.

Check for isolation errors, and return success only when there are no
isolation errors, and cma_page_list is empty after checking.

Because isolation errors are transient, we retry indefinitely.

Link: https://lkml.kernel.org/r/20210215161349.246722-5-pasha.tatashin@xxxxxxxxxx
Fixes: 9a4e9f3b2d73 ("mm: update get_user_pages_longterm to migrate pages allocated from CMA region")
Signed-off-by: Pavel Tatashin <pasha.tatashin@xxxxxxxxxx>
Reviewed-by: Jason Gunthorpe <jgg@xxxxxxxxxx>
Cc: Dan Williams <dan.j.williams@xxxxxxxxx>
Cc: David Hildenbrand <david@xxxxxxxxxx>
Cc: David Rientjes <rientjes@xxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Ira Weiny <ira.weiny@xxxxxxxxx>
Cc: James Morris <jmorris@xxxxxxxxx>
Cc: Jason Gunthorpe <jgg@xxxxxxxx>
Cc: John Hubbard <jhubbard@xxxxxxxxxx>
Cc: Joonsoo Kim <iamjoonsoo.kim@xxxxxxx>
Cc: Matthew Wilcox <willy@xxxxxxxxxxxxx>
Cc: Mel Gorman <mgorman@xxxxxxx>
Cc: Michal Hocko <mhocko@xxxxxxxxxx>
Cc: Michal Hocko <mhocko@xxxxxxxx>
Cc: Mike Kravetz <mike.kravetz@xxxxxxxxxx>
Cc: Oscar Salvador <osalvador@xxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Sasha Levin <sashal@xxxxxxxxxx>
Cc: Steven Rostedt (VMware) <rostedt@xxxxxxxxxxx>
Cc: Tyler Hicks <tyhicks@xxxxxxxxxxxxxxxxxxx>
Cc: Vlastimil Babka <vbabka@xxxxxxx>
Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Signed-off-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx>
---
mm/gup.c | 60 ++++++++++++++++++++++++++++++++------------------------
1 file changed, 34 insertions(+), 26 deletions(-)

diff --git a/mm/gup.c b/mm/gup.c
index 2d7a567b4056..0cdb93e98d00 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1548,8 +1548,8 @@ static long check_and_migrate_cma_pages(struct mm_struct *mm,
struct vm_area_struct **vmas,
unsigned int gup_flags)
{
- unsigned long i;
- bool drain_allow = true;
+ unsigned long i, isolation_error_count;
+ bool drain_allow;
LIST_HEAD(cma_page_list);
long ret = nr_pages;
struct page *prev_head, *head;
@@ -1560,6 +1560,8 @@ static long check_and_migrate_cma_pages(struct mm_struct *mm,

check_again:
prev_head = NULL;
+ isolation_error_count = 0;
+ drain_allow = true;
for (i = 0; i < nr_pages; i++) {
head = compound_head(pages[i]);
if (head == prev_head)
@@ -1571,25 +1573,35 @@ check_again:
* of the CMA zone if possible.
*/
if (is_migrate_cma_page(head)) {
- if (PageHuge(head))
- isolate_huge_page(head, &cma_page_list);
- else {
+ if (PageHuge(head)) {
+ if (!isolate_huge_page(head, &cma_page_list))
+ isolation_error_count++;
+ } else {
if (!PageLRU(head) && drain_allow) {
lru_add_drain_all();
drain_allow = false;
}

- if (!isolate_lru_page(head)) {
- list_add_tail(&head->lru, &cma_page_list);
- mod_node_page_state(page_pgdat(head),
- NR_ISOLATED_ANON +
- page_is_file_lru(head),
- thp_nr_pages(head));
+ if (isolate_lru_page(head)) {
+ isolation_error_count++;
+ continue;
}
+ list_add_tail(&head->lru, &cma_page_list);
+ mod_node_page_state(page_pgdat(head),
+ NR_ISOLATED_ANON +
+ page_is_file_lru(head),
+ thp_nr_pages(head));
}
}
}

+ /*
+ * If list is empty, and no isolation errors, means that all pages are
+ * in the correct zone.
+ */
+ if (list_empty(&cma_page_list) && !isolation_error_count)
+ return ret;
+
if (!list_empty(&cma_page_list)) {
/*
* drop the above get_user_pages reference.
@@ -1609,23 +1621,19 @@ check_again:
return ret > 0 ? -ENOMEM : ret;
}

- /*
- * We did migrate all the pages, Try to get the page references
- * again migrating any new CMA pages which we failed to isolate
- * earlier.
- */
- ret = __get_user_pages_locked(mm, start, nr_pages,
- pages, vmas, NULL,
- gup_flags);
-
- if (ret > 0) {
- nr_pages = ret;
- drain_allow = true;
- goto check_again;
- }
+ /* We unpinned pages before migration, pin them again */
+ ret = __get_user_pages_locked(mm, start, nr_pages, pages, vmas,
+ NULL, gup_flags);
+ if (ret <= 0)
+ return ret;
+ nr_pages = ret;
}

- return ret;
+ /*
+ * check again because pages were unpinned, and we also might have
+ * had isolation errors and need more pages to migrate.
+ */
+ goto check_again;
}
#else
static long check_and_migrate_cma_pages(struct mm_struct *mm,
--
2.30.2