[PATCH 30/46] hugetlb: add high-granularity migration support
From: James Houghton
Date: Thu Jan 05 2023 - 05:22:35 EST
To prevent queueing a hugepage for migration multiple times, we use
last_page to keep track of the last page we saw in queue_pages_hugetlb,
and if the page we're looking at is last_page, then we skip it.
For the non-hugetlb cases, last_page, although unused, is still updated
so that it has a consistent meaning with the hugetlb case.
This commit adds a check in hugetlb_fault for high-granularity migration
PTEs.
Signed-off-by: James Houghton <jthoughton@xxxxxxxxxx>
---
include/linux/swapops.h | 8 ++++++--
mm/hugetlb.c | 2 +-
mm/mempolicy.c | 24 +++++++++++++++++++-----
mm/migrate.c | 18 ++++++++++--------
4 files changed, 36 insertions(+), 16 deletions(-)
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 3a451b7afcb3..6ef80763e629 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -68,6 +68,8 @@
static inline bool is_pfn_swap_entry(swp_entry_t entry);
+struct hugetlb_pte;
+
/* Clear all flags but only keep swp_entry_t related information */
static inline pte_t pte_swp_clear_flags(pte_t pte)
{
@@ -339,7 +341,8 @@ extern void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
#ifdef CONFIG_HUGETLB_PAGE
extern void __migration_entry_wait_huge(struct vm_area_struct *vma,
pte_t *ptep, spinlock_t *ptl);
-extern void migration_entry_wait_huge(struct vm_area_struct *vma, pte_t *pte);
+extern void migration_entry_wait_huge(struct vm_area_struct *vma,
+ struct hugetlb_pte *hpte);
#endif /* CONFIG_HUGETLB_PAGE */
#else /* CONFIG_MIGRATION */
static inline swp_entry_t make_readable_migration_entry(pgoff_t offset)
@@ -369,7 +372,8 @@ static inline void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
#ifdef CONFIG_HUGETLB_PAGE
static inline void __migration_entry_wait_huge(struct vm_area_struct *vma,
pte_t *ptep, spinlock_t *ptl) { }
-static inline void migration_entry_wait_huge(struct vm_area_struct *vma, pte_t *pte) { }
+static inline void migration_entry_wait_huge(struct vm_area_struct *vma,
+ struct hugetlb_pte *hpte) { }
#endif /* CONFIG_HUGETLB_PAGE */
static inline int is_writable_migration_entry(swp_entry_t entry)
{
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 8e690a22456a..2fb95ecafc63 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -6269,7 +6269,7 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
* be released there.
*/
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
- migration_entry_wait_huge(vma, hpte.ptep);
+ migration_entry_wait_huge(vma, &hpte);
return 0;
} else if (unlikely(is_hugetlb_entry_hwpoisoned(entry)))
ret = VM_FAULT_HWPOISON_LARGE |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index e5859ed34e90..6c4c3c923fa2 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -424,6 +424,7 @@ struct queue_pages {
unsigned long start;
unsigned long end;
struct vm_area_struct *first;
+ struct page *last_page;
};
/*
@@ -475,6 +476,7 @@ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
flags = qp->flags;
/* go to thp migration */
if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
+ qp->last_page = page;
if (!vma_migratable(walk->vma) ||
migrate_page_add(page, qp->pagelist, flags)) {
ret = 1;
@@ -532,6 +534,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
continue;
if (!queue_pages_required(page, qp))
continue;
+
if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
/* MPOL_MF_STRICT must be specified if we get here */
if (!vma_migratable(vma)) {
@@ -539,6 +542,8 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
break;
}
+ qp->last_page = page;
+
/*
* Do not abort immediately since there may be
* temporary off LRU pages in the range. Still
@@ -570,15 +575,22 @@ static int queue_pages_hugetlb(struct hugetlb_pte *hpte,
spinlock_t *ptl;
pte_t entry;
- /* We don't migrate high-granularity HugeTLB mappings for now. */
- if (hugetlb_hgm_enabled(walk->vma))
- return -EINVAL;
-
ptl = hugetlb_pte_lock(hpte);
entry = huge_ptep_get(hpte->ptep);
if (!pte_present(entry))
goto unlock;
- page = pte_page(entry);
+
+ if (!hugetlb_pte_present_leaf(hpte, entry)) {
+ ret = -EAGAIN;
+ goto unlock;
+ }
+
+ page = compound_head(pte_page(entry));
+
+ /* We already queued this page with another high-granularity PTE. */
+ if (page == qp->last_page)
+ goto unlock;
+
if (!queue_pages_required(page, qp))
goto unlock;
@@ -605,6 +617,7 @@ static int queue_pages_hugetlb(struct hugetlb_pte *hpte,
/* With MPOL_MF_MOVE, we migrate only unshared hugepage. */
if (flags & (MPOL_MF_MOVE_ALL) ||
(flags & MPOL_MF_MOVE && page_mapcount(page) == 1)) {
+ qp->last_page = page;
if (isolate_hugetlb(page, qp->pagelist) &&
(flags & MPOL_MF_STRICT))
/*
@@ -739,6 +752,7 @@ queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
.start = start,
.end = end,
.first = NULL,
+ .last_page = NULL,
};
err = walk_page_range(mm, start, end, &queue_pages_walk_ops, &qp);
diff --git a/mm/migrate.c b/mm/migrate.c
index 0062689f4878..c30647b75459 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -195,6 +195,9 @@ static bool remove_migration_pte(struct folio *folio,
/* pgoff is invalid for ksm pages, but they are never large */
if (folio_test_large(folio) && !folio_test_hugetlb(folio))
idx = linear_page_index(vma, pvmw.address) - pvmw.pgoff;
+ else if (folio_test_hugetlb(folio))
+ idx = (pvmw.address & ~huge_page_mask(hstate_vma(vma)))/
+ PAGE_SIZE;
new = folio_page(folio, idx);
#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
@@ -244,14 +247,15 @@ static bool remove_migration_pte(struct folio *folio,
#ifdef CONFIG_HUGETLB_PAGE
if (folio_test_hugetlb(folio)) {
+ struct page *hpage = folio_page(folio, 0);
unsigned int shift = pvmw.pte_order + PAGE_SHIFT;
pte = arch_make_huge_pte(pte, shift, vma->vm_flags);
if (folio_test_anon(folio))
- hugepage_add_anon_rmap(new, vma, pvmw.address,
+ hugepage_add_anon_rmap(hpage, vma, pvmw.address,
rmap_flags);
else
- page_dup_file_rmap(new, true);
+ page_dup_file_rmap(hpage, true);
set_huge_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
} else
#endif
@@ -267,7 +271,7 @@ static bool remove_migration_pte(struct folio *folio,
mlock_page_drain_local();
trace_remove_migration_pte(pvmw.address, pte_val(pte),
- compound_order(new));
+ pvmw.pte_order);
/* No need to invalidate - it was non-present before */
update_mmu_cache(vma, pvmw.address, pvmw.pte);
@@ -358,12 +362,10 @@ void __migration_entry_wait_huge(struct vm_area_struct *vma,
}
}
-void migration_entry_wait_huge(struct vm_area_struct *vma, pte_t *pte)
+void migration_entry_wait_huge(struct vm_area_struct *vma,
+ struct hugetlb_pte *hpte)
{
- spinlock_t *ptl = huge_pte_lockptr(huge_page_shift(hstate_vma(vma)),
- vma->vm_mm, pte);
-
- __migration_entry_wait_huge(vma, pte, ptl);
+ __migration_entry_wait_huge(vma, hpte->ptep, hpte->ptl);
}
#endif
--
2.39.0.314.g84b9a713c41-goog