[RFC PATCH 2/2] mm/khugepaged: Remove compound_pagelist

From: Vishal Moola (Oracle)
Date: Fri Sep 22 2023 - 15:37:00 EST


Currently, khugepaged builds a compound_pagelist while scanning, which
is used to properly account for compound pages. We can now account
for a compound page as a singular folio instead, so remove this list.

Large folios are guaranteed to have consecutive ptes and addresses, so
once the first pte of a large folio is found skip over the rest.

This helps convert khugepaged to use folios. It removes 3 compound_head
calls in __collapse_huge_page_copy_succeeded(), and removes 980 bytes of
kernel text.

Signed-off-by: Vishal Moola (Oracle) <vishal.moola@xxxxxxxxx>
---
mm/khugepaged.c | 76 ++++++++++++-------------------------------------
1 file changed, 18 insertions(+), 58 deletions(-)

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index f46a7a7c489f..b6c7d55a8231 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -498,10 +498,9 @@ static void release_pte_page(struct page *page)
release_pte_folio(page_folio(page));
}

-static void release_pte_pages(pte_t *pte, pte_t *_pte,
- struct list_head *compound_pagelist)
+static void release_pte_folios(pte_t *pte, pte_t *_pte)
{
- struct folio *folio, *tmp;
+ struct folio *folio;

while (--_pte >= pte) {
pte_t pteval = ptep_get(_pte);
@@ -514,12 +513,7 @@ static void release_pte_pages(pte_t *pte, pte_t *_pte,
continue;
folio = pfn_folio(pfn);
if (folio_test_large(folio))
- continue;
- release_pte_folio(folio);
- }
-
- list_for_each_entry_safe(folio, tmp, compound_pagelist, lru) {
- list_del(&folio->lru);
+ _pte -= folio_nr_pages(folio) - 1;
release_pte_folio(folio);
}
}
@@ -538,8 +532,7 @@ static bool is_refcount_suitable(struct page *page)
static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
unsigned long address,
pte_t *pte,
- struct collapse_control *cc,
- struct list_head *compound_pagelist)
+ struct collapse_control *cc)
{
struct folio *folio = NULL;
pte_t *_pte;
@@ -588,19 +581,6 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
}
}

- if (folio_test_large(folio)) {
- struct folio *f;
-
- /*
- * Check if we have dealt with the compound page
- * already
- */
- list_for_each_entry(f, compound_pagelist, lru) {
- if (folio == f)
- goto next;
- }
- }
-
/*
* We can do it before isolate_lru_page because the
* page can't be freed from under us. NOTE: PG_lock
@@ -644,9 +624,6 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);

- if (folio_test_large(folio))
- list_add_tail(&folio->lru, compound_pagelist);
-next:
/*
* If collapse was initiated by khugepaged, check that there is
* enough young pte to justify collapsing the page
@@ -660,6 +637,10 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
if (pte_write(pteval))
writable = true;

+ if (folio_test_large(folio)) {
+ _pte += folio_nr_pages(folio) - 1;
+ address += folio_size(folio) - PAGE_SIZE;
+ }
}

if (unlikely(!writable)) {
@@ -673,7 +654,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
return result;
}
out:
- release_pte_pages(pte, _pte, compound_pagelist);
+ release_pte_folios(pte, _pte);
trace_mm_collapse_huge_page_isolate(&folio->page, none_or_zero,
referenced, writable, result);
return result;
@@ -682,11 +663,9 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
static void __collapse_huge_page_copy_succeeded(pte_t *pte,
struct vm_area_struct *vma,
unsigned long address,
- spinlock_t *ptl,
- struct list_head *compound_pagelist)
+ spinlock_t *ptl)
{
struct page *src_page;
- struct page *tmp;
pte_t *_pte;
pte_t pteval;

@@ -706,8 +685,7 @@ static void __collapse_huge_page_copy_succeeded(pte_t *pte,
}
} else {
src_page = pte_page(pteval);
- if (!PageCompound(src_page))
- release_pte_page(src_page);
+ release_pte_page(src_page);
/*
* ptl mostly unnecessary, but preempt has to
* be disabled to update the per-cpu stats
@@ -720,23 +698,12 @@ static void __collapse_huge_page_copy_succeeded(pte_t *pte,
free_page_and_swap_cache(src_page);
}
}
-
- list_for_each_entry_safe(src_page, tmp, compound_pagelist, lru) {
- list_del(&src_page->lru);
- mod_node_page_state(page_pgdat(src_page),
- NR_ISOLATED_ANON + page_is_file_lru(src_page),
- -compound_nr(src_page));
- unlock_page(src_page);
- free_swap_cache(src_page);
- putback_lru_page(src_page);
- }
}

static void __collapse_huge_page_copy_failed(pte_t *pte,
pmd_t *pmd,
pmd_t orig_pmd,
- struct vm_area_struct *vma,
- struct list_head *compound_pagelist)
+ struct vm_area_struct *vma)
{
spinlock_t *pmd_ptl;

@@ -753,7 +720,7 @@ static void __collapse_huge_page_copy_failed(pte_t *pte,
* Release both raw and compound pages isolated
* in __collapse_huge_page_isolate.
*/
- release_pte_pages(pte, pte + HPAGE_PMD_NR, compound_pagelist);
+ release_pte_folios(pte, pte + HPAGE_PMD_NR);
}

/*
@@ -769,7 +736,6 @@ static void __collapse_huge_page_copy_failed(pte_t *pte,
* @vma: the original raw pages' virtual memory area
* @address: starting address to copy
* @ptl: lock on raw pages' PTEs
- * @compound_pagelist: list that stores compound pages
*/
static int __collapse_huge_page_copy(pte_t *pte,
struct page *page,
@@ -777,8 +743,7 @@ static int __collapse_huge_page_copy(pte_t *pte,
pmd_t orig_pmd,
struct vm_area_struct *vma,
unsigned long address,
- spinlock_t *ptl,
- struct list_head *compound_pagelist)
+ spinlock_t *ptl)
{
struct page *src_page;
pte_t *_pte;
@@ -804,11 +769,9 @@ static int __collapse_huge_page_copy(pte_t *pte,
}

if (likely(result == SCAN_SUCCEED))
- __collapse_huge_page_copy_succeeded(pte, vma, address, ptl,
- compound_pagelist);
+ __collapse_huge_page_copy_succeeded(pte, vma, address, ptl);
else
- __collapse_huge_page_copy_failed(pte, pmd, orig_pmd, vma,
- compound_pagelist);
+ __collapse_huge_page_copy_failed(pte, pmd, orig_pmd, vma);

return result;
}
@@ -1081,7 +1044,6 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
int referenced, int unmapped,
struct collapse_control *cc)
{
- LIST_HEAD(compound_pagelist);
pmd_t *pmd, _pmd;
pte_t *pte;
pgtable_t pgtable;
@@ -1168,8 +1130,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,

pte = pte_offset_map_lock(mm, &_pmd, address, &pte_ptl);
if (pte) {
- result = __collapse_huge_page_isolate(vma, address, pte, cc,
- &compound_pagelist);
+ result = __collapse_huge_page_isolate(vma, address, pte, cc);
spin_unlock(pte_ptl);
} else {
result = SCAN_PMD_NULL;
@@ -1198,8 +1159,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
anon_vma_unlock_write(vma->anon_vma);

result = __collapse_huge_page_copy(pte, hpage, pmd, _pmd,
- vma, address, pte_ptl,
- &compound_pagelist);
+ vma, address, pte_ptl);
pte_unmap(pte);
if (unlikely(result != SCAN_SUCCEED))
goto out_up_write;
--
2.40.1