Re: [RFC PATCH 05/12] khugepaged: Generalize __collapse_huge_page_isolate()

From: Ryan Roberts
Date: Tue Dec 17 2024 - 12:09:59 EST


On 16/12/2024 16:50, Dev Jain wrote:
> Scale down the scan range and the sysfs tunables according to the scan order,
> and isolate the folios.
>
> Signed-off-by: Dev Jain <dev.jain@xxxxxxx>
> ---
> mm/khugepaged.c | 19 +++++++++++--------
> 1 file changed, 11 insertions(+), 8 deletions(-)
>
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index f52dae7d5179..de044b1f83d4 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -564,15 +564,18 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
> unsigned long address,
> pte_t *pte,
> struct collapse_control *cc,
> - struct list_head *compound_pagelist)
> + struct list_head *compound_pagelist, int order)
> {
> - struct page *page = NULL;
> - struct folio *folio = NULL;
> - pte_t *_pte;
> + unsigned int max_ptes_shared = khugepaged_max_ptes_shared >> (HPAGE_PMD_ORDER - order);
> + unsigned int max_ptes_none = khugepaged_max_ptes_none >> (HPAGE_PMD_ORDER - order);

This is implicitly rounding down. I think that's the right thing to do; it's
better to be conservative.

> int none_or_zero = 0, shared = 0, result = SCAN_FAIL, referenced = 0;
> + struct folio *folio = NULL;
> + struct page *page = NULL;
> bool writable = false;
> + pte_t *_pte;
>
> - for (_pte = pte; _pte < pte + HPAGE_PMD_NR;
> +
> + for (_pte = pte; _pte < pte + (1UL << order);
> _pte++, address += PAGE_SIZE) {
> pte_t pteval = ptep_get(_pte);
> if (pte_none(pteval) || (pte_present(pteval) &&
> @@ -580,7 +583,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
> ++none_or_zero;
> if (!userfaultfd_armed(vma) &&
> (!cc->is_khugepaged ||
> - none_or_zero <= khugepaged_max_ptes_none)) {
> + none_or_zero <= max_ptes_none)) {
> continue;
> } else {
> result = SCAN_EXCEED_NONE_PTE;
> @@ -609,7 +612,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
> if (folio_likely_mapped_shared(folio)) {
> ++shared;
> if (cc->is_khugepaged &&
> - shared > khugepaged_max_ptes_shared) {
> + shared > max_ptes_shared) {
> result = SCAN_EXCEED_SHARED_PTE;
> count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
> goto out;
> @@ -1200,7 +1203,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
> pte = pte_offset_map_lock(mm, &_pmd, address, &pte_ptl);
> if (pte) {
> result = __collapse_huge_page_isolate(vma, address, pte, cc,
> - &compound_pagelist);
> + &compound_pagelist, order);
> spin_unlock(pte_ptl);
> } else {
> result = SCAN_PMD_NULL;