[RFC PATCH 18/18] mm/pgtable: pass ptdesc in pte_free_defer
From: alexs
Date: Tue Jul 30 2024 - 03:25:06 EST
From: Alex Shi <alexs@xxxxxxxxxx>
pass ptdesc in pte_free_defer() and use ptdesc in collapse_huge_page().
This patch is immature, there is a issue from pmd_pgtable() conversion
in few archs. The problem need a fix.
Signed-off-by: Alex Shi <alexs@xxxxxxxxxx>
Cc: linux-mm@xxxxxxxxx
Cc: linux-s390@xxxxxxxxxxxxxxx
Cc: linux-kernel@xxxxxxxxxxxxxxx
Cc: linuxppc-dev@xxxxxxxxxxxxxxxx
Cc: Mike Rapoport <rppt@xxxxxxxxxx>
Cc: Barry Song <baohua@xxxxxxxxxx>
Cc: Peter Xu <peterx@xxxxxxxxxx>
Cc: Lance Yang <ioworker0@xxxxxxxxx>
Cc: Kemeng Shi <shikemeng@xxxxxxxxxxxxxxx>
Cc: Aneesh Kumar K.V <aneesh.kumar@xxxxxxxxxxxxx>
Cc: Qi Zheng <zhengqi.arch@xxxxxxxxxxxxx>
Cc: Sven Schnelle <svens@xxxxxxxxxxxxx>
Cc: Christian Borntraeger <borntraeger@xxxxxxxxxxxxx>
Cc: Vasily Gorbik <gor@xxxxxxxxxxxxx>
Cc: Heiko Carstens <hca@xxxxxxxxxxxxx>
Cc: Naveen N Rao <naveen@xxxxxxxxxx>
Cc: Nicholas Piggin <npiggin@xxxxxxxxx>
Cc: Michael Ellerman <mpe@xxxxxxxxxxxxxx>
Cc: Matthew Wilcox <willy@xxxxxxxxxxxxx>
Cc: Ryan Roberts <ryan.roberts@xxxxxxx>
Cc: David Hildenbrand <david@xxxxxxxxxx>
Cc: Vishal Moola <vishal.moola@xxxxxxxxx>
Cc: Hugh Dickins <hughd@xxxxxxxxxx>
Cc: Gerald Schaefer <gerald.schaefer@xxxxxxxxxxxxx>
Cc: Alexander Gordeev <agordeev@xxxxxxxxxxxxx>
Cc: Christophe Leroy <christophe.leroy@xxxxxxxxxx>
---
arch/powerpc/include/asm/pgalloc.h | 2 +-
arch/s390/include/asm/pgalloc.h | 2 +-
arch/s390/mm/pgalloc.c | 2 +-
include/linux/pgtable.h | 2 +-
mm/khugepaged.c | 10 +++++-----
mm/pgtable-generic.c | 4 +---
6 files changed, 10 insertions(+), 12 deletions(-)
diff --git a/arch/powerpc/include/asm/pgalloc.h b/arch/powerpc/include/asm/pgalloc.h
index 12520521163e..ca21b67c593f 100644
--- a/arch/powerpc/include/asm/pgalloc.h
+++ b/arch/powerpc/include/asm/pgalloc.h
@@ -47,7 +47,7 @@ static inline void pte_free(struct mm_struct *mm, struct ptdesc *ptepage)
/* arch use pte_free_defer() implementation in arch/powerpc/mm/pgtable-frag.c */
#define pte_free_defer pte_free_defer
-void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable);
+void pte_free_defer(struct mm_struct *mm, struct ptdesc *pgtable);
/*
* Functions that deal with pagetables that could be at any level of
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index 771494526f6e..a229cee11bbd 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -144,7 +144,7 @@ static inline void pmd_populate(struct mm_struct *mm,
/* arch use pte_free_defer() implementation in arch/s390/mm/pgalloc.c */
#define pte_free_defer pte_free_defer
-void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable);
+void pte_free_defer(struct mm_struct *mm, struct ptdesc *pgtable);
void vmem_map_init(void);
void *vmem_crst_alloc(unsigned long val);
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index f691e0fb66a2..c7bb38d85d81 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -214,7 +214,7 @@ static void pte_free_now(struct rcu_head *head)
pagetable_pte_dtor_free(ptdesc);
}
-void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable)
+void pte_free_defer(struct mm_struct *mm, struct ptdesc *pgtable)
{
struct ptdesc *ptdesc = virt_to_ptdesc(pgtable);
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 9d256c548f5e..e7b018de1d0f 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -116,7 +116,7 @@ static inline void pte_unmap(pte_t *pte)
}
#endif
-void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable);
+void pte_free_defer(struct mm_struct *mm, struct ptdesc *ptdesc);
/* Find an entry in the second-level page table.. */
#ifndef pmd_offset
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 5b466a1c2136..30cf61d02c1c 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1094,7 +1094,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
LIST_HEAD(compound_pagelist);
pmd_t *pmd, _pmd;
pte_t *pte;
- pgtable_t pgtable;
+ struct ptdesc *ptdesc;
struct folio *folio;
spinlock_t *pmd_ptl, *pte_ptl;
int result = SCAN_FAIL;
@@ -1223,7 +1223,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
* write.
*/
__folio_mark_uptodate(folio);
- pgtable = pmd_pgtable(_pmd);
+ ptdesc = pmd_ptdesc(&_pmd);
_pmd = mk_huge_pmd(&folio->page, vma->vm_page_prot);
_pmd = maybe_pmd_mkwrite(pmd_mkdirty(_pmd), vma);
@@ -1232,7 +1232,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
BUG_ON(!pmd_none(*pmd));
folio_add_new_anon_rmap(folio, vma, address, RMAP_EXCLUSIVE);
folio_add_lru_vma(folio, vma);
- pgtable_trans_huge_deposit(mm, pmd, page_ptdesc(pgtable));
+ pgtable_trans_huge_deposit(mm, pmd, ptdesc);
set_pmd_at(mm, address, pmd, _pmd);
update_mmu_cache_pmd(vma, address, pmd);
spin_unlock(pmd_ptl);
@@ -1664,7 +1664,7 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
mm_dec_nr_ptes(mm);
page_table_check_pte_clear_range(mm, haddr, pgt_pmd);
- pte_free_defer(mm, pmd_pgtable(pgt_pmd));
+ pte_free_defer(mm, pmd_ptdesc(&pgt_pmd));
maybe_install_pmd:
/* step 5: install pmd entry */
@@ -1777,7 +1777,7 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
if (retracted) {
mm_dec_nr_ptes(mm);
page_table_check_pte_clear_range(mm, addr, pgt_pmd);
- pte_free_defer(mm, pmd_pgtable(pgt_pmd));
+ pte_free_defer(mm, pmd_ptdesc(&pgt_pmd));
}
}
i_mmap_unlock_read(mapping);
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
index 5e763682941d..f3bc2b17893a 100644
--- a/mm/pgtable-generic.c
+++ b/mm/pgtable-generic.c
@@ -244,10 +244,8 @@ static void pte_free_now(struct rcu_head *head)
pte_free(NULL /* mm not passed and not used */, ptdesc);
}
-void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable)
+void pte_free_defer(struct mm_struct *mm, struct ptdesc *ptdesc)
{
- struct ptdesc *ptdesc = page_ptdesc(pgtable);
-
call_rcu(&ptdesc->pt_rcu_head, pte_free_now);
}
#endif /* pte_free_defer */
--
2.43.0