[PATCH v4 23/45] powerpc/mm: Reduce hugepd size for 8M hugepages on 8xx

From: Christophe Leroy
Date: Tue May 19 2020 - 01:50:53 EST


Commit 55c8fc3f4930 ("powerpc/8xx: reintroduce 16K pages with HW
assistance") redefined pte_t as a struct of 4 pte_basic_t, because
in 16K pages mode there are four identical entries in the page table.
But hugepd entries for 8M pages require only one entry of size
pte_basic_t. So there is no point in creating a cache for 4 entries
page tables.

Calculate PTE_T_ORDER using the size of pte_basic_t instead of pte_t.

Define specific huge_pte helpers (set_huge_pte_at(), huge_pte_clear(),
huge_ptep_set_wrprotect()) to write the pte in a single entry instead
of using set_pte_at() which writes 4 identical entries in 16k pages
mode. Also make sure that __ptep_set_access_flags() properly handle
the huge_pte case.

Define set_pte_filter() inline otherwise GCC doesn't inline it anymore
because it is now used twice, and that gives a pretty suboptimal code
because of pte_t being a struct of 4 entries.

Those functions are also used for 512k pages which only require one
entry as well allthough replicating it four times was harmless as 512k
pages entries are spread every 128 bytes in the table.

Signed-off-by: Christophe Leroy <christophe.leroy@xxxxxxxxxx>
---
.../include/asm/nohash/32/hugetlb-8xx.h | 20 ++++++++++++++
arch/powerpc/include/asm/nohash/32/pgtable.h | 3 ++-
arch/powerpc/mm/hugetlbpage.c | 3 ++-
arch/powerpc/mm/pgtable.c | 26 ++++++++++++++++---
4 files changed, 46 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h
index a46616937d20..785437323576 100644
--- a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h
@@ -41,4 +41,24 @@ static inline int check_and_get_huge_psize(int shift)
return shift_to_mmu_psize(shift);
}

+#define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT
+void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte);
+
+#define __HAVE_ARCH_HUGE_PTE_CLEAR
+static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, unsigned long sz)
+{
+ pte_update(mm, addr, ptep, ~0UL, 0, 1);
+}
+
+#define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ unsigned long clr = ~pte_val(pte_wrprotect(__pte(~0)));
+ unsigned long set = pte_val(pte_wrprotect(__pte(0)));
+
+ pte_update(mm, addr, ptep, clr, set, 1);
+}
+
#endif /* _ASM_POWERPC_NOHASH_32_HUGETLB_8XX_H */
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index 5fb3f6798e22..ff78bf25f832 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -314,8 +314,9 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
pte_t pte_clr = pte_mkyoung(pte_mkdirty(pte_mkwrite(pte_mkexec(__pte(~0)))));
unsigned long set = pte_val(entry) & pte_val(pte_set);
unsigned long clr = ~pte_val(entry) & ~pte_val(pte_clr);
+ int huge = psize > mmu_virtual_psize ? 1 : 0;

- pte_update(vma->vm_mm, address, ptep, clr, set, 0);
+ pte_update(vma->vm_mm, address, ptep, clr, set, huge);

flush_tlb_page(vma, address);
}
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index d06efb946c7d..521929a371af 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -30,7 +30,8 @@ bool hugetlb_disabled = false;

#define hugepd_none(hpd) (hpd_val(hpd) == 0)

-#define PTE_T_ORDER (__builtin_ffs(sizeof(pte_t)) - __builtin_ffs(sizeof(void *)))
+#define PTE_T_ORDER (__builtin_ffs(sizeof(pte_basic_t)) - \
+ __builtin_ffs(sizeof(void *)))

pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz)
{
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index e3759b69f81b..214a5f4beb6c 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -100,7 +100,7 @@ static pte_t set_pte_filter_hash(pte_t pte) { return pte; }
* as we don't have two bits to spare for _PAGE_EXEC and _PAGE_HWEXEC so
* instead we "filter out" the exec permission for non clean pages.
*/
-static pte_t set_pte_filter(pte_t pte)
+static inline pte_t set_pte_filter(pte_t pte)
{
struct page *pg;

@@ -249,16 +249,34 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,

#else
/*
- * Not used on non book3s64 platforms. But 8xx
- * can possibly use tsize derived from hstate.
+ * Not used on non book3s64 platforms.
+ * 8xx compares it with mmu_virtual_psize to
+ * know if it is a huge page or not.
*/
- psize = 0;
+ psize = MMU_PAGE_COUNT;
#endif
__ptep_set_access_flags(vma, ptep, pte, addr, psize);
}
return changed;
#endif
}
+
+#if defined(CONFIG_PPC_8xx)
+void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte)
+{
+ /*
+ * Make sure hardware valid bit is not set. We don't do
+ * tlb flush for this update.
+ */
+ VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep));
+
+ pte = pte_mkpte(pte);
+
+ pte = set_pte_filter(pte);
+
+ ptep->pte = pte_val(pte);
+}
+#endif
#endif /* CONFIG_HUGETLB_PAGE */

#ifdef CONFIG_DEBUG_VM
--
2.25.0