[PATCH v3 05/21] mm/hugetlb: Introduce pgtable allocation/freeing helpers

From: Muchun Song
Date: Sun Nov 08 2020 - 09:12:38 EST


On x86_64, vmemmap is always PMD mapped if the machine has hugepages
support and if we have 2MB contiguos pages and PMD aligned. If we want
to free the unused vmemmap pages, we have to split the huge pmd firstly.
So we should pre-allocate pgtable to split PMD to PTE.

Signed-off-by: Muchun Song <songmuchun@xxxxxxxxxxxxx>
---
include/linux/hugetlb.h | 10 +++++
mm/hugetlb.c | 111 ++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 121 insertions(+)

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index eed3dd3bd626..d81c262418db 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -593,6 +593,16 @@ static inline unsigned int blocks_per_huge_page(struct hstate *h)

#include <asm/hugetlb.h>

+#ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP
+#ifndef VMEMMAP_HPAGE_SHIFT
+#define VMEMMAP_HPAGE_SHIFT HPAGE_SHIFT
+#endif
+#define VMEMMAP_HPAGE_ORDER (VMEMMAP_HPAGE_SHIFT - PAGE_SHIFT)
+#define VMEMMAP_HPAGE_NR (1 << VMEMMAP_HPAGE_ORDER)
+#define VMEMMAP_HPAGE_SIZE ((1UL) << VMEMMAP_HPAGE_SHIFT)
+#define VMEMMAP_HPAGE_MASK (~(VMEMMAP_HPAGE_SIZE - 1))
+#endif /* CONFIG_HUGETLB_PAGE_FREE_VMEMMAP */
+
#ifndef is_hugepage_only_range
static inline int is_hugepage_only_range(struct mm_struct *mm,
unsigned long addr, unsigned long len)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index a0007902fafb..5c7be2ee7e15 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1303,6 +1303,108 @@ static inline void destroy_compound_gigantic_page(struct page *page,
*/
#define RESERVE_VMEMMAP_NR 2U

+#define page_huge_pte(page) ((page)->pmd_huge_pte)
+
+static inline unsigned int free_vmemmap_pages_per_hpage(struct hstate *h)
+{
+ return h->nr_free_vmemmap_pages;
+}
+
+static inline unsigned int vmemmap_pages_per_hpage(struct hstate *h)
+{
+ return free_vmemmap_pages_per_hpage(h) + RESERVE_VMEMMAP_NR;
+}
+
+static inline unsigned long vmemmap_pages_size_per_hpage(struct hstate *h)
+{
+ return (unsigned long)vmemmap_pages_per_hpage(h) << PAGE_SHIFT;
+}
+
+static inline unsigned int pgtable_pages_to_prealloc_per_hpage(struct hstate *h)
+{
+ unsigned long vmemmap_size = vmemmap_pages_size_per_hpage(h);
+
+ /*
+ * No need pre-allocate page tabels when there is no vmemmap pages
+ * to free.
+ */
+ if (!free_vmemmap_pages_per_hpage(h))
+ return 0;
+
+ return ALIGN(vmemmap_size, VMEMMAP_HPAGE_SIZE) >> VMEMMAP_HPAGE_SHIFT;
+}
+
+static inline void vmemmap_pgtable_init(struct page *page)
+{
+ page_huge_pte(page) = NULL;
+}
+
+static void vmemmap_pgtable_deposit(struct page *page, pgtable_t pgtable)
+{
+ /* FIFO */
+ if (!page_huge_pte(page))
+ INIT_LIST_HEAD(&pgtable->lru);
+ else
+ list_add(&pgtable->lru, &page_huge_pte(page)->lru);
+ page_huge_pte(page) = pgtable;
+}
+
+static pgtable_t vmemmap_pgtable_withdraw(struct page *page)
+{
+ pgtable_t pgtable;
+
+ /* FIFO */
+ pgtable = page_huge_pte(page);
+ page_huge_pte(page) = list_first_entry_or_null(&pgtable->lru,
+ struct page, lru);
+ if (page_huge_pte(page))
+ list_del(&pgtable->lru);
+ return pgtable;
+}
+
+static int vmemmap_pgtable_prealloc(struct hstate *h, struct page *page)
+{
+ int i;
+ pgtable_t pgtable;
+ unsigned int nr = pgtable_pages_to_prealloc_per_hpage(h);
+
+ if (!nr)
+ return 0;
+
+ vmemmap_pgtable_init(page);
+
+ for (i = 0; i < nr; i++) {
+ pte_t *pte_p;
+
+ pte_p = pte_alloc_one_kernel(&init_mm);
+ if (!pte_p)
+ goto out;
+ vmemmap_pgtable_deposit(page, virt_to_page(pte_p));
+ }
+
+ return 0;
+out:
+ while (i-- && (pgtable = vmemmap_pgtable_withdraw(page)))
+ pte_free_kernel(&init_mm, page_to_virt(pgtable));
+ return -ENOMEM;
+}
+
+static void vmemmap_pgtable_free(struct hstate *h, struct page *page)
+{
+ pgtable_t pgtable;
+ unsigned int nr = pgtable_pages_to_prealloc_per_hpage(h);
+
+ if (!nr)
+ return;
+
+ pgtable = page_huge_pte(page);
+ if (!pgtable)
+ return;
+
+ while (nr-- && (pgtable = vmemmap_pgtable_withdraw(page)))
+ pte_free_kernel(&init_mm, page_to_virt(pgtable));
+}
+
static void __init hugetlb_vmemmap_init(struct hstate *h)
{
unsigned int order = huge_page_order(h);
@@ -1326,6 +1428,15 @@ static void __init hugetlb_vmemmap_init(struct hstate *h)
static inline void hugetlb_vmemmap_init(struct hstate *h)
{
}
+
+static inline int vmemmap_pgtable_prealloc(struct hstate *h, struct page *page)
+{
+ return 0;
+}
+
+static inline void vmemmap_pgtable_free(struct hstate *h, struct page *page)
+{
+}
#endif

static void update_and_free_page(struct hstate *h, struct page *page)
--
2.11.0