[PATCH v2 05/19] mm/hugetlb: Introduce pgtable allocation/freeing helpers
From: Muchun Song
Date: Mon Oct 26 2020 - 10:54:11 EST
On some architectures, the vmemmap areas use huge page mapping.
If we want to free the unused vmemmap pages, we have to split
the huge pmd firstly. So we should pre-allocate pgtable to split
huge pmd.
Signed-off-by: Muchun Song <songmuchun@xxxxxxxxxxxxx>
---
arch/x86/include/asm/hugetlb.h | 5 ++
include/linux/hugetlb.h | 17 +++++
mm/hugetlb.c | 117 +++++++++++++++++++++++++++++++++
3 files changed, 139 insertions(+)
diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h
index 1721b1aadeb1..f5e882f999cd 100644
--- a/arch/x86/include/asm/hugetlb.h
+++ b/arch/x86/include/asm/hugetlb.h
@@ -5,6 +5,11 @@
#include <asm/page.h>
#include <asm-generic/hugetlb.h>
+#ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP
+#define VMEMMAP_HPAGE_SHIFT PMD_SHIFT
+#define arch_vmemmap_support_huge_mapping() boot_cpu_has(X86_FEATURE_PSE)
+#endif
+
#define hugepages_supported() boot_cpu_has(X86_FEATURE_PSE)
#endif /* _ASM_X86_HUGETLB_H */
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index eed3dd3bd626..ace304a6196c 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -593,6 +593,23 @@ static inline unsigned int blocks_per_huge_page(struct hstate *h)
#include <asm/hugetlb.h>
+#ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP
+#ifndef arch_vmemmap_support_huge_mapping
+static inline bool arch_vmemmap_support_huge_mapping(void)
+{
+ return false;
+}
+#endif
+
+#ifndef VMEMMAP_HPAGE_SHIFT
+#define VMEMMAP_HPAGE_SHIFT PMD_SHIFT
+#endif
+#define VMEMMAP_HPAGE_ORDER (VMEMMAP_HPAGE_SHIFT - PAGE_SHIFT)
+#define VMEMMAP_HPAGE_NR (1 << VMEMMAP_HPAGE_ORDER)
+#define VMEMMAP_HPAGE_SIZE ((1UL) << VMEMMAP_HPAGE_SHIFT)
+#define VMEMMAP_HPAGE_MASK (~(VMEMMAP_HPAGE_SIZE - 1))
+#endif /* CONFIG_HUGETLB_PAGE_FREE_VMEMMAP */
+
#ifndef is_hugepage_only_range
static inline int is_hugepage_only_range(struct mm_struct *mm,
unsigned long addr, unsigned long len)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index f1b2b733b49b..d6ae9b6876be 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1295,11 +1295,108 @@ static inline void destroy_compound_gigantic_page(struct page *page,
#ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP
#define RESERVE_VMEMMAP_NR 2U
+#define page_huge_pte(page) ((page)->pmd_huge_pte)
+
static inline unsigned int nr_free_vmemmap(struct hstate *h)
{
return h->nr_free_vmemmap_pages;
}
+static inline unsigned int nr_vmemmap(struct hstate *h)
+{
+ return nr_free_vmemmap(h) + RESERVE_VMEMMAP_NR;
+}
+
+static inline unsigned long nr_vmemmap_size(struct hstate *h)
+{
+ return (unsigned long)nr_vmemmap(h) << PAGE_SHIFT;
+}
+
+static inline unsigned int nr_pgtable(struct hstate *h)
+{
+ unsigned long vmemmap_size = nr_vmemmap_size(h);
+
+ if (!arch_vmemmap_support_huge_mapping())
+ return 0;
+
+ /*
+ * No need pre-allocate page tabels when there is no vmemmap pages
+ * to free.
+ */
+ if (!nr_free_vmemmap(h))
+ return 0;
+
+ return ALIGN(vmemmap_size, VMEMMAP_HPAGE_SIZE) >> VMEMMAP_HPAGE_SHIFT;
+}
+
+static inline void vmemmap_pgtable_init(struct page *page)
+{
+ page_huge_pte(page) = NULL;
+}
+
+static void vmemmap_pgtable_deposit(struct page *page, pte_t *pte_p)
+{
+ pgtable_t pgtable = virt_to_page(pte_p);
+
+ /* FIFO */
+ if (!page_huge_pte(page))
+ INIT_LIST_HEAD(&pgtable->lru);
+ else
+ list_add(&pgtable->lru, &page_huge_pte(page)->lru);
+ page_huge_pte(page) = pgtable;
+}
+
+static pte_t *vmemmap_pgtable_withdraw(struct page *page)
+{
+ pgtable_t pgtable;
+
+ /* FIFO */
+ pgtable = page_huge_pte(page);
+ if (unlikely(!pgtable))
+ return NULL;
+ page_huge_pte(page) = list_first_entry_or_null(&pgtable->lru,
+ struct page, lru);
+ if (page_huge_pte(page))
+ list_del(&pgtable->lru);
+ return page_to_virt(pgtable);
+}
+
+static int vmemmap_pgtable_prealloc(struct hstate *h, struct page *page)
+{
+ int i;
+ pte_t *pte_p;
+ unsigned int nr = nr_pgtable(h);
+
+ if (!nr)
+ return 0;
+
+ vmemmap_pgtable_init(page);
+
+ for (i = 0; i < nr; i++) {
+ pte_p = pte_alloc_one_kernel(&init_mm);
+ if (!pte_p)
+ goto out;
+ vmemmap_pgtable_deposit(page, pte_p);
+ }
+
+ return 0;
+out:
+ while (i-- && (pte_p = vmemmap_pgtable_withdraw(page)))
+ pte_free_kernel(&init_mm, pte_p);
+ return -ENOMEM;
+}
+
+static inline void vmemmap_pgtable_free(struct hstate *h, struct page *page)
+{
+ pte_t *pte_p;
+
+ if (!nr_pgtable(h))
+ return;
+
+ while ((pte_p = vmemmap_pgtable_withdraw(page)))
+ pte_free_kernel(&init_mm, pte_p);
+}
+
static void __init hugetlb_vmemmap_init(struct hstate *h)
{
unsigned int order = huge_page_order(h);
@@ -1323,6 +1420,15 @@ static void __init hugetlb_vmemmap_init(struct hstate *h)
static inline void hugetlb_vmemmap_init(struct hstate *h)
{
}
+
+static inline int vmemmap_pgtable_prealloc(struct hstate *h, struct page *page)
+{
+ return 0;
+}
+
+static inline void vmemmap_pgtable_free(struct hstate *h, struct page *page)
+{
+}
#endif
static void update_and_free_page(struct hstate *h, struct page *page)
@@ -1531,6 +1637,9 @@ void free_huge_page(struct page *page)
static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
{
+ /* Must be called before the initialization of @page->lru */
+ vmemmap_pgtable_free(h, page);
+
INIT_LIST_HEAD(&page->lru);
set_compound_page_dtor(page, HUGETLB_PAGE_DTOR);
set_hugetlb_cgroup(page, NULL);
@@ -1783,6 +1892,14 @@ static struct page *alloc_fresh_huge_page(struct hstate *h,
if (!page)
return NULL;
+ if (vmemmap_pgtable_prealloc(h, page)) {
+ if (hstate_is_gigantic(h))
+ free_gigantic_page(page, huge_page_order(h));
+ else
+ put_page(page);
+ return NULL;
+ }
+
if (hstate_is_gigantic(h))
prep_compound_gigantic_page(page, huge_page_order(h));
prep_new_huge_page(h, page, page_to_nid(page));
--
2.20.1