[RFC PATCH v2 31/47] hugetlb: sort hstates in hugetlb_init_hstates
From: James Houghton
Date: Fri Oct 21 2022 - 12:39:57 EST
When using HugeTLB high-granularity mapping, we need to go through the
supported hugepage sizes in decreasing order so that we pick the largest
size that works. Consider the case where we're faulting in a 1G hugepage
for the first time: we want hugetlb_fault/hugetlb_no_page to map it with
a PUD. By going through the sizes in decreasing order, we will find that
PUD_SIZE works before finding out that PMD_SIZE or PAGE_SIZE work too.
This commit also changes bootmem hugepages from storing hstate pointers
directly to storing the hstate sizes. The hstate pointers used for
boot-time-allocated hugepages become invalid after we sort the hstates.
`gather_bootmem_prealloc`, called after the hstates have been sorted,
now converts the size to the correct hstate.
Signed-off-by: James Houghton <jthoughton@xxxxxxxxxx>
---
include/linux/hugetlb.h | 2 +-
mm/hugetlb.c | 49 ++++++++++++++++++++++++++++++++---------
2 files changed, 40 insertions(+), 11 deletions(-)
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index d305742e9d44..e25f97cdd086 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -772,7 +772,7 @@ struct hstate {
struct huge_bootmem_page {
struct list_head list;
- struct hstate *hstate;
+ unsigned long hstate_sz;
};
int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index bb0005d57cab..d6f07968156c 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -34,6 +34,7 @@
#include <linux/nospec.h>
#include <linux/delayacct.h>
#include <linux/memory.h>
+#include <linux/sort.h>
#include <asm/page.h>
#include <asm/pgalloc.h>
@@ -49,6 +50,10 @@
int hugetlb_max_hstate __read_mostly;
unsigned int default_hstate_idx;
+/*
+ * After hugetlb_init_hstates is called, hstates will be sorted from largest
+ * to smallest.
+ */
struct hstate hstates[HUGE_MAX_HSTATE];
#ifdef CONFIG_CMA
@@ -3189,7 +3194,7 @@ int __alloc_bootmem_huge_page(struct hstate *h, int nid)
/* Put them into a private list first because mem_map is not up yet */
INIT_LIST_HEAD(&m->list);
list_add(&m->list, &huge_boot_pages);
- m->hstate = h;
+ m->hstate_sz = huge_page_size(h);
return 1;
}
@@ -3203,7 +3208,7 @@ static void __init gather_bootmem_prealloc(void)
list_for_each_entry(m, &huge_boot_pages, list) {
struct page *page = virt_to_page(m);
- struct hstate *h = m->hstate;
+ struct hstate *h = size_to_hstate(m->hstate_sz);
VM_BUG_ON(!hstate_is_gigantic(h));
WARN_ON(page_count(page) != 1);
@@ -3319,9 +3324,38 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h)
kfree(node_alloc_noretry);
}
+static int compare_hstates_decreasing(const void *a, const void *b)
+{
+ unsigned long sz_a = huge_page_size((const struct hstate *)a);
+ unsigned long sz_b = huge_page_size((const struct hstate *)b);
+
+ if (sz_a < sz_b)
+ return 1;
+ if (sz_a > sz_b)
+ return -1;
+ return 0;
+}
+
+static void sort_hstates(void)
+{
+ unsigned long default_hstate_sz = huge_page_size(&default_hstate);
+
+ /* Sort from largest to smallest. */
+ sort(hstates, hugetlb_max_hstate, sizeof(*hstates),
+ compare_hstates_decreasing, NULL);
+
+ /*
+ * We may have changed the location of the default hstate, so we need to
+ * update it.
+ */
+ default_hstate_idx = hstate_index(size_to_hstate(default_hstate_sz));
+}
+
static void __init hugetlb_init_hstates(void)
{
- struct hstate *h, *h2;
+ struct hstate *h;
+
+ sort_hstates();
for_each_hstate(h) {
/* oversize hugepages were init'ed in early boot */
@@ -3340,13 +3374,8 @@ static void __init hugetlb_init_hstates(void)
continue;
if (hugetlb_cma_size && h->order <= HUGETLB_PAGE_ORDER)
continue;
- for_each_hstate(h2) {
- if (h2 == h)
- continue;
- if (h2->order < h->order &&
- h2->order > h->demote_order)
- h->demote_order = h2->order;
- }
+ if (h - 1 >= &hstates[0])
+ h->demote_order = huge_page_order(h - 1);
}
}
--
2.38.0.135.g90850a2211-goog