[RFC PATCH 15/15] mm: make MAX_ORDER a kernel boot time parameter.

From: Zi Yan
Date: Thu Aug 05 2021 - 15:04:19 EST


From: Zi Yan <ziy@xxxxxxxxxx>

With the new buddy_alloc_max_order, users can specify larger MAX_ORDER
than set in CONFIG_ARCH_MAX_ORDER or CONFIG_SET_MAX_ORDER.
It can be set any value >= CONFIG_ARCH_MAX_ORDER or CONFIG_SET_MAX_ORDER,
but < 256 (limited by vmscan scan_control and per-cpu free page list).

Signed-off-by: Zi Yan <ziy@xxxxxxxxxx>
Cc: Jonathan Corbet <corbet@xxxxxxx>
Cc: "Paul E. McKenney" <paulmck@xxxxxxxxxx>
Cc: Randy Dunlap <rdunlap@xxxxxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Vlastimil Babka <vbabka@xxxxxxx>
Cc: linux-doc@xxxxxxxxxxxxxxx
Cc: linux-mm@xxxxxxxxx
Cc: linux-kernel@xxxxxxxxxxxxxxx
---
.../admin-guide/kernel-parameters.txt | 5 +++
include/linux/mmzone.h | 23 +++++++++++--
mm/page_alloc.c | 34 ++++++++++++++++++-
mm/vmscan.c | 1 -
4 files changed, 58 insertions(+), 5 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 5c59a5fb17c3..a37141aa28ae 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -465,6 +465,11 @@
bttv.pll= See Documentation/admin-guide/media/bttv.rst
bttv.tuner=

+ buddy_alloc_max_order= [KNL] This parameter adjusts the size of largest
+ pages that can be allocated from kernel buddy allocator. The largest
+ page size is 2^buddy_alloc_max_order * PAGE_SIZE.
+ Format: integer
+
bulk_remove=off [PPC] This parameter disables the use of the pSeries
firmware feature for flushing multiple hpte entries
at a time.
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 379dada82d4b..9ca4d59722a1 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -26,14 +26,25 @@
/* Free memory management - zoned buddy allocator. */
#ifndef CONFIG_ARCH_FORCE_MAX_ORDER
#ifdef CONFIG_SET_MAX_ORDER
-#define MAX_ORDER CONFIG_SET_MAX_ORDER
+/* Defined in mm/page_alloc.c */
+extern int buddy_alloc_max_order;
+
+#define MAX_ORDER buddy_alloc_max_order
#define MIN_MAX_ORDER CONFIG_SET_MAX_ORDER
#else
#define MAX_ORDER 11
#define MIN_MAX_ORDER MAX_ORDER
#endif /* CONFIG_SET_MAX_ORDER */
#else
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+/* Defined in mm/page_alloc.c */
+extern int buddy_alloc_max_order;
+
+#define MAX_ORDER buddy_alloc_max_order
+#else
#define MAX_ORDER CONFIG_ARCH_FORCE_MAX_ORDER
+#endif /* CONFIG_SPARSEMEM_VMEMMAP */
#define MIN_MAX_ORDER CONFIG_ARCH_FORCE_MAX_ORDER
#endif /* CONFIG_ARCH_FORCE_MAX_ORDER */
#define MAX_ORDER_NR_PAGES (1 << (MAX_ORDER - 1))
@@ -1557,8 +1568,14 @@ void sparse_init(void);
* pfn_valid_within() should be used in this case; we optimise this away
* when we have no holes within a MAX_ORDER_NR_PAGES block.
*/
-#if ((MAX_ORDER - 1 + PAGE_SHIFT) > SECTION_SIZE_BITS)
-#define pfn_valid_within(pfn) pfn_valid(pfn)
+#if defined(CONFIG_ARCH_FORCE_MAX_ORDER) || defined(CONFIG_SET_MAX_ORDER)
+static inline bool pfn_valid_within(unsigned long pfn)
+{
+ if ((MAX_ORDER - 1 + PAGE_SHIFT) > SECTION_SIZE_BITS)
+ return pfn_valid(pfn);
+
+ return 1;
+}
#else
#define pfn_valid_within(pfn) (1)
#endif
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index bfa6962f7615..ea6f8d85a4cf 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1487,7 +1487,6 @@ static void free_pcppages_bulk(struct zone *zone, int count,
batch_free = count;

order = pindex_to_order(pindex);
- BUILD_BUG_ON(MAX_ORDER >= (1<<NR_PCP_ORDER_WIDTH));
do {
page = list_last_entry(list, struct page, lru);
/* must delete to avoid corrupting pcp list */
@@ -9508,3 +9507,36 @@ bool take_page_off_buddy(struct page *page)
return ret;
}
#endif
+
+#if (defined(CONFIG_ARCH_FORCE_MAX_ORDER) && defined(CONFIG_SPARSEMEM_VMEMMAP)) \
+ || defined(CONFIG_SET_MAX_ORDER)
+int buddy_alloc_max_order = MIN_MAX_ORDER;
+EXPORT_SYMBOL(buddy_alloc_max_order);
+
+static int __init buddy_alloc_set(char *val)
+{
+ int ret;
+ unsigned long max_order;
+
+ ret = kstrtoul(val, 10, &max_order);
+
+ if (ret < 0)
+ return -EINVAL;
+
+ /*
+ * max_order is also limited at below locations:
+ * 1. scan_control in mm/vmscan.c uses s8 field for order, max_order cannot
+ * be bigger than S8_MAX before the field is changed.
+ * 2. free_pcppages_bulk has max_order upper limit.
+ */
+ if (max_order > MIN_MAX_ORDER && max_order < S8_MAX &&
+ max_order < (1<<NR_PCP_ORDER_WIDTH))
+ buddy_alloc_max_order = max_order;
+ else
+ buddy_alloc_max_order = MIN_MAX_ORDER;
+
+ return 0;
+}
+
+early_param("buddy_alloc_max_order", buddy_alloc_set);
+#endif
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 403a175a720f..9a3963c6166e 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3610,7 +3610,6 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
* scan_control uses s8 fields for order, priority, and reclaim_idx.
* Confirm they are large enough for max values.
*/
- BUILD_BUG_ON(MAX_ORDER > S8_MAX);
BUILD_BUG_ON(DEF_PRIORITY > S8_MAX);
BUILD_BUG_ON(MAX_NR_ZONES > S8_MAX);

--
2.30.2