[PATCH] hugetlb: Support node specified when using cma for gigantic hugepages

From: Baolin Wang
Date: Sun Oct 10 2021 - 01:25:00 EST


Now the size of CMA area for gigantic hugepages runtime allocation is
balanced for all online nodes, but we also want to specify the size of
CMA per-node, or only one node in some cases, which are similar with
commit 86acc55c3d32 ("hugetlbfs: extend the definition of hugepages
parameter to support node allocation")[1].

Thus this patch adds node format for 'hugetlb_cma' parameter to support
specifying the size of CMA per-node. An example is as follows:

hugetlb_cma=0:5G,2:5G

which means allocating 5G size of CMA area on node 0 and node 2
respectively.

[1]
https://lkml.kernel.org/r/20211005054729.86457-1-yaozhenguo1@xxxxxxxxx

Signed-off-by: Baolin Wang <baolin.wang@xxxxxxxxxxxxxxxxx>
---
Documentation/admin-guide/kernel-parameters.txt | 6 +-
mm/hugetlb.c | 79 +++++++++++++++++++++----
2 files changed, 73 insertions(+), 12 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 3ad8e9d0..a147faa5 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1587,8 +1587,10 @@
registers. Default set by CONFIG_HPET_MMAP_DEFAULT.

hugetlb_cma= [HW,CMA] The size of a CMA area used for allocation
- of gigantic hugepages.
- Format: nn[KMGTPE]
+ of gigantic hugepages. Or using node format, the size
+ of a CMA area per node can be specified.
+ Format: nn[KMGTPE] or (node format)
+ <node>:nn[KMGTPE][,<node>:nn[KMGTPE]]

Reserve a CMA area of given size and allocate gigantic
hugepages using the CMA allocator. If enabled, the
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 6d2f4c2..8b4e409 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -50,6 +50,7 @@

#ifdef CONFIG_CMA
static struct cma *hugetlb_cma[MAX_NUMNODES];
+static unsigned long hugetlb_cma_size_in_node[MAX_NUMNODES] __initdata;
static bool hugetlb_cma_page(struct page *page, unsigned int order)
{
return cma_pages_valid(hugetlb_cma[page_to_nid(page)], page,
@@ -62,6 +63,7 @@ static bool hugetlb_cma_page(struct page *page, unsigned int order)
}
#endif
static unsigned long hugetlb_cma_size __initdata;
+static nodemask_t hugetlb_cma_nodes_allowed = NODE_MASK_NONE;

/*
* Minimum page order among possible hugepage sizes, set to a proper value
@@ -3497,9 +3499,15 @@ static ssize_t __nr_hugepages_store_common(bool obey_mempolicy,

if (nid == NUMA_NO_NODE) {
/*
+ * If we've specified the size of CMA area per node,
+ * should use it firstly.
+ */
+ if (hstate_is_gigantic(h) && !nodes_empty(hugetlb_cma_nodes_allowed))
+ n_mask = &hugetlb_cma_nodes_allowed;
+ /*
* global hstate attribute
*/
- if (!(obey_mempolicy &&
+ else if (!(obey_mempolicy &&
init_nodemask_of_mempolicy(&nodes_allowed)))
n_mask = &node_states[N_MEMORY];
else
@@ -6745,7 +6753,38 @@ void hugetlb_unshare_all_pmds(struct vm_area_struct *vma)

static int __init cmdline_parse_hugetlb_cma(char *p)
{
- hugetlb_cma_size = memparse(p, &p);
+ int nid, count = 0;
+ unsigned long tmp;
+ char *s = p;
+
+ while (*s) {
+ if (sscanf(s, "%lu%n", &tmp, &count) != 1)
+ break;
+
+ if (s[count] == ':') {
+ nid = tmp;
+ if (nid < 0 || nid >= MAX_NUMNODES)
+ break;
+
+ s += count + 1;
+ tmp = memparse(s, &s);
+ hugetlb_cma_size_in_node[nid] = tmp;
+ hugetlb_cma_size += tmp;
+
+ /*
+ * Skip the separator if have one, otherwise
+ * break the parsing.
+ */
+ if (*s == ',')
+ s++;
+ else
+ break;
+ } else {
+ hugetlb_cma_size = memparse(p, &p);
+ break;
+ }
+ }
+
return 0;
}

@@ -6754,6 +6793,7 @@ static int __init cmdline_parse_hugetlb_cma(char *p)
void __init hugetlb_cma_reserve(int order)
{
unsigned long size, reserved, per_node;
+ bool node_specific_cma_alloc = false;
int nid;

cma_reserve_called = true;
@@ -6767,20 +6807,37 @@ void __init hugetlb_cma_reserve(int order)
return;
}

- /*
- * If 3 GB area is requested on a machine with 4 numa nodes,
- * let's allocate 1 GB on first three nodes and ignore the last one.
- */
- per_node = DIV_ROUND_UP(hugetlb_cma_size, nr_online_nodes);
- pr_info("hugetlb_cma: reserve %lu MiB, up to %lu MiB per node\n",
- hugetlb_cma_size / SZ_1M, per_node / SZ_1M);
+ for_each_node_state(nid, N_ONLINE) {
+ if (hugetlb_cma_size_in_node[nid] > 0) {
+ node_specific_cma_alloc = true;
+ break;
+ }
+ }
+
+ if (!node_specific_cma_alloc) {
+ /*
+ * If 3 GB area is requested on a machine with 4 numa nodes,
+ * let's allocate 1 GB on first three nodes and ignore the last one.
+ */
+ per_node = DIV_ROUND_UP(hugetlb_cma_size, nr_online_nodes);
+ pr_info("hugetlb_cma: reserve %lu MiB, up to %lu MiB per node\n",
+ hugetlb_cma_size / SZ_1M, per_node / SZ_1M);
+ }

reserved = 0;
for_each_node_state(nid, N_ONLINE) {
int res;
char name[CMA_MAX_NAME];

- size = min(per_node, hugetlb_cma_size - reserved);
+ if (node_specific_cma_alloc) {
+ if (hugetlb_cma_size_in_node[nid] <= 0)
+ continue;
+
+ size = hugetlb_cma_size_in_node[nid];
+ } else {
+ size = min(per_node, hugetlb_cma_size - reserved);
+ }
+
size = round_up(size, PAGE_SIZE << order);

snprintf(name, sizeof(name), "hugetlb%d", nid);
@@ -6799,6 +6856,8 @@ void __init hugetlb_cma_reserve(int order)
continue;
}

+ if (node_specific_cma_alloc)
+ node_set(nid, hugetlb_cma_nodes_allowed);
reserved += size;
pr_info("hugetlb_cma: reserved %lu MiB on node %d\n",
size / SZ_1M, nid);
--
1.8.3.1