[PATCH v3 2/3] arm64: Support page mapping percpu first chunk allocator

From: Kefeng Wang
Date: Mon Aug 09 2021 - 05:32:36 EST


Percpu embedded first chunk allocator is the firstly option, but it
could fails on ARM64, eg,
"percpu: max_distance=0x5fcfdc640000 too large for vmalloc space 0x781fefff0000"
"percpu: max_distance=0x600000540000 too large for vmalloc space 0x7dffb7ff0000"
"percpu: max_distance=0x5fff9adb0000 too large for vmalloc space 0x5dffb7ff0000"

then we could meet "WARNING: CPU: 15 PID: 461 at vmalloc.c:3087 pcpu_get_vm_areas+0x488/0x838",
even the system could not boot successfully.

Let's implement page mapping percpu first chunk allocator as a fallback
to the embedding allocator to increase the robustness of the system.

Signed-off-by: Kefeng Wang <wangkefeng.wang@xxxxxxxxxx>
---
arch/arm64/Kconfig | 4 ++
drivers/base/arch_numa.c | 82 +++++++++++++++++++++++++++++++++++-----
2 files changed, 76 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index fdcd54d39c1e..39f27e268c38 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1045,6 +1045,10 @@ config NEED_PER_CPU_EMBED_FIRST_CHUNK
def_bool y
depends on NUMA

+config NEED_PER_CPU_PAGE_FIRST_CHUNK
+ def_bool y
+ depends on NUMA
+
source "kernel/Kconfig.hz"

config ARCH_SPARSEMEM_ENABLE
diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c
index 4cc4e117727d..563b2013b75a 100644
--- a/drivers/base/arch_numa.c
+++ b/drivers/base/arch_numa.c
@@ -14,6 +14,7 @@
#include <linux/of.h>

#include <asm/sections.h>
+#include <asm/pgalloc.h>

struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
EXPORT_SYMBOL(node_data);
@@ -168,22 +169,83 @@ static void __init pcpu_fc_free(void *ptr, size_t size)
memblock_free_early(__pa(ptr), size);
}

+#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
+static void __init pcpu_populate_pte(unsigned long addr)
+{
+ pgd_t *pgd = pgd_offset_k(addr);
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ p4d = p4d_offset(pgd, addr);
+ if (p4d_none(*p4d)) {
+ pud_t *new;
+
+ new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+ if (!new)
+ goto err_alloc;
+ p4d_populate(&init_mm, p4d, new);
+ }
+
+ pud = pud_offset(p4d, addr);
+ if (pud_none(*pud)) {
+ pmd_t *new;
+
+ new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+ if (!new)
+ goto err_alloc;
+ pud_populate(&init_mm, pud, new);
+ }
+
+ pmd = pmd_offset(pud, addr);
+ if (!pmd_present(*pmd)) {
+ pte_t *new;
+
+ new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+ if (!new)
+ goto err_alloc;
+ pmd_populate_kernel(&init_mm, pmd, new);
+ }
+
+ return;
+
+err_alloc:
+ panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
+ __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+}
+#endif
+
void __init setup_per_cpu_areas(void)
{
unsigned long delta;
unsigned int cpu;
- int rc;
+ int rc = -EINVAL;
+
+ if (pcpu_chosen_fc != PCPU_FC_PAGE) {
+ /*
+ * Always reserve area for module percpu variables. That's
+ * what the legacy allocator did.
+ */
+ rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
+ PERCPU_DYNAMIC_RESERVE, PAGE_SIZE,
+ pcpu_cpu_distance,
+ pcpu_fc_alloc, pcpu_fc_free);
+#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
+ if (rc < 0)
+ pr_warn("PERCPU: %s allocator failed (%d), falling back to page size\n",
+ pcpu_fc_names[pcpu_chosen_fc], rc);
+#endif
+ }

- /*
- * Always reserve area for module percpu variables. That's
- * what the legacy allocator did.
- */
- rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
- PERCPU_DYNAMIC_RESERVE, PAGE_SIZE,
- pcpu_cpu_distance,
- pcpu_fc_alloc, pcpu_fc_free);
+#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
+ if (rc < 0)
+ rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
+ pcpu_fc_alloc,
+ pcpu_fc_free,
+ pcpu_populate_pte);
+#endif
if (rc < 0)
- panic("Failed to initialize percpu areas.");
+ panic("Failed to initialize percpu areas (err=%d).", rc);

delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
for_each_possible_cpu(cpu)
--
2.26.2