Re: [PATCH v3 1/4] LoongArch: Move CONFIG_HAVE_SETUP_PER_CPU_AREA related code to smp.c

From: Huacai Chen
Date: Fri Apr 12 2024 - 00:16:42 EST


On Thu, Apr 11, 2024 at 9:05 AM Tiezhu Yang <yangtiezhu@xxxxxxxxxxx> wrote:
>
> Currently, if CONFIG_NUMA is not set but CONFIG_SMP is set, the arch
> specified setup_per_cpu_areas() in arch/loongarch/kernel/numa.c will
> not be built and the generic setup_per_cpu_areas() in mm/percpu.c is
> actually used, this is not reasonable and does not work as intended.
Why is the generic version not reasonable? We need a custom version
just because it can put the percpu variable in the best node. If NUMA
disabled, software can only see one node, how to optimize?

Huacai

>
> Let us select HAVE_SETUP_PER_CPU_AREA unconditionally and then move
> CONFIG_HAVE_SETUP_PER_CPU_AREA related code from numa.c to smp.c to
> avoid this problem.
>
> Signed-off-by: Tiezhu Yang <yangtiezhu@xxxxxxxxxxx>
> ---
> arch/loongarch/Kconfig | 2 +-
> arch/loongarch/kernel/numa.c | 58 -----------------------------------
> arch/loongarch/kernel/smp.c | 59 ++++++++++++++++++++++++++++++++++++
> 3 files changed, 60 insertions(+), 59 deletions(-)
>
> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> index a5f300ec6f28..64052ae2c2af 100644
> --- a/arch/loongarch/Kconfig
> +++ b/arch/loongarch/Kconfig
> @@ -151,7 +151,7 @@ config LOONGARCH
> select HAVE_RUST
> select HAVE_SAMPLE_FTRACE_DIRECT
> select HAVE_SAMPLE_FTRACE_DIRECT_MULTI
> - select HAVE_SETUP_PER_CPU_AREA if NUMA
> + select HAVE_SETUP_PER_CPU_AREA
> select HAVE_STACK_VALIDATION if HAVE_OBJTOOL
> select HAVE_STACKPROTECTOR
> select HAVE_SYSCALL_TRACEPOINTS
> diff --git a/arch/loongarch/kernel/numa.c b/arch/loongarch/kernel/numa.c
> index 8fe21f868f72..49dc1d932ce2 100644
> --- a/arch/loongarch/kernel/numa.c
> +++ b/arch/loongarch/kernel/numa.c
> @@ -48,64 +48,6 @@ EXPORT_SYMBOL(__cpuid_to_node);
>
> nodemask_t numa_nodes_parsed __initdata;
>
> -#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
> -unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
> -EXPORT_SYMBOL(__per_cpu_offset);
> -
> -static int __init pcpu_cpu_to_node(int cpu)
> -{
> - return early_cpu_to_node(cpu);
> -}
> -
> -static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
> -{
> - if (early_cpu_to_node(from) == early_cpu_to_node(to))
> - return LOCAL_DISTANCE;
> - else
> - return REMOTE_DISTANCE;
> -}
> -
> -void __init pcpu_populate_pte(unsigned long addr)
> -{
> - populate_kernel_pte(addr);
> -}
> -
> -void __init setup_per_cpu_areas(void)
> -{
> - unsigned long delta;
> - unsigned int cpu;
> - int rc = -EINVAL;
> -
> - if (pcpu_chosen_fc == PCPU_FC_AUTO) {
> - if (nr_node_ids >= 8)
> - pcpu_chosen_fc = PCPU_FC_PAGE;
> - else
> - pcpu_chosen_fc = PCPU_FC_EMBED;
> - }
> -
> - /*
> - * Always reserve area for module percpu variables. That's
> - * what the legacy allocator did.
> - */
> - if (pcpu_chosen_fc != PCPU_FC_PAGE) {
> - rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
> - PERCPU_DYNAMIC_RESERVE, PMD_SIZE,
> - pcpu_cpu_distance, pcpu_cpu_to_node);
> - if (rc < 0)
> - pr_warn("%s allocator failed (%d), falling back to page size\n",
> - pcpu_fc_names[pcpu_chosen_fc], rc);
> - }
> - if (rc < 0)
> - rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, pcpu_cpu_to_node);
> - if (rc < 0)
> - panic("cannot initialize percpu area (err=%d)", rc);
> -
> - delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
> - for_each_possible_cpu(cpu)
> - __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
> -}
> -#endif
> -
> /*
> * Get nodeid by logical cpu number.
> * __cpuid_to_node maps phyical cpu id to node, so we
> diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
> index aabee0b280fe..88b9c6b68d1e 100644
> --- a/arch/loongarch/kernel/smp.c
> +++ b/arch/loongarch/kernel/smp.c
> @@ -29,6 +29,7 @@
> #include <asm/loongson.h>
> #include <asm/mmu_context.h>
> #include <asm/numa.h>
> +#include <asm/pgalloc.h>
> #include <asm/processor.h>
> #include <asm/setup.h>
> #include <asm/time.h>
> @@ -717,3 +718,61 @@ void flush_tlb_one(unsigned long vaddr)
> on_each_cpu(flush_tlb_one_ipi, (void *)vaddr, 1);
> }
> EXPORT_SYMBOL(flush_tlb_one);
> +
> +#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
> +unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
> +EXPORT_SYMBOL(__per_cpu_offset);
> +
> +static int __init pcpu_cpu_to_node(int cpu)
> +{
> + return early_cpu_to_node(cpu);
> +}
> +
> +static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
> +{
> + if (early_cpu_to_node(from) == early_cpu_to_node(to))
> + return LOCAL_DISTANCE;
> + else
> + return REMOTE_DISTANCE;
> +}
> +
> +void __init pcpu_populate_pte(unsigned long addr)
> +{
> + populate_kernel_pte(addr);
> +}
> +
> +void __init setup_per_cpu_areas(void)
> +{
> + unsigned long delta;
> + unsigned int cpu;
> + int rc = -EINVAL;
> +
> + if (pcpu_chosen_fc == PCPU_FC_AUTO) {
> + if (nr_node_ids >= 8)
> + pcpu_chosen_fc = PCPU_FC_PAGE;
> + else
> + pcpu_chosen_fc = PCPU_FC_EMBED;
> + }
> +
> + /*
> + * Always reserve area for module percpu variables. That's
> + * what the legacy allocator did.
> + */
> + if (pcpu_chosen_fc != PCPU_FC_PAGE) {
> + rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
> + PERCPU_DYNAMIC_RESERVE, PMD_SIZE,
> + pcpu_cpu_distance, pcpu_cpu_to_node);
> + if (rc < 0)
> + pr_warn("%s allocator failed (%d), falling back to page size\n",
> + pcpu_fc_names[pcpu_chosen_fc], rc);
> + }
> + if (rc < 0)
> + rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, pcpu_cpu_to_node);
> + if (rc < 0)
> + panic("cannot initialize percpu area (err=%d)", rc);
> +
> + delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
> + for_each_possible_cpu(cpu)
> + __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
> +}
> +#endif
> --
> 2.42.0
>
>