Re: [PATCH v11 3/4] arm64: topology: Support SMT control on ACPI based system
From: Yicong Yang
Date: Mon Mar 03 2025 - 09:43:00 EST
On 2025/2/25 14:08, Hanjun Guo wrote:
> On 2025/2/18 22:10, Yicong Yang wrote:
>> From: Yicong Yang <yangyicong@xxxxxxxxxxxxx>
>>
>> For ACPI we'll build the topology from PPTT and we cannot directly
>> get the SMT number of each core. Instead using a temporary xarray
>> to record the heterogeneous information (from ACPI_PPTT_ACPI_IDENTICAL)
>> and SMT information of the first core in its heterogeneous CPU cluster
>> when building the topology. Then we can know the largest SMT number
>> in the system. If a homogeneous system's using ACPI 6.2 or later,
>> all the CPUs should be under the root node of PPTT. There'll be
>> only one entry in the xarray and all the CPUs in the system will
>> be assumed identical.
>>
>> The core's SMT control provides two interface to the users [1]:
>> 1) enable/disable SMT by writing on/off
>> 2) enable/disable SMT by writing thread number 1/max_thread_number
>>
>> If a system have more than one SMT thread number the 2) may
>> not handle it well, since there're multiple thread numbers in the
>> system and 2) only accept 1/max_thread_number. So issue a warning
>> to notify the users if such system detected.
>>
>> [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/ABI/testing/sysfs-devices-system-cpu#n542
>>
>> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@xxxxxxxxxx>
>> Signed-off-by: Yicong Yang <yangyicong@xxxxxxxxxxxxx>
>> ---
>> arch/arm64/kernel/topology.c | 66 ++++++++++++++++++++++++++++++++++++
>> 1 file changed, 66 insertions(+)
>>
>> diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
>> index 1a2c72f3e7f8..6eba1ac091ee 100644
>> --- a/arch/arm64/kernel/topology.c
>> +++ b/arch/arm64/kernel/topology.c
>> @@ -15,8 +15,10 @@
>> #include <linux/arch_topology.h>
>> #include <linux/cacheinfo.h>
>> #include <linux/cpufreq.h>
>> +#include <linux/cpu_smt.h>
>> #include <linux/init.h>
>> #include <linux/percpu.h>
>> +#include <linux/xarray.h>
>> #include <asm/cpu.h>
>> #include <asm/cputype.h>
>> @@ -37,17 +39,28 @@ static bool __init acpi_cpu_is_threaded(int cpu)
>> return !!is_threaded;
>> }
>> +struct cpu_smt_info {
>> + unsigned int thread_num;
>> + int core_id;
>> +};
>> +
>> /*
>> * Propagate the topology information of the processor_topology_node tree to the
>> * cpu_topology array.
>> */
>> int __init parse_acpi_topology(void)
>> {
>> + unsigned int max_smt_thread_num = 0;
>> + struct cpu_smt_info *entry;
>> + struct xarray hetero_cpu;
>> + unsigned long hetero_id;
>> int cpu, topology_id;
>> if (acpi_disabled)
>> return 0;
>> + xa_init(&hetero_cpu);
>> +
>> for_each_possible_cpu(cpu) {
>> topology_id = find_acpi_cpu_topology(cpu, 0);
>> if (topology_id < 0)
>> @@ -57,6 +70,34 @@ int __init parse_acpi_topology(void)
>> cpu_topology[cpu].thread_id = topology_id;
>> topology_id = find_acpi_cpu_topology(cpu, 1);
>> cpu_topology[cpu].core_id = topology_id;
>> +
>> + /*
>> + * In the PPTT, CPUs below a node with the 'identical
>> + * implementation' flag have the same number of threads.
>> + * Count the number of threads for only one CPU (i.e.
>> + * one core_id) among those with the same hetero_id.
>> + * See the comment of find_acpi_cpu_topology_hetero_id()
>> + * for more details.
>> + *
>> + * One entry is created for each node having:
>> + * - the 'identical implementation' flag
>> + * - its parent not having the flag
>> + */
>> + hetero_id = find_acpi_cpu_topology_hetero_id(cpu);
>> + entry = xa_load(&hetero_cpu, hetero_id);
>> + if (!entry) {
>> + entry = kzalloc(sizeof(*entry), GFP_KERNEL);
>> + WARN_ON_ONCE(!entry);
>> +
>> + if (entry) {
>> + entry->core_id = topology_id;
>> + entry->thread_num = 1;
>> + xa_store(&hetero_cpu, hetero_id,
>> + entry, GFP_KERNEL);
>> + }
>> + } else if (entry->core_id == topology_id) {
>> + entry->thread_num++;
>> + }
>> } else {
>> cpu_topology[cpu].thread_id = -1;
>> cpu_topology[cpu].core_id = topology_id;
>> @@ -67,6 +108,31 @@ int __init parse_acpi_topology(void)
>> cpu_topology[cpu].package_id = topology_id;
>> }
>> + /*
>> + * This should be a short loop depending on the number of heterogeneous
>> + * CPU clusters. Typically on a homogeneous system there's only one
>> + * entry in the XArray.
>> + */
>> + xa_for_each(&hetero_cpu, hetero_id, entry) {
>> + if (entry->thread_num != max_smt_thread_num && max_smt_thread_num)
>> + pr_warn_once("Heterogeneous SMT topology is partly supported by SMT control\n");
>> +
>> + max_smt_thread_num = max(max_smt_thread_num, entry->thread_num);
>> + xa_erase(&hetero_cpu, hetero_id);
>> + kfree(entry);
>> + }
>> +
>> + /*
>> + * Notify the CPU framework of the SMT support. Initialize the
>> + * max_smt_thread_num to 1 if no SMT support detected. A thread
>> + * number of 1 can be handled by the framework so we don't need
>> + * to check max_smt_thread_num to see we support SMT or not.
>> + */
>> + if (!max_smt_thread_num)
>> + max_smt_thread_num = 1;
>> +
>> + cpu_smt_set_num_threads(max_smt_thread_num, max_smt_thread_num);
>> + xa_destroy(&hetero_cpu);
>> return 0;
>> }
>> #endif
>
> Looks good to me,
>
> Reviewed-by: Hanjun Guo <guohanjun@xxxxxxxxxx>
>
Thanks a lot for taking a look :)