[PATCH 2/3] powerpc/smp: Add support detecting thread-groups sharing L2 cache

From: Gautham R. Shenoy
Date: Thu Dec 03 2020 - 23:50:20 EST


From: "Gautham R. Shenoy" <ego@xxxxxxxxxxxxxxxxxx>

On POWER systems, groups of threads within a core sharing the L2-cache
can be indicated by the "ibm,thread-groups" property array with the
identifier "2".

This patch adds support for detecting this, and when present, populate
the populating the cpu_l2_cache_mask of every CPU to the core-siblings
which share L2 with the CPU as specified in the by the
"ibm,thread-groups" property array.

On a platform with the following "ibm,thread-group" configuration
00000001 00000002 00000004 00000000
00000002 00000004 00000006 00000001
00000003 00000005 00000007 00000002
00000002 00000004 00000000 00000002
00000004 00000006 00000001 00000003
00000005 00000007

Without this patch, the sched-domain hierarchy for CPUs 0,1 would be
CPU0 attaching sched-domain(s):
domain-0: span=0,2,4,6 level=SMT
domain-1: span=0-7 level=CACHE
domain-2: span=0-15,24-39,48-55 level=MC
domain-3: span=0-55 level=DIE

CPU1 attaching sched-domain(s):
domain-0: span=1,3,5,7 level=SMT
domain-1: span=0-7 level=CACHE
domain-2: span=0-15,24-39,48-55 level=MC
domain-3: span=0-55 level=DIE

The CACHE domain at 0-7 is incorrect since the ibm,thread-groups
sub-array
[00000002 00000002 00000004
00000000 00000002 00000004 00000006
00000001 00000003 00000005 00000007]
indicates that L2 (Property "2") is shared only between the threads of a single
group. There are "2" groups of threads where each group contains "4"
threads each. The groups being {0,2,4,6} and {1,3,5,7}.

With this patch, the sched-domain hierarchy for CPUs 0,1 would be
CPU0 attaching sched-domain(s):
domain-0: span=0,2,4,6 level=SMT
domain-1: span=0-15,24-39,48-55 level=MC
domain-2: span=0-55 level=DIE

CPU1 attaching sched-domain(s):
domain-0: span=1,3,5,7 level=SMT
domain-1: span=0-15,24-39,48-55 level=MC
domain-2: span=0-55 level=DIE

The CACHE domain with span=0,2,4,6 for CPU 0 (span=1,3,5,7 for CPU 1
resp.) gets degenerated into the SMT domain. Furthermore, the
last-level-cache domain gets correctly set to the SMT sched-domain.

Signed-off-by: Gautham R. Shenoy <ego@xxxxxxxxxxxxxxxxxx>
---
arch/powerpc/kernel/smp.c | 66 +++++++++++++++++++++++++++++++++++++++++------
1 file changed, 58 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 6a242a3..a116d2d 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -76,6 +76,7 @@
struct task_struct *secondary_current;
bool has_big_cores;
bool coregroup_enabled;
+bool thread_group_shares_l2;

DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
@@ -99,6 +100,7 @@ enum {

#define MAX_THREAD_LIST_SIZE 8
#define THREAD_GROUP_SHARE_L1 1
+#define THREAD_GROUP_SHARE_L2 2
struct thread_groups {
unsigned int property;
unsigned int nr_groups;
@@ -107,7 +109,7 @@ struct thread_groups {
};

/* Maximum number of properties that groups of threads within a core can share */
-#define MAX_THREAD_GROUP_PROPERTIES 1
+#define MAX_THREAD_GROUP_PROPERTIES 2

struct thread_groups_list {
unsigned int nr_properties;
@@ -121,6 +123,13 @@ struct thread_groups_list {
*/
DEFINE_PER_CPU(cpumask_var_t, cpu_l1_cache_map);

+/*
+ * On some big-cores system, thread_group_l2_cache_map for each CPU
+ * corresponds to the set its siblings within the core that share the
+ * L2-cache.
+ */
+DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
+
/* SMP operations for this machine */
struct smp_ops_t *smp_ops;

@@ -718,7 +727,9 @@ static void or_cpumasks_related(int i, int j, struct cpumask *(*srcmask)(int),
*
* ibm,thread-groups[i + 0] tells us the property based on which the
* threads are being grouped together. If this value is 1, it implies
- * that the threads in the same group share L1, translation cache.
+ * that the threads in the same group share L1, translation cache. If
+ * the value is 2, it implies that the threads in the same group share
+ * the same L2 cache.
*
* ibm,thread-groups[i+1] tells us how many such thread groups exist for the
* property ibm,thread-groups[i]
@@ -745,10 +756,10 @@ static void or_cpumasks_related(int i, int j, struct cpumask *(*srcmask)(int),
* 12}.
*
* b) there are 2 groups of 4 threads each, where each group of
- * threads share some property indicated by the first value 2. The
- * "ibm,ppc-interrupt-server#s" of the first group is {5,7,9,11}
- * and the "ibm,ppc-interrupt-server#s" of the second group is
- * {6,8,10,12} structure
+ * threads share some property indicated by the first value 2 (L2
+ * cache). The "ibm,ppc-interrupt-server#s" of the first group is
+ * {5,7,9,11} and the "ibm,ppc-interrupt-server#s" of the second
+ * group is {6,8,10,12} structure
*
* Returns 0 on success, -EINVAL if the property does not exist,
* -ENODATA if property does not have a value, and -EOVERFLOW if the
@@ -840,7 +851,8 @@ static int init_cpu_cache_map(int cpu, unsigned int cache_property)
if (!dn)
return -ENODATA;

- if (!(cache_property == THREAD_GROUP_SHARE_L1))
+ if (!(cache_property == THREAD_GROUP_SHARE_L1 ||
+ cache_property == THREAD_GROUP_SHARE_L2))
return -EINVAL;

if (!cpu_tgl->nr_properties) {
@@ -867,7 +879,10 @@ static int init_cpu_cache_map(int cpu, unsigned int cache_property)
goto out;
}

- mask = &per_cpu(cpu_l1_cache_map, cpu);
+ if (cache_property == THREAD_GROUP_SHARE_L1)
+ mask = &per_cpu(cpu_l1_cache_map, cpu);
+ else if (cache_property == THREAD_GROUP_SHARE_L2)
+ mask = &per_cpu(thread_group_l2_cache_map, cpu);

zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu));

@@ -973,6 +988,16 @@ static int init_big_cores(void)
}

has_big_cores = true;
+
+ for_each_possible_cpu(cpu) {
+ int err = init_cpu_cache_map(cpu, THREAD_GROUP_SHARE_L2);
+
+ if (err)
+ return err;
+ }
+
+ thread_group_shares_l2 = true;
+ pr_info("Thread-groups in a core share L2-cache\n");
return 0;
}

@@ -1287,6 +1312,31 @@ static bool update_mask_by_l2(int cpu, cpumask_var_t *mask)
if (has_big_cores)
submask_fn = cpu_smallcore_mask;

+
+ /*
+ * If the threads in a thread-group share L2 cache, then then
+ * the L2-mask can be obtained from thread_group_l2_cache_map.
+ */
+ if (thread_group_shares_l2) {
+ /* Siblings that share L1 is a subset of siblings that share L2.*/
+ or_cpumasks_related(cpu, cpu, submask_fn, cpu_l2_cache_mask);
+ if (*mask) {
+ cpumask_andnot(*mask,
+ per_cpu(thread_group_l2_cache_map, cpu),
+ cpu_l2_cache_mask(cpu));
+ } else {
+ mask = &per_cpu(thread_group_l2_cache_map, cpu);
+ }
+
+ for_each_cpu(i, *mask) {
+ if (!cpu_online(i))
+ continue;
+ set_cpus_related(i, cpu, cpu_l2_cache_mask);
+ }
+
+ return true;
+ }
+
l2_cache = cpu_to_l2cache(cpu);
if (!l2_cache || !*mask) {
/* Assume only core siblings share cache with this CPU */
--
1.9.4