[PATCH 2/2] s390/topology: add drawer scheduling domain level

From: Heiko Carstens
Date: Wed Jun 08 2016 - 05:11:48 EST


The z13 machine added a fourth level to the cpu topology
information. The new top level is called drawer.

A drawer contains two books, which used to be the top level.

Adding this additional scheduling domain did show performance
improvements for some workloads of up to 8%, while there don't
seem to be any workloads impacted in a negative way.

Signed-off-by: Heiko Carstens <heiko.carstens@xxxxxxxxxx>
---
arch/s390/Kconfig | 4 ++++
arch/s390/include/asm/topology.h | 4 ++++
arch/s390/kernel/topology.c | 33 +++++++++++++++++++++++++++------
arch/s390/numa/mode_emu.c | 25 ++++++++++++++++++++-----
4 files changed, 55 insertions(+), 11 deletions(-)

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index a8c259059adf..9d35d6d084da 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -477,6 +477,9 @@ config SCHED_MC
config SCHED_BOOK
def_bool n

+config SCHED_DRAWER
+ def_bool n
+
config SCHED_TOPOLOGY
def_bool y
prompt "Topology scheduler support"
@@ -484,6 +487,7 @@ config SCHED_TOPOLOGY
select SCHED_SMT
select SCHED_MC
select SCHED_BOOK
+ select SCHED_DRAWER
help
Topology scheduler support improves the CPU scheduler's decision
making when dealing with machines that have multi-threading,
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index 6b53962e807e..f15f5571ca2b 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -14,10 +14,12 @@ struct cpu_topology_s390 {
unsigned short core_id;
unsigned short socket_id;
unsigned short book_id;
+ unsigned short drawer_id;
unsigned short node_id;
cpumask_t thread_mask;
cpumask_t core_mask;
cpumask_t book_mask;
+ cpumask_t drawer_mask;
};

DECLARE_PER_CPU(struct cpu_topology_s390, cpu_topology);
@@ -30,6 +32,8 @@ DECLARE_PER_CPU(struct cpu_topology_s390, cpu_topology);
#define topology_core_cpumask(cpu) (&per_cpu(cpu_topology, cpu).core_mask)
#define topology_book_id(cpu) (per_cpu(cpu_topology, cpu).book_id)
#define topology_book_cpumask(cpu) (&per_cpu(cpu_topology, cpu).book_mask)
+#define topology_drawer_id(cpu) (per_cpu(cpu_topology, cpu).drawer_id)
+#define topology_drawer_cpumask(cpu) (&per_cpu(cpu_topology, cpu).drawer_mask)

#define mc_capable() 1

diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 64298a867589..44745e751c3a 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -46,6 +46,7 @@ static DECLARE_WORK(topology_work, topology_work_fn);
*/
static struct mask_info socket_info;
static struct mask_info book_info;
+static struct mask_info drawer_info;

DEFINE_PER_CPU(struct cpu_topology_s390, cpu_topology);
EXPORT_PER_CPU_SYMBOL_GPL(cpu_topology);
@@ -80,6 +81,7 @@ static cpumask_t cpu_thread_map(unsigned int cpu)
}

static struct mask_info *add_cpus_to_mask(struct topology_core *tl_core,
+ struct mask_info *drawer,
struct mask_info *book,
struct mask_info *socket,
int one_socket_per_cpu)
@@ -97,9 +99,11 @@ static struct mask_info *add_cpus_to_mask(struct topology_core *tl_core,
continue;
for (i = 0; i <= smp_cpu_mtid; i++) {
topo = &per_cpu(cpu_topology, lcpu + i);
+ topo->drawer_id = drawer->id;
topo->book_id = book->id;
topo->core_id = rcore;
topo->thread_id = lcpu + i;
+ cpumask_set_cpu(lcpu + i, &drawer->mask);
cpumask_set_cpu(lcpu + i, &book->mask);
cpumask_set_cpu(lcpu + i, &socket->mask);
if (one_socket_per_cpu)
@@ -128,6 +132,11 @@ static void clear_masks(void)
cpumask_clear(&info->mask);
info = info->next;
}
+ info = &drawer_info;
+ while (info) {
+ cpumask_clear(&info->mask);
+ info = info->next;
+ }
}

static union topology_entry *next_tle(union topology_entry *tle)
@@ -141,12 +150,17 @@ static void __tl_to_masks_generic(struct sysinfo_15_1_x *info)
{
struct mask_info *socket = &socket_info;
struct mask_info *book = &book_info;
+ struct mask_info *drawer = &drawer_info;
union topology_entry *tle, *end;

tle = info->tle;
end = (union topology_entry *)((unsigned long)info + info->length);
while (tle < end) {
switch (tle->nl) {
+ case 3:
+ drawer = drawer->next;
+ drawer->id = tle->container.id;
+ break;
case 2:
book = book->next;
book->id = tle->container.id;
@@ -156,7 +170,7 @@ static void __tl_to_masks_generic(struct sysinfo_15_1_x *info)
socket->id = tle->container.id;
break;
case 0:
- add_cpus_to_mask(&tle->cpu, book, socket, 0);
+ add_cpus_to_mask(&tle->cpu, drawer, book, socket, 0);
break;
default:
clear_masks();
@@ -170,6 +184,7 @@ static void __tl_to_masks_z10(struct sysinfo_15_1_x *info)
{
struct mask_info *socket = &socket_info;
struct mask_info *book = &book_info;
+ struct mask_info *drawer = &drawer_info;
union topology_entry *tle, *end;

tle = info->tle;
@@ -181,7 +196,7 @@ static void __tl_to_masks_z10(struct sysinfo_15_1_x *info)
book->id = tle->container.id;
break;
case 0:
- socket = add_cpus_to_mask(&tle->cpu, book, socket, 1);
+ socket = add_cpus_to_mask(&tle->cpu, drawer, book, socket, 1);
break;
default:
clear_masks();
@@ -257,11 +272,13 @@ static void update_cpu_masks(void)
topo->thread_mask = cpu_thread_map(cpu);
topo->core_mask = cpu_group_map(&socket_info, cpu);
topo->book_mask = cpu_group_map(&book_info, cpu);
+ topo->drawer_mask = cpu_group_map(&drawer_info, cpu);
if (!MACHINE_HAS_TOPOLOGY) {
topo->thread_id = cpu;
topo->core_id = cpu;
topo->socket_id = cpu;
topo->book_id = cpu;
+ topo->drawer_id = cpu;
}
}
numa_update_cpu_topology();
@@ -269,10 +286,7 @@ static void update_cpu_masks(void)

void store_topology(struct sysinfo_15_1_x *info)
{
- if (topology_max_mnest >= 3)
- stsi(info, 15, 1, 3);
- else
- stsi(info, 15, 1, 2);
+ stsi(info, 15, 1, min(topology_max_mnest, 4));
}

int arch_update_cpu_topology(void)
@@ -442,6 +456,11 @@ static const struct cpumask *cpu_book_mask(int cpu)
return &per_cpu(cpu_topology, cpu).book_mask;
}

+static const struct cpumask *cpu_drawer_mask(int cpu)
+{
+ return &per_cpu(cpu_topology, cpu).drawer_mask;
+}
+
static int __init early_parse_topology(char *p)
{
return kstrtobool(p, &topology_enabled);
@@ -452,6 +471,7 @@ static struct sched_domain_topology_level s390_topology[] = {
{ cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
{ cpu_book_mask, SD_INIT_NAME(BOOK) },
+ { cpu_drawer_mask, SD_INIT_NAME(DRAWER) },
{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
{ NULL, },
};
@@ -487,6 +507,7 @@ static int __init s390_topology_init(void)
printk(KERN_CONT " / %d\n", info->mnest);
alloc_masks(info, &socket_info, 1);
alloc_masks(info, &book_info, 2);
+ alloc_masks(info, &drawer_info, 3);
set_sched_topology(s390_topology);
return 0;
}
diff --git a/arch/s390/numa/mode_emu.c b/arch/s390/numa/mode_emu.c
index 828d0695d0d4..fbc394e16b2c 100644
--- a/arch/s390/numa/mode_emu.c
+++ b/arch/s390/numa/mode_emu.c
@@ -34,7 +34,8 @@
#define DIST_CORE 1
#define DIST_MC 2
#define DIST_BOOK 3
-#define DIST_MAX 4
+#define DIST_DRAWER 4
+#define DIST_MAX 5

/* Node distance reported to common code */
#define EMU_NODE_DIST 10
@@ -43,7 +44,7 @@
#define NODE_ID_FREE -1

/* Different levels of toptree */
-enum toptree_level {CORE, MC, BOOK, NODE, TOPOLOGY};
+enum toptree_level {CORE, MC, BOOK, DRAWER, NODE, TOPOLOGY};

/* The two toptree IDs */
enum {TOPTREE_ID_PHYS, TOPTREE_ID_NUMA};
@@ -114,6 +115,14 @@ static int cores_free(struct toptree *tree)
*/
static struct toptree *core_node(struct toptree *core)
{
+ return core->parent->parent->parent->parent;
+}
+
+/*
+ * Return drawer of core
+ */
+static struct toptree *core_drawer(struct toptree *core)
+{
return core->parent->parent->parent;
}

@@ -138,6 +147,8 @@ static struct toptree *core_mc(struct toptree *core)
*/
static int dist_core_to_core(struct toptree *core1, struct toptree *core2)
{
+ if (core_drawer(core1)->id != core_drawer(core2)->id)
+ return DIST_DRAWER;
if (core_book(core1)->id != core_book(core2)->id)
return DIST_BOOK;
if (core_mc(core1)->id != core_mc(core2)->id)
@@ -262,6 +273,8 @@ static void toptree_to_numa_first(struct toptree *numa, struct toptree *phys)
struct toptree *core;

/* Always try to move perfectly fitting structures first */
+ move_level_to_numa(numa, phys, DRAWER, true);
+ move_level_to_numa(numa, phys, DRAWER, false);
move_level_to_numa(numa, phys, BOOK, true);
move_level_to_numa(numa, phys, BOOK, false);
move_level_to_numa(numa, phys, MC, true);
@@ -335,7 +348,7 @@ static struct toptree *toptree_to_numa(struct toptree *phys)
*/
static struct toptree *toptree_from_topology(void)
{
- struct toptree *phys, *node, *book, *mc, *core;
+ struct toptree *phys, *node, *drawer, *book, *mc, *core;
struct cpu_topology_s390 *top;
int cpu;

@@ -344,10 +357,11 @@ static struct toptree *toptree_from_topology(void)
for_each_online_cpu(cpu) {
top = &per_cpu(cpu_topology, cpu);
node = toptree_get_child(phys, 0);
- book = toptree_get_child(node, top->book_id);
+ drawer = toptree_get_child(node, top->drawer_id);
+ book = toptree_get_child(drawer, top->book_id);
mc = toptree_get_child(book, top->socket_id);
core = toptree_get_child(mc, top->core_id);
- if (!book || !mc || !core)
+ if (!drawer || !book || !mc || !core)
panic("NUMA emulation could not allocate memory");
cpumask_set_cpu(cpu, &core->mask);
toptree_update_mask(mc);
@@ -368,6 +382,7 @@ static void topology_add_core(struct toptree *core)
cpumask_copy(&top->thread_mask, &core->mask);
cpumask_copy(&top->core_mask, &core_mc(core)->mask);
cpumask_copy(&top->book_mask, &core_book(core)->mask);
+ cpumask_copy(&top->drawer_mask, &core_drawer(core)->mask);
cpumask_set_cpu(cpu, &node_to_cpumask_map[core_node(core)->id]);
top->node_id = core_node(core)->id;
}
--
2.6.6