[PATCH 01/19] smart: define and build per-core data structures

From: klamm
Date: Thu Sep 04 2014 - 12:36:46 EST


From: Roman Gushchin <klamm@xxxxxxxxxxxxxx>

First, this patch introduces the smart_core_data structure.
This structure contains following fields:
cpu_core_id - per-cpu core id (first SMT thread on this core)
core_next - id of next core on local node
core_node_sibling - id of corresponding core on next node
core_locked - per-core lock used for synchronizing core selection

The following macros/functions are introduced to access smart data:
cpu_core_id(cpu) - returns core id of CPU
smart_data(cpu) - returns per-core smart_data (macro)
core_next(cpu) - returns next core id of CPU
core_node_sibling(cpu) - returns id of sibling core on next node

Also, this patch introduces build_smart_topology() function,
which fills smart_core_data for each cpu.
Below is the illustration of how it's should look on 2-nodes CPU
with 8 physical cores and 16 SMT threads.

cpu cpu_core_id
0,8 0
1,9 1
2,10 2
3,11 3
4,12 4
5,13 5
6,14 6
7,15 7

node 0 node 1
------------------------------ ------------------------------
core 0 core 1 core 4 core 5
core_next 1 core_next 2 core_next 5 core_next 6
node_sibling 4 node_sibling 5 node_sibling 0 node_sibling 1

core 2 core 3 core 6 core 7
core_next 3 core_next 0 core_next 7 core_next 4
node_sibling 6 node_sibling 7 node_sibling 2 node_sibling 3
------------------------------ ------------------------------

build_smart_topology() uses sched_domains data and is called
each time sched domains are rebuilt. If smart topology is built
successfully (checked by check_smart_data()),
__smart_initialized static key is set to true.

Signed-off-by: Roman Gushchin <klamm@xxxxxxxxxxxxxx>
---
kernel/sched/core.c | 3 +
kernel/sched/rt.c | 169 +++++++++++++++++++++++++++++++++++++++++++++++++++
kernel/sched/sched.h | 40 ++++++++++++
3 files changed, 212 insertions(+)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c771f25..14bcdd6 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6671,6 +6671,7 @@ static int init_sched_domains(const struct cpumask *cpu_map)
doms_cur = &fallback_doms;
cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map);
err = build_sched_domains(doms_cur[0], NULL);
+ build_smart_topology();
register_sched_domain_sysctl();

return err;
@@ -6791,6 +6792,8 @@ match2:

register_sched_domain_sysctl();

+ build_smart_topology();
+
mutex_unlock(&sched_domains_mutex);
}

diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 2dffc7b..fed3992 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -7,6 +7,15 @@

#include <linux/slab.h>

+#ifdef CONFIG_SMART
+#include <linux/jump_label.h>
+
+struct static_key __smart_initialized = STATIC_KEY_INIT_FALSE;
+DEFINE_MUTEX(smart_mutex);
+
+DEFINE_PER_CPU_SHARED_ALIGNED(struct smart_core_data, smart_core_data);
+#endif /* CONFIG_SMART */
+
int sched_rr_timeslice = RR_TIMESLICE;

static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);
@@ -2114,3 +2123,163 @@ void print_rt_stats(struct seq_file *m, int cpu)
rcu_read_unlock();
}
#endif /* CONFIG_SCHED_DEBUG */
+
+#ifdef CONFIG_SMART
+int check_smart_data(void)
+{
+ int cpu, core;
+ int iterations;
+
+ for_each_online_cpu(cpu) {
+ if (cpu_core_id(cpu) == -1 || next_core(cpu) == -1 ||
+ core_node_sibling(cpu) == -1)
+ goto error;
+
+ if (!cpumask_test_cpu(cpu_core_id(cpu), cpu_online_mask))
+ goto error;
+
+ if (!cpumask_test_cpu(core_node_sibling(cpu), cpu_online_mask))
+ goto error;
+
+ iterations = 0;
+ core = cpu_core_id(cpu);
+ do {
+ if (core == -1)
+ goto error;
+ if (++iterations > NR_CPUS)
+ goto error;
+ } while (core = next_core(core), core != cpu_core_id(cpu));
+
+ iterations = 0;
+ core = core_node_sibling(cpu);
+ do {
+ if (core == -1)
+ goto error;
+ if (++iterations > NR_CPUS)
+ goto error;
+ } while (core = next_core(core), core != core_node_sibling(cpu));
+
+ }
+
+ return 0;
+
+error:
+ printk(KERN_INFO "smart: init error (cpu %d core %d next %d sibling %d)\n",
+ cpu, cpu_core_id(cpu), next_core(cpu), core_node_sibling(cpu));
+ return -1;
+}
+
+static int number_of_cpu(int cpu, cpumask_t *mask)
+{
+ int tmp;
+ int count = 0;
+
+ for_each_cpu(tmp, mask) {
+ if (tmp == cpu)
+ return count;
+ count++;
+ }
+
+ return -1;
+}
+
+static int cpu_with_number(int number, cpumask_t *mask)
+{
+ int tmp;
+ int count = 0;
+
+ for_each_cpu(tmp, mask) {
+ if (count == number)
+ return tmp;
+ count++;
+ }
+
+ return -1;
+}
+
+void build_smart_topology(void)
+{
+ int cpu;
+ int was_initialized;
+
+ mutex_lock(&smart_mutex);
+
+ was_initialized = static_key_enabled(&__smart_initialized);
+ if (was_initialized)
+ static_key_slow_dec(&__smart_initialized);
+ synchronize_rcu();
+
+ if (was_initialized)
+ printk(KERN_INFO "smart: disabled\n");
+
+ get_online_cpus();
+ for_each_online_cpu(cpu) {
+ /* __cpu_core_id */
+ per_cpu(smart_core_data, cpu).cpu_core_id =
+ cpumask_first(topology_thread_cpumask(cpu));
+ if (per_cpu(smart_core_data, cpu).cpu_core_id < 0 ||
+ per_cpu(smart_core_data, cpu).cpu_core_id >= nr_cpu_ids)
+ per_cpu(smart_core_data, cpu).cpu_core_id = cpu;
+
+ atomic_set(&per_cpu(smart_core_data, cpu).core_locked, 0);
+ }
+
+ rcu_read_lock();
+ for_each_online_cpu(cpu) {
+ struct sched_domain *sd;
+
+ /* core_node_sibling */
+ smart_data(cpu).core_node_sibling = -1;
+ for_each_domain(cpu, sd) {
+ struct sched_group *sg, *next_sg;
+ int number;
+
+ if (sd->flags & SD_SHARE_PKG_RESOURCES)
+ continue;
+
+ sg = sd->groups;
+ next_sg = sg->next;
+
+ if (sg == next_sg)
+ continue;
+
+ number = number_of_cpu(cpu, sched_group_cpus(sg));
+ if (number != -1) {
+ int sibling = cpu_with_number(number,
+ sched_group_cpus(next_sg));
+ if (sibling != -1)
+ smart_data(cpu).core_node_sibling = cpu_core_id(sibling);
+ }
+ }
+
+ /* local_core_list */
+ smart_data(cpu).core_next = -1;
+ for_each_domain(cpu, sd) {
+ if (sd->flags & SD_SHARE_CPUPOWER)
+ continue;
+
+ if (likely(sd->groups)) {
+ struct sched_group *sg = sd->groups->next;
+ int next = group_first_cpu(sg);
+
+ if (next < nr_cpu_ids)
+ smart_data(cpu).core_next = cpu_core_id(next);
+ }
+
+ break;
+ }
+ }
+
+ if (!check_smart_data()) {
+ printk(KERN_INFO "smart: enabled\n");
+ static_key_slow_inc(&__smart_initialized);
+ }
+
+ rcu_read_unlock();
+
+ put_online_cpus();
+
+ mutex_unlock(&smart_mutex);
+}
+
+#endif /* CONFIG_SMART */
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index dfa31d5..357736b 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1378,3 +1378,43 @@ static inline u64 irq_time_read(int cpu)
}
#endif /* CONFIG_64BIT */
#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
+
+#ifdef CONFIG_SMART
+struct smart_core_data {
+ int cpu_core_id;
+
+ /* Per core data, use smart_data macro for access */
+ int core_next;
+ int core_node_sibling;
+ atomic_t core_locked;
+} ____cacheline_aligned_in_smp;
+
+extern struct static_key __smart_initialized;
+
+DECLARE_PER_CPU_SHARED_ALIGNED(struct smart_core_data, smart_core_data);
+
+static inline int cpu_core_id(int cpu)
+{
+ return per_cpu(smart_core_data, cpu).cpu_core_id;
+}
+
+#define smart_data(cpu) per_cpu(smart_core_data, cpu_core_id(cpu))
+
+static inline int core_node_sibling(int cpu)
+{
+ return smart_data(cpu).core_node_sibling;
+}
+
+static inline int next_core(int cpu)
+{
+ return smart_data(cpu).core_next;
+}
+
+void build_smart_topology(void);
+
+#else /* CONFIG_SMART */
+static inline void build_smart_topology(void)
+{
+}
+
+#endif /* CONFIG_SMART */
--
1.9.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/