[PATCH 4/7] x86/intel_rdt: Implement scheduling support for Intel RDT

From: Vikas Shivappa
Date: Thu Mar 12 2015 - 19:18:30 EST


Adds support for IA32_PQR_ASSOC MSR writes during task scheduling.

The high 32 bits in the per processor MSR IA32_PQR_ASSOC represents the
CLOSid. During context switch kernel implements this by writing the
CLOSid of the cgroup to which the task belongs to the CPU's
IA32_PQR_ASSOC MSR.

For Cache Allocation, this would let the task fill in the cache 'subset'
represented by the cgroup's Cache bit mask(CBM).

Signed-off-by: Vikas Shivappa <vikas.shivappa@xxxxxxxxxxxxxxx>
---
arch/x86/include/asm/intel_rdt.h | 55 ++++++++++++++++++++++++++++++++++++++++
arch/x86/include/asm/switch_to.h | 3 +++
arch/x86/kernel/cpu/intel_rdt.c | 4 ++-
kernel/sched/core.c | 1 +
kernel/sched/sched.h | 3 +++
5 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index 0ed28d9..6383a24 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -4,9 +4,13 @@
#ifdef CONFIG_CGROUP_RDT

#include <linux/cgroup.h>
+
+#define MSR_IA32_PQR_ASSOC 0xc8f
#define MAX_CBM_LENGTH 32
#define IA32_L3_CBM_BASE 0xc90
#define CBM_FROM_INDEX(x) (IA32_L3_CBM_BASE + x)
+DECLARE_PER_CPU(unsigned int, x86_cpu_clos);
+extern struct static_key rdt_enable_key;

struct rdt_subsys_info {
/* Clos Bitmap to keep track of available CLOSids.*/
@@ -24,6 +28,11 @@ struct clos_cbm_map {
unsigned int cgrp_count;
};

+static inline bool rdt_enabled(void)
+{
+ return static_key_false(&rdt_enable_key);
+}
+
/*
* Return rdt group corresponding to this container.
*/
@@ -37,5 +46,51 @@ static inline struct intel_rdt *parent_rdt(struct intel_rdt *ir)
return css_rdt(ir->css.parent);
}

+/*
+ * Return rdt group to which this task belongs.
+ */
+static inline struct intel_rdt *task_rdt(struct task_struct *task)
+{
+ return css_rdt(task_css(task, rdt_cgrp_id));
+}
+
+/*
+ * rdt_sched_in() - Writes the task's CLOSid to IA32_PQR_MSR
+ * if the current Closid is different than the new one.
+ */
+
+static inline void rdt_sched_in(struct task_struct *task)
+{
+ struct intel_rdt *ir;
+ unsigned int clos;
+
+ if (!rdt_enabled())
+ return;
+
+ /*
+ * This needs to be fixed after CQM code stabilizes
+ * to cache the whole PQR instead of just CLOSid.
+ * PQR has closid in high 32 bits and CQM-RMID in low 10 bits.
+ * Should not write a 0 to the low 10 bits of PQR
+ * and corrupt RMID.
+ */
+ clos = this_cpu_read(x86_cpu_clos);
+
+ rcu_read_lock();
+ ir = task_rdt(task);
+ if (ir->clos == clos) {
+ rcu_read_unlock();
+ return;
+ }
+
+ wrmsr(MSR_IA32_PQR_ASSOC, 0, ir->clos);
+ this_cpu_write(x86_cpu_clos, ir->clos);
+ rcu_read_unlock();
+}
+
+#else
+
+static inline void rdt_sched_in(struct task_struct *task) {}
+
#endif
#endif
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 751bf4b..82ef4b3 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -8,6 +8,9 @@ struct tss_struct;
void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
struct tss_struct *tss);

+#include <asm/intel_rdt.h>
+#define post_arch_switch(current) rdt_sched_in(current)
+
#ifdef CONFIG_X86_32

#ifdef CONFIG_CC_STACKPROTECTOR
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 495497a..0330791 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -32,6 +32,8 @@ static struct clos_cbm_map *ccmap;
static struct rdt_subsys_info rdtss_info;
static DEFINE_MUTEX(rdt_group_mutex);
struct intel_rdt rdt_root_group;
+struct static_key __read_mostly rdt_enable_key = STATIC_KEY_INIT_FALSE;
+DEFINE_PER_CPU(unsigned int, x86_cpu_clos);

#define rdt_for_each_child(pos_css, parent_ir) \
css_for_each_child((pos_css), &(parent_ir)->css)
@@ -76,7 +78,7 @@ static int __init rdt_late_init(void)
ccm = &ccmap[0];
ccm->cbm = (u32)((u64)(1 << cbm_len) - 1);
ccm->cgrp_count++;
-
+ static_key_slow_inc(&rdt_enable_key);
pr_info("cbmlength:%u,Closs: %u\n", cbm_len, maxid);
}

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index f0f831e..93ff61b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2206,6 +2206,7 @@ static struct rq *finish_task_switch(struct task_struct *prev)
prev_state = prev->state;
vtime_task_switch(prev);
finish_arch_switch(prev);
+ post_arch_switch(current);
perf_event_task_sched_in(prev, current);
finish_lock_switch(rq, prev);
finish_arch_post_lock_switch();
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index dc0f435..0b3c191 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1026,6 +1026,9 @@ static inline int task_on_rq_migrating(struct task_struct *p)
#ifndef finish_arch_switch
# define finish_arch_switch(prev) do { } while (0)
#endif
+#ifndef post_arch_switch
+# define post_arch_switch(current) do { } while (0)
+#endif
#ifndef finish_arch_post_lock_switch
# define finish_arch_post_lock_switch() do { } while (0)
#endif
--
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/