[PATCH 5/7] x86/intel_rdt: Software Cache for IA32_PQR_MSR

From: Vikas Shivappa
Date: Mon May 11 2015 - 15:07:16 EST


This patch implements a common software cache for IA32_PQR_MSR(RMID 0:9,
CLOSId 32:63) to be used by both Cache monitoring(CMT) and Cache
allocation. CMT updates the RMID where as cache_alloc updates the CLOSid
in the software cache. During scheduling when the new RMID/CLOSid value
is different from the cached values, IA32_PQR_MSR is updated. Since the
measured rdmsr latency for IA32_PQR_MSR is very high(~250 cycles) this
software cache is necessary to avoid reading the MSR to compare the
current CLOSid value. Caching reduces the frequency of MSR writes during
the scheduler hot path for cache allocation. During CPU hotplug pqr
cache is updated to zero.

Signed-off-by: Vikas Shivappa <vikas.shivappa@xxxxxxxxxxxxxxx>

Conflicts:
arch/x86/kernel/cpu/perf_event_intel_cqm.c
---
arch/x86/include/asm/intel_rdt.h | 9 ++++++---
arch/x86/include/asm/rdt_common.h | 13 +++++++++++++
arch/x86/kernel/cpu/intel_rdt.c | 30 +++++++++++++++++-------------
arch/x86/kernel/cpu/perf_event_intel_cqm.c | 20 +++++++-------------
4 files changed, 43 insertions(+), 29 deletions(-)
create mode 100644 arch/x86/include/asm/rdt_common.h

diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index 589394b..f4372d8 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -4,16 +4,16 @@
#ifdef CONFIG_CGROUP_RDT

#include <linux/cgroup.h>
+#include <asm/rdt_common.h>

-#define MSR_IA32_PQR_ASSOC 0xc8f
#define MAX_CBM_LENGTH 32
#define IA32_L3_CBM_BASE 0xc90
#define CBM_FROM_INDEX(x) (IA32_L3_CBM_BASE + x)
-DECLARE_PER_CPU(unsigned int, x86_cpu_clos);
+
+DECLARE_PER_CPU(struct intel_pqr_state, pqr_state);
extern struct static_key rdt_enable_key;
extern void __rdt_sched_in(void);

-
struct rdt_subsys_info {
/* Clos Bitmap to keep track of available CLOSids.*/
unsigned long *closmap;
@@ -67,6 +67,9 @@ static inline struct intel_rdt *task_rdt(struct task_struct *task)
* IA32_PQR_MSR writes until the user starts really using the feature
* ie creates a rdt cgroup directory and assigns a cache_mask thats
* different from the root cgroup's cache_mask.
+ * - Caches the per cpu CLOSid values and does the MSR write only
+ * when a task with a different CLOSid is scheduled in. That
+ * means the task belongs to a different cgroup.
* - Closids are allocated so that different cgroup directories
* with same cache_mask gets the same CLOSid. This minimizes CLOSids
* used and reduces MSR write frequency.
diff --git a/arch/x86/include/asm/rdt_common.h b/arch/x86/include/asm/rdt_common.h
new file mode 100644
index 0000000..33fd8ea
--- /dev/null
+++ b/arch/x86/include/asm/rdt_common.h
@@ -0,0 +1,13 @@
+#ifndef _X86_RDT_H_
+#define _X86_RDT_H_
+
+#define MSR_IA32_PQR_ASSOC 0x0c8f
+
+struct intel_pqr_state {
+ raw_spinlock_t lock;
+ int rmid;
+ int clos;
+ int cnt;
+};
+
+#endif
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index fe3ce4e..2415965 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -85,27 +85,28 @@ void __rdt_sched_in(void)
{
struct task_struct *task = current;
struct intel_rdt *ir;
- unsigned int clos;
-
- /*
- * This needs to be fixed
- * to cache the whole PQR instead of just CLOSid.
- * PQR has closid in high 32 bits and CQM-RMID in low 10 bits.
- * Should not write a 0 to the low 10 bits of PQR
- * and corrupt RMID.
- */
- clos = this_cpu_read(x86_cpu_clos);
+ struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
+ unsigned long flags;

+ raw_spin_lock_irqsave(&state->lock, flags);
rcu_read_lock();
ir = task_rdt(task);
- if (ir->clos == clos) {
+ if (ir->clos == state->clos) {
rcu_read_unlock();
+ raw_spin_unlock_irqrestore(&state->lock, flags);
return;
}

- wrmsr(MSR_IA32_PQR_ASSOC, 0, ir->clos);
- this_cpu_write(x86_cpu_clos, ir->clos);
+ /*
+ * PQR has closid in high 32 bits and CQM-RMID
+ * in low 10 bits. Rewrite the exsting rmid from
+ * software cache.
+ */
+ wrmsr(MSR_IA32_PQR_ASSOC, state->rmid, ir->clos);
+ state->clos = ir->clos;
rcu_read_unlock();
+ raw_spin_unlock_irqrestore(&state->lock, flags);
+
}

static void __clos_get(unsigned int closid)
@@ -372,6 +373,9 @@ static inline bool intel_rdt_update_cpumask(int cpu)
*/
static inline void intel_rdt_cpu_start(int cpu)
{
+ struct intel_pqr_state *state = &per_cpu(pqr_state, cpu);
+
+ state->clos = 0;
mutex_lock(&rdt_group_mutex);
if (intel_rdt_update_cpumask(cpu))
cbm_update_msrs(cpu);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
index e4d1b8b..fd039899 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
@@ -7,22 +7,16 @@
#include <linux/perf_event.h>
#include <linux/slab.h>
#include <asm/cpu_device_id.h>
+#include <asm/rdt_common.h>
#include "perf_event.h"

-#define MSR_IA32_PQR_ASSOC 0x0c8f
#define MSR_IA32_QM_CTR 0x0c8e
#define MSR_IA32_QM_EVTSEL 0x0c8d

static unsigned int cqm_max_rmid = -1;
static unsigned int cqm_l3_scale; /* supposedly cacheline size */

-struct intel_cqm_state {
- raw_spinlock_t lock;
- int rmid;
- int cnt;
-};
-
-static DEFINE_PER_CPU(struct intel_cqm_state, cqm_state);
+DEFINE_PER_CPU(struct intel_pqr_state, pqr_state);

/*
* Protects cache_cgroups and cqm_rmid_free_lru and cqm_rmid_limbo_lru.
@@ -961,7 +955,7 @@ out:

static void intel_cqm_event_start(struct perf_event *event, int mode)
{
- struct intel_cqm_state *state = this_cpu_ptr(&cqm_state);
+ struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
unsigned int rmid = event->hw.cqm_rmid;
unsigned long flags;

@@ -978,14 +972,14 @@ static void intel_cqm_event_start(struct perf_event *event, int mode)
WARN_ON_ONCE(state->rmid);

state->rmid = rmid;
- wrmsrl(MSR_IA32_PQR_ASSOC, state->rmid);
+ wrmsr(MSR_IA32_PQR_ASSOC, state->rmid, state->clos);

raw_spin_unlock_irqrestore(&state->lock, flags);
}

static void intel_cqm_event_stop(struct perf_event *event, int mode)
{
- struct intel_cqm_state *state = this_cpu_ptr(&cqm_state);
+ struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
unsigned long flags;

if (event->hw.cqm_state & PERF_HES_STOPPED)
@@ -998,7 +992,7 @@ static void intel_cqm_event_stop(struct perf_event *event, int mode)

if (!--state->cnt) {
state->rmid = 0;
- wrmsrl(MSR_IA32_PQR_ASSOC, 0);
+ wrmsr(MSR_IA32_PQR_ASSOC, 0, state->clos);
} else {
WARN_ON_ONCE(!state->rmid);
}
@@ -1243,7 +1237,7 @@ static inline void cqm_pick_event_reader(int cpu)

static void intel_cqm_cpu_prepare(unsigned int cpu)
{
- struct intel_cqm_state *state = &per_cpu(cqm_state, cpu);
+ struct intel_pqr_state *state = &per_cpu(pqr_state, cpu);
struct cpuinfo_x86 *c = &cpu_data(cpu);

raw_spin_lock_init(&state->lock);
--
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/