[PATCH 23/32] x86/intel_rdt.c: Extend RDT to per cache and per resources

From: Fenghua Yu
Date: Tue Jul 12 2016 - 18:03:55 EST


From: Fenghua Yu <fenghua.yu@xxxxxxxxx>

QoS mask MSRs array is per cache. We need to allocate CLOSID per cache
instead global CLOSID.

A few different resources can share same QoS mask MSRs array. For
example, one L2 cache can share QoS MSRs with its next level
L3 cache. A domain number represents the L2 cache, the L3 cache, the L2
cache's shared cpumask, and the L3 cache's shared cpumask.

cctable is extended to be index by domain number so that each cache
has its own control table.

shared_domain is introduced to cover multiple resources sharing
CLOSID.

Signed-off-by: Fenghua Yu <fenghua.yu@xxxxxxxxx>
Reviewed-by: Tony Luck <tony.luck@xxxxxxxxx>
---
arch/x86/include/asm/intel_rdt.h | 1 +
arch/x86/kernel/cpu/intel_rdt.c | 738 +++++++++++++++++++++++++++++----------
2 files changed, 546 insertions(+), 193 deletions(-)

diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index 4c5e0ac..5aacc4a 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -77,6 +77,7 @@ extern inline void closid_get(u32 closid, int domain);
extern void closid_put(u32 closid, int domain);
extern void closid_free(u32 closid, int domain, int level);
extern int closid_alloc(u32 *closid, int domain);
+extern struct mutex rdtgroup_mutex;
extern bool cat_l3_enabled;
extern unsigned int get_domain_num(int level);
extern struct shared_domain *shared_domain;
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 057aef1..017c833 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -17,7 +17,7 @@
* more details.
*
* More information about RDT be found in the Intel (R) x86 Architecture
- * Software Developer Manual June 2015, volume 3, section 17.15.
+ * Software Developer Manual.
*/

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -26,46 +26,49 @@
#include <linux/err.h>
#include <linux/cpu.h>
#include <linux/sched.h>
+#include <linux/cpumask.h>
+#include <linux/cacheinfo.h>
#include <asm/pqr_common.h>
#include <asm/intel_rdt.h>
+#include <asm/intel_rdt_rdtgroup.h>

/*
- * During cache alloc mode cctable maintains 1:1 mapping between
- * CLOSid and l3_cbm.
- *
- * During CDP mode, the cctable maintains a 1:2 mapping between the closid
- * and (dcache_cbm, icache_cbm) pair.
- * index of a dcache_cbm for CLOSid 'n' = n << 1.
- * index of a icache_cbm for CLOSid 'n' = n << 1 + 1
+ * cctable maintains 1:1 mapping between CLOSid and cache bitmask.
*/
-static struct clos_cbm_table *cctable;
+struct clos_cbm_table **l3_cctable;
+
/*
* Minimum bits required in Cache bitmask.
*/
-static unsigned int min_bitmask_len = 1;
+unsigned int min_bitmask_len = 1;
+
/*
* Mask of CPUs for writing CBM values. We only need one CPU per-socket.
*/
-static cpumask_t rdt_cpumask;
-/*
- * Temporary cpumask used during hot cpu notificaiton handling. The usage
- * is serialized by hot cpu locks.
- */
-static cpumask_t tmp_cpumask;
-static DEFINE_MUTEX(rdt_group_mutex);
+cpumask_t rdt_l3_cpumask;
+
+bool cat_l3_enabled;
+
struct static_key __read_mostly rdt_enable_key = STATIC_KEY_INIT_FALSE;
-static struct clos_config cconfig;
-static bool cdp_enabled;
+struct clos_config cconfig;
+bool cdp_enabled;

-#define __DCBM_TABLE_INDEX(x) (x << 1)
-#define __ICBM_TABLE_INDEX(x) ((x << 1) + 1)
-#define __DCBM_MSR_INDEX(x) \
- CBM_FROM_INDEX(__DCBM_TABLE_INDEX(x))
-#define __ICBM_MSR_INDEX(x) \
- CBM_FROM_INDEX(__ICBM_TABLE_INDEX(x))
+#define __DCBM_TABLE_INDEX(x) (x << 1)
+#define __ICBM_TABLE_INDEX(x) ((x << 1) + 1)
+#define __ICBM_MSR_INDEX(x) \
+ L3_CBM_FROM_INDEX(__ICBM_TABLE_INDEX(x))

-#define DCBM_TABLE_INDEX(x) (x << cdp_enabled)
-#define ICBM_TABLE_INDEX(x) ((x << cdp_enabled) + cdp_enabled)
+#define DCBM_TABLE_INDEX(x) (x << cdp_enabled)
+#define ICBM_TABLE_INDEX(x) ((x << cdp_enabled) + cdp_enabled)
+
+inline int get_dcbm_table_index(int x)
+{
+ return DCBM_TABLE_INDEX(x);
+}
+inline int get_icbm_table_index(int x)
+{
+ return ICBM_TABLE_INDEX(x);
+}

struct rdt_remote_data {
int msr;
@@ -101,14 +104,76 @@ static inline bool cache_alloc_hsw_probe(void)

wrmsr_safe(MSR_IA32_PQR_ASSOC, l, h_old);

- boot_cpu_data.x86_cache_max_closid = 4;
- boot_cpu_data.x86_cache_max_cbm_len = 20;
+ boot_cpu_data.x86_l3_max_closid = 4;
+ boot_cpu_data.x86_l3_max_cbm_len = 20;
min_bitmask_len = 2;

return true;
}

-static inline bool cache_alloc_supported(struct cpuinfo_x86 *c)
+u32 max_cbm_len(int level)
+{
+ switch (level) {
+ case CACHE_LEVEL3:
+ return boot_cpu_data.x86_l3_max_cbm_len;
+ default:
+ break;
+ }
+
+ return (u32)~0;
+}
+
+u64 max_cbm(int level)
+{
+ switch (level) {
+ case CACHE_LEVEL3:
+ return (1ULL << boot_cpu_data.x86_l3_max_cbm_len) - 1;
+ default:
+ break;
+ }
+
+ return (u64)~0;
+}
+
+static u32 hw_max_closid(int level)
+{
+ switch (level) {
+ case CACHE_LEVEL3:
+ return boot_cpu_data.x86_l3_max_closid;
+ default:
+ break;
+ }
+
+ WARN(1, "invalid level\n");
+ return 0;
+}
+
+static int cbm_from_index(u32 i, int level)
+{
+ switch (level) {
+ case CACHE_LEVEL3:
+ return L3_CBM_FROM_INDEX(i);
+ default:
+ break;
+ }
+
+ WARN(1, "invalid level\n");
+ return 0;
+}
+
+bool cat_enabled(int level)
+{
+ switch (level) {
+ case CACHE_LEVEL3:
+ return cat_l3_enabled;
+ default:
+ break;
+ }
+
+ return false;
+}
+
+static inline bool cat_l3_supported(struct cpuinfo_x86 *c)
{
if (cpu_has(c, X86_FEATURE_CAT_L3))
return true;
@@ -122,230 +187,286 @@ static inline bool cache_alloc_supported(struct cpuinfo_x86 *c)
return false;
}

+DEFINE_MUTEX(rdtgroup_mutex);
+
+DEFINE_PER_CPU_READ_MOSTLY(int, cpu_l3_domain) = -1;
+DEFINE_PER_CPU_READ_MOSTLY(int, cpu_shared_domain) = -1;
+DEFINE_PER_CPU_READ_MOSTLY(struct rdtgroup *, cpu_rdtgroup) = 0;
+
void __intel_rdt_sched_in(void *dummy)
{
struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
+ struct rdtgroup *rdtgrp;
+ int closid;
+ int cpu = smp_processor_id();
+ int domain;
+
+ /* Don't write PQR register if rscctrl is not mounted. */
+ if (!rdtgroup_mounted)
+ return;

/*
- * Currently closid is always 0. When user interface is added,
- * closid will come from user interface.
+ * First find rdtgroup for this cpu.
+ * If no rdtgroup is found for this cpu, find the task's rdtgroup.
*/
- if (state->closid == 0)
+ rdtgrp = per_cpu(cpu_rdtgroup, cpu);
+ if (!rdtgrp) {
+ rdtgrp = current->rdtgroup;
+
+ if (!rdtgrp)
+ return;
+ }
+
+ domain = per_cpu(cpu_shared_domain, cpu);
+ closid = rdtgrp->resource.closid[domain];
+
+ if (closid == state->closid)
return;

- wrmsr(MSR_IA32_PQR_ASSOC, state->rmid, 0);
- state->closid = 0;
-}
+ state->closid = closid;
+ /* Don't really write PQR register in simulation mode. */
+ if (unlikely(rdt_opts.simulate_cat_l3))
+ return;

-/*
- * Synchronize the IA32_PQR_ASSOC MSR of all currently running tasks.
- */
-static inline void closid_tasks_sync(void)
-{
- on_each_cpu_mask(cpu_online_mask, __intel_rdt_sched_in, NULL, 1);
+ wrmsr(MSR_IA32_PQR_ASSOC, state->rmid, closid);
}

/*
* When cdp mode is enabled, refcnt is maintained in the dcache_cbm entry.
*/
-static inline void closid_get(u32 closid)
+inline void closid_get(u32 closid, int domain)
{
- struct clos_cbm_table *cct = &cctable[DCBM_TABLE_INDEX(closid)];
+ lockdep_assert_held(&rdtgroup_mutex);
+
+ if (cat_l3_enabled) {
+ int l3_domain;
+ int dindex;

- lockdep_assert_held(&rdt_group_mutex);
+ l3_domain = shared_domain[domain].l3_domain;
+ dindex = DCBM_TABLE_INDEX(closid);
+ l3_cctable[l3_domain][dindex].clos_refcnt++;
+ if (cdp_enabled) {
+ int iindex = ICBM_TABLE_INDEX(closid);

- cct->clos_refcnt++;
+ l3_cctable[l3_domain][iindex].clos_refcnt++;
+ }
+ }
}

-static int closid_alloc(u32 *closid)
+int closid_alloc(u32 *closid, int domain)
{
u32 maxid;
u32 id;

- lockdep_assert_held(&rdt_group_mutex);
+ lockdep_assert_held(&rdtgroup_mutex);

maxid = cconfig.max_closid;
- id = find_first_zero_bit(cconfig.closmap, maxid);
+ id = find_first_zero_bit((unsigned long *)cconfig.closmap[domain],
+ maxid);
+
if (id == maxid)
return -ENOSPC;

- set_bit(id, cconfig.closmap);
- closid_get(id);
+ set_bit(id, (unsigned long *)cconfig.closmap[domain]);
+ closid_get(id, domain);
*closid = id;
- cconfig.closids_used++;

return 0;
}

-static inline void closid_free(u32 closid)
+unsigned int get_domain_num(int level)
{
- clear_bit(closid, cconfig.closmap);
- cctable[DCBM_TABLE_INDEX(closid)].l3_cbm = 0;
-
- if (WARN_ON(!cconfig.closids_used))
- return;
+ if (level == CACHE_LEVEL3)
+ return cpumask_weight(&rdt_l3_cpumask);
+ else
+ return -EINVAL;
+}

- cconfig.closids_used--;
+int level_to_leaf(int level)
+{
+ switch (level) {
+ case CACHE_LEVEL3:
+ return 3;
+ default:
+ return -EINVAL;
+ }
}

-static void closid_put(u32 closid)
+void closid_free(u32 closid, int domain, int level)
{
- struct clos_cbm_table *cct = &cctable[DCBM_TABLE_INDEX(closid)];
+ struct clos_cbm_table **cctable;
+ int leaf;
+ struct cpumask *mask;
+ int cpu;
+
+ if (level == CACHE_LEVEL3)
+ cctable = l3_cctable;
+
+ clear_bit(closid, (unsigned long *)cconfig.closmap[domain]);
+
+ if (level == CACHE_LEVEL3) {
+ cctable[domain][closid].cbm = max_cbm(level);
+ leaf = level_to_leaf(level);
+ mask = &cache_domains[leaf].shared_cpu_map[domain];
+ cpu = cpumask_first(mask);
+ smp_call_function_single(cpu, cbm_update_l3_msr, &closid, 1);
+ }
+}

- lockdep_assert_held(&rdt_group_mutex);
+static void _closid_put(u32 closid, struct clos_cbm_table *cct,
+ int domain, int level)
+{
+ lockdep_assert_held(&rdtgroup_mutex);
if (WARN_ON(!cct->clos_refcnt))
return;

if (!--cct->clos_refcnt)
- closid_free(closid);
+ closid_free(closid, domain, level);
}

-static bool cbm_validate(unsigned long var)
+void closid_put(u32 closid, int domain)
{
- u32 max_cbm_len = boot_cpu_data.x86_cache_max_cbm_len;
- unsigned long first_bit, zero_bit;
- u64 max_cbm;
-
- if (bitmap_weight(&var, max_cbm_len) < min_bitmask_len)
- return false;
+ struct clos_cbm_table *cct;

- max_cbm = (1ULL << max_cbm_len) - 1;
- if (var & ~max_cbm)
- return false;
-
- first_bit = find_first_bit(&var, max_cbm_len);
- zero_bit = find_next_zero_bit(&var, max_cbm_len, first_bit);
-
- if (find_next_bit(&var, max_cbm_len, zero_bit) < max_cbm_len)
- return false;
+ if (cat_l3_enabled) {
+ int l3_domain = shared_domain[domain].l3_domain;

- return true;
+ cct = &l3_cctable[l3_domain][DCBM_TABLE_INDEX(closid)];
+ _closid_put(closid, cct, l3_domain, CACHE_LEVEL3);
+ if (cdp_enabled) {
+ cct = &l3_cctable[l3_domain][ICBM_TABLE_INDEX(closid)];
+ _closid_put(closid, cct, l3_domain, CACHE_LEVEL3);
+ }
+ }
}

-static int clos_cbm_table_read(u32 index, unsigned long *l3_cbm)
+void msr_cpu_update(void *arg)
{
- u32 orig_maxid = boot_cpu_data.x86_cache_max_closid;
+ struct rdt_remote_data *info = arg;

- lockdep_assert_held(&rdt_group_mutex);
+ if (unlikely(rdt_opts.verbose))
+ pr_info("Write %lx to msr %x on cpu%d\n",
+ (unsigned long)info->val, info->msr,
+ smp_processor_id());

- if (index >= orig_maxid)
- return -EINVAL;
+ if (unlikely(rdt_opts.simulate_cat_l3))
+ return;

- *l3_cbm = cctable[index].l3_cbm;
+ wrmsrl(info->msr, info->val);
+}

- return 0;
+static struct cpumask *rdt_cache_cpumask(int level)
+{
+ return &rdt_l3_cpumask;
}

/*
- * clos_cbm_table_update() - Update a clos cbm table entry.
- * @index: index of the table entry whose cbm needs to be updated
- * @cbm: the new cbm value that has to be updated
- *
- * This assumes the cbm is validated as per the interface requirements
- * and the cache allocation requirements(through the cbm_validate).
+ * msr_update_all() - Update the msr for all packages.
*/
-static int clos_cbm_table_update(u32 index, unsigned long cbm)
+static inline void msr_update_all(int msr, u64 val, int level)
{
- u32 orig_maxid = boot_cpu_data.x86_cache_max_closid;
-
- lockdep_assert_held(&rdt_group_mutex);
-
- if (index >= orig_maxid)
- return -EINVAL;
-
- cctable[index].l3_cbm = cbm;
+ struct rdt_remote_data info;

- return 0;
+ info.msr = msr;
+ info.val = val;
+ on_each_cpu_mask(rdt_cache_cpumask(level), msr_cpu_update, &info, 1);
}

-static bool cbm_search(unsigned long cbm, u32 *closid)
+static void init_qos_msrs(int level)
{
- u32 maxid = cconfig.max_closid;
- u32 i;
+ if (cat_enabled(level)) {
+ u32 maxcbm;
+ u32 i;

- for (i = 0; i < maxid; i++) {
- if (cctable[i].clos_refcnt &&
- bitmap_equal(&cbm, &cctable[i].l3_cbm, MAX_CBM_LENGTH)) {
- *closid = i;
- return true;
- }
+ maxcbm = max_cbm(level);
+ for (i = 0; i < hw_max_closid(level); i++)
+ msr_update_all(cbm_from_index(i, level), maxcbm, level);
}
-
- return false;
}

-static void closcbm_map_dump(void)
+/*
+ * Initialize QOS_MASK_n registers to all 1's.
+ *
+ * Initialize L3_QOS_CFG register to enable or disable CDP.
+ */
+void init_msrs(bool cdpenabled)
{
- u32 i;
-
- pr_debug("CBMMAP\n");
- for (i = 0; i < boot_cpu_data.x86_cache_max_closid; i++) {
- pr_debug("l3_cbm: 0x%x,clos_refcnt: %u\n",
- (unsigned int)cctable[i].l3_cbm, cctable[i].clos_refcnt);
+ if (cat_enabled(CACHE_LEVEL3)) {
+ init_qos_msrs(CACHE_LEVEL3);
+ msr_update_all(MSR_IA32_L3_QOS_CFG, cdpenabled, CACHE_LEVEL3);
}
-}
-
-static void msr_cpu_update(void *arg)
-{
- struct rdt_remote_data *info = arg;

- wrmsrl(info->msr, info->val);
}

-/*
- * msr_update_all() - Update the msr for all packages.
- */
-static inline void msr_update_all(int msr, u64 val)
+int get_cache_leaf(int level, int cpu)
{
- struct rdt_remote_data info;
+ int index;
+ struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+ struct cacheinfo *this_leaf;
+ int num_leaves = this_cpu_ci->num_leaves;
+
+ for (index = 0; index < num_leaves; index++) {
+ this_leaf = this_cpu_ci->info_list + index;
+ if (this_leaf->level == level)
+ return index;
+ }

- info.msr = msr;
- info.val = val;
- on_each_cpu_mask(&rdt_cpumask, msr_cpu_update, &info, 1);
+ return -EINVAL;
}

-static bool code_data_mask_equal(void)
+static struct cpumask *get_shared_cpu_map(int cpu, int level)
{
- int i, dindex, iindex;
+ int index;
+ struct cacheinfo *leaf;
+ struct cpu_cacheinfo *cpu_ci = get_cpu_cacheinfo(cpu);

- for (i = 0; i < cconfig.max_closid; i++) {
- dindex = __DCBM_TABLE_INDEX(i);
- iindex = __ICBM_TABLE_INDEX(i);
- if (cctable[dindex].clos_refcnt &&
- (cctable[dindex].l3_cbm != cctable[iindex].l3_cbm))
- return false;
- }
+ index = get_cache_leaf(level, cpu);
+ if (index < 0)
+ return 0;

- return true;
+ leaf = cpu_ci->info_list + index;
+
+ return &leaf->shared_cpu_map;
}

-static inline bool rdt_cpumask_update(int cpu)
+inline bool rdt_cpumask_update(struct cpumask *cpumask, int cpu, int level)
{
- cpumask_and(&tmp_cpumask, &rdt_cpumask, topology_core_cpumask(cpu));
+ struct cpumask *shared_cpu_map;
+ cpumask_t tmp_cpumask;
+
+ shared_cpu_map = get_shared_cpu_map(cpu, level);
+ if (!shared_cpu_map)
+ return false;
+
+ cpumask_and(&tmp_cpumask, cpumask, shared_cpu_map);
if (cpumask_empty(&tmp_cpumask)) {
- cpumask_set_cpu(cpu, &rdt_cpumask);
+ cpumask_set_cpu(cpu, cpumask);
return true;
}

return false;
}

-static void cbm_update_msr(u32 index)
+void cbm_update_l3_msr(void *pindex)
{
- struct rdt_remote_data info;
+ int index;
int dindex;
+ int l3_domain;
+ struct clos_cbm_table *pl3_cctable;
+ struct rdt_remote_data info;

+ index = *(int *)pindex;
dindex = DCBM_TABLE_INDEX(index);
- if (cctable[dindex].clos_refcnt) {
-
- info.msr = CBM_FROM_INDEX(dindex);
- info.val = cctable[dindex].l3_cbm;
- msr_cpu_update((void *) &info);
-
+ l3_domain = per_cpu(cpu_l3_domain, smp_processor_id());
+ pl3_cctable = &l3_cctable[l3_domain][dindex];
+ if (pl3_cctable->clos_refcnt) {
+ info.msr = L3_CBM_FROM_INDEX(dindex);
+ info.val = pl3_cctable->cbm;
+ msr_cpu_update(&info);
if (cdp_enabled) {
info.msr = __ICBM_MSR_INDEX(index);
- info.val = cctable[dindex + 1].l3_cbm;
- msr_cpu_update((void *) &info);
+ info.val = l3_cctable[l3_domain][dindex+1].cbm;
+ msr_cpu_update(&info);
}
}
}
@@ -356,11 +477,13 @@ static void cbm_update_msr(u32 index)
*/
static void cbm_update_msrs(void *dummy)
{
- int maxid = cconfig.max_closid;
- unsigned int i;
+ int maxid;
+ int index;

- for (i = 0; i < maxid; i++) {
- cbm_update_msr(i);
+ maxid = cconfig.max_closid;
+ if (cat_l3_enabled) {
+ for (index = 0; index < maxid; index++)
+ cbm_update_l3_msr(&index);
}
}

@@ -369,19 +492,44 @@ static inline void intel_rdt_cpu_start(int cpu)
struct intel_pqr_state *state = &per_cpu(pqr_state, cpu);

state->closid = 0;
- mutex_lock(&rdt_group_mutex);
- if (rdt_cpumask_update(cpu))
+ mutex_lock(&rdtgroup_mutex);
+ if (rdt_cpumask_update(&rdt_l3_cpumask, cpu, CACHE_LEVEL3))
smp_call_function_single(cpu, cbm_update_msrs, NULL, 1);
- mutex_unlock(&rdt_group_mutex);
+ mutex_unlock(&rdtgroup_mutex);
}

static void intel_rdt_cpu_exit(unsigned int cpu)
{
+ cpumask_t tmp_cpumask;
+ struct cpumask *shared_cpu_map;
+ int new_cpu;
int i;
+ int l3_domain;
+ int level;
+ int leaf;
+
+ mutex_lock(&rdtgroup_mutex);

- mutex_lock(&rdt_group_mutex);
- if (!cpumask_test_and_clear_cpu(cpu, &rdt_cpumask)) {
- mutex_unlock(&rdt_group_mutex);
+ level = CACHE_LEVEL3;
+
+ l3_domain = per_cpu(cpu_l3_domain, cpu);
+ leaf = level_to_leaf(level);
+ shared_cpu_map = &cache_domains[leaf].shared_cpu_map[l3_domain];
+
+ cpumask_clear_cpu(cpu, &rdt_l3_cpumask);
+ cpumask_clear_cpu(cpu, shared_cpu_map);
+ if (cpumask_empty(shared_cpu_map))
+ goto out;
+
+ new_cpu = cpumask_first(shared_cpu_map);
+ rdt_cpumask_update(&rdt_l3_cpumask, new_cpu, level);
+
+out:
+ mutex_unlock(&rdtgroup_mutex);
+ return;
+
+ if (cpumask_test_and_clear_cpu(cpu, &rdt_l3_cpumask)) {
+ mutex_unlock(&rdtgroup_mutex);
return;
}

@@ -390,8 +538,8 @@ static void intel_rdt_cpu_exit(unsigned int cpu)
i = cpumask_any(&tmp_cpumask);

if (i < nr_cpu_ids)
- cpumask_set_cpu(i, &rdt_cpumask);
- mutex_unlock(&rdt_group_mutex);
+ cpumask_set_cpu(i, &rdt_l3_cpumask);
+ mutex_unlock(&rdtgroup_mutex);
}

static int intel_rdt_cpu_notifier(struct notifier_block *nb,
@@ -414,37 +562,242 @@ static int intel_rdt_cpu_notifier(struct notifier_block *nb,
return NOTIFY_OK;
}

+/*
+ * Initialize per-cpu cpu_l3_domain.
+ *
+ * cpu_l3_domain numbers are consequtive integer starting from 0.
+ * Sets up 1:1 mapping of cpu id and cpu_l3_domain.
+ */
+static int __init cpu_cache_domain_init(int level)
+{
+ int i, j;
+ int max_cpu_cache_domain = 0;
+ int index;
+ struct cacheinfo *leaf;
+ int *domain;
+ struct cpu_cacheinfo *cpu_ci;
+
+ for_each_online_cpu(i) {
+ domain = &per_cpu(cpu_l3_domain, i);
+ if (*domain == -1) {
+ index = get_cache_leaf(level, i);
+ if (index < 0)
+ return -EINVAL;
+
+ cpu_ci = get_cpu_cacheinfo(i);
+ leaf = cpu_ci->info_list + index;
+ if (cpumask_empty(&leaf->shared_cpu_map)) {
+ WARN(1, "no shared cpu for L2\n");
+ return -EINVAL;
+ }
+
+ for_each_cpu(j, &leaf->shared_cpu_map) {
+ domain = &per_cpu(cpu_l3_domain, j);
+ *domain = max_cpu_cache_domain;
+ }
+ max_cpu_cache_domain++;
+ }
+ }
+
+ return 0;
+}
+
+struct rdt_opts rdt_opts = {
+ .cdp_enabled = false,
+ .verbose = false,
+ .simulate_cat_l3 = false,
+};
+
+static bool disable_cat_l3 __initdata;
+
+static int __init rdt_setup(char *str)
+{
+ char *tok;
+
+ while ((tok = strsep(&str, ",")) != NULL) {
+ if (!*tok)
+ return -EINVAL;
+
+ if (strcmp(tok, "simulate_cat_l3") == 0) {
+ pr_info("Simulate CAT L3\n");
+ rdt_opts.simulate_cat_l3 = true;
+ } else if (strcmp(tok, "disable_cat_l3") == 0) {
+ pr_info("CAT L3 is disabled\n");
+ disable_cat_l3 = true;
+ } else {
+ pr_info("Invalid rdt option\n");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+__setup("rscctrl=", rdt_setup);
+
+static inline bool resource_alloc_enabled(void)
+{
+ return cat_l3_enabled;
+}
+
+struct shared_domain *shared_domain;
+int shared_domain_num;
+
+static int shared_domain_init(void)
+{
+ int l3_domain_num = get_domain_num(CACHE_LEVEL3);
+ int size;
+ int domain;
+ struct cpumask *cpumask;
+ struct cpumask *shared_cpu_map;
+ int cpu;
+
+ if (cat_l3_enabled) {
+ shared_domain_num = l3_domain_num;
+ cpumask = &rdt_l3_cpumask;
+ } else
+ return -EINVAL;
+
+ size = shared_domain_num * sizeof(struct shared_domain);
+ shared_domain = kzalloc(size, GFP_KERNEL);
+ if (!shared_domain)
+ return -EINVAL;
+
+ domain = 0;
+ for_each_cpu(cpu, cpumask) {
+ if (cat_l3_enabled)
+ shared_domain[domain].l3_domain =
+ per_cpu(cpu_l3_domain, cpu);
+ else
+ shared_domain[domain].l3_domain = -1;
+
+ shared_cpu_map = get_shared_cpu_map(cpu, CACHE_LEVEL3);
+
+ cpumask_copy(&shared_domain[domain].cpumask, shared_cpu_map);
+
+ domain++;
+ }
+ for_each_online_cpu(cpu) {
+ if (cat_l3_enabled)
+ per_cpu(cpu_shared_domain, cpu) =
+ per_cpu(cpu_l3_domain, cpu);
+ }
+
+ return 0;
+}
+
+static int cconfig_init(int maxid)
+{
+ int num;
+ int domain;
+ unsigned long *closmap_block;
+ int maxid_size;
+
+ maxid_size = BITS_TO_LONGS(maxid);
+ num = maxid_size * shared_domain_num;
+ cconfig.closmap = kcalloc(maxid, sizeof(unsigned long *), GFP_KERNEL);
+ if (!cconfig.closmap)
+ goto out_free;
+
+ closmap_block = kcalloc(num, sizeof(unsigned long), GFP_KERNEL);
+ if (!closmap_block)
+ goto out_free;
+
+ for (domain = 0; domain < shared_domain_num; domain++)
+ cconfig.closmap[domain] = (unsigned long *)closmap_block +
+ domain * maxid_size;
+
+ cconfig.max_closid = maxid;
+
+ return 0;
+out_free:
+ kfree(cconfig.closmap);
+ kfree(closmap_block);
+ return -ENOMEM;
+}
+
+static int __init cat_cache_init(int level, int maxid,
+ struct clos_cbm_table ***cctable)
+{
+ int domain_num;
+ int domain;
+ int size;
+ int ret = 0;
+ struct clos_cbm_table *p;
+
+ domain_num = get_domain_num(level);
+ size = domain_num * sizeof(struct clos_cbm_table *);
+ *cctable = kzalloc(size, GFP_KERNEL);
+ if (!*cctable) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ size = maxid * domain_num * sizeof(struct clos_cbm_table);
+ p = kzalloc(size, GFP_KERNEL);
+ if (!p) {
+ kfree(*cctable);
+ ret = -ENOMEM;
+ goto out;
+ }
+ for (domain = 0; domain < domain_num; domain++)
+ (*cctable)[domain] = p + domain * maxid;
+
+ ret = cpu_cache_domain_init(level);
+ if (ret) {
+ kfree(*cctable);
+ kfree(p);
+ }
+out:
+ return ret;
+}
static int __init intel_rdt_late_init(void)
{
struct cpuinfo_x86 *c = &boot_cpu_data;
- u32 maxid, max_cbm_len;
- int err = 0, size, i;
-
- if (!cache_alloc_supported(c))
+ u32 maxid;
+ int i;
+ int ret;
+
+ if (unlikely(disable_cat_l3))
+ cat_l3_enabled = false;
+ else if (cat_l3_supported(c))
+ cat_l3_enabled = true;
+ else if (rdt_opts.simulate_cat_l3 &&
+ get_cache_leaf(CACHE_LEVEL3, 0) >= 0)
+ cat_l3_enabled = true;
+ else
+ cat_l3_enabled = false;
+
+ if (!resource_alloc_enabled())
return -ENODEV;

- maxid = c->x86_cache_max_closid;
- max_cbm_len = c->x86_cache_max_cbm_len;
-
- size = maxid * sizeof(struct clos_cbm_table);
- cctable = kzalloc(size, GFP_KERNEL);
- if (!cctable) {
- err = -ENOMEM;
- goto out_err;
+ if (rdt_opts.simulate_cat_l3) {
+ boot_cpu_data.x86_l3_max_closid = 16;
+ boot_cpu_data.x86_l3_max_cbm_len = 20;
+ }
+ for_each_online_cpu(i) {
+ rdt_cpumask_update(&rdt_l3_cpumask, i, CACHE_LEVEL3);
}

- size = BITS_TO_LONGS(maxid) * sizeof(long);
- cconfig.closmap = kzalloc(size, GFP_KERNEL);
- if (!cconfig.closmap) {
- kfree(cctable);
- err = -ENOMEM;
- goto out_err;
+ maxid = 0;
+ if (cat_l3_enabled) {
+ maxid = boot_cpu_data.x86_l3_max_closid;
+ ret = cat_cache_init(CACHE_LEVEL3, maxid, &l3_cctable);
+ if (ret)
+ cat_l3_enabled = false;
}

- cpu_notifier_register_begin();
+ if (!cat_l3_enabled)
+ return -ENOSPC;
+
+ ret = shared_domain_init();
+ if (ret)
+ return -ENODEV;

- for_each_online_cpu(i)
- rdt_cpumask_update(i);
+ ret = cconfig_init(maxid);
+ if (ret)
+ return ret;
+
+ cpu_notifier_register_begin();

__hotcpu_notifier(intel_rdt_cpu_notifier, 0);

@@ -454,9 +807,8 @@ static int __init intel_rdt_late_init(void)
pr_info("Intel cache allocation enabled\n");
if (cpu_has(c, X86_FEATURE_CDP_L3))
pr_info("Intel code data prioritization detected\n");
-out_err:

- return err;
+ return 0;
}

late_initcall(intel_rdt_late_init);
--
2.5.0