[PATCH 4/6] x86/intel_rdt: Adds support to enable CDP
From: Vikas Shivappa
Date: Sun Aug 23 2015 - 18:47:05 EST
At any time the intel_rdt cgroup operates in 2 modes (legacy cache
allocation mode/default or code data prioritization mode).
When CDP is enabled the number of available CLOSids is halved and its
doubled when CDP is disabled. When CDP is enabled each CLOSid
maps to a data cache mask and instruction cache mask.
The enabling itself is done by writing to the IA32_PQOS_CFG MSR and can
dynamically be enabled or disabled.
On every mode change all the bit mask MSRs are reset to all 1s or fully
open. This implies that all the tasks also need to be now moved to the
root cgroup since root cgroup represents the cgroup with a mask of all
1s. Thereby we destroy all the child cgroups in the hierarchy.
The enabling and disabling is done during mount/umount and depending on
the mode we switched into , the corresponding files are only displayed
in the cgroup directory. Each cgroup would
expose its dcache(data cache) and icache(instruction cache) mask when
CDP is enabled and expose the cache_mask(common cache mask) when
operating in Cache allocation mode.
---
arch/x86/include/asm/intel_rdt.h | 11 +-
arch/x86/kernel/cpu/intel_rdt.c | 253 ++++++++++++++++++++++++++++++++-------
include/linux/cgroup.h | 1 +
kernel/cgroup.c | 32 ++++-
4 files changed, 247 insertions(+), 50 deletions(-)
diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index 68f220e..55c496a 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -9,6 +9,8 @@
#define MAX_CBM_LENGTH 32
#define IA32_L3_CBM_BASE 0xc90
#define CBM_FROM_INDEX(x) (IA32_L3_CBM_BASE + x)
+#define DCACHE_MASK_INDEX(x) CBM_FROM_INDEX((x << 1))
+#define ICACHE_MASK_INDEX(x) CBM_FROM_INDEX((x << 1) + 1)
DECLARE_PER_CPU(struct intel_pqr_state, pqr_state);
extern struct static_key rdt_enable_key;
@@ -17,6 +19,7 @@ extern void __intel_rdt_sched_in(void);
struct rdt_subsys_info {
unsigned long *closmap;
bool cdp_supported;
+ bool cdp_enable;
};
struct intel_rdt {
@@ -24,11 +27,17 @@ struct intel_rdt {
u32 closid;
};
-struct clos_cbm_map {
+struct cat_clos_mask_map {
unsigned long cache_mask;
unsigned int clos_refcnt;
};
+struct cdp_clos_mask_map {
+ unsigned long icache_mask;
+ unsigned long dcache_mask;
+ unsigned int clos_refcnt;
+};
+
/*
* Return rdt group corresponding to this container.
*/
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index b8dcb30..155ac51 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -29,9 +29,17 @@
#include <asm/intel_rdt.h>
/*
- * ccmap maintains 1:1 mapping between CLOSid and cache bitmask.
+ * cat_cm_map maintains 1:1 mapping between CLOSid and cache bitmask.
+ * Used for cache allocation.
*/
-static struct clos_cbm_map *ccmap;
+static struct cat_clos_mask_map *cat_cm_map;
+
+/*
+ * cdp_cm_map maintains 1:2 mapping between Closid and the icache,dcache mask.
+ */
+static struct cdp_clos_mask_map *cdp_cm_map;
+
+static struct cftype rdt_files[];
static struct rdt_subsys_info rdtss_info;
static DEFINE_MUTEX(rdt_group_mutex);
struct intel_rdt rdt_root_group;
@@ -47,6 +55,11 @@ static unsigned int min_bitmask_len = 1;
*/
static cpumask_t rdt_cpumask;
+struct rdt_remote_data {
+ int msr;
+ u64 val;
+};
+
#define rdt_for_each_child(pos_css, parent_ir) \
css_for_each_child((pos_css), &(parent_ir)->css)
@@ -116,9 +129,168 @@ static inline bool cdp_supported(struct cpuinfo_x86 *c)
return false;
}
+static void cbm_cpu_update(void *info)
+{
+ u32 closid = (u32) info;
+
+ wrmsrl(CBM_FROM_INDEX(closid), cat_cm_map[closid].cache_mask);
+}
+
+static void msr_cpu_update(void *arg)
+{
+ struct rdt_remote_data *info = arg;
+
+ wrmsrl(info->msr, info->val);
+}
+
+/*
+ * msr_update_all() - Update the msr for all packages.
+ */
+static inline void msr_update_all(int msr, u64 val)
+{
+ struct rdt_remote_data info;
+
+ info.msr = msr;
+ info.val = val;
+ on_each_cpu_mask(&rdt_cpumask, msr_cpu_update, &info, 1);
+}
+
+static void closcbm_map_dump(void)
+{
+ u32 i;
+
+ pr_debug("CBMMAP\n");
+ for (i = 0; i < boot_cpu_data.x86_cache_max_closid; i++) {
+ pr_debug("cache_mask: 0x%x,clos_refcnt: %u\n",
+ (unsigned int)cat_cm_map[i].cache_mask, cat_cm_map[i].clos_refcnt);
+ }
+}
+
+static void cdp_cm_map_reset(int maxid, unsigned long max_cbm_mask)
+{
+ size_t sizeb;
+
+ sizeb = (maxid - 1) * sizeof(struct cdp_clos_mask_map);
+ memset(&cdp_cm_map[1], 0, sizeb);
+
+ cdp_cm_map[0].clos_refcnt = 1;
+}
+
+static void cat_cm_map_reset(int maxid, unsigned long max_cbm_mask)
+{
+ size_t sizeb;
+
+ sizeb = (maxid - 1) * sizeof(struct cat_clos_mask_map);
+ memset(&cat_cm_map[1], 0, sizeb);
+
+ cat_cm_map[0].clos_refcnt = 1;
+}
+
+static void cdp_enable(void)
+{
+ int max_cbm_len = boot_cpu_data.x86_cache_max_cbm_len;
+ struct cpuinfo_x86 *c = &boot_cpu_data;
+ unsigned long max_cbm_mask;
+ unsigned int i;
+
+ max_cbm_mask = (1ULL << max_cbm_len) - 1;
+ c->x86_cache_max_closid = c->x86_cache_max_closid >> 1;
+
+ for (i = 0; i < c->x86_cache_max_closid; i++) {
+ msr_update_all(DCACHE_MASK_INDEX(i), max_cbm_mask);
+ msr_update_all(ICACHE_MASK_INDEX(i), max_cbm_mask);
+ }
+ cdp_cm_map_reset(c->x86_cache_max_closid, max_cbm_mask);
+ msr_update_all(MSR_IA32_PQOS_CFG, 0x1U);
+}
+
+static void cdp_disable(void)
+{
+ struct cpuinfo_x86 *c = &boot_cpu_data;
+ int max_cbm_len = boot_cpu_data.x86_cache_max_cbm_len;
+ unsigned int i;
+ unsigned long max_cbm_mask;
+
+ max_cbm_mask = (1ULL << max_cbm_len) - 1;
+ c->x86_cache_max_closid = c->x86_cache_max_closid << 1;
+
+ for (i = 0; i < c->x86_cache_max_closid; i++) {
+ msr_update_all(CBM_FROM_INDEX(i), max_cbm_mask);
+ }
+ cat_cm_map_reset(c->x86_cache_max_closid, max_cbm_mask);
+ msr_update_all(MSR_IA32_PQOS_CFG, 0x0U);
+}
+
+static void cdp_fileinfo_set(bool cdp_enable)
+{
+ unsigned tmp_flags = CFTYPE_ONLY_ON_ROOT | CFTYPE_NOT_ON_ROOT;
+
+ lockdep_assert_held(&rdt_group_mutex);
+
+ if (cdp_enable) {
+ rdt_files[0].flags |= tmp_flags;
+ rdt_files[1].flags &= ~tmp_flags;
+ rdt_files[2].flags &= ~tmp_flags;
+ } else {
+ rdt_files[0].flags &= ~tmp_flags;
+ rdt_files[1].flags |= tmp_flags;
+ rdt_files[2].flags |= tmp_flags;
+ }
+}
+
+static void rdt_css_umount(void)
+{
+ struct cgroup *cgrp = rdt_root_group.css.cgroup;
+
+ if (!rdtss_info.cdp_supported)
+ return;
+
+ if (css_has_online_children(&cgrp->self))
+ cgroup_destroy_children(&rdt_root_group.css);
+
+ return;
+}
+
+static void rdt_css_mount(void* info)
+{
+ bool enable_cdp = (bool) info;
+
+ if (!rdtss_info.cdp_supported || rdtss_info.cdp_enable == enable_cdp)
+ return;
+
+ mutex_lock(&rdt_group_mutex);
+ cdp_fileinfo_set(enable_cdp);
+
+ if (enable_cdp)
+ cdp_enable();
+ else
+ cdp_disable();
+
+ rdtss_info.cdp_enable = enable_cdp;
+ mutex_unlock(&rdt_group_mutex);
+}
+
+static inline void rdt_cdp_init(int cdp_maxid, unsigned long max_cbm_mask)
+{
+ size_t sizeb;
+
+ sizeb = cdp_maxid * sizeof(struct cdp_clos_mask_map);
+ cdp_cm_map = kzalloc(sizeb, GFP_KERNEL);
+ if (!cdp_cm_map) {
+ pr_err("cdp enable failure. No Memory\n");
+ return;
+ }
+
+ cdp_cm_map[0].icache_mask = max_cbm_mask;
+ cdp_cm_map[0].dcache_mask = max_cbm_mask;
+ cdp_cm_map[0].clos_refcnt = 1;
+
+ rdtss_info.cdp_supported = true;
+}
+
static inline void closid_get(u32 closid)
{
- struct clos_cbm_map *ccm = &ccmap[closid];
+ struct cat_clos_mask_map *ccm = &cat_cm_map[closid];
lockdep_assert_held(&rdt_group_mutex);
@@ -147,12 +319,12 @@ static int closid_alloc(struct intel_rdt *ir)
static inline void closid_free(u32 closid)
{
clear_bit(closid, rdtss_info.closmap);
- ccmap[closid].cache_mask = 0;
+ cat_cm_map[closid].cache_mask = 0;
}
static inline void closid_put(u32 closid)
{
- struct clos_cbm_map *ccm = &ccmap[closid];
+ struct cat_clos_mask_map *ccm = &cat_cm_map[closid];
lockdep_assert_held(&rdt_group_mutex);
if (WARN_ON(!ccm->clos_refcnt))
@@ -216,7 +388,7 @@ static int intel_cache_alloc_cbm_read(struct seq_file *m, void *v)
{
struct intel_rdt *ir = css_rdt(seq_css(m));
- seq_printf(m, "%08lx\n", ccmap[ir->closid].cache_mask);
+ seq_printf(m, "%08lx\n", cat_cm_map[ir->closid].cache_mask);
return 0;
}
@@ -253,7 +425,7 @@ static int cbm_validate(struct intel_rdt *ir, unsigned long cbmvalue)
}
par = parent_rdt(ir);
- cbm_tmp = &ccmap[par->closid].cache_mask;
+ cbm_tmp = &cat_cm_map[par->closid].cache_mask;
if (!bitmap_subset(&cbmvalue, cbm_tmp, MAX_CBM_LENGTH)) {
err = -EINVAL;
goto out_err;
@@ -262,7 +434,7 @@ static int cbm_validate(struct intel_rdt *ir, unsigned long cbmvalue)
rcu_read_lock();
rdt_for_each_child(css, ir) {
c = css_rdt(css);
- cbm_tmp = &ccmap[c->closid].cache_mask;
+ cbm_tmp = &cat_cm_map[c->closid].cache_mask;
if (!bitmap_subset(cbm_tmp, &cbmvalue, MAX_CBM_LENGTH)) {
rcu_read_unlock();
pr_err("Children's mask not a subset\n");
@@ -282,7 +454,7 @@ static bool cbm_search(unsigned long cbm, u32 *closid)
u32 i;
for (i = 0; i < maxid; i++) {
- if (bitmap_equal(&cbm, &ccmap[i].cache_mask, MAX_CBM_LENGTH)) {
+ if (bitmap_equal(&cbm, &cat_cm_map[i].cache_mask, MAX_CBM_LENGTH)) {
*closid = i;
return true;
}
@@ -291,36 +463,10 @@ static bool cbm_search(unsigned long cbm, u32 *closid)
return false;
}
-static void closcbm_map_dump(void)
-{
- u32 i;
-
- pr_debug("CBMMAP\n");
- for (i = 0; i < boot_cpu_data.x86_cache_max_closid; i++) {
- pr_debug("cache_mask: 0x%x,clos_refcnt: %u\n",
- (unsigned int)ccmap[i].cache_mask, ccmap[i].clos_refcnt);
- }
-}
-
-static void cbm_cpu_update(void *info)
-{
- u32 closid = (u32) info;
-
- wrmsrl(CBM_FROM_INDEX(closid), ccmap[closid].cache_mask);
-}
-
-/*
- * cbm_update_all() - Update the cache bit mask for all packages.
- */
-static inline void cbm_update_all(u32 closid)
-{
- on_each_cpu_mask(&rdt_cpumask, cbm_cpu_update, (void *)closid, 1);
-}
-
/*
* intel_cache_alloc_cbm_write() - Validates and writes the
* cache bit mask(cbm) to the IA32_L3_MASK_n
- * and also store the same in the ccmap.
+ * and also store the same in the cat_cm_map.
*
* CLOSids are reused for cgroups which have same bitmask.
* This helps to use the scant CLOSids optimally. This also
@@ -350,7 +496,7 @@ static int intel_cache_alloc_cbm_write(struct cgroup_subsys_state *css,
goto out;
}
- if (cbmvalue == ccmap[ir->closid].cache_mask)
+ if (cbmvalue == cat_cm_map[ir->closid].cache_mask)
goto out;
err = cbm_validate(ir, cbmvalue);
@@ -376,8 +522,8 @@ static int intel_cache_alloc_cbm_write(struct cgroup_subsys_state *css,
goto out;
}
- ccmap[ir->closid].cache_mask = cbmvalue;
- cbm_update_all(ir->closid);
+ cat_cm_map[ir->closid].cache_mask = cbmvalue;
+ msr_update_all(CBM_FROM_INDEX(ir->closid), cbmvalue);
}
closcbm_map_dump();
out:
@@ -415,7 +561,7 @@ static inline void cbm_update_msrs(void)
* whose cache mask is all 1s always.
*/
for (i = 1; i < maxid; i++) {
- if (ccmap[i].clos_refcnt)
+ if (cat_cm_map[i].clos_refcnt)
cbm_cpu_update((void *)i);
}
}
@@ -469,7 +615,7 @@ static int intel_rdt_cpu_notifier(struct notifier_block *nb,
static int __init intel_rdt_late_init(void)
{
struct cpuinfo_x86 *c = &boot_cpu_data;
- static struct clos_cbm_map *ccm;
+ static struct cat_clos_mask_map *ccm;
u32 maxid, max_cbm_len;
int err = 0, i;
size_t sizeb;
@@ -488,9 +634,9 @@ static int __init intel_rdt_late_init(void)
goto out_err;
}
- sizeb = maxid * sizeof(struct clos_cbm_map);
- ccmap = kzalloc(sizeb, GFP_KERNEL);
- if (!ccmap) {
+ sizeb = maxid * sizeof(struct cat_clos_mask_map);
+ cat_cm_map = kzalloc(sizeb, GFP_KERNEL);
+ if (!cat_cm_map) {
kfree(rdtss_info.closmap);
err = -ENOMEM;
goto out_err;
@@ -498,7 +644,7 @@ static int __init intel_rdt_late_init(void)
set_bit(0, rdtss_info.closmap);
rdt_root_group.closid = 0;
- ccm = &ccmap[0];
+ ccm = &cat_cm_map[0];
ccm->cache_mask = (1ULL << max_cbm_len) - 1;
ccm->clos_refcnt = 1;
@@ -515,8 +661,13 @@ static int __init intel_rdt_late_init(void)
static_key_slow_inc(&rdt_enable_key);
pr_info("Intel cache allocation enabled\n");
+
+ /*
+ * Test for Code data prioritization support.
+ * Failure is not fatal as CAT can still work.
+ */
if (cdp_supported(c)) {
- rdtss_info.cdp_supported = true;
+ rdt_cdp_init(maxid / 2, cat_cm_map[0].cache_mask);
pr_info("Intel code data prioritization enabled\n");
}
out_err:
@@ -533,12 +684,22 @@ static struct cftype rdt_files[] = {
.write_u64 = intel_cache_alloc_cbm_write,
.mode = 0666,
},
+ {
+ .name = "icache_mask",
+ .flags = CFTYPE_ONLY_ON_ROOT | CFTYPE_NOT_ON_ROOT,
+ },
+ {
+ .name = "dcache_mask",
+ .flags = CFTYPE_ONLY_ON_ROOT | CFTYPE_NOT_ON_ROOT,
+ },
{ } /* terminate */
};
struct cgroup_subsys intel_rdt_cgrp_subsys = {
.css_alloc = intel_rdt_css_alloc,
.css_free = intel_rdt_css_free,
+ .css_mount = rdt_css_mount,
+ .css_umount = rdt_css_umount,
.legacy_cftypes = rdt_files,
.early_init = 0,
};
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 3ded186..48f106c 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -195,6 +195,7 @@ enum {
* specified at mount time and thus is implemented here.
*/
CGRP_CPUSET_CLONE_CHILDREN,
+ CGRP_RDT_DESTROY_CHILDREN,
};
struct cgroup {
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index e139504..38b4de5 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1346,6 +1346,8 @@ struct cgroup_sb_opts {
/* User explicitly requested empty subsystem */
bool none;
void *mount_info;
+ bool rdt_cdp_enable;
+ bool rdt_mount;
};
static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
@@ -1392,6 +1394,13 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
opts->cpuset_clone_children = true;
continue;
}
+ if (!strcmp(token, "cdp_enable")) {
+ opts->rdt_cdp_enable = true;
+ continue;
+ }
+ if (!strcmp(token, "intel_rdt")) {
+ opts->rdt_mount = true;
+ }
if (!strcmp(token, "xattr")) {
opts->flags |= CGRP_ROOT_XATTR;
continue;
@@ -1640,6 +1649,8 @@ static void init_cgroup_root(struct cgroup_root *root,
strcpy(root->name, opts->name);
if (opts->cpuset_clone_children)
set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags);
+ if (opts->rdt_mount)
+ set_bit(CGRP_RDT_DESTROY_CHILDREN, &root->cgrp.flags);
}
static int cgroup_setup_root(struct cgroup_root *root, unsigned int ss_mask)
@@ -1789,8 +1800,11 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
goto out_unlock;
for_each_subsys(ss, i) {
- if ((opts.subsys_mask && (1U << i)) && ss->css_mount)
+ if ((opts.subsys_mask && (1U << i)) && ss->css_mount) {
+ if (opts.rdt_mount)
+ opts.mount_info = (void *)opts.rdt_cdp_enable;
ss->css_mount(opts.mount_info);
+ }
}
/* look for a matching existing root */
@@ -1934,12 +1948,24 @@ static void cgroup_kill_sb(struct super_block *sb)
{
struct kernfs_root *kf_root = kernfs_root_from_sb(sb);
struct cgroup_root *root = cgroup_root_from_kf(kf_root);
+ bool root_has_no_children = false;
+ struct cgroup *cgrp = &root->cgrp;
struct cgroup_subsys *ss;
int i;
+ root_has_no_children = list_empty(&root->cgrp.self.children);
+
+ /*
+ * Call the subsystem specific umount API.
+ */
for_each_subsys(ss, i) {
- if ((root->subsys_mask && (1U << i)) && ss->css_umount)
+ if ((root->subsys_mask && (1U << i)) && ss->css_umount) {
+ mutex_lock(&cgroup_mutex);
ss->css_umount();
+ if (!css_has_online_children(&cgrp->self))
+ root_has_no_children = true;
+ mutex_unlock(&cgroup_mutex);
+ }
}
/*
* If @root doesn't have any mounts or children, start killing it.
@@ -1948,7 +1974,7 @@ static void cgroup_kill_sb(struct super_block *sb)
*
* And don't kill the default root.
*/
- if (!list_empty(&root->cgrp.self.children) ||
+ if (!root_has_no_children ||
root == &cgrp_dfl_root)
cgroup_put(&root->cgrp);
else
--
1.9.1
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/