[PATCH v2 30/33] x86/intel_rdt_rdtgroup.c: Process schemata input from resctrl interface
From: Fenghua Yu
Date: Thu Sep 08 2016 - 02:58:39 EST
From: Fenghua Yu <fenghua.yu@xxxxxxxxx>
There is one "schemata" file in each rdtgroup directory. User can input
schemata in the file to control how to allocate resources.
The input schemata first needs to pass validation. If there is no syntax
issue, kernel digests the input schemata and find CLOSID for each
domain for each resource.
A shared domain covers a few different resource domains which share
the same CLOSID. Kernel will find a CLOSID in each shared domain. If
an existing CLOSID and its CBMs match input schemata, the CLOSID is
shared by this rdtgroup. Otherwise, kernel tries to alloc a new
CLOSID for this rdtgroup. If a new CLOSID is available, update QoS MASK
MSRs. If no more CLOSID is available, kernel report ENODEV to user.
A shared domain is in preparation for multiple resources (like L2)
that will be added very soon.
User can read the schemata saved in the file.
Signed-off-by: Fenghua Yu <fenghua.yu@xxxxxxxxx>
Reviewed-by: Tony Luck <tony.luck@xxxxxxxxx>
---
arch/x86/include/asm/intel_rdt_rdtgroup.h | 6 +
arch/x86/kernel/cpu/intel_rdt_schemata.c | 674 ++++++++++++++++++++++++++++++
2 files changed, 680 insertions(+)
create mode 100644 arch/x86/kernel/cpu/intel_rdt_schemata.c
diff --git a/arch/x86/include/asm/intel_rdt_rdtgroup.h b/arch/x86/include/asm/intel_rdt_rdtgroup.h
index 43a3b83..782513e 100644
--- a/arch/x86/include/asm/intel_rdt_rdtgroup.h
+++ b/arch/x86/include/asm/intel_rdt_rdtgroup.h
@@ -17,6 +17,12 @@ extern struct list_head rdtgroup_lists;
extern struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn);
extern void rdtgroup_kn_unlock(struct kernfs_node *kn);
+/* Defiend in intel_rdt_schemata.c. */
+extern int get_default_resources(struct rdtgroup *rdtgrp);
+extern ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off);
+extern int rdtgroup_schemata_show(struct seq_file *s, void *v);
+
/* cftype->flags */
enum {
RFTYPE_WORLD_WRITABLE = (1 << 4),/* (DON'T USE FOR NEW FILES) S_IWUGO */
diff --git a/arch/x86/kernel/cpu/intel_rdt_schemata.c b/arch/x86/kernel/cpu/intel_rdt_schemata.c
new file mode 100644
index 0000000..4e624f0
--- /dev/null
+++ b/arch/x86/kernel/cpu/intel_rdt_schemata.c
@@ -0,0 +1,674 @@
+#include <linux/slab.h>
+#include <asm/intel_rdt_rdtgroup.h>
+
+struct resources {
+ struct cache_resource *l3;
+};
+
+static int get_res_type(char **res, enum resource_type *res_type)
+{
+ char *tok;
+
+ tok = strsep(res, ":");
+ if (tok == NULL)
+ return -EINVAL;
+
+ if (!strcmp(tok, "L3")) {
+ *res_type = RESOURCE_L3;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static int divide_resources(char *buf, char *resources[RESOURCE_NUM])
+{
+ char *tok;
+ unsigned int resource_num = 0;
+ int ret = 0;
+ char *res;
+ char *res_block;
+ size_t size;
+ enum resource_type res_type;
+
+ size = strlen(buf) + 1;
+ res = kzalloc(size, GFP_KERNEL);
+ if (!res) {
+ ret = -ENOSPC;
+ goto out;
+ }
+
+ while ((tok = strsep(&buf, "\n")) != NULL) {
+ if (strlen(tok) == 0)
+ break;
+ if (resource_num++ >= 1) {
+ pr_info("More than one line of resource input!\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ strcpy(res, tok);
+ }
+
+ res_block = res;
+ ret = get_res_type(&res_block, &res_type);
+ if (ret) {
+ pr_info("Unknown resource type!");
+ goto out;
+ }
+
+ if (res_block == NULL) {
+ pr_info("Invalid resource value!");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (res_type == RESOURCE_L3 && cat_enabled(CACHE_LEVEL3)) {
+ strcpy(resources[RESOURCE_L3], res_block);
+ } else {
+ pr_info("Invalid resource type!");
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ kfree(res);
+ return ret;
+}
+
+static bool cbm_validate(unsigned long var, int level)
+{
+ u32 maxcbmlen = max_cbm_len(level);
+ unsigned long first_bit, zero_bit;
+
+ if (bitmap_weight(&var, maxcbmlen) < min_bitmask_len)
+ return false;
+
+ if (var & ~max_cbm(level))
+ return false;
+
+ first_bit = find_first_bit(&var, maxcbmlen);
+ zero_bit = find_next_zero_bit(&var, maxcbmlen, first_bit);
+
+ if (find_next_bit(&var, maxcbmlen, zero_bit) < maxcbmlen)
+ return false;
+
+ return true;
+}
+
+static int get_input_cbm(char *tok, struct cache_resource *l,
+ int input_domain_num, int level)
+{
+ int ret;
+
+ if (!cdp_enabled) {
+ if (tok == NULL)
+ return -EINVAL;
+
+ ret = kstrtoul(tok, 16,
+ (unsigned long *)&l->cbm[input_domain_num]);
+ if (ret)
+ return ret;
+
+ if (!cbm_validate(l->cbm[input_domain_num], level))
+ return -EINVAL;
+ } else {
+ char *input_cbm1_str;
+
+ input_cbm1_str = strsep(&tok, ",");
+ if (input_cbm1_str == NULL || tok == NULL)
+ return -EINVAL;
+
+ ret = kstrtoul(input_cbm1_str, 16,
+ (unsigned long *)&l->cbm[input_domain_num]);
+ if (ret)
+ return ret;
+
+ if (!cbm_validate(l->cbm[input_domain_num], level))
+ return -EINVAL;
+
+ ret = kstrtoul(tok, 16,
+ (unsigned long *)&l->cbm2[input_domain_num]);
+ if (ret)
+ return ret;
+
+ if (!cbm_validate(l->cbm2[input_domain_num], level))
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int get_cache_schema(char *buf, struct cache_resource *l, int level,
+ struct rdtgroup *rdtgrp)
+{
+ char *tok, *tok_cache_id;
+ int ret;
+ int domain_num;
+ int input_domain_num;
+ int len;
+ unsigned int input_cache_id;
+ unsigned int cid;
+ unsigned int leaf;
+
+ if (!cat_enabled(level) && strcmp(buf, ";")) {
+ pr_info("Disabled resource should have empty schema\n");
+ return -EINVAL;
+ }
+
+ len = strlen(buf);
+ /*
+ * Translate cache id based cbm from one line string with format
+ * "<cache prefix>:<cache id0>=xxxx;<cache id1>=xxxx;..." for
+ * disabled cdp.
+ * Or
+ * "<cache prefix>:<cache id0>=xxxxx,xxxxx;<cache id1>=xxxxx,xxxxx;..."
+ * for enabled cdp.
+ */
+ input_domain_num = 0;
+ while ((tok = strsep(&buf, ";")) != NULL) {
+ tok_cache_id = strsep(&tok, "=");
+ if (tok_cache_id == NULL)
+ goto cache_id_err;
+
+ ret = kstrtouint(tok_cache_id, 16, &input_cache_id);
+ if (ret)
+ goto cache_id_err;
+
+ leaf = level_to_leaf(level);
+ cid = cache_domains[leaf].shared_cache_id[input_domain_num];
+ if (input_cache_id != cid)
+ goto cache_id_err;
+
+ ret = get_input_cbm(tok, l, input_domain_num, level);
+ if (ret)
+ goto cbm_err;
+
+ input_domain_num++;
+ if (input_domain_num > get_domain_num(level)) {
+ pr_info("domain number is more than max %d\n",
+ MAX_CACHE_DOMAINS);
+ return -EINVAL;
+ }
+ }
+
+ domain_num = get_domain_num(level);
+ if (domain_num != input_domain_num) {
+ pr_info("%s input domain number %d doesn't match domain number %d\n",
+ "l3",
+ input_domain_num, domain_num);
+
+ return -EINVAL;
+ }
+
+ return 0;
+
+cache_id_err:
+ pr_info("Invalid cache id in field %d for L%1d\n", input_domain_num,
+ level);
+ return -EINVAL;
+
+cbm_err:
+ pr_info("Invalid cbm in field %d for cache L%d\n",
+ input_domain_num, level);
+ return -EINVAL;
+}
+
+static bool cbm_found(struct cache_resource *l, struct rdtgroup *r,
+ int domain, int level)
+{
+ int closid;
+ int l3_domain;
+ u64 cctable_cbm;
+ u64 cbm;
+ int dindex;
+
+ closid = r->resource.closid[domain];
+
+ if (level == CACHE_LEVEL3) {
+ l3_domain = shared_domain[domain].l3_domain;
+ cbm = l->cbm[l3_domain];
+ dindex = get_dcbm_table_index(closid);
+ cctable_cbm = l3_cctable[l3_domain][dindex].cbm;
+ if (cdp_enabled) {
+ u64 icbm;
+ u64 cctable_icbm;
+ int iindex;
+
+ icbm = l->cbm2[l3_domain];
+ iindex = get_icbm_table_index(closid);
+ cctable_icbm = l3_cctable[l3_domain][iindex].cbm;
+
+ return cbm == cctable_cbm && icbm == cctable_icbm;
+ }
+
+ return cbm == cctable_cbm;
+ }
+
+ return false;
+}
+
+enum {
+ CURRENT_CLOSID,
+ REUSED_OWN_CLOSID,
+ REUSED_OTHER_CLOSID,
+ NEW_CLOSID,
+};
+
+/*
+ * Check if the reference counts are all ones in rdtgrp's domain.
+ */
+static bool one_refcnt(struct rdtgroup *rdtgrp, int domain)
+{
+ int refcnt;
+ int closid;
+
+ closid = rdtgrp->resource.closid[domain];
+ if (cat_l3_enabled) {
+ int l3_domain;
+ int dindex;
+
+ l3_domain = shared_domain[domain].l3_domain;
+ dindex = get_dcbm_table_index(closid);
+ refcnt = l3_cctable[l3_domain][dindex].clos_refcnt;
+ if (refcnt != 1)
+ return false;
+
+ if (cdp_enabled) {
+ int iindex;
+
+ iindex = get_icbm_table_index(closid);
+ refcnt = l3_cctable[l3_domain][iindex].clos_refcnt;
+
+ if (refcnt != 1)
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/*
+ * Go through all shared domains. Check if there is an existing closid
+ * in all rdtgroups that matches l3 cbms in the shared
+ * domain. If find one, reuse the closid. Otherwise, allocate a new one.
+ */
+static int get_rdtgroup_resources(struct resources *resources_set,
+ struct rdtgroup *rdtgrp)
+{
+ struct cache_resource *l3;
+ bool l3_cbm_found;
+ struct list_head *l;
+ struct rdtgroup *r;
+ u64 cbm;
+ int rdt_closid[MAX_CACHE_DOMAINS];
+ int rdt_closid_type[MAX_CACHE_DOMAINS];
+ int domain;
+ int closid;
+ int ret;
+
+ l3 = resources_set->l3;
+ memcpy(rdt_closid, rdtgrp->resource.closid,
+ shared_domain_num * sizeof(int));
+ for (domain = 0; domain < shared_domain_num; domain++) {
+ if (rdtgrp->resource.valid) {
+ /*
+ * If current rdtgrp is the only user of cbms in
+ * this domain, will replace the cbms with the input
+ * cbms and reuse its own closid.
+ */
+ if (one_refcnt(rdtgrp, domain)) {
+ closid = rdtgrp->resource.closid[domain];
+ rdt_closid[domain] = closid;
+ rdt_closid_type[domain] = REUSED_OWN_CLOSID;
+ continue;
+ }
+
+ l3_cbm_found = true;
+
+ if (cat_l3_enabled)
+ l3_cbm_found = cbm_found(l3, rdtgrp, domain,
+ CACHE_LEVEL3);
+
+ /*
+ * If the cbms in this shared domain are already
+ * existing in current rdtgrp, record the closid
+ * and its type.
+ */
+ if (l3_cbm_found) {
+ closid = rdtgrp->resource.closid[domain];
+ rdt_closid[domain] = closid;
+ rdt_closid_type[domain] = CURRENT_CLOSID;
+ continue;
+ }
+ }
+
+ /*
+ * If the cbms are not found in this rdtgrp, search other
+ * rdtgroups and see if there are matched cbms.
+ */
+ l3_cbm_found = cat_l3_enabled ? false : true;
+ list_for_each(l, &rdtgroup_lists) {
+ r = list_entry(l, struct rdtgroup, rdtgroup_list);
+ if (r == rdtgrp || !r->resource.valid)
+ continue;
+
+ if (cat_l3_enabled)
+ l3_cbm_found = cbm_found(l3, r, domain,
+ CACHE_LEVEL3);
+
+ if (l3_cbm_found) {
+ /* Get the closid that matches l3 cbms.*/
+ closid = r->resource.closid[domain];
+ rdt_closid[domain] = closid;
+ rdt_closid_type[domain] = REUSED_OTHER_CLOSID;
+ break;
+ }
+ }
+
+ if (!l3_cbm_found) {
+ /*
+ * If no existing closid is found, allocate
+ * a new one.
+ */
+ ret = closid_alloc(&closid, domain);
+ if (ret)
+ goto err;
+ rdt_closid[domain] = closid;
+ rdt_closid_type[domain] = NEW_CLOSID;
+ }
+ }
+
+ /*
+ * Now all closid are ready in rdt_closid. Update rdtgrp's closid.
+ */
+ for_each_cache_domain(domain, 0, shared_domain_num) {
+ /*
+ * Nothing is changed if the same closid and same cbms were
+ * found in this rdtgrp's domain.
+ */
+ if (rdt_closid_type[domain] == CURRENT_CLOSID)
+ continue;
+
+ /*
+ * Put rdtgroup closid. No need to put the closid if we
+ * just change cbms and keep the closid (REUSED_OWN_CLOSID).
+ */
+ if (rdtgrp->resource.valid &&
+ rdt_closid_type[domain] != REUSED_OWN_CLOSID) {
+ /* Put old closid in this rdtgrp's domain if valid. */
+ closid = rdtgrp->resource.closid[domain];
+ closid_put(closid, domain);
+ }
+
+ /*
+ * Replace the closid in this rdtgrp's domain with saved
+ * closid that was newly allocted (NEW_CLOSID), or found in
+ * another rdtgroup's domains (REUSED_CLOSID), or found in
+ * this rdtgrp (REUSED_OWN_CLOSID).
+ */
+ closid = rdt_closid[domain];
+ rdtgrp->resource.closid[domain] = closid;
+
+ /*
+ * Get the reused other rdtgroup's closid. No need to get the
+ * closid newly allocated (NEW_CLOSID) because it's been
+ * already got in closid_alloc(). And no need to get the closid
+ * for resued own closid (REUSED_OWN_CLOSID).
+ */
+ if (rdt_closid_type[domain] == REUSED_OTHER_CLOSID)
+ closid_get(closid, domain);
+
+ /*
+ * If the closid comes from a newly allocated closid
+ * (NEW_CLOSID), or found in this rdtgrp (REUSED_OWN_CLOSID),
+ * cbms for this closid will be updated in MSRs.
+ */
+ if (rdt_closid_type[domain] == NEW_CLOSID ||
+ rdt_closid_type[domain] == REUSED_OWN_CLOSID) {
+ /*
+ * Update cbm in cctable with the newly allocated
+ * closid.
+ */
+ if (cat_l3_enabled) {
+ int cpu;
+ struct cpumask *mask;
+ int dindex;
+ int l3_domain = shared_domain[domain].l3_domain;
+ int leaf = level_to_leaf(CACHE_LEVEL3);
+
+ cbm = l3->cbm[l3_domain];
+ dindex = get_dcbm_table_index(closid);
+ l3_cctable[l3_domain][dindex].cbm = cbm;
+ if (cdp_enabled) {
+ int iindex;
+
+ cbm = l3->cbm2[l3_domain];
+ iindex = get_icbm_table_index(closid);
+ l3_cctable[l3_domain][iindex].cbm = cbm;
+ }
+
+ mask =
+ &cache_domains[leaf].shared_cpu_map[l3_domain];
+
+ cpu = cpumask_first(mask);
+ smp_call_function_single(cpu, cbm_update_l3_msr,
+ &closid, 1);
+ }
+ }
+ }
+
+ rdtgrp->resource.valid = true;
+
+ return 0;
+err:
+ /* Free previously allocated closid. */
+ for_each_cache_domain(domain, 0, shared_domain_num) {
+ if (rdt_closid_type[domain] != NEW_CLOSID)
+ continue;
+
+ closid_put(rdt_closid[domain], domain);
+
+ }
+
+ return ret;
+}
+
+static void init_cache_resource(struct cache_resource *l)
+{
+ l->cbm = NULL;
+ l->cbm2 = NULL;
+ l->closid = NULL;
+ l->refcnt = NULL;
+}
+
+static void free_cache_resource(struct cache_resource *l)
+{
+ kfree(l->cbm);
+ kfree(l->cbm2);
+ kfree(l->closid);
+ kfree(l->refcnt);
+}
+
+static int alloc_cache_resource(struct cache_resource *l, int level)
+{
+ int domain_num = get_domain_num(level);
+
+ l->cbm = kcalloc(domain_num, sizeof(*l->cbm), GFP_KERNEL);
+ l->cbm2 = kcalloc(domain_num, sizeof(*l->cbm2), GFP_KERNEL);
+ l->closid = kcalloc(domain_num, sizeof(*l->closid), GFP_KERNEL);
+ l->refcnt = kcalloc(domain_num, sizeof(*l->refcnt), GFP_KERNEL);
+ if (l->cbm && l->cbm2 && l->closid && l->refcnt)
+ return 0;
+
+ return -ENOMEM;
+}
+
+/*
+ * This function digests schemata given in text buf. If the schemata are in
+ * right format and there is enough closid, input the schemata in rdtgrp
+ * and update resource cctables.
+ *
+ * Inputs:
+ * buf: string buffer containing schemata
+ * rdtgrp: current rdtgroup holding schemata.
+ *
+ * Return:
+ * 0 on success or error code.
+ */
+static int get_resources(char *buf, struct rdtgroup *rdtgrp)
+{
+ char *resources[RESOURCE_NUM];
+ struct cache_resource l3;
+ struct resources resources_set;
+ int ret;
+ char *resources_block;
+ int i;
+ int size = strlen(buf) + 1;
+
+ resources_block = kcalloc(RESOURCE_NUM, size, GFP_KERNEL);
+ if (!resources_block)
+ return -ENOMEM;
+
+ for (i = 0; i < RESOURCE_NUM; i++)
+ resources[i] = (char *)(resources_block + i * size);
+
+ ret = divide_resources(buf, resources);
+ if (ret) {
+ kfree(resources_block);
+ return -EINVAL;
+ }
+
+ init_cache_resource(&l3);
+
+ if (cat_l3_enabled) {
+ ret = alloc_cache_resource(&l3, CACHE_LEVEL3);
+ if (ret)
+ goto out;
+
+ ret = get_cache_schema(resources[RESOURCE_L3], &l3,
+ CACHE_LEVEL3, rdtgrp);
+ if (ret)
+ goto out;
+
+ resources_set.l3 = &l3;
+ } else
+ resources_set.l3 = NULL;
+
+ ret = get_rdtgroup_resources(&resources_set, rdtgrp);
+
+out:
+ kfree(resources_block);
+ free_cache_resource(&l3);
+
+ return ret;
+}
+
+static void gen_cache_prefix(char *buf, int level)
+{
+ sprintf(buf, "L%1d:", level == CACHE_LEVEL3 ? 3 : 2);
+}
+
+static int get_cache_id(int domain, int level)
+{
+ return cache_domains[level_to_leaf(level)].shared_cache_id[domain];
+}
+
+static void gen_cache_buf(char *buf, int level)
+{
+ int domain;
+ char buf1[32];
+ int domain_num;
+ u64 val;
+
+ gen_cache_prefix(buf, level);
+
+ domain_num = get_domain_num(level);
+
+ val = max_cbm(level);
+
+ for (domain = 0; domain < domain_num; domain++) {
+ sprintf(buf1, "%d=%lx", get_cache_id(domain, level),
+ (unsigned long)val);
+ strcat(buf, buf1);
+ if (cdp_enabled) {
+ sprintf(buf1, ",%lx", (unsigned long)val);
+ strcat(buf, buf1);
+ }
+ if (domain < domain_num - 1)
+ strcat(buf, ";");
+ else
+ strcat(buf, "\n");
+ }
+}
+
+/*
+ * Set up default schemata in a rdtgroup. All schemata in all resources are
+ * default values (all 1's) for all domains.
+ *
+ * Input: rdtgroup.
+ * Return: 0: successful
+ * non-0: error code
+ */
+int get_default_resources(struct rdtgroup *rdtgrp)
+{
+ char schema[1024];
+ int ret = 0;
+
+ if (cat_enabled(CACHE_LEVEL3)) {
+ gen_cache_buf(schema, CACHE_LEVEL3);
+
+ if (strlen(schema)) {
+ ret = get_resources(schema, rdtgrp);
+ if (ret)
+ return ret;
+ }
+ gen_cache_buf(rdtgrp->schema, CACHE_LEVEL3);
+ }
+
+ return ret;
+}
+
+ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off)
+{
+ int ret = 0;
+ struct rdtgroup *rdtgrp;
+ char *schema;
+
+ rdtgrp = rdtgroup_kn_lock_live(of->kn);
+ if (!rdtgrp)
+ return -ENODEV;
+
+ schema = kzalloc(sizeof(char) * strlen(buf) + 1, GFP_KERNEL);
+ if (!schema) {
+ ret = -ENOMEM;
+ goto out_unlock;
+ }
+
+ memcpy(schema, buf, strlen(buf) + 1);
+
+ ret = get_resources(buf, rdtgrp);
+ if (ret)
+ goto out;
+
+ memcpy(rdtgrp->schema, schema, strlen(schema) + 1);
+
+out:
+ kfree(schema);
+
+out_unlock:
+ rdtgroup_kn_unlock(of->kn);
+ return ret ?: nbytes;
+}
+
+int rdtgroup_schemata_show(struct seq_file *s, void *v)
+{
+ struct kernfs_open_file *of = s->private;
+ struct rdtgroup *rdtgrp;
+
+ rdtgrp = rdtgroup_kn_lock_live(of->kn);
+ seq_printf(s, "%s", rdtgrp->schema);
+ rdtgroup_kn_unlock(of->kn);
+ return 0;
+}
--
2.5.0