[PATCH 14/21] x86/intel_rdt/cqm: Add mon_data

From: Vikas Shivappa
Date: Mon Jun 26 2017 - 14:56:39 EST


Add a mon_data directory for the root rdtgroup and all other rdtgroups.
The directory holds all of the monitored data for all domains and events
of all resources being monitored.

The mon_data itself has a list of directories in the format
mon_<domain_name>_<domain_id>. Each of these subdirectories contain one
file per event in the mode "0444". Reading the file displays a snapshot
of the monitored data for the event the file represents.

For ex, on a 2 socket Broadwell with llc_occupancy being
monitored the mon_data contents look as below:

$ ls /sys/fs/resctrl/p1/mon_data/
mon_L3_00
mon_L3_01

Each domain directory has one file per event:
$ ls /sys/fs/resctrl/p1/mon_data/mon_L3_00/
llc_occupancy

To read current llc_occupancy of ctrl_mon group p1
$ cat /sys/fs/resctrl/p1/mon_data/mon_L3_00/llc_occupancy
33789096

[This patch idea is based on Tony's sample patches to organise data in a
per domain directory and have one file per event (and use the fp->priv to
store mon data bits)]

Signed-off-by: Vikas Shivappa <vikas.shivappa@xxxxxxxxxxxxxxx>
---
arch/x86/kernel/cpu/Makefile | 2 +-
arch/x86/kernel/cpu/intel_rdt.c | 4 +-
arch/x86/kernel/cpu/intel_rdt.h | 27 +++
arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c | 332 ++++++++++++++++++++++++++++
arch/x86/kernel/cpu/intel_rdt_monitor.c | 42 ++++
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 155 +++++++++++++
arch/x86/kernel/cpu/intel_rdt_schemata.c | 286 ------------------------
7 files changed, 559 insertions(+), 289 deletions(-)
create mode 100644 arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
delete mode 100644 arch/x86/kernel/cpu/intel_rdt_schemata.c

diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 81b0060..1245f98 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -32,7 +32,7 @@ obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o
obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o
obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o

-obj-$(CONFIG_INTEL_RDT) += intel_rdt.o intel_rdt_rdtgroup.o intel_rdt_schemata.o intel_rdt_monitor.o
+obj-$(CONFIG_INTEL_RDT) += intel_rdt.o intel_rdt_rdtgroup.o intel_rdt_monitor.o intel_rdt_ctrlmondata.o

obj-$(CONFIG_X86_MCE) += mcheck/
obj-$(CONFIG_MTRR) += mtrr/
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index b0f8c35..63bfb47c 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -344,8 +344,8 @@ void rdt_ctrl_update(void *arg)
* caller, return the first domain whose id is bigger than the input id.
* The domain list is sorted by id in ascending order.
*/
-static struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
- struct list_head **pos)
+struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
+ struct list_head **pos)
{
struct rdt_domain *d;
struct list_head *l;
diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h
index fec8ba9..631d58e 100644
--- a/arch/x86/kernel/cpu/intel_rdt.h
+++ b/arch/x86/kernel/cpu/intel_rdt.h
@@ -33,6 +33,27 @@ struct mon_evt {
struct list_head list;
};

+/**
+ * struct mon_data_bits - Monitoring details for each event file
+ * @rid: Resource id associated with the event file.
+ * @evtid: Event id associated with the event file
+ * @domid: The domain to which the event file belongs
+ */
+union mon_data_bits {
+ void *priv;
+ struct {
+ unsigned int rid : 10;
+ unsigned int evtid : 8;
+ unsigned int domid : 14;
+ } u;
+};
+
+struct rmid_read {
+ struct rdtgroup *rgrp;
+ int evtid;
+ u64 val;
+};
+
extern unsigned int intel_cqm_threshold;
extern bool rdt_alloc_enabled;
extern int rdt_mon_features;
@@ -48,6 +69,7 @@ enum rdt_group_type {
/**
* struct rdtgroup - store rdtgroup's data in resctrl file system.
* @kn: kernfs node
+ * @mon_data_kn kernlfs node for the mon_data directory
* @rdtgroup_list: linked list for all rdtgroups
* @parent: parent rdtgrp
* @crdtgrp_list: child rdtgroup node list
@@ -62,6 +84,7 @@ enum rdt_group_type {
*/
struct rdtgroup {
struct kernfs_node *kn;
+ struct kernfs_node *mon_data_kn;
struct list_head rdtgroup_list;
struct rdtgroup *parent;
struct list_head crdtgrp_list;
@@ -311,6 +334,8 @@ enum {
void rdt_ctrl_update(void *arg);
struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn);
void rdtgroup_kn_unlock(struct kernfs_node *kn);
+struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
+ struct list_head **pos);
ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off);
int rdtgroup_schemata_show(struct kernfs_open_file *of,
@@ -318,5 +343,7 @@ int rdtgroup_schemata_show(struct kernfs_open_file *of,
int alloc_rmid(void);
void free_rmid(u32 rmid);
void rdt_get_mon_l3_config(struct rdt_resource *r);
+void mon_event_count(void *info);
+int rdtgroup_mondata_show(struct seq_file *m, void *arg);

#endif /* _ASM_X86_INTEL_RDT_H */
diff --git a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
new file mode 100644
index 0000000..0c8bca0
--- /dev/null
+++ b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
@@ -0,0 +1,332 @@
+/*
+ * Resource Director Technology(RDT)
+ * - Cache Allocation code.
+ *
+ * Copyright (C) 2016 Intel Corporation
+ *
+ * Authors:
+ * Fenghua Yu <fenghua.yu@xxxxxxxxx>
+ * Tony Luck <tony.luck@xxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * More information about RDT be found in the Intel (R) x86 Architecture
+ * Software Developer Manual June 2016, volume 3, section 17.17.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernfs.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include "intel_rdt.h"
+
+/*
+ * Check whether MBA bandwidth percentage value is correct. The value is
+ * checked against the minimum and max bandwidth values specified by the
+ * hardware. The allocated bandwidth percentage is rounded to the next
+ * control step available on the hardware.
+ */
+static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r)
+{
+ unsigned long bw;
+ int ret;
+
+ /*
+ * Only linear delay values is supported for current Intel SKUs.
+ */
+ if (!r->membw.delay_linear)
+ return false;
+
+ ret = kstrtoul(buf, 10, &bw);
+ if (ret)
+ return false;
+
+ if (bw < r->membw.min_bw || bw > r->default_ctrl)
+ return false;
+
+ *data = roundup(bw, (unsigned long)r->membw.bw_gran);
+ return true;
+}
+
+int parse_bw(char *buf, struct rdt_resource *r, struct rdt_domain *d)
+{
+ unsigned long data;
+
+ if (d->have_new_ctrl)
+ return -EINVAL;
+
+ if (!bw_validate(buf, &data, r))
+ return -EINVAL;
+ d->new_ctrl = data;
+ d->have_new_ctrl = true;
+
+ return 0;
+}
+
+/*
+ * Check whether a cache bit mask is valid. The SDM says:
+ * Please note that all (and only) contiguous '1' combinations
+ * are allowed (e.g. FFFFH, 0FF0H, 003CH, etc.).
+ * Additionally Haswell requires at least two bits set.
+ */
+static bool cbm_validate(char *buf, unsigned long *data, struct rdt_resource *r)
+{
+ unsigned long first_bit, zero_bit, val;
+ unsigned int cbm_len = r->cache.cbm_len;
+ int ret;
+
+ ret = kstrtoul(buf, 16, &val);
+ if (ret)
+ return false;
+
+ if (val == 0 || val > r->default_ctrl)
+ return false;
+
+ first_bit = find_first_bit(&val, cbm_len);
+ zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
+
+ if (find_next_bit(&val, cbm_len, zero_bit) < cbm_len)
+ return false;
+
+ if ((zero_bit - first_bit) < r->cache.min_cbm_bits)
+ return false;
+
+ *data = val;
+ return true;
+}
+
+/*
+ * Read one cache bit mask (hex). Check that it is valid for the current
+ * resource type.
+ */
+int parse_cbm(char *buf, struct rdt_resource *r, struct rdt_domain *d)
+{
+ unsigned long data;
+
+ if (d->have_new_ctrl)
+ return -EINVAL;
+
+ if (!cbm_validate(buf, &data, r))
+ return -EINVAL;
+ d->new_ctrl = data;
+ d->have_new_ctrl = true;
+
+ return 0;
+}
+
+/*
+ * For each domain in this resource we expect to find a series of:
+ * id=mask
+ * separated by ";". The "id" is in decimal, and must match one of
+ * the "id"s for this resource.
+ */
+static int parse_line(char *line, struct rdt_resource *r)
+{
+ char *dom = NULL, *id;
+ struct rdt_domain *d;
+ unsigned long dom_id;
+
+next:
+ if (!line || line[0] == '\0')
+ return 0;
+ dom = strsep(&line, ";");
+ id = strsep(&dom, "=");
+ if (!dom || kstrtoul(id, 10, &dom_id))
+ return -EINVAL;
+ dom = strim(dom);
+ list_for_each_entry(d, &r->domains, list) {
+ if (d->id == dom_id) {
+ if (r->parse_ctrlval(dom, r, d))
+ return -EINVAL;
+ goto next;
+ }
+ }
+ return -EINVAL;
+}
+
+static int update_domains(struct rdt_resource *r, int closid)
+{
+ struct msr_param msr_param;
+ cpumask_var_t cpu_mask;
+ struct rdt_domain *d;
+ int cpu;
+
+ if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
+ return -ENOMEM;
+
+ msr_param.low = closid;
+ msr_param.high = msr_param.low + 1;
+ msr_param.res = r;
+
+ list_for_each_entry(d, &r->domains, list) {
+ if (d->have_new_ctrl && d->new_ctrl != d->ctrl_val[closid]) {
+ cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
+ d->ctrl_val[closid] = d->new_ctrl;
+ }
+ }
+ if (cpumask_empty(cpu_mask))
+ goto done;
+ cpu = get_cpu();
+ /* Update CBM on this cpu if it's in cpu_mask. */
+ if (cpumask_test_cpu(cpu, cpu_mask))
+ rdt_ctrl_update(&msr_param);
+ /* Update CBM on other cpus. */
+ smp_call_function_many(cpu_mask, rdt_ctrl_update, &msr_param, 1);
+ put_cpu();
+
+done:
+ free_cpumask_var(cpu_mask);
+
+ return 0;
+}
+
+static int rdtgroup_parse_resource(char *resname, char *tok, int closid)
+{
+ struct rdt_resource *r;
+
+ for_each_alloc_enabled_rdt_resource(r) {
+ if (!strcmp(resname, r->name) && closid < r->num_closid)
+ return parse_line(tok, r);
+ }
+ return -EINVAL;
+}
+
+ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off)
+{
+ struct rdtgroup *rdtgrp;
+ struct rdt_domain *dom;
+ struct rdt_resource *r;
+ char *tok, *resname;
+ int closid, ret = 0;
+
+ /* Valid input requires a trailing newline */
+ if (nbytes == 0 || buf[nbytes - 1] != '\n')
+ return -EINVAL;
+ buf[nbytes - 1] = '\0';
+
+ rdtgrp = rdtgroup_kn_lock_live(of->kn);
+ if (!rdtgrp) {
+ rdtgroup_kn_unlock(of->kn);
+ return -ENOENT;
+ }
+
+ closid = rdtgrp->closid;
+
+ for_each_alloc_enabled_rdt_resource(r) {
+ list_for_each_entry(dom, &r->domains, list)
+ dom->have_new_ctrl = false;
+ }
+
+ while ((tok = strsep(&buf, "\n")) != NULL) {
+ resname = strim(strsep(&tok, ":"));
+ if (!tok) {
+ ret = -EINVAL;
+ goto out;
+ }
+ ret = rdtgroup_parse_resource(resname, tok, closid);
+ if (ret)
+ goto out;
+ }
+
+ for_each_alloc_enabled_rdt_resource(r) {
+ ret = update_domains(r, closid);
+ if (ret)
+ goto out;
+ }
+
+out:
+ rdtgroup_kn_unlock(of->kn);
+ return ret ?: nbytes;
+}
+
+static void show_doms(struct seq_file *s, struct rdt_resource *r, int closid)
+{
+ struct rdt_domain *dom;
+ bool sep = false;
+
+ seq_printf(s, "%*s:", max_name_width, r->name);
+ list_for_each_entry(dom, &r->domains, list) {
+ if (sep)
+ seq_puts(s, ";");
+ seq_printf(s, r->format_str, dom->id, max_data_width,
+ dom->ctrl_val[closid]);
+ sep = true;
+ }
+ seq_puts(s, "\n");
+}
+
+int rdtgroup_schemata_show(struct kernfs_open_file *of,
+ struct seq_file *s, void *v)
+{
+ struct rdtgroup *rdtgrp;
+ struct rdt_resource *r;
+ int closid, ret = 0;
+
+ rdtgrp = rdtgroup_kn_lock_live(of->kn);
+ if (rdtgrp) {
+ closid = rdtgrp->closid;
+ for_each_alloc_enabled_rdt_resource(r) {
+ if (closid < r->num_closid)
+ show_doms(s, r, closid);
+ }
+ } else {
+ ret = -ENOENT;
+ }
+ rdtgroup_kn_unlock(of->kn);
+ return ret;
+}
+
+int rdtgroup_mondata_show(struct seq_file *m, void *arg)
+{
+ struct kernfs_open_file *of = m->private;
+ u32 resid, evtid, domid;
+ struct rdtgroup *rdtgrp;
+ struct rdt_resource *r;
+ union mon_data_bits md;
+ struct rdt_domain *d;
+ struct rmid_read rr;
+ int ret = 0;
+
+ rdtgrp = rdtgroup_kn_lock_live(of->kn);
+
+ md.priv = of->kn->priv;
+ resid = md.u.rid;
+ domid = md.u.domid;
+ evtid = md.u.evtid;
+
+ r = &rdt_resources_all[resid];
+ d = rdt_find_domain(r, domid, NULL);
+ if (!d) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ /*
+ * setup the parameters to send to the IPI to read the data.
+ */
+ rr.rgrp = rdtgrp;
+ rr.evtid = evtid;
+ rr.val = 0;
+
+ smp_call_function_any(&d->cpu_mask, mon_event_count, &rr, 1);
+
+ if (rr.val & RMID_VAL_ERROR)
+ seq_puts(m, "Error\n");
+ else if (rr.val & RMID_VAL_UNAVAIL)
+ seq_puts(m, "Unavailable\n");
+ else
+ seq_printf(m, "%llu\n", rr.val * r->mon_scale);
+
+out:
+ rdtgroup_kn_unlock(of->kn);
+ return ret;
+}
diff --git a/arch/x86/kernel/cpu/intel_rdt_monitor.c b/arch/x86/kernel/cpu/intel_rdt_monitor.c
index 624a0aa..cc252eb 100644
--- a/arch/x86/kernel/cpu/intel_rdt_monitor.c
+++ b/arch/x86/kernel/cpu/intel_rdt_monitor.c
@@ -204,6 +204,48 @@ void free_rmid(u32 rmid)
list_add_tail(&entry->list, &rmid_free_lru);
}

+static bool __mon_event_count(u32 rmid, struct rmid_read *rr)
+{
+ u64 tval;
+
+ tval = __rmid_read(rmid, rr->evtid);
+ if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) {
+ rr->val = tval;
+ return false;
+ }
+ switch (rr->evtid) {
+ case QOS_L3_OCCUP_EVENT_ID:
+ rr->val += tval;
+ return true;
+ default:
+ return false;
+ }
+}
+
+void mon_event_count(void *info)
+{
+ struct rdtgroup *rdtgrp, *entry;
+ struct rmid_read *rr = info;
+ struct list_head *llist;
+
+ rdtgrp = rr->rgrp;
+
+ if (!__mon_event_count(rdtgrp->rmid, rr))
+ return;
+
+ /*
+ * For Ctrl groups read data from child monitor groups.
+ */
+ llist = &rdtgrp->crdtgrp_list;
+
+ if (rdtgrp->type == RDTCTRL_GROUP) {
+ list_for_each_entry(entry, llist, crdtgrp_list) {
+ if (!__mon_event_count(entry->rmid, rr))
+ return;
+ }
+ }
+}
+
static int dom_data_init(struct rdt_resource *r)
{
struct rmid_entry *entry = NULL;
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index d32b781..9377bcd 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -152,6 +152,11 @@ static ssize_t rdtgroup_file_write(struct kernfs_open_file *of, char *buf,
.seq_show = rdtgroup_seqfile_show,
};

+static struct kernfs_ops kf_mondata_ops = {
+ .atomic_write_len = PAGE_SIZE,
+ .seq_show = rdtgroup_mondata_show,
+};
+
static bool is_cpu_list(struct kernfs_open_file *of)
{
struct rftype *rft = of->kn->priv;
@@ -1251,6 +1256,152 @@ static void rdt_kill_sb(struct super_block *sb)
.kill_sb = rdt_kill_sb,
};

+static int mon_addfile(struct kernfs_node *parent_kn, const char *name,
+ void *priv)
+{
+ struct kernfs_node *kn;
+ int ret = 0;
+
+ kn = __kernfs_create_file(parent_kn, name, 0444, 0,
+ &kf_mondata_ops, priv, NULL, NULL);
+ if (IS_ERR(kn))
+ return PTR_ERR(kn);
+
+ ret = rdtgroup_kn_set_ugid(kn);
+ if (ret) {
+ kernfs_remove(kn);
+ return ret;
+ }
+
+ return ret;
+}
+
+static int get_rdt_resourceid(struct rdt_resource *r)
+{
+ if (r > (rdt_resources_all + RDT_NUM_RESOURCES - 1) ||
+ r < rdt_resources_all ||
+ ((r - rdt_resources_all) % sizeof(struct rdt_resource)))
+ return -EINVAL;
+
+ return ((r - rdt_resources_all) / sizeof(struct rdt_resource));
+}
+
+static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, int domid,
+ struct rdt_resource *r, struct rdtgroup *pr)
+{
+ union mon_data_bits priv;
+ struct kernfs_node *kn;
+ struct mon_evt *mevt;
+ char name[32];
+ int ret, rid;
+
+ rid = get_rdt_resourceid(r);
+ if (rid < 0)
+ return -EINVAL;
+
+ sprintf(name, "mon_%s_%02d", r->name, domid);
+ /* create the directory */
+ kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, pr);
+ if (IS_ERR(kn))
+ return PTR_ERR(kn);
+
+ /*
+ * This extra ref will be put in kernfs_remove() and guarantees
+ * that @rdtgrp->kn is always accessible.
+ */
+ kernfs_get(kn);
+ ret = rdtgroup_kn_set_ugid(kn);
+ if (ret)
+ goto out_destroy;
+
+ if (WARN_ON(list_empty(&r->evt_list))) {
+ ret = -EPERM;
+ goto out_destroy;
+ }
+
+ priv.u.rid = rid;
+ priv.u.domid = domid;
+ list_for_each_entry(mevt, &r->evt_list, list) {
+ priv.u.evtid = mevt->evtid;
+ ret = mon_addfile(kn, mevt->name, priv.priv);
+ if (ret)
+ goto out_destroy;
+ }
+ kernfs_activate(kn);
+ return 0;
+
+out_destroy:
+ kernfs_remove(kn);
+ return ret;
+}
+
+static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn,
+ struct rdt_resource *r,
+ struct rdtgroup *pr)
+{
+ struct rdt_domain *dom;
+ int ret;
+
+ list_for_each_entry(dom, &r->domains, list) {
+ ret = mkdir_mondata_subdir(parent_kn, dom->id, r, pr);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+/*
+ * This creates a directory mon_data which holds one subdirectory
+ * per domain which contains the monitored data.
+ *
+ * mon_data has one directory for each domain whic are named
+ * mon_<domain_name>_<domain_id>. For ex: A mon_data with
+ * with L3 domain looks as below:
+ * ./mon_data:
+ * mon_L3_00
+ * mon_L3_01
+ * mon_L3_02
+ * ...
+ *
+ * Each domain directory has one file per event:
+ * ./mon_L3_00/:
+ * llc_occupancy
+ *
+ */
+static int mkdir_mondata_all(struct kernfs_node *parent_kn, struct rdtgroup *pr,
+ struct kernfs_node **dest_kn)
+{
+ struct rdt_resource *r;
+ struct kernfs_node *kn;
+ int ret;
+
+ /*
+ * Create the mon_data directory first.
+ */
+ ret = mongroup_create_dir(parent_kn, NULL, "mon_data", &kn);
+ if (ret)
+ return ret;
+
+ if (dest_kn)
+ *dest_kn = kn;
+
+ /*
+ * Create the subdirectories for each domain. Note that all events
+ * in a domain like L3 are grouped into a resource whose domain is L3
+ */
+ for_each_mon_enabled_rdt_resource(r) {
+ ret = mkdir_mondata_subdir_alldom(kn, r, pr);
+ if (ret)
+ goto out_destroy;
+ }
+
+ return 0;
+
+out_destroy:
+ kernfs_remove(kn);
+ return ret;
+}
/*
* Common code for ctrl_mon and monitor group mkdir.
* The caller needs to unlock the global mutex upon success.
@@ -1307,6 +1458,10 @@ static int mkdir_rdt_common(struct kernfs_node *pkn, struct kernfs_node *prkn,
goto out_destroy;

if (rdt_mon_features) {
+ ret = mkdir_mondata_all(kn, rdtgrp, &rdtgrp->mon_data_kn);
+ if (ret)
+ goto out_destroy;
+
ret = alloc_rmid();
if (ret < 0)
return ret;
diff --git a/arch/x86/kernel/cpu/intel_rdt_schemata.c b/arch/x86/kernel/cpu/intel_rdt_schemata.c
deleted file mode 100644
index 952156c..0000000
--- a/arch/x86/kernel/cpu/intel_rdt_schemata.c
+++ /dev/null
@@ -1,286 +0,0 @@
-/*
- * Resource Director Technology(RDT)
- * - Cache Allocation code.
- *
- * Copyright (C) 2016 Intel Corporation
- *
- * Authors:
- * Fenghua Yu <fenghua.yu@xxxxxxxxx>
- * Tony Luck <tony.luck@xxxxxxxxx>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * More information about RDT be found in the Intel (R) x86 Architecture
- * Software Developer Manual June 2016, volume 3, section 17.17.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/kernfs.h>
-#include <linux/seq_file.h>
-#include <linux/slab.h>
-#include "intel_rdt.h"
-
-/*
- * Check whether MBA bandwidth percentage value is correct. The value is
- * checked against the minimum and max bandwidth values specified by the
- * hardware. The allocated bandwidth percentage is rounded to the next
- * control step available on the hardware.
- */
-static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r)
-{
- unsigned long bw;
- int ret;
-
- /*
- * Only linear delay values is supported for current Intel SKUs.
- */
- if (!r->membw.delay_linear)
- return false;
-
- ret = kstrtoul(buf, 10, &bw);
- if (ret)
- return false;
-
- if (bw < r->membw.min_bw || bw > r->default_ctrl)
- return false;
-
- *data = roundup(bw, (unsigned long)r->membw.bw_gran);
- return true;
-}
-
-int parse_bw(char *buf, struct rdt_resource *r, struct rdt_domain *d)
-{
- unsigned long data;
-
- if (d->have_new_ctrl)
- return -EINVAL;
-
- if (!bw_validate(buf, &data, r))
- return -EINVAL;
- d->new_ctrl = data;
- d->have_new_ctrl = true;
-
- return 0;
-}
-
-/*
- * Check whether a cache bit mask is valid. The SDM says:
- * Please note that all (and only) contiguous '1' combinations
- * are allowed (e.g. FFFFH, 0FF0H, 003CH, etc.).
- * Additionally Haswell requires at least two bits set.
- */
-static bool cbm_validate(char *buf, unsigned long *data, struct rdt_resource *r)
-{
- unsigned long first_bit, zero_bit, val;
- unsigned int cbm_len = r->cache.cbm_len;
- int ret;
-
- ret = kstrtoul(buf, 16, &val);
- if (ret)
- return false;
-
- if (val == 0 || val > r->default_ctrl)
- return false;
-
- first_bit = find_first_bit(&val, cbm_len);
- zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
-
- if (find_next_bit(&val, cbm_len, zero_bit) < cbm_len)
- return false;
-
- if ((zero_bit - first_bit) < r->cache.min_cbm_bits)
- return false;
-
- *data = val;
- return true;
-}
-
-/*
- * Read one cache bit mask (hex). Check that it is valid for the current
- * resource type.
- */
-int parse_cbm(char *buf, struct rdt_resource *r, struct rdt_domain *d)
-{
- unsigned long data;
-
- if (d->have_new_ctrl)
- return -EINVAL;
-
- if(!cbm_validate(buf, &data, r))
- return -EINVAL;
- d->new_ctrl = data;
- d->have_new_ctrl = true;
-
- return 0;
-}
-
-/*
- * For each domain in this resource we expect to find a series of:
- * id=mask
- * separated by ";". The "id" is in decimal, and must match one of
- * the "id"s for this resource.
- */
-static int parse_line(char *line, struct rdt_resource *r)
-{
- char *dom = NULL, *id;
- struct rdt_domain *d;
- unsigned long dom_id;
-
-next:
- if (!line || line[0] == '\0')
- return 0;
- dom = strsep(&line, ";");
- id = strsep(&dom, "=");
- if (!dom || kstrtoul(id, 10, &dom_id))
- return -EINVAL;
- dom = strim(dom);
- list_for_each_entry(d, &r->domains, list) {
- if (d->id == dom_id) {
- if (r->parse_ctrlval(dom, r, d))
- return -EINVAL;
- goto next;
- }
- }
- return -EINVAL;
-}
-
-static int update_domains(struct rdt_resource *r, int closid)
-{
- struct msr_param msr_param;
- cpumask_var_t cpu_mask;
- struct rdt_domain *d;
- int cpu;
-
- if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
- return -ENOMEM;
-
- msr_param.low = closid;
- msr_param.high = msr_param.low + 1;
- msr_param.res = r;
-
- list_for_each_entry(d, &r->domains, list) {
- if (d->have_new_ctrl && d->new_ctrl != d->ctrl_val[closid]) {
- cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
- d->ctrl_val[closid] = d->new_ctrl;
- }
- }
- if (cpumask_empty(cpu_mask))
- goto done;
- cpu = get_cpu();
- /* Update CBM on this cpu if it's in cpu_mask. */
- if (cpumask_test_cpu(cpu, cpu_mask))
- rdt_ctrl_update(&msr_param);
- /* Update CBM on other cpus. */
- smp_call_function_many(cpu_mask, rdt_ctrl_update, &msr_param, 1);
- put_cpu();
-
-done:
- free_cpumask_var(cpu_mask);
-
- return 0;
-}
-
-static int rdtgroup_parse_resource(char *resname, char *tok, int closid)
-{
- struct rdt_resource *r;
-
- for_each_alloc_enabled_rdt_resource(r) {
- if (!strcmp(resname, r->name) && closid < r->num_closid)
- return parse_line(tok, r);
- }
- return -EINVAL;
-}
-
-ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
- char *buf, size_t nbytes, loff_t off)
-{
- struct rdtgroup *rdtgrp;
- struct rdt_domain *dom;
- struct rdt_resource *r;
- char *tok, *resname;
- int closid, ret = 0;
-
- /* Valid input requires a trailing newline */
- if (nbytes == 0 || buf[nbytes - 1] != '\n')
- return -EINVAL;
- buf[nbytes - 1] = '\0';
-
- rdtgrp = rdtgroup_kn_lock_live(of->kn);
- if (!rdtgrp) {
- rdtgroup_kn_unlock(of->kn);
- return -ENOENT;
- }
-
- closid = rdtgrp->closid;
-
- for_each_alloc_enabled_rdt_resource(r) {
- list_for_each_entry(dom, &r->domains, list)
- dom->have_new_ctrl = false;
- }
-
- while ((tok = strsep(&buf, "\n")) != NULL) {
- resname = strim(strsep(&tok, ":"));
- if (!tok) {
- ret = -EINVAL;
- goto out;
- }
- ret = rdtgroup_parse_resource(resname, tok, closid);
- if (ret)
- goto out;
- }
-
- for_each_alloc_enabled_rdt_resource(r) {
- ret = update_domains(r, closid);
- if (ret)
- goto out;
- }
-
-out:
- rdtgroup_kn_unlock(of->kn);
- return ret ?: nbytes;
-}
-
-static void show_doms(struct seq_file *s, struct rdt_resource *r, int closid)
-{
- struct rdt_domain *dom;
- bool sep = false;
-
- seq_printf(s, "%*s:", max_name_width, r->name);
- list_for_each_entry(dom, &r->domains, list) {
- if (sep)
- seq_puts(s, ";");
- seq_printf(s, r->format_str, dom->id, max_data_width,
- dom->ctrl_val[closid]);
- sep = true;
- }
- seq_puts(s, "\n");
-}
-
-int rdtgroup_schemata_show(struct kernfs_open_file *of,
- struct seq_file *s, void *v)
-{
- struct rdtgroup *rdtgrp;
- struct rdt_resource *r;
- int closid, ret = 0;
-
- rdtgrp = rdtgroup_kn_lock_live(of->kn);
- if (rdtgrp) {
- closid = rdtgrp->closid;
- for_each_alloc_enabled_rdt_resource(r) {
- if (closid < r->num_closid)
- show_doms(s, r, closid);
- }
- } else {
- ret = -ENOENT;
- }
- rdtgroup_kn_unlock(of->kn);
- return ret;
-}
--
1.9.1