[PATCH 13/21] x86/intel_rdt/cqm: Add cpus file support

From: Vikas Shivappa
Date: Mon Jun 26 2017 - 14:56:54 EST


The cpus file is extended to support resource monitoring. This is used
to over-ride the RMID of the default group when running on specific
CPUs. It works similar to the resource control. The "cpus" and
"cpus_list" file is present in default group, ctrl_mon groups and
monitor groups.

Each "cpus" file or cpu_list file reads a cpumask or list showing which
CPUs belong to the resource group. By default all online cpus belong to
the default root group. A CPU can be present in one "ctrl_mon" and one
"monitor" group simultaneously. They can be added to a resource group by
writing the CPU to the file. When a CPU is added to a ctrl_mon group it
is automatically removed from the previous ctrl_mon group. A CPU can be
added to a monitor group only if it is present in the parent ctrl_mon
group and when a CPU is added to a monitor group, it is automatically
removed from the previous monitor group. When CPUs go offline, they are
automatically removed from the ctrl_mon and monitor groups.

Signed-off-by: Vikas Shivappa <vikas.shivappa@xxxxxxxxxxxxxxx>
---
arch/x86/kernel/cpu/intel_rdt.c | 15 ++-
arch/x86/kernel/cpu/intel_rdt.h | 2 +
arch/x86/kernel/cpu/intel_rdt_monitor.c | 1 +
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 183 +++++++++++++++++++++++++------
4 files changed, 169 insertions(+), 32 deletions(-)

diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index e96b3f0..b0f8c35 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -483,6 +483,17 @@ static int intel_rdt_online_cpu(unsigned int cpu)
return 0;
}

+static void clear_childcpus(struct rdtgroup *r, unsigned int cpu)
+{
+ struct rdtgroup *cr;
+
+ list_for_each_entry(cr, &r->crdtgrp_list, crdtgrp_list) {
+ if (cpumask_test_and_clear_cpu(cpu, &cr->cpu_mask)) {
+ break;
+ }
+ }
+}
+
static int intel_rdt_offline_cpu(unsigned int cpu)
{
struct rdtgroup *rdtgrp;
@@ -492,8 +503,10 @@ static int intel_rdt_offline_cpu(unsigned int cpu)
for_each_alloc_capable_rdt_resource(r)
domain_remove_cpu(cpu, r);
list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
- if (cpumask_test_and_clear_cpu(cpu, &rdtgrp->cpu_mask))
+ if (cpumask_test_and_clear_cpu(cpu, &rdtgrp->cpu_mask)) {
+ clear_childcpus(rdtgrp, cpu);
break;
+ }
}
clear_closid(cpu);
mutex_unlock(&rdtgroup_mutex);
diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h
index fdf3654..fec8ba9 100644
--- a/arch/x86/kernel/cpu/intel_rdt.h
+++ b/arch/x86/kernel/cpu/intel_rdt.h
@@ -37,6 +37,8 @@ struct mon_evt {
extern bool rdt_alloc_enabled;
extern int rdt_mon_features;

+DECLARE_PER_CPU_READ_MOSTLY(int, cpu_rmid);
+
enum rdt_group_type {
RDTCTRL_GROUP = 0,
RDTMON_GROUP,
diff --git a/arch/x86/kernel/cpu/intel_rdt_monitor.c b/arch/x86/kernel/cpu/intel_rdt_monitor.c
index 4f4221a..624a0aa 100644
--- a/arch/x86/kernel/cpu/intel_rdt_monitor.c
+++ b/arch/x86/kernel/cpu/intel_rdt_monitor.c
@@ -75,6 +75,7 @@ struct rmid_entry {
*/
unsigned int intel_cqm_threshold;

+DEFINE_PER_CPU_READ_MOSTLY(int, cpu_rmid);
static inline struct rmid_entry *__rmid_entry(u32 rmid)
{
struct rmid_entry *entry;
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index 8fd0757..d32b781 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -181,13 +181,18 @@ static int rdtgroup_cpus_show(struct kernfs_open_file *of,
/*
* This is safe against intel_rdt_sched_in() called from __switch_to()
* because __switch_to() is executed with interrupts disabled. A local call
- * from rdt_update_closid() is proteced against __switch_to() because
+ * from rdt_update_closidrmid() is proteced against __switch_to() because
* preemption is disabled.
*/
-static void rdt_update_cpu_closid(void *closid)
+static void update_cpu_closid_rmid(void *info)
{
- if (closid)
- this_cpu_write(cpu_closid, *(int *)closid);
+ struct rdtgroup *r = info;
+
+ if (r) {
+ this_cpu_write(cpu_closid, r->closid);
+ this_cpu_write(cpu_rmid, r->rmid);
+ }
+
/*
* We cannot unconditionally write the MSR because the current
* executing task might have its own closid selected. Just reuse
@@ -199,33 +204,30 @@ static void rdt_update_cpu_closid(void *closid)
/*
* Update the PGR_ASSOC MSR on all cpus in @cpu_mask,
*
- * Per task closids must have been set up before calling this function.
+ * Per task closids/rmids must have been set up before calling this function.
*
- * The per cpu closids are updated with the smp function call, when @closid
- * is not NULL. If @closid is NULL then all affected percpu closids must
- * have been set up before calling this function.
+ * The per cpu closids and rmids are updated with the smp function call
*/
static void
-rdt_update_closid(const struct cpumask *cpu_mask, int *closid)
+update_closid_rmid(const struct cpumask *cpu_mask, struct rdtgroup *r)
{
int cpu = get_cpu();

if (cpumask_test_cpu(cpu, cpu_mask))
- rdt_update_cpu_closid(closid);
- smp_call_function_many(cpu_mask, rdt_update_cpu_closid, closid, 1);
+ update_cpu_closid_rmid(r);
+ smp_call_function_many(cpu_mask, update_cpu_closid_rmid, r, 1);
put_cpu();
}

-static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
- char *buf, size_t nbytes, loff_t off)
+static ssize_t cpus_mon_write(struct kernfs_open_file *of,
+ char *buf, size_t nbytes,
+ struct rdtgroup *rdtgrp)
{
+ struct rdtgroup *pr = rdtgrp->parent, *cr;
cpumask_var_t tmpmask, newmask;
- struct rdtgroup *rdtgrp, *r;
+ struct list_head *llist;
int ret;

- if (!buf)
- return -EINVAL;
-
if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
return -ENOMEM;
if (!zalloc_cpumask_var(&newmask, GFP_KERNEL)) {
@@ -233,10 +235,89 @@ static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
return -ENOMEM;
}

- rdtgrp = rdtgroup_kn_lock_live(of->kn);
- if (!rdtgrp) {
- ret = -ENOENT;
- goto unlock;
+ if (is_cpu_list(of))
+ ret = cpulist_parse(buf, newmask);
+ else
+ ret = cpumask_parse(buf, newmask);
+
+ if (ret)
+ goto out;
+
+ /* check that user didn't specify any offline cpus */
+ cpumask_andnot(tmpmask, newmask, cpu_online_mask);
+ if (cpumask_weight(tmpmask)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* Check whether cpus belong to parent ctrl group */
+ cpumask_andnot(tmpmask, newmask, &pr->cpu_mask);
+ if (cpumask_weight(tmpmask)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* Check whether cpus are dropped from this group */
+ cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
+ if (cpumask_weight(tmpmask)) {
+ /* Give any dropped cpus to parent rdtgroup */
+ cpumask_or(&pr->cpu_mask, &pr->cpu_mask, tmpmask);
+ update_closid_rmid(tmpmask, pr);
+ }
+
+ /*
+ * If we added cpus, remove them from previous group that owned them
+ * and update per-cpu rmid
+ */
+ cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
+ if (cpumask_weight(tmpmask)) {
+ llist = &pr->crdtgrp_list;
+ list_for_each_entry(cr, llist, crdtgrp_list) {
+ if (cr == rdtgrp)
+ continue;
+ cpumask_andnot(&cr->cpu_mask, &cr->cpu_mask, tmpmask);
+ }
+ update_closid_rmid(tmpmask, rdtgrp);
+ }
+
+ /* Done pushing/pulling - update this group with new mask */
+ cpumask_copy(&rdtgrp->cpu_mask, newmask);
+
+out:
+ free_cpumask_var(tmpmask);
+ free_cpumask_var(newmask);
+
+ return ret ?: nbytes;
+}
+
+static void cpumask_rdtgrp_clear(struct rdtgroup *r, struct cpumask *m)
+{
+ struct rdtgroup *cr;
+
+ cpumask_andnot(&r->cpu_mask, &r->cpu_mask, m);
+ /* update the child mon group masks as well*/
+ list_for_each_entry(cr, &r->crdtgrp_list, crdtgrp_list)
+ cpumask_and(&cr->cpu_mask, &r->cpu_mask, &cr->cpu_mask);
+}
+
+static ssize_t cpus_ctrl_write(struct kernfs_open_file *of,
+ char *buf, size_t nbytes,
+ struct rdtgroup *rdtgrp)
+{
+ cpumask_var_t tmpmask, newmask, tmpmask1;
+ struct rdtgroup *r, *cr;
+ int ret;
+
+ if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
+ return -ENOMEM;
+ if (!zalloc_cpumask_var(&newmask, GFP_KERNEL)) {
+ free_cpumask_var(tmpmask);
+ return -ENOMEM;
+ }
+ if (!zalloc_cpumask_var(&tmpmask1, GFP_KERNEL)) {
+ free_cpumask_var(tmpmask);
+ free_cpumask_var(newmask);
+ return -ENOMEM;
}

if (is_cpu_list(of))
@@ -245,13 +326,13 @@ static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
ret = cpumask_parse(buf, newmask);

if (ret)
- goto unlock;
+ goto out;

/* check that user didn't specify any offline cpus */
cpumask_andnot(tmpmask, newmask, cpu_online_mask);
if (cpumask_weight(tmpmask)) {
ret = -EINVAL;
- goto unlock;
+ goto out;
}

/* Check whether cpus are dropped from this group */
@@ -260,12 +341,13 @@ static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
/* Can't drop from default group */
if (rdtgrp == &rdtgroup_default) {
ret = -EINVAL;
- goto unlock;
+ goto out;
}
+
/* Give any dropped cpus to rdtgroup_default */
cpumask_or(&rdtgroup_default.cpu_mask,
&rdtgroup_default.cpu_mask, tmpmask);
- rdt_update_closid(tmpmask, &rdtgroup_default.closid);
+ update_closid_rmid(tmpmask, &rdtgroup_default);
}

/*
@@ -277,22 +359,61 @@ static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
list_for_each_entry(r, &rdt_all_groups, rdtgroup_list) {
if (r == rdtgrp)
continue;
- cpumask_andnot(&r->cpu_mask, &r->cpu_mask, tmpmask);
+ cpumask_and(tmpmask1, &r->cpu_mask, tmpmask);
+ if (cpumask_weight(tmpmask1))
+ cpumask_rdtgrp_clear(r, tmpmask1);
}
- rdt_update_closid(tmpmask, &rdtgrp->closid);
+ update_closid_rmid(tmpmask, rdtgrp);
}

/* Done pushing/pulling - update this group with new mask */
cpumask_copy(&rdtgrp->cpu_mask, newmask);

-unlock:
- rdtgroup_kn_unlock(of->kn);
+ /*
+ * Update the child mon group masks as well. The child groups
+ * would always have the subset of parent, but any new cpus
+ * to the parent need to be removed from the children.
+ */
+ list_for_each_entry(cr, &rdtgrp->crdtgrp_list, crdtgrp_list) {
+ cpumask_and(tmpmask, &rdtgrp->cpu_mask, &cr->cpu_mask);
+ cpumask_andnot(&cr->cpu_mask, tmpmask, newmask);
+ }
+out:
+ free_cpumask_var(tmpmask1);
free_cpumask_var(tmpmask);
free_cpumask_var(newmask);

return ret ?: nbytes;
}

+static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off)
+{
+ struct rdtgroup *rdtgrp;
+ int ret;
+
+ if (!buf)
+ return -EINVAL;
+
+ rdtgrp = rdtgroup_kn_lock_live(of->kn);
+ if (!rdtgrp) {
+ ret = -ENOENT;
+ goto unlock;
+ }
+
+ if (rdtgrp->type == RDTCTRL_GROUP)
+ ret = cpus_ctrl_write(of, buf, nbytes, rdtgrp);
+ else if (rdtgrp->type == RDTMON_GROUP)
+ ret = cpus_mon_write(of, buf, nbytes, rdtgrp);
+ else
+ ret = -EINVAL;
+
+unlock:
+ rdtgroup_kn_unlock(of->kn);
+
+ return ret ?: nbytes;
+}
+
struct task_move_callback {
struct callback_head work;
struct rdtgroup *rdtgrp;
@@ -1102,7 +1223,7 @@ static void rmdir_all_sub(void)
}
/* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */
get_online_cpus();
- rdt_update_closid(cpu_online_mask, &rdtgroup_default.closid);
+ update_closid_rmid(cpu_online_mask, &rdtgroup_default);
put_online_cpus();

kernfs_remove(kn_info);
@@ -1342,7 +1463,7 @@ static int rdtgroup_rmdir(struct kernfs_node *kn)
* task running on them.
*/
cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
- rdt_update_closid(tmpmask, NULL);
+ update_closid_rmid(tmpmask, NULL);

rdtgrp->flags = RDT_DELETED;
closid_free(rdtgrp->closid);
--
1.9.1