[PATCH 08/10] x86/resctrl: Support pseudo-lock regions spanning resources

From: Reinette Chatre
Date: Wed Jun 26 2019 - 13:51:32 EST


Currently cache pseudo-locked regions only consider one cache level but
cache pseudo-locked regions may span multiple cache levels.

In preparation for support of pseudo-locked regions spanning multiple
cache levels pseudo-lock 'portions' are introduced. A 'portion' of a
pseudo-locked region is the portion of a pseudo-locked region that
belongs to a specific resource. Each pseudo-locked portion is identified
with the resource (for example, L2 or L3 cache), the domain (the
specific cache instance), and the capacity bitmask that specifies which
region of the cache is used by the pseudo-locked region.

In support of pseudo-locked regions spanning multiple cache levels a
pseudo-locked region could have multiple 'portions' but in this
introduction only single portions are allowed.

Signed-off-by: Reinette Chatre <reinette.chatre@xxxxxxxxx>
---
arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 26 +++-
arch/x86/kernel/cpu/resctrl/internal.h | 32 ++--
arch/x86/kernel/cpu/resctrl/pseudo_lock.c | 180 ++++++++++++++++------
arch/x86/kernel/cpu/resctrl/rdtgroup.c | 44 ++++--
4 files changed, 211 insertions(+), 71 deletions(-)

diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
index a0383ff80afe..a60fb38a4d20 100644
--- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
+++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
@@ -207,7 +207,7 @@ int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r,
* hierarchy.
*/
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP &&
- rdtgroup_pseudo_locked_in_hierarchy(d)) {
+ rdtgroup_pseudo_locked_in_hierarchy(rdtgrp, d)) {
rdt_last_cmd_puts("Pseudo-locked region in hierarchy\n");
return -EINVAL;
}
@@ -282,6 +282,7 @@ static int parse_line(char *line, struct rdt_resource *r,
if (r->parse_ctrlval(&data, r, d))
return -EINVAL;
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
+ struct pseudo_lock_portion *p;
/*
* In pseudo-locking setup mode and just
* parsed a valid CBM that should be
@@ -290,9 +291,15 @@ static int parse_line(char *line, struct rdt_resource *r,
* the required initialization for single
* region and return.
*/
- rdtgrp->plr->r = r;
- rdtgrp->plr->d_id = d->id;
- rdtgrp->plr->cbm = d->new_ctrl;
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
+ if (!p) {
+ rdt_last_cmd_puts("Unable to allocate memory for pseudo-lock portion\n");
+ return -ENOMEM;
+ }
+ p->r = r;
+ p->d_id = d->id;
+ p->cbm = d->new_ctrl;
+ list_add(&p->list, &rdtgrp->plr->portions);
return 0;
}
goto next;
@@ -410,8 +417,11 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
goto out;
}
ret = rdtgroup_parse_resource(resname, tok, rdtgrp);
- if (ret)
+ if (ret) {
+ if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP)
+ pseudo_lock_region_clear(rdtgrp->plr);
goto out;
+ }
}

for_each_alloc_enabled_rdt_resource(r) {
@@ -459,6 +469,7 @@ static void show_doms(struct seq_file *s, struct rdt_resource *r, int closid)
int rdtgroup_schemata_show(struct kernfs_open_file *of,
struct seq_file *s, void *v)
{
+ struct pseudo_lock_portion *p;
struct rdtgroup *rdtgrp;
struct rdt_resource *r;
int ret = 0;
@@ -470,8 +481,9 @@ int rdtgroup_schemata_show(struct kernfs_open_file *of,
for_each_alloc_enabled_rdt_resource(r)
seq_printf(s, "%s:uninitialized\n", r->name);
} else if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
- seq_printf(s, "%s:%d=%x\n", rdtgrp->plr->r->name,
- rdtgrp->plr->d_id, rdtgrp->plr->cbm);
+ list_for_each_entry(p, &rdtgrp->plr->portions, list)
+ seq_printf(s, "%s:%d=%x\n", p->r->name, p->d_id,
+ p->cbm);
} else {
closid = rdtgrp->closid;
for_each_alloc_enabled_rdt_resource(r) {
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 892f38899dda..b041029d4de1 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -145,13 +145,27 @@ struct mongroup {
u32 rmid;
};

+/**
+ * struct pseudo_lock_portion - portion of a pseudo-lock region on one resource
+ * @r: RDT resource to which this pseudo-locked portion
+ * belongs
+ * @d_id: ID of cache instance to which this pseudo-locked portion
+ * belongs
+ * @cbm: bitmask of the pseudo-locked portion
+ * @list: Entry in the list of pseudo-locked portion
+ * belonging to the pseudo-locked region
+ */
+struct pseudo_lock_portion {
+ struct rdt_resource *r;
+ int d_id;
+ u32 cbm;
+ struct list_head list;
+};
+
/**
* struct pseudo_lock_region - pseudo-lock region information
- * @r: RDT resource to which this pseudo-locked region
- * belongs
- * @d_id: ID of cache instance to which this pseudo-locked region
- * belongs
- * @cbm: bitmask of the pseudo-locked region
+ * @portions: list of portions across different resources that
+ * are associated with this pseudo-locked region
* @lock_thread_wq: waitqueue used to wait on the pseudo-locking thread
* completion
* @thread_done: variable used by waitqueue to test if pseudo-locking
@@ -168,9 +182,7 @@ struct mongroup {
* @pm_reqs: Power management QoS requests related to this region
*/
struct pseudo_lock_region {
- struct rdt_resource *r;
- int d_id;
- u32 cbm;
+ struct list_head portions;
wait_queue_head_t lock_thread_wq;
int thread_done;
int cpu;
@@ -569,11 +581,13 @@ bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_resource *r,
struct rdt_domain *d,
unsigned long cbm);
u32 rdtgroup_pseudo_locked_bits(struct rdt_resource *r, struct rdt_domain *d);
-bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d);
+bool rdtgroup_pseudo_locked_in_hierarchy(struct rdtgroup *selfgrp,
+ struct rdt_domain *d);
int rdt_pseudo_lock_init(void);
void rdt_pseudo_lock_release(void);
int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp);
void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp);
+void pseudo_lock_region_clear(struct pseudo_lock_region *plr);
struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r);
int update_domains(struct rdt_resource *r, int closid);
int closids_supported(void);
diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
index 8f20af017f7b..a7fe53447a7e 100644
--- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
+++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
@@ -270,28 +270,85 @@ static int pseudo_lock_cstates_constrain(struct pseudo_lock_region *plr,
*
* Return: void
*/
-static void pseudo_lock_region_clear(struct pseudo_lock_region *plr)
+void pseudo_lock_region_clear(struct pseudo_lock_region *plr)
{
+ struct pseudo_lock_portion *p, *tmp;
+
plr->size = 0;
plr->line_size = 0;
kfree(plr->kmem);
plr->kmem = NULL;
- plr->r = NULL;
- plr->d_id = -1;
- plr->cbm = 0;
pseudo_lock_cstates_relax(plr);
+ if (!list_empty(&plr->portions)) {
+ list_for_each_entry_safe(p, tmp, &plr->portions, list) {
+ list_del(&p->list);
+ kfree(p);
+ }
+ }
plr->debugfs_dir = NULL;
}

+/**
+ * pseudo_lock_single_portion_valid - Verify properties of pseudo-lock region
+ * @plr: the main pseudo-lock region
+ * @p: the single portion that makes up the pseudo-locked region
+ *
+ * Verify and initialize properties of the pseudo-locked region.
+ *
+ * Return: -1 if portion of cache unable to be used for pseudo-locking
+ * 0 if portion of cache can be used for pseudo-locking, in
+ * addition the CPU on which pseudo-locking will be performed will
+ * be initialized as well as the size and cache line size of the region
+ */
+static int pseudo_lock_single_portion_valid(struct pseudo_lock_region *plr,
+ struct pseudo_lock_portion *p)
+{
+ struct rdt_domain *d;
+
+ d = rdt_find_domain(p->r, p->d_id, NULL);
+ if (IS_ERR_OR_NULL(d)) {
+ rdt_last_cmd_puts("Cannot find cache domain\n");
+ return -1;
+ }
+
+ plr->cpu = cpumask_first(&d->cpu_mask);
+ if (!cpu_online(plr->cpu)) {
+ rdt_last_cmd_printf("CPU %u not online\n", plr->cpu);
+ goto err_cpu;
+ }
+
+ plr->line_size = get_cache_line_size(plr->cpu, p->r->cache_level);
+ if (plr->line_size == 0) {
+ rdt_last_cmd_puts("Unable to compute cache line length\n");
+ goto err_cpu;
+ }
+
+ if (pseudo_lock_cstates_constrain(plr, &d->cpu_mask)) {
+ rdt_last_cmd_puts("Cannot limit C-states\n");
+ goto err_line;
+ }
+
+ plr->size = rdtgroup_cbm_to_size(p->r, d, p->cbm);
+
+ return 0;
+
+err_line:
+ plr->line_size = 0;
+err_cpu:
+ plr->cpu = 0;
+ return -1;
+}
+
/**
* pseudo_lock_region_init - Initialize pseudo-lock region information
* @plr: pseudo-lock region
*
* Called after user provided a schemata to be pseudo-locked. From the
* schemata the &struct pseudo_lock_region is on entry already initialized
- * with the resource, domain, and capacity bitmask. Here the information
- * required for pseudo-locking is deduced from this data and &struct
- * pseudo_lock_region initialized further. This information includes:
+ * with the resource, domain, and capacity bitmask. Here the
+ * provided data is validated and information required for pseudo-locking
+ * deduced, and &struct pseudo_lock_region initialized further. This
+ * information includes:
* - size in bytes of the region to be pseudo-locked
* - cache line size to know the stride with which data needs to be accessed
* to be pseudo-locked
@@ -303,44 +360,50 @@ static void pseudo_lock_region_clear(struct pseudo_lock_region *plr)
*/
static int pseudo_lock_region_init(struct pseudo_lock_region *plr)
{
- struct rdt_domain *d;
+ struct rdt_resource *l3_resource = &rdt_resources_all[RDT_RESOURCE_L3];
+ struct pseudo_lock_portion *p;
int ret;

- /* Pick the first cpu we find that is associated with the cache. */
- d = rdt_find_domain(plr->r, plr->d_id, NULL);
- if (IS_ERR_OR_NULL(d)) {
- rdt_last_cmd_puts("Cache domain offline\n");
- ret = -ENODEV;
+ if (list_empty(&plr->portions)) {
+ rdt_last_cmd_puts("No pseudo-lock portions provided\n");
goto out_region;
}

- plr->cpu = cpumask_first(&d->cpu_mask);
-
- if (!cpu_online(plr->cpu)) {
- rdt_last_cmd_printf("CPU %u associated with cache not online\n",
- plr->cpu);
- ret = -ENODEV;
- goto out_region;
+ /* Cache Pseudo-Locking only supported on L2 and L3 resources */
+ list_for_each_entry(p, &plr->portions, list) {
+ if (p->r->rid != RDT_RESOURCE_L2 &&
+ p->r->rid != RDT_RESOURCE_L3) {
+ rdt_last_cmd_puts("Unsupported resource\n");
+ goto out_region;
+ }
}

- plr->line_size = get_cache_line_size(plr->cpu, plr->r->cache_level);
- if (plr->line_size == 0) {
- rdt_last_cmd_puts("Unable to determine cache line size\n");
- ret = -1;
- goto out_region;
+ /*
+ * If only one resource requested to be pseudo-locked then:
+ * - Just a L3 cache portion is valid
+ * - Just a L2 cache portion on system without L3 cache is valid
+ */
+ if (list_is_singular(&plr->portions)) {
+ p = list_first_entry(&plr->portions, struct pseudo_lock_portion,
+ list);
+ if (p->r->rid == RDT_RESOURCE_L3 ||
+ (p->r->rid == RDT_RESOURCE_L2 &&
+ !l3_resource->alloc_capable)) {
+ ret = pseudo_lock_single_portion_valid(plr, p);
+ if (ret < 0)
+ goto out_region;
+ return 0;
+ } else {
+ rdt_last_cmd_puts("Invalid resource or just L2 provided when L3 is required\n");
+ goto out_region;
+ }
+ } else {
+ rdt_last_cmd_puts("Multiple pseudo-lock portions unsupported\n");
}

- plr->size = rdtgroup_cbm_to_size(plr->r, d, plr->cbm);
-
- ret = pseudo_lock_cstates_constrain(plr, &d->cpu_mask);
- if (ret < 0)
- goto out_region;
-
- return 0;
-
out_region:
pseudo_lock_region_clear(plr);
- return ret;
+ return -1;
}

/**
@@ -362,9 +425,9 @@ static int pseudo_lock_init(struct rdtgroup *rdtgrp)
if (!plr)
return -ENOMEM;

+ INIT_LIST_HEAD(&plr->portions);
init_waitqueue_head(&plr->lock_thread_wq);
INIT_LIST_HEAD(&plr->pm_reqs);
- plr->d_id = -1;
rdtgrp->plr = plr;
return 0;
}
@@ -849,6 +912,7 @@ bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_resource *r,

/**
* rdtgroup_pseudo_locked_in_hierarchy - Pseudo-locked region in cache hierarchy
+ * @selfgrp: current resource group testing for overlap
* @d: RDT domain under test
*
* The setup of a pseudo-locked region affects all cache instances within
@@ -860,8 +924,10 @@ bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_resource *r,
* if it is not possible to test due to memory allocation or other
* failure, false otherwise.
*/
-bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d)
+bool rdtgroup_pseudo_locked_in_hierarchy(struct rdtgroup *selfgrp,
+ struct rdt_domain *d)
{
+ struct pseudo_lock_portion *p;
cpumask_var_t cpu_with_psl;
struct rdtgroup *rdtgrp;
struct rdt_domain *d_i;
@@ -875,15 +941,15 @@ bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d)
* associated with them.
*/
list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
- if (rdtgrp->plr && rdtgrp->plr->d_id >= 0) {
- d_i = rdt_find_domain(rdtgrp->plr->r, rdtgrp->plr->d_id,
- NULL);
+ if (!rdtgrp->plr || rdtgrp == selfgrp)
+ continue;
+ list_for_each_entry(p, &rdtgrp->plr->portions, list) {
+ d_i = rdt_find_domain(p->r, p->d_id, NULL);
if (IS_ERR_OR_NULL(d_i)) {
ret = true;
goto out;
}
- cpumask_or(cpu_with_psl, cpu_with_psl,
- &d_i->cpu_mask);
+ cpumask_or(cpu_with_psl, cpu_with_psl, &d_i->cpu_mask);
}
}

@@ -1200,6 +1266,7 @@ static int measure_l3_residency(void *_plr)
static int pseudo_lock_measure_cycles(struct rdtgroup *rdtgrp, int sel)
{
struct pseudo_lock_region *plr = rdtgrp->plr;
+ struct pseudo_lock_portion *p;
struct task_struct *thread;
struct rdt_domain *d;
unsigned int cpu;
@@ -1213,7 +1280,16 @@ static int pseudo_lock_measure_cycles(struct rdtgroup *rdtgrp, int sel)
goto out;
}

- d = rdt_find_domain(plr->r, plr->d_id, NULL);
+ /*
+ * Ensure test is run on CPU associated with the pseudo-locked
+ * region. If pseudo-locked region spans L2 and L3, L2 portion
+ * would be first in the list and all CPUs associated with the
+ * L2 cache instance would be associated with pseudo-locked
+ * region.
+ */
+ p = list_first_entry(&plr->portions, struct pseudo_lock_portion, list);
+
+ d = rdt_find_domain(p->r, p->d_id, NULL);
if (IS_ERR_OR_NULL(d)) {
ret = -ENODEV;
goto out;
@@ -1515,6 +1591,7 @@ static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma)
unsigned long vsize = vma->vm_end - vma->vm_start;
unsigned long off = vma->vm_pgoff << PAGE_SHIFT;
struct pseudo_lock_region *plr;
+ struct pseudo_lock_portion *p;
struct rdtgroup *rdtgrp;
unsigned long physical;
struct rdt_domain *d;
@@ -1531,7 +1608,16 @@ static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma)

plr = rdtgrp->plr;

- d = rdt_find_domain(plr->r, plr->d_id, NULL);
+ /*
+ * If pseudo-locked region spans one cache level, there will be
+ * only one portion to consider. If pseudo-locked region spans two
+ * cache levels then the L2 cache portion will be the first entry
+ * and a CPU associated with it is what a task is required to be run
+ * on.
+ */
+ p = list_first_entry(&plr->portions, struct pseudo_lock_portion, list);
+
+ d = rdt_find_domain(p->r, p->d_id, NULL);
if (IS_ERR_OR_NULL(d)) {
mutex_unlock(&rdtgroup_mutex);
return -ENODEV;
@@ -1640,15 +1726,17 @@ void rdt_pseudo_lock_release(void)
*/
u32 rdtgroup_pseudo_locked_bits(struct rdt_resource *r, struct rdt_domain *d)
{
+ struct pseudo_lock_portion *p;
struct rdtgroup *rdtgrp;
u32 pseudo_locked = 0;

list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
if (!rdtgrp->plr)
continue;
- if (rdtgrp->plr->r && rdtgrp->plr->r->rid == r->rid &&
- rdtgrp->plr->d_id == d->id)
- pseudo_locked |= rdtgrp->plr->cbm;
+ list_for_each_entry(p, &rdtgrp->plr->portions, list) {
+ if (p->r->rid == r->rid && p->d_id == d->id)
+ pseudo_locked |= p->cbm;
+ }
}

return pseudo_locked;
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index c9070cb4b6a5..6bea91d87883 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -269,17 +269,31 @@ static int rdtgroup_cpus_show(struct kernfs_open_file *of,

if (rdtgrp) {
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
- d = rdt_find_domain(rdtgrp->plr->r, rdtgrp->plr->d_id,
- NULL);
+ struct pseudo_lock_portion *p;
+
+ /*
+ * User space needs to know all CPUs associated
+ * with the pseudo-locked region. When the
+ * pseudo-locked region spans multiple resources it
+ * is possible that not all CPUs are associated with
+ * all portions of the pseudo-locked region.
+ * Only display CPUs that are associated with _all_
+ * portions of the region. The first portion in the
+ * list will be the L2 cache if the region spans
+ * multiple resource, it is thus only needed to
+ * print CPUs associated with the first portion.
+ */
+ p = list_first_entry(&rdtgrp->plr->portions,
+ struct pseudo_lock_portion, list);
+ d = rdt_find_domain(p->r, p->d_id, NULL);
if (IS_ERR_OR_NULL(d)) {
rdt_last_cmd_clear();
rdt_last_cmd_puts("Cache domain offline\n");
ret = -ENODEV;
- } else {
- seq_printf(s, is_cpu_list(of) ?
- "%*pbl\n" : "%*pb\n",
- cpumask_pr_args(&d->cpu_mask));
+ goto out;
}
+ seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n",
+ cpumask_pr_args(&d->cpu_mask));
} else {
seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n",
cpumask_pr_args(&rdtgrp->cpu_mask));
@@ -287,8 +301,9 @@ static int rdtgroup_cpus_show(struct kernfs_open_file *of,
} else {
ret = -ENOENT;
}
- rdtgroup_kn_unlock(of->kn);

+out:
+ rdtgroup_kn_unlock(of->kn);
return ret;
}

@@ -1304,8 +1319,19 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
}

if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
- seq_printf(s, "%*s:", max_name_width, rdtgrp->plr->r->name);
- seq_printf(s, "%d=%u\n", rdtgrp->plr->d_id, rdtgrp->plr->size);
+ struct pseudo_lock_portion *portion;
+
+ /*
+ * While the portions of the L2 and L3 caches allocated for a
+ * pseudo-locked region may be different, the size used for
+ * the pseudo-locked region is the same.
+ */
+ list_for_each_entry(portion, &rdtgrp->plr->portions, list) {
+ seq_printf(s, "%*s:", max_name_width,
+ portion->r->name);
+ seq_printf(s, "%d=%u\n", portion->d_id,
+ rdtgrp->plr->size);
+ }
goto out;
}

--
2.17.2