[PATCH 02/18] x86/resctrl: Access per-rmid structures by index
From: James Morse
Date: Fri Oct 21 2022 - 09:12:57 EST
Because of the differences between Intel RDT/AMD QoS and Arm's MPAM
monitors, RMID values on arm64 are not unique unless the CLOSID is
also included. Bitmaps like rmid_busy_llc need to be sized by the
number of unique entries for this resource.
Add helpers to encode/decode the CLOSID and RMID to an index. The
domain's busy_rmid_llc and the rmid_ptrs[] array are then sized by
index. On x86, this is always just the RMID. This gives resctrl a
unique value it can use to store monitor values, and allows MPAM to
decode the closid when reading the hardware counters.
Signed-off-by: James Morse <james.morse@xxxxxxx>
---
arch/x86/include/asm/resctrl.h | 17 ++++++
arch/x86/kernel/cpu/resctrl/internal.h | 2 +
arch/x86/kernel/cpu/resctrl/monitor.c | 75 +++++++++++++++++---------
arch/x86/kernel/cpu/resctrl/rdtgroup.c | 7 +--
4 files changed, 72 insertions(+), 29 deletions(-)
diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h
index d24b04ebf950..523eabfa3193 100644
--- a/arch/x86/include/asm/resctrl.h
+++ b/arch/x86/include/asm/resctrl.h
@@ -96,6 +96,23 @@ static inline void resctrl_sched_in(void)
__resctrl_sched_in();
}
+static inline u32 resctrl_arch_system_num_rmid_idx(void)
+{
+ /* RMID are independent numbers for x86. num_rmid_idx==num_rmid */
+ return boot_cpu_data.x86_cache_max_rmid + 1;
+}
+
+static inline void resctrl_arch_rmid_idx_decode(u32 idx, u32 *closid, u32 *rmid)
+{
+ *rmid = idx;
+ *closid = ~0;
+}
+
+static inline u32 resctrl_arch_rmid_idx_encode(u32 closid, u32 rmid)
+{
+ return rmid;
+}
+
void resctrl_cpu_detect(struct cpuinfo_x86 *c);
#else
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 4b243ba88882..cb94c3e3fe36 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -8,6 +8,8 @@
#include <linux/fs_context.h>
#include <linux/jump_label.h>
+#include <asm/resctrl.h>
+
#define MSR_IA32_L3_QOS_CFG 0xc81
#define MSR_IA32_L2_QOS_CFG 0xc82
#define MSR_IA32_L3_CBM_BASE 0xc90
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index f1f66c9942a5..c95d259476d4 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -137,11 +137,24 @@ static inline u64 get_corrected_mbm_count(u32 rmid, unsigned long val)
return val;
}
-static inline struct rmid_entry *__rmid_entry(u32 closid, u32 rmid)
+/*
+ * x86 and arm64 differ in their handling of monitoring.
+ * x86's RMID are an independent number, there is one RMID '1'.
+ * arm64's PMG extend the PARTID/CLOSID space, there is one RMID '1' for each
+ * CLOSID. The RMID is no longer unique.
+ * To account for this, resctrl uses an index. On x86 this is just the RMID,
+ * on arm64 it encodes the CLOSID and RMID. This gives a unique number.
+ *
+ * The domain's rmid_busy_llc and rmid_ptrs are sized by index. The arch code
+ * must accept an attempt to read every index.
+ */
+static inline struct rmid_entry *__rmid_entry(u32 idx)
{
struct rmid_entry *entry;
+ u32 closid, rmid;
- entry = &rmid_ptrs[rmid];
+ entry = &rmid_ptrs[idx];
+ resctrl_arch_rmid_idx_decode(idx, &closid, &rmid);
WARN_ON(entry->rmid != rmid);
return entry;
@@ -238,8 +251,9 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
void __check_limbo(struct rdt_domain *d, bool force_free)
{
struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
+ u32 idx_limit = resctrl_arch_system_num_rmid_idx();
struct rmid_entry *entry;
- u32 crmid = 1, nrmid;
+ u32 idx, cur_idx = 1;
bool rmid_dirty;
u64 val = 0;
@@ -250,12 +264,11 @@ void __check_limbo(struct rdt_domain *d, bool force_free)
* RMID and move it to the free list when the counter reaches 0.
*/
for (;;) {
- nrmid = find_next_bit(d->rmid_busy_llc, r->num_rmid, crmid);
- if (nrmid >= r->num_rmid)
+ idx = find_next_bit(d->rmid_busy_llc, idx_limit, cur_idx);
+ if (idx >= idx_limit)
break;
- entry = __rmid_entry(~0, nrmid); // temporary
-
+ entry = __rmid_entry(idx);
if (resctrl_arch_rmid_read(r, d, entry->closid, entry->rmid,
QOS_L3_OCCUP_EVENT_ID, &val)) {
rmid_dirty = true;
@@ -264,19 +277,21 @@ void __check_limbo(struct rdt_domain *d, bool force_free)
}
if (force_free || !rmid_dirty) {
- clear_bit(entry->rmid, d->rmid_busy_llc);
+ clear_bit(idx, d->rmid_busy_llc);
if (!--entry->busy) {
rmid_limbo_count--;
list_add_tail(&entry->list, &rmid_free_lru);
}
}
- crmid = nrmid + 1;
+ cur_idx = idx + 1;
}
}
bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d)
{
- return find_first_bit(d->rmid_busy_llc, r->num_rmid) != r->num_rmid;
+ u32 idx_limit = resctrl_arch_system_num_rmid_idx();
+
+ return find_first_bit(d->rmid_busy_llc, idx_limit) != idx_limit;
}
/*
@@ -306,6 +321,9 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
struct rdt_domain *d;
int cpu, err;
u64 val = 0;
+ u32 idx;
+
+ idx = resctrl_arch_rmid_idx_encode(entry->closid, entry->rmid);
entry->busy = 0;
cpu = get_cpu();
@@ -325,7 +343,7 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
*/
if (!has_busy_rmid(r, d))
cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL);
- set_bit(entry->rmid, d->rmid_busy_llc);
+ set_bit(idx, d->rmid_busy_llc);
entry->busy++;
}
put_cpu();
@@ -338,14 +356,16 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
void free_rmid(u32 closid, u32 rmid)
{
+ u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
struct rmid_entry *entry;
- if (!rmid)
- return;
-
lockdep_assert_held(&rdtgroup_mutex);
- entry = __rmid_entry(closid, rmid);
+ /* do not allow the default rmid to be free'd */
+ if (!idx)
+ return;
+
+ entry = __rmid_entry(idx);
if (is_llc_occupancy_enabled())
add_rmid_to_limbo(entry);
@@ -355,6 +375,7 @@ void free_rmid(u32 closid, u32 rmid)
static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
{
+ u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
struct mbm_state *m;
u64 tval = 0;
@@ -371,10 +392,10 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
rr->val += tval;
return 0;
case QOS_L3_MBM_TOTAL_EVENT_ID:
- m = &rr->d->mbm_total[rmid];
+ m = &rr->d->mbm_total[idx];
break;
case QOS_L3_MBM_LOCAL_EVENT_ID:
- m = &rr->d->mbm_local[rmid];
+ m = &rr->d->mbm_local[idx];
break;
default:
/*
@@ -407,7 +428,8 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
*/
static void mbm_bw_count(u32 closid, u32 rmid, struct rmid_read *rr)
{
- struct mbm_state *m = &rr->d->mbm_local[rmid];
+ u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
+ struct mbm_state *m = &rr->d->mbm_local[idx];
u64 cur_bw, bytes, cur_bytes;
cur_bytes = rr->val;
@@ -497,7 +519,7 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
{
u32 closid, rmid, cur_msr_val, new_msr_val;
struct mbm_state *pmbm_data, *cmbm_data;
- u32 cur_bw, delta_bw, user_bw;
+ u32 cur_bw, delta_bw, user_bw, idx;
struct rdt_resource *r_mba;
struct rdt_domain *dom_mba;
struct list_head *head;
@@ -510,7 +532,8 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
closid = rgrp->closid;
rmid = rgrp->mon.rmid;
- pmbm_data = &dom_mbm->mbm_local[rmid];
+ idx = resctrl_arch_rmid_idx_encode(closid, rmid);
+ pmbm_data = &dom_mbm->mbm_local[idx];
dom_mba = get_domain_from_cpu(smp_processor_id(), r_mba);
if (!dom_mba) {
@@ -693,19 +716,19 @@ void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms)
static int dom_data_init(struct rdt_resource *r)
{
+ u32 nr_idx = resctrl_arch_system_num_rmid_idx();
struct rmid_entry *entry = NULL;
- int i, nr_rmids;
+ int i;
- nr_rmids = r->num_rmid;
- rmid_ptrs = kcalloc(nr_rmids, sizeof(struct rmid_entry), GFP_KERNEL);
+ rmid_ptrs = kcalloc(nr_idx, sizeof(struct rmid_entry), GFP_KERNEL);
if (!rmid_ptrs)
return -ENOMEM;
- for (i = 0; i < nr_rmids; i++) {
+ for (i = 0; i < nr_idx; i++) {
entry = &rmid_ptrs[i];
INIT_LIST_HEAD(&entry->list);
- entry->rmid = i;
+ resctrl_arch_rmid_idx_decode(i, &entry->closid, &entry->rmid);
list_add_tail(&entry->list, &rmid_free_lru);
}
@@ -714,7 +737,7 @@ static int dom_data_init(struct rdt_resource *r)
* default_rdtgroup control group, which will be setup later. See
* rdtgroup_setup_root().
*/
- entry = __rmid_entry(0, 0);
+ entry = __rmid_entry(resctrl_arch_rmid_idx_encode(0, 0));
list_del(&entry->list);
return 0;
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index f3b739c52e42..9ce4746778f4 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -3320,16 +3320,17 @@ void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d)
static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
{
+ u32 idx_limit = resctrl_arch_system_num_rmid_idx();
size_t tsize;
if (is_llc_occupancy_enabled()) {
- d->rmid_busy_llc = bitmap_zalloc(r->num_rmid, GFP_KERNEL);
+ d->rmid_busy_llc = bitmap_zalloc(idx_limit, GFP_KERNEL);
if (!d->rmid_busy_llc)
return -ENOMEM;
}
if (is_mbm_total_enabled()) {
tsize = sizeof(*d->mbm_total);
- d->mbm_total = kcalloc(r->num_rmid, tsize, GFP_KERNEL);
+ d->mbm_total = kcalloc(idx_limit, tsize, GFP_KERNEL);
if (!d->mbm_total) {
bitmap_free(d->rmid_busy_llc);
return -ENOMEM;
@@ -3337,7 +3338,7 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
}
if (is_mbm_local_enabled()) {
tsize = sizeof(*d->mbm_local);
- d->mbm_local = kcalloc(r->num_rmid, tsize, GFP_KERNEL);
+ d->mbm_local = kcalloc(idx_limit, tsize, GFP_KERNEL);
if (!d->mbm_local) {
bitmap_free(d->rmid_busy_llc);
kfree(d->mbm_total);
--
2.30.2