[PATCH v2 02/18] x86/resctrl: Access per-rmid structures by index

From: James Morse
Date: Fri Jan 13 2023 - 13:02:19 EST


Because of the differences between Intel RDT/AMD QoS and Arm's MPAM
monitors, RMID values on arm64 are not unique unless the CLOSID is
also included. Bitmaps like rmid_busy_llc need to be sized by the
number of unique entries for this resource.

Add helpers to encode/decode the CLOSID and RMID to an index. The
domain's busy_rmid_llc and the rmid_ptrs[] array are then sized by
index. On x86, this is always just the RMID. This gives resctrl a
unique value it can use to store monitor values, and allows MPAM to
decode the closid when reading the hardware counters.

Tested-by: Shaopeng Tan <tan.shaopeng@xxxxxxxxxxx>
Signed-off-by: James Morse <james.morse@xxxxxxx>
---
Changes since v1:
* Added X86_BAD_CLOSID macro to make it clear what this value means
* Added second WARN_ON() for closid checking, and made both _ONCE()
---
arch/x86/include/asm/resctrl.h | 24 ++++++++
arch/x86/kernel/cpu/resctrl/internal.h | 2 +
arch/x86/kernel/cpu/resctrl/monitor.c | 79 +++++++++++++++++---------
arch/x86/kernel/cpu/resctrl/rdtgroup.c | 7 ++-
4 files changed, 82 insertions(+), 30 deletions(-)

diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h
index 52788f79786f..44d568f3577c 100644
--- a/arch/x86/include/asm/resctrl.h
+++ b/arch/x86/include/asm/resctrl.h
@@ -7,6 +7,13 @@
#include <linux/sched.h>
#include <linux/jump_label.h>

+/*
+ * This value can never be a valid CLOSID, and is used when mapping a
+ * (closid, rmid) pair to an index and back. On x86 only the RMID is
+ * needed.
+ */
+#define X86_RESCTRL_BAD_CLOSID ~0
+
/**
* struct resctrl_pqr_state - State cache for the PQR MSR
* @cur_rmid: The cached Resource Monitoring ID
@@ -94,6 +101,23 @@ static inline void resctrl_sched_in(void)
__resctrl_sched_in();
}

+static inline u32 resctrl_arch_system_num_rmid_idx(void)
+{
+ /* RMID are independent numbers for x86. num_rmid_idx==num_rmid */
+ return boot_cpu_data.x86_cache_max_rmid + 1;
+}
+
+static inline void resctrl_arch_rmid_idx_decode(u32 idx, u32 *closid, u32 *rmid)
+{
+ *rmid = idx;
+ *closid = X86_RESCTRL_BAD_CLOSID;
+}
+
+static inline u32 resctrl_arch_rmid_idx_encode(u32 closid, u32 rmid)
+{
+ return rmid;
+}
+
void resctrl_cpu_detect(struct cpuinfo_x86 *c);

#else
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 5dbff3035723..af71401c57e2 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -8,6 +8,8 @@
#include <linux/fs_context.h>
#include <linux/jump_label.h>

+#include <asm/resctrl.h>
+
#define L3_QOS_CDP_ENABLE 0x01ULL

#define L2_QOS_CDP_ENABLE 0x01ULL
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index 13673cab175a..dbae380e3d1c 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -142,12 +142,27 @@ static inline u64 get_corrected_mbm_count(u32 rmid, unsigned long val)
return val;
}

-static inline struct rmid_entry *__rmid_entry(u32 closid, u32 rmid)
+/*
+ * x86 and arm64 differ in their handling of monitoring.
+ * x86's RMID are an independent number, there is one RMID '1'.
+ * arm64's PMG extend the PARTID/CLOSID space, there is one RMID '1' for each
+ * CLOSID. The RMID is no longer unique.
+ * To account for this, resctrl uses an index. On x86 this is just the RMID,
+ * on arm64 it encodes the CLOSID and RMID. This gives a unique number.
+ *
+ * The domain's rmid_busy_llc and rmid_ptrs are sized by index. The arch code
+ * must accept an attempt to read every index.
+ */
+static inline struct rmid_entry *__rmid_entry(u32 idx)
{
struct rmid_entry *entry;
+ u32 closid, rmid;

- entry = &rmid_ptrs[rmid];
- WARN_ON(entry->rmid != rmid);
+ entry = &rmid_ptrs[idx];
+ resctrl_arch_rmid_idx_decode(idx, &closid, &rmid);
+
+ WARN_ON_ONCE(entry->closid != closid);
+ WARN_ON_ONCE(entry->rmid != rmid);

return entry;
}
@@ -243,8 +258,9 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
void __check_limbo(struct rdt_domain *d, bool force_free)
{
struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
+ u32 idx_limit = resctrl_arch_system_num_rmid_idx();
struct rmid_entry *entry;
- u32 crmid = 1, nrmid;
+ u32 idx, cur_idx = 1;
bool rmid_dirty;
u64 val = 0;

@@ -255,12 +271,11 @@ void __check_limbo(struct rdt_domain *d, bool force_free)
* RMID and move it to the free list when the counter reaches 0.
*/
for (;;) {
- nrmid = find_next_bit(d->rmid_busy_llc, r->num_rmid, crmid);
- if (nrmid >= r->num_rmid)
+ idx = find_next_bit(d->rmid_busy_llc, idx_limit, cur_idx);
+ if (idx >= idx_limit)
break;

- entry = __rmid_entry(~0, nrmid); // temporary
-
+ entry = __rmid_entry(idx);
if (resctrl_arch_rmid_read(r, d, entry->closid, entry->rmid,
QOS_L3_OCCUP_EVENT_ID, &val)) {
rmid_dirty = true;
@@ -269,19 +284,21 @@ void __check_limbo(struct rdt_domain *d, bool force_free)
}

if (force_free || !rmid_dirty) {
- clear_bit(entry->rmid, d->rmid_busy_llc);
+ clear_bit(idx, d->rmid_busy_llc);
if (!--entry->busy) {
rmid_limbo_count--;
list_add_tail(&entry->list, &rmid_free_lru);
}
}
- crmid = nrmid + 1;
+ cur_idx = idx + 1;
}
}

bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d)
{
- return find_first_bit(d->rmid_busy_llc, r->num_rmid) != r->num_rmid;
+ u32 idx_limit = resctrl_arch_system_num_rmid_idx();
+
+ return find_first_bit(d->rmid_busy_llc, idx_limit) != idx_limit;
}

/*
@@ -311,6 +328,9 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
struct rdt_domain *d;
int cpu, err;
u64 val = 0;
+ u32 idx;
+
+ idx = resctrl_arch_rmid_idx_encode(entry->closid, entry->rmid);

entry->busy = 0;
cpu = get_cpu();
@@ -330,7 +350,7 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
*/
if (!has_busy_rmid(r, d))
cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL);
- set_bit(entry->rmid, d->rmid_busy_llc);
+ set_bit(idx, d->rmid_busy_llc);
entry->busy++;
}
put_cpu();
@@ -343,14 +363,16 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)

void free_rmid(u32 closid, u32 rmid)
{
+ u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
struct rmid_entry *entry;

- if (!rmid)
- return;
-
lockdep_assert_held(&rdtgroup_mutex);

- entry = __rmid_entry(closid, rmid);
+ /* do not allow the default rmid to be free'd */
+ if (!idx)
+ return;
+
+ entry = __rmid_entry(idx);

if (is_llc_occupancy_enabled())
add_rmid_to_limbo(entry);
@@ -360,6 +382,7 @@ void free_rmid(u32 closid, u32 rmid)

static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
{
+ u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
struct mbm_state *m;
u64 tval = 0;

@@ -376,10 +399,10 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
rr->val += tval;
return 0;
case QOS_L3_MBM_TOTAL_EVENT_ID:
- m = &rr->d->mbm_total[rmid];
+ m = &rr->d->mbm_total[idx];
break;
case QOS_L3_MBM_LOCAL_EVENT_ID:
- m = &rr->d->mbm_local[rmid];
+ m = &rr->d->mbm_local[idx];
break;
default:
/*
@@ -412,7 +435,8 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
*/
static void mbm_bw_count(u32 closid, u32 rmid, struct rmid_read *rr)
{
- struct mbm_state *m = &rr->d->mbm_local[rmid];
+ u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
+ struct mbm_state *m = &rr->d->mbm_local[idx];
u64 cur_bw, bytes, cur_bytes;

cur_bytes = rr->val;
@@ -502,7 +526,7 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
{
u32 closid, rmid, cur_msr_val, new_msr_val;
struct mbm_state *pmbm_data, *cmbm_data;
- u32 cur_bw, delta_bw, user_bw;
+ u32 cur_bw, delta_bw, user_bw, idx;
struct rdt_resource *r_mba;
struct rdt_domain *dom_mba;
struct list_head *head;
@@ -515,7 +539,8 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)

closid = rgrp->closid;
rmid = rgrp->mon.rmid;
- pmbm_data = &dom_mbm->mbm_local[rmid];
+ idx = resctrl_arch_rmid_idx_encode(closid, rmid);
+ pmbm_data = &dom_mbm->mbm_local[idx];

dom_mba = get_domain_from_cpu(smp_processor_id(), r_mba);
if (!dom_mba) {
@@ -698,19 +723,19 @@ void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms)

static int dom_data_init(struct rdt_resource *r)
{
+ u32 nr_idx = resctrl_arch_system_num_rmid_idx();
struct rmid_entry *entry = NULL;
- int i, nr_rmids;
+ int i;

- nr_rmids = r->num_rmid;
- rmid_ptrs = kcalloc(nr_rmids, sizeof(struct rmid_entry), GFP_KERNEL);
+ rmid_ptrs = kcalloc(nr_idx, sizeof(struct rmid_entry), GFP_KERNEL);
if (!rmid_ptrs)
return -ENOMEM;

- for (i = 0; i < nr_rmids; i++) {
+ for (i = 0; i < nr_idx; i++) {
entry = &rmid_ptrs[i];
INIT_LIST_HEAD(&entry->list);

- entry->rmid = i;
+ resctrl_arch_rmid_idx_decode(i, &entry->closid, &entry->rmid);
list_add_tail(&entry->list, &rmid_free_lru);
}

@@ -719,7 +744,7 @@ static int dom_data_init(struct rdt_resource *r)
* default_rdtgroup control group, which will be setup later. See
* rdtgroup_setup_root().
*/
- entry = __rmid_entry(0, 0);
+ entry = __rmid_entry(resctrl_arch_rmid_idx_encode(0, 0));
list_del(&entry->list);

return 0;
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index f3b739c52e42..9ce4746778f4 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -3320,16 +3320,17 @@ void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d)

static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
{
+ u32 idx_limit = resctrl_arch_system_num_rmid_idx();
size_t tsize;

if (is_llc_occupancy_enabled()) {
- d->rmid_busy_llc = bitmap_zalloc(r->num_rmid, GFP_KERNEL);
+ d->rmid_busy_llc = bitmap_zalloc(idx_limit, GFP_KERNEL);
if (!d->rmid_busy_llc)
return -ENOMEM;
}
if (is_mbm_total_enabled()) {
tsize = sizeof(*d->mbm_total);
- d->mbm_total = kcalloc(r->num_rmid, tsize, GFP_KERNEL);
+ d->mbm_total = kcalloc(idx_limit, tsize, GFP_KERNEL);
if (!d->mbm_total) {
bitmap_free(d->rmid_busy_llc);
return -ENOMEM;
@@ -3337,7 +3338,7 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
}
if (is_mbm_local_enabled()) {
tsize = sizeof(*d->mbm_local);
- d->mbm_local = kcalloc(r->num_rmid, tsize, GFP_KERNEL);
+ d->mbm_local = kcalloc(idx_limit, tsize, GFP_KERNEL);
if (!d->mbm_local) {
bitmap_free(d->rmid_busy_llc);
kfree(d->mbm_total);
--
2.30.2