[PATCH 13/32] perf/x86/intel/cqm: add polled update of RMID's llc_occupancy
From: David Carrillo-Cisneros
Date: Fri Apr 29 2016 - 00:49:04 EST
To avoid IPIs from IRQ disabled contexts, the occupancy for a RMID in a
remote package (a package other than the one the current cpu belongs) is
obtained from a cache that is periodically updated.
This removes the need for an IPI when reading occupancy for a task event,
that was the reason to add the problematic pmu::count and dummy
perf_event_read() in the previous CQM version.
The occupancy of all active prmids is updated every
__rmid_timed_update_period ms .
To avoid holding raw_spin_locks on the prmid hierarchy for too long, the
raw rmids to be read are copied to a temporal array list. The array list
is consumed to perform the wrmsrl and rdmsrl in each RMID required to
read its llc_occupancy.
This decoupling of traversing the RMID hierarchy and read occupancy is
specially useful due to high latency of the wrmsrl and rdmsl for the
llc_occupancy event (thousand of cycles in my test machine).
To avoid unnecessary memory allocations, the objects used to temporarily
store RMIDs are pooled in a per-package list and allocated on demand.
The infrastructure introduced in this patch will be used in future patches
in this series to perform reads on subtrees of a prmid hierarchy.
Reviewed-by: Stephane Eranian <eranian@xxxxxxxxxx>
Signed-off-by: David Carrillo-Cisneros <davidcc@xxxxxxxxxx>
---
arch/x86/events/intel/cqm.c | 251 +++++++++++++++++++++++++++++++++++++++++++-
arch/x86/events/intel/cqm.h | 36 +++++++
2 files changed, 286 insertions(+), 1 deletion(-)
diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c
index 31f0fd6..904f2d3 100644
--- a/arch/x86/events/intel/cqm.c
+++ b/arch/x86/events/intel/cqm.c
@@ -189,6 +189,8 @@ static inline bool __valid_pkg_id(u16 pkg_id)
return pkg_id < PQR_MAX_NR_PKGS;
}
+static int anode_pool__alloc_one(u16 pkg_id);
+
/* Init cqm pkg_data for @cpu 's package. */
static int pkg_data_init_cpu(int cpu)
{
@@ -241,11 +243,19 @@ static int pkg_data_init_cpu(int cpu)
mutex_init(&pkg_data->pkg_data_mutex);
raw_spin_lock_init(&pkg_data->pkg_data_lock);
+ INIT_LIST_HEAD(&pkg_data->anode_pool_head);
+ raw_spin_lock_init(&pkg_data->anode_pool_lock);
+
INIT_DELAYED_WORK(
&pkg_data->rotation_work, intel_cqm_rmid_rotation_work);
/* XXX: Chose randomly*/
pkg_data->rotation_cpu = cpu;
+ INIT_DELAYED_WORK(
+ &pkg_data->timed_update_work, intel_cqm_timed_update_work);
+ /* XXX: Chose randomly*/
+ pkg_data->timed_update_cpu = cpu;
+
cqm_pkgs_data[pkg_id] = pkg_data;
return 0;
}
@@ -744,6 +754,189 @@ static void monr_dealloc(struct monr *monr)
}
/*
+ * Logic for reading sets of rmids into per-package lists.
+ * This package lists can be used to update occupancies without
+ * holding locks in the hierarchies of pmonrs.
+ * @pool: free pool.
+ */
+struct astack {
+ struct list_head pool;
+ struct list_head items;
+ int top_idx;
+ int max_idx;
+ u16 pkg_id;
+};
+
+static void astack__init(struct astack *astack, int max_idx, u16 pkg_id)
+{
+ INIT_LIST_HEAD(&astack->items);
+ INIT_LIST_HEAD(&astack->pool);
+ astack->top_idx = -1;
+ astack->max_idx = max_idx;
+ astack->pkg_id = pkg_id;
+}
+
+/* Try to enlarge astack->pool with a anode from this pkgs pool. */
+static int astack__try_add_pool(struct astack *astack)
+{
+ unsigned long flags;
+ int ret = -1;
+ struct pkg_data *pkg_data = cqm_pkgs_data[astack->pkg_id];
+
+ raw_spin_lock_irqsave(&pkg_data->anode_pool_lock, flags);
+
+ if (!list_empty(&pkg_data->anode_pool_head)) {
+ list_move_tail(pkg_data->anode_pool_head.prev, &astack->pool);
+ ret = 0;
+ }
+
+ raw_spin_unlock_irqrestore(&pkg_data->anode_pool_lock, flags);
+ return ret;
+}
+
+static int astack__push(struct astack *astack)
+{
+ if (!list_empty(&astack->items) && astack->top_idx < astack->max_idx) {
+ astack->top_idx++;
+ return 0;
+ }
+
+ if (list_empty(&astack->pool) && astack__try_add_pool(astack))
+ return -1;
+ list_move_tail(astack->pool.prev, &astack->items);
+ astack->top_idx = 0;
+ return 0;
+}
+
+/* Must be non-empty */
+# define __astack__top(astack_, member_) \
+ list_last_entry(&(astack_)->items, \
+ struct anode, entry)->member_[(astack_)->top_idx]
+
+static void astack__clear(struct astack *astack)
+{
+ list_splice_tail_init(&astack->items, &astack->pool);
+ astack->top_idx = -1;
+}
+
+/* Put back into pkg_data's pool. */
+static void astack__release(struct astack *astack)
+{
+ unsigned long flags;
+ struct pkg_data *pkg_data = cqm_pkgs_data[astack->pkg_id];
+
+ astack__clear(astack);
+ raw_spin_lock_irqsave(&pkg_data->anode_pool_lock, flags);
+ list_splice_tail_init(&astack->pool, &pkg_data->anode_pool_head);
+ raw_spin_unlock_irqrestore(&pkg_data->anode_pool_lock, flags);
+}
+
+static int anode_pool__alloc_one(u16 pkg_id)
+{
+ unsigned long flags;
+ struct anode *anode;
+ struct pkg_data *pkg_data = cqm_pkgs_data[pkg_id];
+
+ anode = kmalloc_node(sizeof(struct anode), GFP_KERNEL,
+ cpu_to_node(pkg_data->rotation_cpu));
+ if (!anode)
+ return -ENOMEM;
+ raw_spin_lock_irqsave(&pkg_data->anode_pool_lock, flags);
+ list_add_tail(&anode->entry, &pkg_data->anode_pool_head);
+ raw_spin_unlock_irqrestore(&pkg_data->anode_pool_lock, flags);
+ return 0;
+}
+
+static int astack__end(struct astack *astack, struct anode *anode, int idx)
+{
+ return list_is_last(&anode->entry, &astack->items) &&
+ idx > astack->top_idx;
+}
+
+static int __rmid_fn__cqm_prmid_update(struct prmid *prmid, u64 *val)
+{
+ int ret = cqm_prmid_update(prmid);
+
+ if (ret >= 0)
+ *val = atomic64_read(&prmid->last_read_value);
+ return ret;
+}
+
+/* Apply function to all elements in all nodes.
+ * On error returns first error in read, zero otherwise.
+ */
+static int astack__rmids_sum_apply(
+ struct astack *astack,
+ u16 pkg_id, int (*fn)(struct prmid *, u64 *), u64 *total)
+{
+ struct prmid *prmid;
+ struct anode *anode;
+ u32 rmid;
+ int i, ret, first_error = 0;
+ u64 count;
+ *total = 0;
+
+ list_for_each_entry(anode, &astack->items, entry) {
+ for (i = 0; i <= astack->max_idx; i++) {
+ /* node in tail only has astack->top_idx elements. */
+ if (astack__end(astack, anode, i))
+ break;
+ rmid = anode->rmids[i];
+ prmid = cqm_pkgs_data[pkg_id]->prmids_by_rmid[rmid];
+ WARN_ON_ONCE(!prmid);
+ ret = fn(prmid, &count);
+ if (ret < 0) {
+ if (!first_error)
+ first_error = ret;
+ continue;
+ }
+ *total += count;
+ }
+ }
+ return first_error;
+}
+
+/* Does not need mutex since protected by locks when transversing
+ * astate_pmonrs_lru and updating atomic prmids.
+ */
+static int update_rmids_in_astate_pmonrs_lru(u16 pkg_id)
+{
+ struct astack astack;
+ struct pkg_data *pkg_data;
+ struct pmonr *pmonr;
+ int ret = 0;
+ unsigned long flags;
+ u64 count;
+
+ astack__init(&astack, NR_RMIDS_PER_NODE - 1, pkg_id);
+ pkg_data = cqm_pkgs_data[pkg_id];
+
+retry:
+ if (ret) {
+ anode_pool__alloc_one(pkg_id);
+ ret = 0;
+ }
+ raw_spin_lock_irqsave_nested(&pkg_data->pkg_data_lock, flags, pkg_id);
+ list_for_each_entry(pmonr,
+ &pkg_data->astate_pmonrs_lru, rotation_entry) {
+ ret = astack__push(&astack);
+ if (ret)
+ break;
+ __astack__top(&astack, rmids) = pmonr->prmid->rmid;
+ }
+ raw_spin_unlock_irqrestore(&pkg_data->pkg_data_lock, flags);
+ if (ret) {
+ astack__clear(&astack);
+ goto retry;
+ }
+ /* count is not used. */
+ ret = astack__rmids_sum_apply(&astack, pkg_id,
+ &__rmid_fn__cqm_prmid_update, &count);
+ astack__release(&astack);
+ return ret;
+}
+
+/*
* Wrappers for monr manipulation in events.
*
*/
@@ -1532,6 +1725,17 @@ exit:
mutex_unlock(&pkg_data->pkg_data_mutex);
}
+static void
+__intel_cqm_timed_update(u16 pkg_id)
+{
+ int ret;
+
+ mutex_lock_nested(&cqm_pkgs_data[pkg_id]->pkg_data_mutex, pkg_id);
+ ret = update_rmids_in_astate_pmonrs_lru(pkg_id);
+ mutex_unlock(&cqm_pkgs_data[pkg_id]->pkg_data_mutex);
+ WARN_ON_ONCE(ret);
+}
+
static struct pmu intel_cqm_pmu;
/* Rotation only needs to be run when there is any pmonr in (I)state. */
@@ -1554,6 +1758,22 @@ static bool intel_cqm_need_rotation(u16 pkg_id)
return need_rot;
}
+static bool intel_cqm_need_timed_update(u16 pkg_id)
+{
+
+ struct pkg_data *pkg_data;
+ bool need_update;
+
+ pkg_data = cqm_pkgs_data[pkg_id];
+
+ mutex_lock_nested(&pkg_data->pkg_data_mutex, pkg_id);
+ /* Update is needed if prmids if there is any active prmid. */
+ need_update = !list_empty(&pkg_data->active_prmids_pool);
+ mutex_unlock(&pkg_data->pkg_data_mutex);
+
+ return need_update;
+}
+
/*
* Schedule rotation in one package.
*/
@@ -1568,6 +1788,19 @@ static void __intel_cqm_schedule_rotation_for_pkg(u16 pkg_id)
pkg_data->rotation_cpu, &pkg_data->rotation_work, delay);
}
+static void __intel_cqm_schedule_timed_update_for_pkg(u16 pkg_id)
+{
+ struct pkg_data *pkg_data;
+ unsigned long delay;
+
+ delay = msecs_to_jiffies(__rmid_timed_update_period);
+ pkg_data = cqm_pkgs_data[pkg_id];
+ schedule_delayed_work_on(
+ pkg_data->timed_update_cpu,
+ &pkg_data->timed_update_work, delay);
+}
+
+
/*
* Schedule rotation and rmid's timed update in all packages.
* Reescheduling will stop when no longer needed.
@@ -1576,8 +1809,10 @@ static void intel_cqm_schedule_work_all_pkgs(void)
{
int pkg_id;
- cqm_pkg_id_for_each_online(pkg_id)
+ cqm_pkg_id_for_each_online(pkg_id) {
__intel_cqm_schedule_rotation_for_pkg(pkg_id);
+ __intel_cqm_schedule_timed_update_for_pkg(pkg_id);
+ }
}
static void intel_cqm_rmid_rotation_work(struct work_struct *work)
@@ -1598,6 +1833,20 @@ static void intel_cqm_rmid_rotation_work(struct work_struct *work)
__intel_cqm_schedule_rotation_for_pkg(pkg_id);
}
+static void intel_cqm_timed_update_work(struct work_struct *work)
+{
+ struct pkg_data *pkg_data = container_of(
+ to_delayed_work(work), struct pkg_data, timed_update_work);
+ u16 pkg_id = topology_physical_package_id(pkg_data->timed_update_cpu);
+
+ WARN_ON_ONCE(pkg_data != cqm_pkgs_data[pkg_id]);
+
+ __intel_cqm_timed_update(pkg_id);
+
+ if (intel_cqm_need_timed_update(pkg_id))
+ __intel_cqm_schedule_timed_update_for_pkg(pkg_id);
+}
+
/*
* Find a group and setup RMID.
*
diff --git a/arch/x86/events/intel/cqm.h b/arch/x86/events/intel/cqm.h
index b0e1698..25646a2 100644
--- a/arch/x86/events/intel/cqm.h
+++ b/arch/x86/events/intel/cqm.h
@@ -45,6 +45,10 @@ static unsigned int __rmid_min_update_time = RMID_DEFAULT_MIN_UPDATE_TIME;
static inline int cqm_prmid_update(struct prmid *prmid);
+#define RMID_DEFAULT_TIMED_UPDATE_PERIOD 100 /* ms */
+static unsigned int __rmid_timed_update_period =
+ RMID_DEFAULT_TIMED_UPDATE_PERIOD;
+
/*
* union prmid_summary: Machine-size summary of a pmonr's prmid state.
* @value: One word accesor.
@@ -211,6 +215,21 @@ struct pmonr {
atomic64_t prmid_summary_atomic;
};
+/* Store all RMIDs that can fit in a anode while keeping sizeof(struct anode)
+ * within one cache line (for performance).
+ */
+#define NR_TYPE_PER_NODE(__type) ((SMP_CACHE_BYTES - (int)sizeof(struct list_head)) / \
+ (int)sizeof(__type))
+
+#define NR_RMIDS_PER_NODE NR_TYPE_PER_NODE(u32)
+
+/* struct anode: Node of an array list used to temporarily store RMIDs. */
+struct anode {
+ /* Last valid RMID is RMID_INVALID */
+ u32 rmids[NR_RMIDS_PER_NODE];
+ struct list_head entry;
+};
+
/*
* struct pkg_data: Per-package CQM data.
* @max_rmid: Max rmid valid for cpus in this package.
@@ -239,6 +258,14 @@ struct pmonr {
* @rotation_cpu: CPU to run @rotation_work on, it must be in the
* package associated to this instance of pkg_data.
* @rotation_work: Task that performs rotation of prmids.
+ * @timed_update_work: Task that performs periodic updates of values
+ * for active rmids. These values are used when
+ * inter-package event read is not available due to
+ * irqs disabled contexts.
+ * @timed_update_cpu: CPU to run @timed_update_work on, it must be a
+ * cpu in this package.
+ * @anode_pool_head: Pool of unused anodes.
+ * @anode_pool_lock: Protect @anode_pool_head.
*/
struct pkg_data {
u32 max_rmid;
@@ -268,6 +295,13 @@ struct pkg_data {
struct delayed_work rotation_work;
int rotation_cpu;
+
+ struct delayed_work timed_update_work;
+ int timed_update_cpu;
+
+ /* Pool of unused rmid_list_nodes and its lock */
+ struct list_head anode_pool_head;
+ raw_spinlock_t anode_pool_lock;
};
/*
@@ -438,6 +472,8 @@ static inline int monr_hrchy_count_held_raw_spin_locks(void)
*/
static void intel_cqm_rmid_rotation_work(struct work_struct *work);
+static void intel_cqm_timed_update_work(struct work_struct *work);
+
/*
* Service Level Objectives (SLO) for the rotation logic.
*
--
2.8.0.rc3.226.g39d4020