[PATCH v2 10/32] perf/x86/intel/cqm: introduce (I)state and limbo prmids

From: David Carrillo-Cisneros
Date: Wed May 11 2016 - 19:09:38 EST


CQM defines a dirty threshold as the minimum number of dirty cache lines
that a prmid to be eligible to reuse.
This threshold is zero unless there exist significant contention of prmids
(more on this on the patch that introduces rotation of RMIDs).

A limbo prmid is a prmid that is no longer utilized by any pmonr, yet, its
occupancy exceeds the dirty threshold. This is a consequence of the
hardware design that do not provide a mechanism to flush cache lines
associated with a RMID.

If no pmonr schedules a limbo prmid, it's expected that it's occupancy
will eventually drop below the dirty threshold. Nevertheless, the cache
lines tagged to a limbo prmid still hold valid occupancy for the previous
owner of the prmid. This creates a difference in the way pmonrs are
handled depending on whether its former prmid has been reused or not.

This patch introduce the (I)state mentioned in previous changelog.
The (I)state is a superstate conformed by two substates:
- (IL)state: (I)state with limbo prmid, this pmonr held a prmid in
(A)state before its transition to (I)state.
- (IN)state: (I)state without limbo prmid, this pmonr has not held a
prmid or its previous prmid has been reused.

A pmonr in (IL)state keeps the reference to its former prmid in the field
limbo_prmid, this occupancy is counted towards the occupancy of the
ancestors of the pmonr, reducing the error caused by stealing of prmids
during RMID rotation.

In future patches (rotation logic), the occupancy of limbo_prmids is
polled periodically and (IL)state pmonrs with limbo prmids that had become
"clean" will be transitioned to (IN)state and their prmid reused.

Reviewed-by: Stephane Eranian <eranian@xxxxxxxxxx>
Signed-off-by: David Carrillo-Cisneros <davidcc@xxxxxxxxxx>
---
arch/x86/events/intel/cqm.c | 201 ++++++++++++++++++++++++++++++++++++++++++--
arch/x86/events/intel/cqm.h | 84 ++++++++++++++++--
2 files changed, 274 insertions(+), 11 deletions(-)

diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c
index 5f2969b..62fc7b1 100644
--- a/arch/x86/events/intel/cqm.c
+++ b/arch/x86/events/intel/cqm.c
@@ -92,13 +92,31 @@ struct pkg_data **cqm_pkgs_data;
static inline bool __pmonr__in_astate(struct pmonr *pmonr)
{
lockdep_assert_held(&__pkg_data(pmonr, pkg_data_lock));
- return pmonr->prmid;
+ return pmonr->prmid && !pmonr->ancestor_pmonr;
}

static inline bool __pmonr__in_ustate(struct pmonr *pmonr)
{
lockdep_assert_held(&__pkg_data(pmonr, pkg_data_lock));
- return !pmonr->prmid;
+ return !pmonr->prmid && !pmonr->ancestor_pmonr;
+}
+
+static inline bool __pmonr__in_istate(struct pmonr *pmonr)
+{
+ lockdep_assert_held(&__pkg_data(pmonr, pkg_data_lock));
+ return pmonr->ancestor_pmonr;
+}
+
+static inline bool __pmonr__in_ilstate(struct pmonr *pmonr)
+{
+ lockdep_assert_held(&__pkg_data(pmonr, pkg_data_lock));
+ return __pmonr__in_istate(pmonr) && pmonr->limbo_prmid;
+}
+
+static inline bool __pmonr__in_instate(struct pmonr *pmonr)
+{
+ lockdep_assert_held(&__pkg_data(pmonr, pkg_data_lock));
+ return __pmonr__in_istate(pmonr) && !__pmonr__in_ilstate(pmonr);
}

static inline bool monr__is_root(struct monr *monr)
@@ -191,9 +209,12 @@ static int pkg_data_init_cpu(int cpu)

INIT_LIST_HEAD(&pkg_data->free_prmids_pool);
INIT_LIST_HEAD(&pkg_data->active_prmids_pool);
+ INIT_LIST_HEAD(&pkg_data->pmonr_limbo_prmids_pool);
INIT_LIST_HEAD(&pkg_data->nopmonr_limbo_prmids_pool);

INIT_LIST_HEAD(&pkg_data->astate_pmonrs_lru);
+ INIT_LIST_HEAD(&pkg_data->istate_pmonrs_lru);
+ INIT_LIST_HEAD(&pkg_data->ilstate_pmonrs_lru);

mutex_init(&pkg_data->pkg_data_mutex);
raw_spin_lock_init(&pkg_data->pkg_data_lock);
@@ -242,7 +263,15 @@ static struct pmonr *pmonr_alloc(int cpu)
if (!pmonr)
return ERR_PTR(-ENOMEM);

+ pmonr->ancestor_pmonr = NULL;
+
+ /*
+ * Since (A)state and (I)state have union in members,
+ * initialize one of them only.
+ */
+ INIT_LIST_HEAD(&pmonr->pmonr_deps_head);
pmonr->prmid = NULL;
+ INIT_LIST_HEAD(&pmonr->limbo_rotation_entry);

pmonr->monr = NULL;
INIT_LIST_HEAD(&pmonr->rotation_entry);
@@ -308,6 +337,44 @@ __pmonr__finish_to_astate(struct pmonr *pmonr, struct prmid *prmid)
atomic64_set(&pmonr->prmid_summary_atomic, summary.value);
}

+/*
+ * Transition to (A)state from (IN)state, given a valid prmid.
+ * Cannot fail. Updates ancestor dependants to use this pmonr as new ancestor.
+ */
+static inline void
+__pmonr__instate_to_astate(struct pmonr *pmonr, struct prmid *prmid)
+{
+ struct pmonr *pos, *tmp, *ancestor;
+ union prmid_summary old_summary, summary;
+
+ lockdep_assert_held(&__pkg_data(pmonr, pkg_data_lock));
+
+ /* If in (I) state, cannot have limbo_prmid, otherwise prmid
+ * in function's argument is superfluous.
+ */
+ WARN_ON_ONCE(pmonr->limbo_prmid);
+
+ /* Do not depend on ancestor_pmonr anymore. Make it (A)state. */
+ ancestor = pmonr->ancestor_pmonr;
+ list_del_init(&pmonr->pmonr_deps_entry);
+ pmonr->ancestor_pmonr = NULL;
+ __pmonr__finish_to_astate(pmonr, prmid);
+
+ /* Update ex ancestor's dependants that are pmonr descendants. */
+ list_for_each_entry_safe(pos, tmp, &ancestor->pmonr_deps_head,
+ pmonr_deps_entry) {
+ if (!__monr_hrchy_is_ancestor(monr_hrchy_root,
+ pmonr->monr, pos->monr))
+ continue;
+ list_move_tail(&pos->pmonr_deps_entry, &pmonr->pmonr_deps_head);
+ pos->ancestor_pmonr = pmonr;
+ old_summary.value = atomic64_read(&pos->prmid_summary_atomic);
+ summary.sched_rmid = prmid->rmid;
+ summary.read_rmid = old_summary.read_rmid;
+ atomic64_set(&pos->prmid_summary_atomic, summary.value);
+ }
+}
+
static inline void
__pmonr__ustate_to_astate(struct pmonr *pmonr, struct prmid *prmid)
{
@@ -315,9 +382,59 @@ __pmonr__ustate_to_astate(struct pmonr *pmonr, struct prmid *prmid)
__pmonr__finish_to_astate(pmonr, prmid);
}

+/*
+ * Find lowest active ancestor.
+ * Always successful since monr_hrchy_root is always in (A)state.
+ */
+static struct monr *
+__monr_hrchy__find_laa(struct monr *monr, u16 pkg_id)
+{
+ lockdep_assert_held(&cqm_pkgs_data[pkg_id]->pkg_data_lock);
+
+ while ((monr = monr->parent)) {
+ if (__pmonr__in_astate(monr->pmonrs[pkg_id]))
+ return monr;
+ }
+ /* Should have hitted monr_hrchy_root */
+ WARN_ON_ONCE(true);
+ return NULL;
+}
+
+/*
+ * __pmnor__move_dependants: Move dependants from one ancestor to another.
+ * @old: Old ancestor.
+ * @new: New ancestor.
+ *
+ * To be called on valid pmonrs. @new must be ancestor of @old.
+ */
+static inline void
+__pmonr__move_dependants(struct pmonr *old, struct pmonr *new)
+{
+ struct pmonr *dep;
+ union prmid_summary old_summary, summary;
+
+ WARN_ON_ONCE(old->pkg_id != new->pkg_id);
+ lockdep_assert_held(&__pkg_data(old, pkg_data_lock));
+
+ /* Update this pmonr dependencies to use new ancestor. */
+ list_for_each_entry(dep, &old->pmonr_deps_head, pmonr_deps_entry) {
+ /* Set next summary for dependent pmonrs. */
+ dep->ancestor_pmonr = new;
+
+ old_summary.value = atomic64_read(&dep->prmid_summary_atomic);
+ summary.sched_rmid = new->prmid->rmid;
+ summary.read_rmid = old_summary.read_rmid;
+ atomic64_set(&dep->prmid_summary_atomic, summary.value);
+ }
+ list_splice_tail_init(&old->pmonr_deps_head,
+ &new->pmonr_deps_head);
+}
+
static inline void
__pmonr__to_ustate(struct pmonr *pmonr)
{
+ struct pmonr *ancestor;
+ u16 pkg_id = pmonr->pkg_id;
union prmid_summary summary;

lockdep_assert_held(&__pkg_data(pmonr, pkg_data_lock));
@@ -331,9 +448,27 @@ __pmonr__to_ustate(struct pmonr *pmonr)
if (__pmonr__in_astate(pmonr)) {
WARN_ON_ONCE(!pmonr->prmid);

+ ancestor = __monr_hrchy__find_laa(
+ pmonr->monr, pkg_id)->pmonrs[pkg_id];
+ WARN_ON_ONCE(!ancestor);
+ __pmonr__move_dependants(pmonr, ancestor);
list_move_tail(&pmonr->prmid->pool_entry,
&__pkg_data(pmonr, nopmonr_limbo_prmids_pool));
pmonr->prmid = NULL;
+ } else if (__pmonr__in_istate(pmonr)) {
+ list_del_init(&pmonr->pmonr_deps_entry);
+ /* limbo_prmid is already in limbo pool */
+ if (__pmonr__in_ilstate(pmonr)) {
+ WARN_ON(!pmonr->limbo_prmid);
+ list_move_tail(
+ &pmonr->limbo_prmid->pool_entry,
+ &__pkg_data(pmonr, nopmonr_limbo_prmids_pool));
+
+ pmonr->limbo_prmid = NULL;
+ list_del_init(&pmonr->limbo_rotation_entry);
+ } else {
+ }
+ pmonr->ancestor_pmonr = NULL;
} else {
WARN_ON_ONCE(true);
return;
@@ -348,6 +483,62 @@ __pmonr__to_ustate(struct pmonr *pmonr)
WARN_ON_ONCE(!__pmonr__in_ustate(pmonr));
}

+static inline void __pmonr__set_istate_summary(struct pmonr *pmonr)
+{
+ union prmid_summary summary;
+
+ summary.sched_rmid = pmonr->ancestor_pmonr->prmid->rmid;
+ summary.read_rmid =
+ pmonr->limbo_prmid ? pmonr->limbo_prmid->rmid : INVALID_RMID;
+ atomic64_set(
+ &pmonr->prmid_summary_atomic, summary.value);
+}
+
+/*
+ * Transition to (I)state from no (I)state..
+ * Finds a valid ancestor transversing monr_hrchy. Cannot fail.
+ */
+static inline void
+__pmonr__to_istate(struct pmonr *pmonr)
+{
+ struct pmonr *ancestor;
+ u16 pkg_id = pmonr->pkg_id;
+
+ lockdep_assert_held(&__pkg_data(pmonr, pkg_data_lock));
+
+ if (!(__pmonr__in_ustate(pmonr) || __pmonr__in_astate(pmonr))) {
+ /* Invalid initial state. */
+ WARN_ON_ONCE(true);
+ return;
+ }
+
+ ancestor = __monr_hrchy__find_laa(pmonr->monr, pkg_id)->pmonrs[pkg_id];
+ WARN_ON_ONCE(!ancestor);
+
+ if (__pmonr__in_astate(pmonr)) {
+ /* Active pmonr->prmid becomes limbo in transition to (I)state.
+ * Note that pmonr->prmid and pmonr->limbo_prmid are in an
+ * union, so no need to copy.
+ */
+ __pmonr__move_dependants(pmonr, ancestor);
+ list_move_tail(&pmonr->limbo_prmid->pool_entry,
+ &__pkg_data(pmonr, pmonr_limbo_prmids_pool));
+ }
+
+ pmonr->ancestor_pmonr = ancestor;
+ list_add_tail(&pmonr->pmonr_deps_entry, &ancestor->pmonr_deps_head);
+
+ list_move_tail(
+ &pmonr->rotation_entry, &__pkg_data(pmonr, istate_pmonrs_lru));
+
+ if (pmonr->limbo_prmid)
+ list_move_tail(&pmonr->limbo_rotation_entry,
+ &__pkg_data(pmonr, ilstate_pmonrs_lru));
+
+ __pmonr__set_istate_summary(pmonr);
+
+}
+
static int intel_cqm_setup_pkg_prmid_pools(u16 pkg_id)
{
int r;
@@ -527,9 +718,9 @@ monr_hrchy_get_next_prmid_summary(struct pmonr *pmonr)

if (list_empty(&__pkg_data(pmonr, free_prmids_pool))) {
/* Failed to obtain an valid rmid in this package for this
- * monr. In next patches it will transition to (I)state.
- * For now, stay in (U)state (do nothing).
+ * monr. Use an inherited one.
*/
+ __pmonr__to_istate(pmonr);
} else {
/* Transition to (A)state using free prmid. */
__pmonr__ustate_to_astate(
@@ -777,7 +968,7 @@ static int intel_cqm_event_add(struct perf_event *event, int mode)
__intel_cqm_event_start(event, summary);

/* (I)state pmonrs cannot report occupancy for themselves. */
- return 0;
+ return prmid_summary__is_istate(summary) ? -1 : 0;
}

static inline bool cqm_group_leader(struct perf_event *event)
diff --git a/arch/x86/events/intel/cqm.h b/arch/x86/events/intel/cqm.h
index 81c7af1..28011a8 100644
--- a/arch/x86/events/intel/cqm.h
+++ b/arch/x86/events/intel/cqm.h
@@ -64,11 +64,11 @@ static inline int cqm_prmid_update(struct prmid *prmid);
* The combination of values in sched_rmid and read_rmid indicate the state of
* the associated pmonr (see pmonr comments) as follows:
* pmonr state
- * | (A)state (U)state
+ * | (A)state (IN)state (IL)state (U)state
* ----------------------------------------------------------------------------
- * sched_rmid | pmonr.prmid INVALID_RMID
- * read_rmid | pmonr.prmid INVALID_RMID
- * (or 0)
+ * sched_rmid | pmonr.prmid ancestor.prmid ancestor.prmid INVALID_RMID
+ * read_rmid | pmonr.prmid INVALID_RMID pmonr.limbo_prmid INVALID_RMID
+ * (or 0)
*
* The combination sched_rmid == INVALID_RMID and read_rmid == 0 for (U)state
* denotes that the flag MONR_MON_ACTIVE is set in the monr associated with
@@ -90,6 +90,13 @@ inline bool prmid_summary__is_ustate(union prmid_summary summ)
return summ.sched_rmid == INVALID_RMID;
}

+/* A pmonr in (I)state (either (IN)state or (IL)state. */
+inline bool prmid_summary__is_istate(union prmid_summary summ)
+{
+ return summ.sched_rmid != INVALID_RMID &&
+ summ.sched_rmid != summ.read_rmid;
+}
+
inline bool prmid_summary__is_mon_active(union prmid_summary summ)
{
/* If not in (U)state, then MONR_MON_ACTIVE must be set. */
@@ -100,7 +107,24 @@ inline bool prmid_summary__is_mon_active(union prmid_summary summ)
struct monr;

/* struct pmonr: Node of per-package hierarchy of MONitored Resources.
+ * @ancestor_pmonr: lowest active pmonr whose monr is ancestor of
+ * this pmonr's monr.
+ * @pmonr_deps_head: List of pmonrs without prmid that use
+ * this pmonr's prmid -when in (A)state-.
* @prmid: The prmid of this pmonr -when in (A)state-.
+ * @pmonr_deps_entry: Entry into ancestor's @pmonr_deps_head
+ * -when inheriting, (I)state-.
+ * @limbo_prmid: A prmid previously used by this pmonr and that
+ * has not been reused yet and therefore contain
+ * occupancy that should be counted towards this
+ * pmonr's occupancy.
+ * The limbo_prmid can be reused in the same pmonr
+ * in the next transition to (A) state, even if
+ * the occupancy of @limbo_prmid is not below the
+ * dirty threshold, reducing the need of free
+ * prmids.
+ * @limbo_rotation_entry: List entry to attach to ilstate_pmonrs_lru when
+ * this pmonr is in (IL)state.
* @rotation_entry: List entry to attach to pmonr rotation lists in
* pkg_data.
* @monr: The monr that contains this pmonr.
@@ -114,6 +138,15 @@ struct monr;
* pmonr, the pmonr utilizes the rmid of its ancestor.
* A pmonr is always in one of the following states:
* - (A)ctive: Has @prmid assigned, @ancestor_pmonr must be NULL.
+ * - (I)nherited: The prmid used is "Inherited" from @ancestor_pmonr.
+ * @ancestor_pmonr must be set. @prmid is unused. This is
+ * a super-state composed of two substates:
+ *
+ * - (IL)state: A pmonr in (I)state that has a valid limbo_prmid.
+ * - (IN)state: A pmonr in (I)state with NO valid limbo_prmid.
+ *
+ * When the distintion between the two substates is
+ * no relevant, the pmonr is simply in the (I)state.
* - (U)nused: No @ancestor_pmonr and no @prmid, hence no available
* prmid and no inhering one either. Not in rotation list.
* This state is unschedulable and a prmid
@@ -124,13 +157,41 @@ struct monr;
* The state transitions are:
* (U) : The initial state. Starts there after allocation.
* (U) -> (A): If on first sched (or initialization) pmonr receives a prmid.
+ * (U) -> (I): If on first sched (or initialization) pmonr cannot find a free
+ * prmid and resort to use its ancestor's.
+ * (A) -> (I): On stealing of prmid from pmonr (by rotation logic only).
* (A) -> (U): On destruction of monr.
+ * (I) -> (A): On receiving a free prmid or on reuse of its @limbo_prmid (by
+ * rotation logic only).
+ * (I) -> (U): On destruction of pmonr.
+ *
+ * Note that the (I) -> (A) transition makes monitoring available, but can
+ * introduce error due to cache lines allocated before the transition. Such
+ * error is likely to decrease over time.
+ * When entering an (I) state, the reported count of the event is unavaiable.
*
- * Each pmonr is contained by a monr.
+ * Each pmonr is contained by a monr. Each monr forms a system-wide hierarchy
+ * that is used by the pmrs to find ancestors and dependants. The per-package
+ * hierarchy spanned by the pmrs follows the monr hierarchy except by
+ * collapsing the nodes in (I)state into a super-node that contains an (A)state
+ * pmonr and all of its dependants (pmonr in pmonr_deps_head).
*/
struct pmonr {

- struct prmid *prmid;
+ /* If set, pmonr is in (I)state. */
+ struct pmonr *ancestor_pmonr;
+
+ union{
+ struct { /* (A)state variables. */
+ struct list_head pmonr_deps_head;
+ struct prmid *prmid;
+ };
+ struct { /* (I)state variables. */
+ struct list_head pmonr_deps_entry;
+ struct prmid *limbo_prmid;
+ struct list_head limbo_rotation_entry;
+ };
+ };

struct monr *monr;
struct list_head rotation_entry;
@@ -148,10 +209,17 @@ struct pmonr {
* XXX: Make it an array of prmids.
* @free_prmid_pool: Free prmids.
* @active_prmid_pool: prmids associated with a (A)state pmonr.
+ * @pmonr_limbo_prmid_pool: limbo prmids referenced by the limbo_prmid of a
+ * pmonr in (I)state.
* @nopmonr_limbo_prmid_pool: prmids in limbo state that are not referenced
* by a pmonr.
* @astate_pmonrs_lru: pmonrs in (A)state. LRU in increasing order of
* pmonr.last_enter_astate.
+ * @istate_pmonrs_lru: pmors In (I)state with no limbo_prmid. LRU in
+ * increasing order of pmonr.last_enter_istate.
+ * @ilsate_pmonrs_lru: pmonrs in (IL)state, these pmonrs have a valid
+ * limbo_prmid. It's a subset of istate_pmonrs_lru.
+ * Sorted increasingly by pmonr.last_enter_istate.
* @pkg_data_mutex: Hold for stability when modifying pmonrs
* hierarchy.
* @pkg_data_lock: Hold to protect variables that may be accessed
@@ -173,9 +241,13 @@ struct pkg_data {
/* Can be modified during task switch with (U)state -> (A)state. */
struct list_head active_prmids_pool;
/* Only modified during rotation logic and deletion. */
+ struct list_head pmonr_limbo_prmids_pool;
struct list_head nopmonr_limbo_prmids_pool;

struct list_head astate_pmonrs_lru;
+ /* Superset of ilstate_pmonrs_lru. */
+ struct list_head istate_pmonrs_lru;
+ struct list_head ilstate_pmonrs_lru;

struct mutex pkg_data_mutex;
raw_spinlock_t pkg_data_lock;
--
2.8.0.rc3.226.g39d4020