[PATCH v3 38/46] perf/x86/intel/cmt: introduce read SLOs for rotation

From: David Carrillo-Cisneros
Date: Sat Oct 29 2016 - 20:41:34 EST


To make rmid rotation more dependable, this patch series introduces
rotation Service Level Objectives (SLOs) that are described in
code's documentation.

This patch introduces cmt_{pre,min}_mon_slice SLOs that protects from
bogus values when a rmid has not been available since the beginning of
monitoring. It also introduces auxiliary variables necessary for the
SLOs to work and the checks in intel_cmt_event_read that enforce the SLOs
for the read of llc_occupancy event.

Signed-off-by: David Carrillo-Cisneros <davidcc@xxxxxxxxxx>
---
arch/x86/events/intel/cmt.c | 46 ++++++++++++++++++++++++++++++++++++++++++++-
arch/x86/events/intel/cmt.h | 28 +++++++++++++++++++++++++++
2 files changed, 73 insertions(+), 1 deletion(-)

diff --git a/arch/x86/events/intel/cmt.c b/arch/x86/events/intel/cmt.c
index 3ade923..649eb5f 100644
--- a/arch/x86/events/intel/cmt.c
+++ b/arch/x86/events/intel/cmt.c
@@ -51,6 +51,25 @@ static size_t pkg_uflags_size;
static struct pkg_data **cmt_pkgs_data;

/*
+ * Rotation Service Level Objectives (SLO) for monrs with llc_occupancy
+ * monitoring. Note that these are monr level SLOs, therefore all pmonrs in
+ * the monr meet or exceed them.
+ * (A "monitored" monr is a monr with no pmonr in a Dependent state).
+ *
+ * SLOs:
+ *
+ * @__cmt_pre_mon_slice: Min time a monr is monitored before being readable.
+ * @__cmt_min_mon_slice: Min time a monr stays monitored after becoming
+ * readable.
+ */
+#define CMT_DEFAULT_PRE_MON_SLICE 2000 /* ms */
+static u64 __cmt_pre_mon_slice;
+
+#define CMT_DEFAULT_MIN_MON_SLICE 5000 /* ms */
+static u64 __cmt_min_mon_slice;
+
+
+/*
* If @pkgd == NULL, return first online, pkg_data in cmt_pkgs_data.
* Otherwise next online pkg_data or NULL if no more.
*/
@@ -300,6 +319,7 @@ static void pmonr_to_unused(struct pmonr *pmonr)
pmonr_move_all_dependants(pmonr, lender);
}
__set_bit(rmids.read_rmid, pkgd->dirty_rmids);
+ pkgd->nr_dirty_rmids++;

} else if (pmonr->state == PMONR_DEP_IDLE ||
pmonr->state == PMONR_DEP_DIRTY) {
@@ -312,6 +332,11 @@ static void pmonr_to_unused(struct pmonr *pmonr)
__set_bit(rmids.read_rmid, pkgd->dirty_rmids);
else
pkgd->nr_dep_pmonrs--;
+
+
+ if (!atomic_dec_and_test(&pmonr->monr->nr_dep_pmonrs))
+ atomic64_set(&pmonr->monr->last_rmid_recoup,
+ get_jiffies_64());
} else {
WARN_ON_ONCE(true);
return;
@@ -372,6 +397,7 @@ static inline void __pmonr_to_dep_helper(

lender_rmids.value = atomic64_read(&lender->atomic_rmids);
pmonr_set_rmids(pmonr, lender_rmids.sched_rmid, read_rmid);
+ atomic_inc(&pmonr->monr->nr_dep_pmonrs);
}

static inline void pmonr_unused_to_dep_idle(struct pmonr *pmonr)
@@ -390,6 +416,7 @@ static void pmonr_unused_to_off(struct pmonr *pmonr)

static void pmonr_active_to_dep_dirty(struct pmonr *pmonr)
{
+ struct pkg_data *pkgd = pmonr->pkgd;
struct pmonr *lender;
union pmonr_rmids rmids;

@@ -398,6 +425,7 @@ static void pmonr_active_to_dep_dirty(struct pmonr *pmonr)

rmids.value = atomic64_read(&pmonr->atomic_rmids);
__pmonr_to_dep_helper(pmonr, lender, rmids.read_rmid);
+ pkgd->nr_dirty_rmids++;
}

static void __pmonr_dep_to_active_helper(struct pmonr *pmonr, u32 rmid)
@@ -408,6 +436,9 @@ static void __pmonr_dep_to_active_helper(struct pmonr *pmonr, u32 rmid)
pmonr_move_dependants(pmonr->lender, pmonr);
pmonr->lender = NULL;
__pmonr_to_active_helper(pmonr, rmid);
+
+ if (!atomic_dec_and_test(&pmonr->monr->nr_dep_pmonrs))
+ atomic64_set(&pmonr->monr->last_rmid_recoup, get_jiffies_64());
}

static void pmonr_dep_idle_to_active(struct pmonr *pmonr, u32 rmid)
@@ -422,6 +453,7 @@ static void pmonr_dep_dirty_to_active(struct pmonr *pmonr)
union pmonr_rmids rmids;

rmids.value = atomic64_read(&pmonr->atomic_rmids);
+ pmonr->pkgd->nr_dirty_rmids--;
__pmonr_dep_to_active_helper(pmonr, rmids.read_rmid);
}

@@ -1599,7 +1631,7 @@ static int read_all_pkgs(struct monr *monr, int wait_time_ms, u64 *count)
static int intel_cmt_event_read(struct perf_event *event)
{
struct monr *monr = monr_from_event(event);
- u64 count;
+ u64 count, recoup, wait_end;
u16 pkgid = topology_logical_package_id(smp_processor_id());
int err;

@@ -1614,6 +1646,15 @@ static int intel_cmt_event_read(struct perf_event *event)
return -ENXIO;

/*
+ * If rmid has been stolen, only read if enough time has elapsed since
+ * rmid were recovered.
+ */
+ recoup = atomic64_read(&monr->last_rmid_recoup);
+ wait_end = recoup + __cmt_pre_mon_slice;
+ if (recoup && time_before64(get_jiffies_64(), wait_end))
+ return -EAGAIN;
+
+ /*
* Only event parent can return a value, everyone else share its
* rmid and therefore doesn't track occupancy independently.
*/
@@ -2267,6 +2308,9 @@ static int __init intel_cmt_init(void)
struct pkg_data *pkgd = NULL;
int err = 0;

+ __cmt_pre_mon_slice = msecs_to_jiffies(CMT_DEFAULT_PRE_MON_SLICE);
+ __cmt_min_mon_slice = msecs_to_jiffies(CMT_DEFAULT_MIN_MON_SLICE);
+
if (!x86_match_cpu(intel_cmt_match)) {
err = -ENODEV;
goto err_exit;
diff --git a/arch/x86/events/intel/cmt.h b/arch/x86/events/intel/cmt.h
index 8bb43bd..8756666 100644
--- a/arch/x86/events/intel/cmt.h
+++ b/arch/x86/events/intel/cmt.h
@@ -52,6 +52,24 @@
* schedule and read.
*
*
+ * Rotation
+ *
+ * The number of rmids in hw is relatively small with respect to the number
+ * of potential monitored resources. rmids are rotated to among pmonrs that
+ * need one to give a fair-ish usage of this resource.
+ *
+ * A hw constraint is that occupancy for a rmid cannot be restarted, therefore
+ * a rmid with llc_occupancy need some time unscheduled until all cache lines
+ * tagged to it are evicted from cache (if this ever happens).
+ *
+ * When a rmid is "rotated", it is stolen from a pmonr and must wait until its
+ * llc_occupancy has decreased enough to be considered "clean". Meanwhile, that
+ * rmid is considered "dirty".
+ *
+ * Rotation logic periodically reads occupancy of this "dirty" rmids and, when
+ * clean, the rmid is either reused or placed in a free pool.
+ *
+ *
* Locking
*
* One global cmt_mutex. One mutex and spin_lock per package.
@@ -62,6 +80,7 @@
* cgroup start/stop.
* - Hold pkg->mutex and pkg->lock in _all_ active packages to traverse or
* change the monr hierarchy.
+ * - pkgd->mutex: Hold in current package for rotation in that pkgd.
* - pkgd->lock: Hold in current package to access that pkgd's members. Hold
* a pmonr's package pkgd->lock for non-atomic access to pmonr.
*/
@@ -225,6 +244,7 @@ struct cmt_csd {
* @dep_dirty_pmonrs: LRU of Dep_Dirty pmonrs.
* @dep_pmonrs: LRU of Dep_Idle and Dep_Dirty pmonrs.
* @nr_dep_pmonrs: nr Dep_Idle + nr Dep_Dirty pmonrs.
+ * @nr_dirty_rmids: "dirty" rmids, both with and without a pmonr.
* @mutex: Hold when modifying this pkg_data.
* @mutex_key: lockdep class for pkg_data's mutex.
* @lock: Hold to protect pmonrs in this pkg_data.
@@ -243,6 +263,7 @@ struct pkg_data {
struct list_head dep_dirty_pmonrs;
struct list_head dep_pmonrs;
int nr_dep_pmonrs;
+ int nr_dirty_rmids;

struct mutex mutex;
raw_spinlock_t lock;
@@ -280,6 +301,10 @@ enum cmt_user_flags {
* @parent: Parent in monr hierarchy.
* @children: List of children in monr hierarchy.
* @parent_entry: Entry in parent's children list.
+ * @last_rmid_recoup: Last time that nr_dep_pmonrs decreased to zero. It's
+ * zero if a rmid has never been stolen from this monr.
+ * @nr_dep_pmonrs: nr of Dep_* pmonrs in this monr. A zero implies that
+ * monr is monitoring in all required packages.
* @flags: monr_flags.
* @nr_has_user: nr of CMT_UF_HAS_USER set in events in mon_events.
* @nr_nolazy_user: nr of CMT_UF_NOLAZY_RMID set in events in mon_events.
@@ -303,6 +328,9 @@ struct monr {
struct list_head children;
struct list_head parent_entry;

+ atomic64_t last_rmid_recoup;
+ atomic_t nr_dep_pmonrs;
+
enum monr_flags flags;
int nr_has_user;
int nr_nolazy_rmid;
--
2.8.0.rc3.226.g39d4020