[PATCH v3 41/46] perf/x86/intel/cmt: add rotation minimum progress SLO

From: David Carrillo-Cisneros
Date: Sat Oct 29 2016 - 20:42:00 EST


Try to activate monrs at a __cmt_min_progress_rate rate.

Signed-off-by: David Carrillo-Cisneros <davidcc@xxxxxxxxxx>
---
arch/x86/events/intel/cmt.c | 274 +++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 273 insertions(+), 1 deletion(-)

diff --git a/arch/x86/events/intel/cmt.c b/arch/x86/events/intel/cmt.c
index 8bf6aa5..ba82f95 100644
--- a/arch/x86/events/intel/cmt.c
+++ b/arch/x86/events/intel/cmt.c
@@ -79,6 +79,14 @@ static u64 __cmt_min_mon_slice;
static unsigned int __cmt_max_threshold; /* bytes */

/*
+ * Rotation SLO of all monrs events (including those without llc_occupancy):
+ * @__cmt_min_progrees_rate: Min numbers of pmonrs that must go to Active
+ * state per second, otherwise, recycling occupancy error is increased.
+ */
+#define CMT_DEFAULT_MIN_PROGRESS_RATE 2 /* pmonrs per sec */
+static unsigned int __cmt_min_progress_rate = CMT_DEFAULT_MIN_PROGRESS_RATE;
+
+/*
* If @pkgd == NULL, return first online, pkg_data in cmt_pkgs_data.
* Otherwise next online pkg_data or NULL if no more.
*/
@@ -466,6 +474,21 @@ static void pmonr_dep_dirty_to_active(struct pmonr *pmonr)
__pmonr_dep_to_active_helper(pmonr, rmids.read_rmid);
}

+/* dirty rmid must be clean enough to go to free_rmids. */
+static void pmonr_dep_dirty_to_dep_idle_helper(struct pmonr *pmonr,
+ union pmonr_rmids rmids)
+{
+ struct pkg_data *pkgd = pmonr->pkgd;
+
+ pmonr->pkgd->nr_dirty_rmids--;
+ __set_bit(rmids.read_rmid, pkgd->free_rmids);
+ list_move_tail(&pmonr->rot_entry, &pkgd->dep_idle_pmonrs);
+ pkgd->nr_dep_pmonrs++;
+
+ pmonr->state = PMONR_DEP_IDLE;
+ pmonr_set_rmids(pmonr, rmids.sched_rmid, INVALID_RMID);
+}
+
static void monr_dealloc(struct monr *monr)
{
u16 p, nr_pkgs = topology_max_packages();
@@ -1311,6 +1334,242 @@ static void smp_call_rmid_read(void *data)
atomic_set(&ccsd->on_read, 0);
}

+/*
+ * Try to reuse dirty rmid's for pmonrs at the front of dep_dirty_pmonrs.
+ */
+static int __try_activate_dep_dirty_pmonrs(struct pkg_data *pkgd)
+{
+ int reused = 0;
+ struct pmonr *pmonr;
+ struct list_head *lhead = &pkgd->dep_pmonrs;
+
+ lockdep_assert_held(&pkgd->lock);
+
+ while ((pmonr = list_first_entry_or_null(
+ lhead, struct pmonr, pkgd_deps_entry))) {
+ if (!pmonr || pmonr->state == PMONR_DEP_IDLE)
+ break;
+ pmonr_dep_dirty_to_active(pmonr);
+ reused++;
+ }
+
+ return reused;
+}
+
+static int try_activate_dep_dirty_pmonrs(struct pkg_data *pkgd)
+{
+ int nr_reused;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&pkgd->lock, flags);
+ nr_reused = __try_activate_dep_dirty_pmonrs(pkgd);
+ raw_spin_unlock_irqrestore(&pkgd->lock, flags);
+
+ return nr_reused;
+}
+
+static inline int __try_use_free_rmid(struct pkg_data *pkgd, u32 rmid)
+{
+ struct pmonr *pmonr;
+
+ lockdep_assert_held(&pkgd->lock);
+
+ pmonr = list_first_entry_or_null(&pkgd->dep_idle_pmonrs,
+ struct pmonr, rot_entry);
+ if (!pmonr)
+ return 0;
+ /* The state transition will move the rmid to the active list. */
+ pmonr_dep_idle_to_active(pmonr, rmid);
+
+ return 1 + __try_activate_dep_dirty_pmonrs(pkgd);
+}
+
+static int __try_use_free_rmids(struct pkg_data *pkgd)
+{
+ int nr_activated = 0, nr_used, r;
+
+ for_each_set_bit(r, pkgd->free_rmids, CMT_MAX_NR_RMIDS) {
+ /* Removes the rmid from free list if succeeds. */
+ nr_used = __try_use_free_rmid(pkgd, r);
+ if (!nr_used)
+ break;
+ nr_activated += nr_used;
+ }
+
+ return nr_activated;
+}
+
+static bool is_rmid_dirty(struct pkg_data *pkgd, u32 rmid, bool do_read,
+ unsigned int dirty_thld, unsigned int *min_dirty)
+{
+ u64 val;
+
+ if (do_read && WARN_ON_ONCE(cmt_rmid_read(rmid, &val)))
+ return true;
+ if (val > dirty_thld) {
+ if (val < *min_dirty)
+ *min_dirty = val;
+ return true;
+ }
+
+ return false;
+}
+
+static int try_free_dep_dirty_pmonrs(struct pkg_data *pkgd,
+ bool do_read,
+ unsigned int dirty_thld,
+ unsigned int *min_dirty)
+{
+ struct pmonr *pmonr, *tmp;
+ union pmonr_rmids rmids;
+ int nr_activated = 0;
+ unsigned long flags;
+
+ /*
+ * No need to acquire pkg lock for pkgd->dep_dirty_pmonrs because
+ * rotation logic is the only user of this list.
+ */
+ list_for_each_entry_safe(pmonr, tmp,
+ &pkgd->dep_dirty_pmonrs, rot_entry) {
+ rmids.value = atomic64_read(&pmonr->atomic_rmids);
+ if (is_rmid_dirty(pkgd, rmids.read_rmid,
+ do_read, dirty_thld, min_dirty))
+ continue;
+
+ raw_spin_lock_irqsave(&pkgd->lock, flags);
+ pmonr_dep_dirty_to_dep_idle_helper(pmonr, rmids);
+ nr_activated += __try_use_free_rmid(pkgd, rmids.read_rmid);
+ raw_spin_unlock_irqrestore(&pkgd->lock, flags);
+ }
+
+ return nr_activated;
+}
+
+static int try_free_dirty_rmids(struct pkg_data *pkgd,
+ bool do_read,
+ unsigned int dirty_thld,
+ unsigned int *min_dirty,
+ unsigned long *rmids_bm)
+{
+ int nr_activated = 0, r;
+ unsigned long flags;
+
+ /*
+ * To avoid holding pkgd->lock while reading rmids in hw (slow), hold
+ * once and save all rmids that must be read. Then read them while
+ * unlocked.
+ */
+ raw_spin_lock_irqsave(&pkgd->lock, flags);
+ memcpy(rmids_bm, pkgd->dirty_rmids, CMT_MAX_NR_RMIDS_BYTES);
+ raw_spin_unlock_irqrestore(&pkgd->lock, flags);
+
+ for_each_set_bit(r, rmids_bm, CMT_MAX_NR_RMIDS) {
+ if (is_rmid_dirty(pkgd, r, do_read, dirty_thld, min_dirty))
+ continue;
+
+ raw_spin_lock_irqsave(&pkgd->lock, flags);
+
+ pkgd->nr_dirty_rmids--;
+ __clear_bit(r, pkgd->dirty_rmids);
+ __set_bit(r, pkgd->free_rmids);
+ nr_activated += __try_use_free_rmid(pkgd, r);
+
+ raw_spin_unlock_irqrestore(&pkgd->lock, flags);
+ }
+
+ return nr_activated;
+}
+
+/**
+ * __intel_cmt_rmid_rotate - Rotate rmids among pmonrs and handle dirty rmids.
+ * @pkgd: The package data to rotate rmids on.
+ * @active_goal: Target min nr of pmonrs to put in Active state.
+ * @max_dirty_thld: Upper bound for dirty_thld, in CMT cache units.
+ *
+ * The goals for each iteration of rotation logic are:
+ * 1) to activate @active_goal pmonrs.
+ *
+ * In order to activate Dep_{Dirty,Idle} pmonrs, rotation logic:
+ * 1) activate eligible Dep_Dirty pmonrs: These pmonrs can reuse their former
+ * rmid, even if it is not clean, without increasing the error.
+ * 2) take clean rmids from Dep_Dirty pmonrs and reuse them for other pmonrs
+ * or add them to pool of free rmids.
+ * 3) use free rmids to activate Dep_Idle pmonrs.
+ *
+ * Rotation logic also checks the occupancy of dirty rmids and, if now clean,
+ * uses them or adds them to free rmids.
+ * When a Dep_Idle pmonr is activated, any Dep_Dirty pmonr that is immediately
+ * after it in the pkg->dep_pmonrs list can be activated reusing its dirty
+ * rmid.
+ */
+static int __intel_cmt_rmid_rotate(struct pkg_data *pkgd,
+ unsigned int active_goal, unsigned int max_dirty_thld)
+{
+ unsigned int dirty_thld = 0, min_dirty, nr_activated;
+ unsigned int nr_dep_pmonrs;
+ unsigned long flags, *rmids_bm = NULL;
+ bool do_active_goal, read_dirty = true, dirty_is_max;
+
+ lockdep_assert_held(&pkgd->mutex);
+
+ rmids_bm = kzalloc(CMT_MAX_NR_RMIDS_BYTES, GFP_KERNEL);
+ if (!rmids_bm)
+ return -ENOMEM;
+
+ nr_activated = try_activate_dep_dirty_pmonrs(pkgd);
+
+again:
+ min_dirty = UINT_MAX;
+
+ /* retry every iteration since dirty_thld may have changed. */
+ nr_activated += try_free_dirty_rmids(pkgd, read_dirty,
+ dirty_thld, &min_dirty, rmids_bm);
+
+ raw_spin_lock_irqsave(&pkgd->lock, flags);
+ nr_activated += __try_use_free_rmids(pkgd);
+ raw_spin_unlock_irqrestore(&pkgd->lock, flags);
+
+ nr_activated += try_free_dep_dirty_pmonrs(pkgd, read_dirty,
+ dirty_thld, &min_dirty);
+
+ raw_spin_lock_irqsave(&pkgd->lock, flags);
+ nr_activated += __try_use_free_rmids(pkgd);
+ nr_dep_pmonrs = pkgd->nr_dep_pmonrs;
+ raw_spin_unlock_irqrestore(&pkgd->lock, flags);
+
+ /*
+ * If there is no room to increase dirty_thld, then no more dirty rmids
+ * could be reused and must give up active goal.
+ */
+ dirty_is_max = dirty_thld >= max_dirty_thld;
+ do_active_goal = nr_activated < active_goal && !dirty_is_max;
+
+ /*
+ * Since Dep_Dirty pmonrs have their own dirty rmid, only Dep_Idle
+ * pmonrs are waiting for a rmid to be available. Stop if no pmonr
+ * wait for rmid or no goals to pursue.
+ */
+ if (!nr_dep_pmonrs || !do_active_goal)
+ goto exit;
+
+ /*
+ * Try to activate more pmonrs by increasing the dirty threshold.
+ * Using the minimum observed occupancy in dirty rmids guarantees to
+ * recover at least one rmid per iteration.
+ */
+ if (do_active_goal) {
+ dirty_thld = min(min_dirty, max_dirty_thld);
+ /* do not read occupancy for dirty rmids twice. */
+ read_dirty = true;
+ goto again;
+ }
+
+exit:
+ kfree(rmids_bm);
+
+ return 0;
+}
+
static struct pmu intel_cmt_pmu;

/* Schedule rotation in one package. */
@@ -1360,10 +1619,20 @@ static bool intel_cmt_need_rmid_rotation(struct pkg_data *pkgd)

/*
* Rotation function, runs per-package.
+ * If rmids are needed in a package it will steal rmids from pmonr that have
+ * been active longer than __cmt_pre_mon_slice + __cmt_min_mon_slice.
+ * The hardware doesn't provide a way to free occupancy for a rmid that will
+ * be reused. Therefore, before reusing a rmid, it should stay unscheduled for
+ * a while, hoping that the cache lines counted towards this rmid will
+ * eventually be replaced and the rmid occupancy will decrease below
+ * __cmt_max_threshold.
*/
static void intel_cmt_rmid_rotation_work(struct work_struct *work)
{
struct pkg_data *pkgd;
+ /* not precise elapsed time, but good enough for rotation purposes. */
+ unsigned int elapsed_ms = intel_cmt_pmu.hrtimer_interval_ms;
+ unsigned int active_goal, max_dirty_threshold;

pkgd = container_of(to_delayed_work(work),
struct pkg_data, rotation_work);
@@ -1377,7 +1646,10 @@ static void intel_cmt_rmid_rotation_work(struct work_struct *work)
if (!intel_cmt_need_rmid_rotation(pkgd))
goto exit;

- /* To add call to rotation function in next patch */
+ active_goal = max(1u, (elapsed_ms * __cmt_min_progress_rate) / 1000);
+ max_dirty_threshold = READ_ONCE(__cmt_max_threshold) / cmt_l3_scale;
+
+ __intel_cmt_rmid_rotate(pkgd, active_goal, max_dirty_threshold);

if (intel_cmt_need_rmid_rotation(pkgd))
__intel_cmt_schedule_rotation_for_pkg(pkgd);
--
2.8.0.rc3.226.g39d4020