[PATCH 09/32] perf/x86/intel/cqm: add per-package RMIDs, data and locks

From: David Carrillo-Cisneros
Date: Fri Apr 29 2016 - 00:46:37 EST


First part of new CQM logic. This patch introduces the struct pkg_data
that contains all per-package CQM data required by the new RMID hierarchy.

The raw RMID value is encapsulated in a Package RMID (prmid) structure
that provides atomic updates and caches recent reads. This caching
throttles the frequency at which (slow) hardware reads are performed and
ameliorates the impact of the worst case scenarios while traversing the
hierarchy of RMIDs (hierarchy and operations are introduced in future
patches within this series).

There is a set of prmids per physical package (socket) in the system. Each
package may have different number of prmids (different hw max_rmid_index).

Each package maintains its own pool of prmids (only a free pool as of this
patch, more pools to add in future patches in this series). Also, each
package has its own mutex and lock to protect the RMID pools and rotation
logic. This per-package separation reduces the contention for each lock
and mutex compared with the previous version (with system-wide mutex
and lock).

Reviewed-by: Stephane Eranian <eranian@xxxxxxxxxx>
Signed-off-by: David Carrillo-Cisneros <davidcc@xxxxxxxxxx>
---
arch/x86/events/intel/cqm.c | 426 +++++++++++++++++++++-----------------
arch/x86/events/intel/cqm.h | 154 ++++++++++++++
arch/x86/include/asm/pqr_common.h | 2 +
3 files changed, 392 insertions(+), 190 deletions(-)

diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c
index f678014..541e515 100644
--- a/arch/x86/events/intel/cqm.c
+++ b/arch/x86/events/intel/cqm.c
@@ -12,7 +12,6 @@
#define MSR_IA32_QM_CTR 0x0c8e
#define MSR_IA32_QM_EVTSEL 0x0c8d

-static u32 cqm_max_rmid = -1;
static unsigned int cqm_l3_scale; /* supposedly cacheline size */

#define RMID_VAL_ERROR (1ULL << 63)
@@ -30,39 +29,13 @@ static struct perf_pmu_events_attr event_attr_##v = { \
}

/*
- * Updates caller cpu's cache.
- */
-static inline void __update_pqr_rmid(u32 rmid)
-{
- struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
- if (state->rmid == rmid)
- return;
- state->rmid = rmid;
- wrmsr(MSR_IA32_PQR_ASSOC, rmid, state->closid);
-}
-
-/*
* Groups of events that have the same target(s), one RMID per group.
* Protected by cqm_mutex.
*/
static LIST_HEAD(cache_groups);
static DEFINE_MUTEX(cqm_mutex);
-static DEFINE_RAW_SPINLOCK(cache_lock);

-/*
- * Mask of CPUs for reading CQM values. We only need one per-socket.
- */
-static cpumask_t cqm_cpumask;
-
-
-/*
- * This is central to the rotation algorithm in __intel_cqm_rmid_rotate().
- *
- * This rmid is always free and is guaranteed to have an associated
- * near-zero occupancy value, i.e. no cachelines are tagged with this
- * RMID, once __intel_cqm_rmid_rotate() returns.
- */
-static u32 intel_cqm_rotation_rmid;
+struct pkg_data *cqm_pkgs_data[PQR_MAX_NR_PKGS];

/*
* Is @rmid valid for programming the hardware?
@@ -82,162 +55,220 @@ static inline bool __rmid_valid(u32 rmid)

static u64 __rmid_read(u32 rmid)
{
+ /* XXX: Placeholder, will be removed in next patch. */
+ return 0;
+}
+
+/*
+ * Update if enough time has passed since last read.
+ *
+ * Must be called in a cpu in the package where prmid belongs.
+ * This function is safe to be called concurrently since it is guaranteed
+ * that entry->last_read_value is updated to a occupancy value obtained
+ * after the time set in entry->last_read_time .
+ * Return 1 if value was updated, 0 if not, negative number if error.
+ */
+static inline int __cqm_prmid_update(struct prmid *prmid,
+ unsigned long jiffies_min_delta)
+{
+ unsigned long now = jiffies;
+ unsigned long last_read_time;
u64 val;

/*
- * Ignore the SDM, this thing is _NOTHING_ like a regular perfcnt,
- * it just says that to increase confusion.
+ * Shortcut the calculation of elapsed time for the
+ * case jiffies_min_delta == 0
*/
- wrmsr(MSR_IA32_QM_EVTSEL, QOS_L3_OCCUP_EVENT_ID, rmid);
+ if (jiffies_min_delta > 0) {
+ last_read_time = atomic64_read(&prmid->last_read_time);
+ if (time_after(last_read_time + jiffies_min_delta, now))
+ return 0;
+ }
+
+ wrmsr(MSR_IA32_QM_EVTSEL, QOS_L3_OCCUP_EVENT_ID, prmid->rmid);
rdmsrl(MSR_IA32_QM_CTR, val);

/*
- * Aside from the ERROR and UNAVAIL bits, assume this thing returns
- * the number of cachelines tagged with @rmid.
+ * Ignore this reading on error states and do not update the value.
*/
- return val;
-}
+ WARN_ON_ONCE(val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL));
+ if (val & RMID_VAL_ERROR)
+ return -EINVAL;
+ if (val & RMID_VAL_UNAVAIL)
+ return -ENODATA;

-enum rmid_recycle_state {
- RMID_YOUNG = 0,
- RMID_AVAILABLE,
- RMID_DIRTY,
-};
+ atomic64_set(&prmid->last_read_value, val);
+ /*
+ * Protect last_read_time from being updated before last_read_value is.
+ * So reader always receive an updated value even if sometimes values
+ * are updated twice.
+ */
+ smp_wmb();

-struct cqm_rmid_entry {
- u32 rmid;
- enum rmid_recycle_state state;
- struct list_head list;
- unsigned long queue_time;
-};
+ atomic64_set(&prmid->last_read_time, now);

-/*
- * cqm_rmid_free_lru - A least recently used list of RMIDs.
- *
- * Oldest entry at the head, newest (most recently used) entry at the
- * tail. This list is never traversed, it's only used to keep track of
- * the lru order. That is, we only pick entries of the head or insert
- * them on the tail.
- *
- * All entries on the list are 'free', and their RMIDs are not currently
- * in use. To mark an RMID as in use, remove its entry from the lru
- * list.
- *
- *
- * cqm_rmid_limbo_lru - list of currently unused but (potentially) dirty RMIDs.
- *
- * This list is contains RMIDs that no one is currently using but that
- * may have a non-zero occupancy value associated with them. The
- * rotation worker moves RMIDs from the limbo list to the free list once
- * the occupancy value drops below __intel_cqm_threshold.
- *
- * Both lists are protected by cqm_mutex.
- */
-static LIST_HEAD(cqm_rmid_free_lru);
-static LIST_HEAD(cqm_rmid_limbo_lru);
+ return 1;
+}
+
+static inline int cqm_prmid_update(struct prmid *prmid)
+{
+ return __cqm_prmid_update(prmid, __rmid_min_update_time);
+}

/*
- * We use a simple array of pointers so that we can lookup a struct
- * cqm_rmid_entry in O(1). This alleviates the callers of __get_rmid()
- * and __put_rmid() from having to worry about dealing with struct
- * cqm_rmid_entry - they just deal with rmids, i.e. integers.
- *
- * Once this array is initialized it is read-only. No locks are required
- * to access it.
- *
- * All entries for all RMIDs can be looked up in the this array at all
- * times.
+ * Updates caller cpu's cache.
*/
-static struct cqm_rmid_entry **cqm_rmid_ptrs;
-
-static inline struct cqm_rmid_entry *__rmid_entry(u32 rmid)
+static inline void __update_pqr_prmid(struct prmid *prmid)
{
- struct cqm_rmid_entry *entry;
+ struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);

- entry = cqm_rmid_ptrs[rmid];
- WARN_ON(entry->rmid != rmid);
+ if (state->rmid == prmid->rmid)
+ return;
+ state->rmid = prmid->rmid;
+ wrmsr(MSR_IA32_PQR_ASSOC, prmid->rmid, state->closid);
+}

- return entry;
+static inline bool __valid_pkg_id(u16 pkg_id)
+{
+ return pkg_id < PQR_MAX_NR_PKGS;
}

/*
* Returns < 0 on fail.
*
- * We expect to be called with cqm_mutex held.
+ * We expect to be called with cache_mutex held.
*/
static u32 __get_rmid(void)
{
- struct cqm_rmid_entry *entry;
+ /* XXX: Placeholder, will be removed in next patch. */
+ return 0;
+}
+
+static void __put_rmid(u32 rmid)
+{
+ /* XXX: Placeholder, will be removed in next patch. */
+}
+
+/* Init cqm pkg_data for @cpu 's package. */
+static int pkg_data_init_cpu(int cpu)
+{
+ struct pkg_data *pkg_data;
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
+ u16 pkg_id = topology_physical_package_id(cpu);
+
+ if (cqm_pkgs_data[pkg_id])
+ return 0;

- lockdep_assert_held(&cqm_mutex);

- if (list_empty(&cqm_rmid_free_lru))
- return INVALID_RMID;
+ pkg_data = kmalloc_node(sizeof(struct pkg_data),
+ GFP_KERNEL, cpu_to_node(cpu));
+ if (!pkg_data)
+ return -ENOMEM;
+
+ pkg_data->max_rmid = c->x86_cache_max_rmid;

- entry = list_first_entry(&cqm_rmid_free_lru, struct cqm_rmid_entry, list);
- list_del(&entry->list);
+ /* Does hardware has more rmids than this driver can handle? */
+ if (WARN_ON(pkg_data->max_rmid >= INVALID_RMID))
+ pkg_data->max_rmid = INVALID_RMID - 1;

- return entry->rmid;
+ if (c->x86_cache_occ_scale != cqm_l3_scale) {
+ pr_err("Multiple LLC scale values, disabling\n");
+ kfree(pkg_data);
+ return -EINVAL;
+ }
+
+ pkg_data->prmids_by_rmid = kmalloc_node(
+ sizeof(struct prmid *) * (1 + pkg_data->max_rmid),
+ GFP_KERNEL, cpu_to_node(cpu));
+
+ if (!pkg_data) {
+ kfree(pkg_data);
+ return -ENOMEM;
+ }
+
+ INIT_LIST_HEAD(&pkg_data->free_prmids_pool);
+
+ mutex_init(&pkg_data->pkg_data_mutex);
+ raw_spin_lock_init(&pkg_data->pkg_data_lock);
+
+ /* XXX: Chose randomly*/
+ pkg_data->rotation_cpu = cpu;
+
+ cqm_pkgs_data[pkg_id] = pkg_data;
+ return 0;
}

-static void __put_rmid(u32 rmid)
+static inline bool __valid_rmid(u16 pkg_id, u32 rmid)
{
- struct cqm_rmid_entry *entry;
+ return rmid <= cqm_pkgs_data[pkg_id]->max_rmid;
+}

- lockdep_assert_held(&cqm_mutex);
+static inline bool __valid_prmid(u16 pkg_id, struct prmid *prmid)
+{
+ struct pkg_data *pkg_data = cqm_pkgs_data[pkg_id];
+ bool valid = __valid_rmid(pkg_id, prmid->rmid);

- WARN_ON(!__rmid_valid(rmid));
- entry = __rmid_entry(rmid);
+ WARN_ON_ONCE(valid && pkg_data->prmids_by_rmid[
+ prmid->rmid]->rmid != prmid->rmid);
+ return valid;
+}

- entry->queue_time = jiffies;
- entry->state = RMID_YOUNG;
+static inline struct prmid *
+__prmid_from_rmid(u16 pkg_id, u32 rmid)
+{
+ struct prmid *prmid;

- list_add_tail(&entry->list, &cqm_rmid_limbo_lru);
+ if (!__valid_rmid(pkg_id, rmid))
+ return NULL;
+ prmid = cqm_pkgs_data[pkg_id]->prmids_by_rmid[rmid];
+ WARN_ON_ONCE(!__valid_prmid(pkg_id, prmid));
+ return prmid;
}

-static int intel_cqm_setup_rmid_cache(void)
+static int intel_cqm_setup_pkg_prmid_pools(u16 pkg_id)
{
- struct cqm_rmid_entry *entry;
- unsigned int nr_rmids;
- int r = 0;
-
- nr_rmids = cqm_max_rmid + 1;
- cqm_rmid_ptrs = kmalloc(sizeof(struct cqm_rmid_entry *) *
- nr_rmids, GFP_KERNEL);
- if (!cqm_rmid_ptrs)
- return -ENOMEM;
+ int r;
+ unsigned long flags;
+ struct prmid *prmid;
+ struct pkg_data *pkg_data = cqm_pkgs_data[pkg_id];
+
+ if (!__valid_pkg_id(pkg_id))
+ return -EINVAL;

- for (; r <= cqm_max_rmid; r++) {
- struct cqm_rmid_entry *entry;
+ for (r = 0; r <= pkg_data->max_rmid; r++) {

- entry = kmalloc(sizeof(*entry), GFP_KERNEL);
- if (!entry)
+ prmid = kmalloc_node(sizeof(struct prmid), GFP_KERNEL,
+ cpu_to_node(pkg_data->rotation_cpu));
+ if (!prmid)
goto fail;

- INIT_LIST_HEAD(&entry->list);
- entry->rmid = r;
- cqm_rmid_ptrs[r] = entry;
+ atomic64_set(&prmid->last_read_value, 0L);
+ atomic64_set(&prmid->last_read_time, 0L);
+ INIT_LIST_HEAD(&prmid->pool_entry);
+ prmid->rmid = r;

- list_add_tail(&entry->list, &cqm_rmid_free_lru);
- }
+ /* Lock needed if called during CPU hotplug. */
+ raw_spin_lock_irqsave_nested(
+ &pkg_data->pkg_data_lock, flags, pkg_id);
+ pkg_data->prmids_by_rmid[r] = prmid;

- /*
- * RMID 0 is special and is always allocated. It's used for all
- * tasks that are not monitored.
- */
- entry = __rmid_entry(0);
- list_del(&entry->list);

- mutex_lock(&cqm_mutex);
- intel_cqm_rotation_rmid = __get_rmid();
- mutex_unlock(&cqm_mutex);
+ /* RMID 0 is special and makes the root of rmid hierarchy. */
+ if (r != 0)
+ list_add_tail(&prmid->pool_entry,
+ &pkg_data->free_prmids_pool);

+ raw_spin_unlock_irqrestore(&pkg_data->pkg_data_lock, flags);
+ }
return 0;
fail:
- while (r--)
- kfree(cqm_rmid_ptrs[r]);
-
- kfree(cqm_rmid_ptrs);
+ while (!list_empty(&pkg_data->free_prmids_pool)) {
+ prmid = list_first_entry(&pkg_data->free_prmids_pool,
+ struct prmid, pool_entry);
+ list_del(&prmid->pool_entry);
+ kfree(pkg_data->prmids_by_rmid[prmid->rmid]);
+ kfree(prmid);
+ }
return -ENOMEM;
}

@@ -322,8 +353,9 @@ static void intel_cqm_event_read(struct perf_event *event)
unsigned long flags;
u32 rmid;
u64 val;
+ u16 pkg_id = topology_physical_package_id(smp_processor_id());

- raw_spin_lock_irqsave(&cache_lock, flags);
+ raw_spin_lock_irqsave(&cqm_pkgs_data[pkg_id]->pkg_data_lock, flags);
rmid = event->hw.cqm_rmid;

if (!__rmid_valid(rmid))
@@ -339,7 +371,8 @@ static void intel_cqm_event_read(struct perf_event *event)

local64_set(&event->count, val);
out:
- raw_spin_unlock_irqrestore(&cache_lock, flags);
+ raw_spin_unlock_irqrestore(
+ &cqm_pkgs_data[pkg_id]->pkg_data_lock, flags);
}

static inline bool cqm_group_leader(struct perf_event *event)
@@ -349,29 +382,32 @@ static inline bool cqm_group_leader(struct perf_event *event)

static void intel_cqm_event_start(struct perf_event *event, int mode)
{
+ u16 pkg_id = topology_physical_package_id(smp_processor_id());
if (!(event->hw.state & PERF_HES_STOPPED))
return;

event->hw.state &= ~PERF_HES_STOPPED;
- __update_pqr_rmid(event->hw.cqm_rmid);
+ __update_pqr_prmid(__prmid_from_rmid(pkg_id, event->hw.cqm_rmid));
}

static void intel_cqm_event_stop(struct perf_event *event, int mode)
{
+ u16 pkg_id = topology_physical_package_id(smp_processor_id());
if (event->hw.state & PERF_HES_STOPPED)
return;

event->hw.state |= PERF_HES_STOPPED;
intel_cqm_event_read(event);
- __update_pqr_rmid(0);
+ __update_pqr_prmid(__prmid_from_rmid(pkg_id, 0));
}

static int intel_cqm_event_add(struct perf_event *event, int mode)
{
unsigned long flags;
u32 rmid;
+ u16 pkg_id = topology_physical_package_id(smp_processor_id());

- raw_spin_lock_irqsave(&cache_lock, flags);
+ raw_spin_lock_irqsave(&cqm_pkgs_data[pkg_id]->pkg_data_lock, flags);

event->hw.state = PERF_HES_STOPPED;
rmid = event->hw.cqm_rmid;
@@ -379,7 +415,8 @@ static int intel_cqm_event_add(struct perf_event *event, int mode)
if (__rmid_valid(rmid) && (mode & PERF_EF_START))
intel_cqm_event_start(event, mode);

- raw_spin_unlock_irqrestore(&cache_lock, flags);
+ raw_spin_unlock_irqrestore(
+ &cqm_pkgs_data[pkg_id]->pkg_data_lock, flags);
return 0;
}

@@ -503,9 +540,10 @@ max_recycle_threshold_show(
{
ssize_t rv;

- mutex_lock(&cqm_mutex);
- rv = snprintf(page, PAGE_SIZE-1, "%u\n", __intel_cqm_max_threshold);
- mutex_unlock(&cqm_mutex);
+ monr_hrchy_acquire_mutexes();
+ rv = snprintf(page, PAGE_SIZE - 1, "%u\n",
+ __intel_cqm_max_threshold);
+ monr_hrchy_release_mutexes();

return rv;
}
@@ -522,9 +560,12 @@ max_recycle_threshold_store(struct device *dev,
if (ret)
return ret;

- mutex_lock(&cqm_mutex);
+ /* Mutex waits for rotation logic in all packages to complete. */
+ monr_hrchy_acquire_mutexes();
+
__intel_cqm_max_threshold = bytes;
- mutex_unlock(&cqm_mutex);
+
+ monr_hrchy_release_mutexes();

return count;
}
@@ -561,49 +602,42 @@ static struct pmu intel_cqm_pmu = {

static inline void cqm_pick_event_reader(int cpu)
{
- int phys_id = topology_physical_package_id(cpu);
- int i;
-
- for_each_cpu(i, &cqm_cpumask) {
- if (phys_id == topology_physical_package_id(i))
- return; /* already got reader for this socket */
- }
-
- cpumask_set_cpu(cpu, &cqm_cpumask);
+ u16 pkg_id = topology_physical_package_id(cpu);
+ /* XXX: lock, check if rotation cpu is online, maybe */
+ /*
+ * Pick a reader if there isn't one already.
+ */
+ if (cqm_pkgs_data[pkg_id]->rotation_cpu != -1)
+ cqm_pkgs_data[pkg_id]->rotation_cpu = cpu;
}

static void intel_cqm_cpu_starting(unsigned int cpu)
{
struct intel_pqr_state *state = &per_cpu(pqr_state, cpu);
struct cpuinfo_x86 *c = &cpu_data(cpu);
+ u16 pkg_id = topology_physical_package_id(cpu);

state->rmid = 0;
state->closid = 0;

- WARN_ON(c->x86_cache_max_rmid != cqm_max_rmid);
+ /* XXX: lock */
+ /* XXX: Make sure this case is handled when hotplug happens. */
+ WARN_ON(c->x86_cache_max_rmid != cqm_pkgs_data[pkg_id]->max_rmid);
WARN_ON(c->x86_cache_occ_scale != cqm_l3_scale);
}

static void intel_cqm_cpu_exit(unsigned int cpu)
{
- int phys_id = topology_physical_package_id(cpu);
- int i;
-
/*
* Is @cpu a designated cqm reader?
*/
- if (!cpumask_test_and_clear_cpu(cpu, &cqm_cpumask))
- return;
-
- for_each_online_cpu(i) {
- if (i == cpu)
- continue;
+ u16 pkg_id = topology_physical_package_id(cpu);

- if (phys_id == topology_physical_package_id(i)) {
- cpumask_set_cpu(i, &cqm_cpumask);
- break;
- }
- }
+ if (cqm_pkgs_data[pkg_id]->rotation_cpu != cpu)
+ return;
+ /* XXX: do remove unused packages */
+ cqm_pkgs_data[pkg_id]->rotation_cpu = cpumask_any_but(
+ topology_core_cpumask(cpu), cpu);
}

static int intel_cqm_cpu_notifier(struct notifier_block *nb,
@@ -616,6 +650,7 @@ static int intel_cqm_cpu_notifier(struct notifier_block *nb,
intel_cqm_cpu_exit(cpu);
break;
case CPU_STARTING:
+ pkg_data_init_cpu(cpu);
intel_cqm_cpu_starting(cpu);
cqm_pick_event_reader(cpu);
break;
@@ -632,12 +667,17 @@ static const struct x86_cpu_id intel_cqm_match[] = {
static int __init intel_cqm_init(void)
{
char *str, scale[20];
- int i, cpu, ret;
+ int i, cpu, ret = 0, min_max_rmid = 0;

if (!x86_match_cpu(intel_cqm_match))
return -ENODEV;

cqm_l3_scale = boot_cpu_data.x86_cache_occ_scale;
+ if (WARN_ON(cqm_l3_scale == 0))
+ cqm_l3_scale = 1;
+
+ for (i = 0; i < PQR_MAX_NR_PKGS; i++)
+ cqm_pkgs_data[i] = NULL;

/*
* It's possible that not all resources support the same number
@@ -650,17 +690,20 @@ static int __init intel_cqm_init(void)
*/
cpu_notifier_register_begin();

+ /* XXX: assert all cpus in pkg have same nr rmids (they should). */
for_each_online_cpu(cpu) {
- struct cpuinfo_x86 *c = &cpu_data(cpu);
-
- if (c->x86_cache_max_rmid < cqm_max_rmid)
- cqm_max_rmid = c->x86_cache_max_rmid;
+ ret = pkg_data_init_cpu(cpu);
+ if (ret)
+ goto error;
+ }

- if (c->x86_cache_occ_scale != cqm_l3_scale) {
- pr_err("Multiple LLC scale values, disabling\n");
- ret = -EINVAL;
- goto out;
- }
+ /* Select the minimum of the maximum rmids to use as limit for
+ * threshold. XXX: per-package threshold.
+ */
+ cqm_pkg_id_for_each_online(i) {
+ if (min_max_rmid < cqm_pkgs_data[i]->max_rmid)
+ min_max_rmid = cqm_pkgs_data[i]->max_rmid;
+ intel_cqm_setup_pkg_prmid_pools(i);
}

/*
@@ -671,21 +714,17 @@ static int __init intel_cqm_init(void)
* For a 35MB LLC and 56 RMIDs, this is ~1.8% of the LLC.
*/
__intel_cqm_max_threshold =
- boot_cpu_data.x86_cache_size * 1024 / (cqm_max_rmid + 1);
+ boot_cpu_data.x86_cache_size * 1024 / (min_max_rmid + 1);

snprintf(scale, sizeof(scale), "%u", cqm_l3_scale);
str = kstrdup(scale, GFP_KERNEL);
if (!str) {
ret = -ENOMEM;
- goto out;
+ goto error;
}

event_attr_intel_cqm_llc_scale.event_str = str;

- ret = intel_cqm_setup_rmid_cache();
- if (ret)
- goto out;
-
for_each_online_cpu(i) {
intel_cqm_cpu_starting(i);
cqm_pick_event_reader(i);
@@ -695,13 +734,20 @@ static int __init intel_cqm_init(void)

ret = perf_pmu_register(&intel_cqm_pmu, "intel_cqm", -1);
if (ret)
- pr_err("Intel CQM perf registration failed: %d\n", ret);
- else
- pr_info("Intel CQM monitoring enabled\n");
+ goto error;

-out:
+ cpu_notifier_register_done();
+
+ pr_info("Intel CQM monitoring enabled with at least %u rmids per package.\n",
+ min_max_rmid + 1);
+
+ return ret;
+
+error:
+ pr_err("Intel CQM perf registration failed: %d\n", ret);
cpu_notifier_register_done();

return ret;
}
+
device_initcall(intel_cqm_init);
diff --git a/arch/x86/events/intel/cqm.h b/arch/x86/events/intel/cqm.h
index e25d0a1..a25d49b 100644
--- a/arch/x86/events/intel/cqm.h
+++ b/arch/x86/events/intel/cqm.h
@@ -19,14 +19,168 @@
#include <asm/pqr_common.h>

/*
+ * struct prmid: Package RMID. Per-package wrapper for a rmid.
+ * @last_read_value: Least read value.
+ * @last_read_time: Time last read, used when throtling read rate.
+ * @pool_entry: Attaches to a prmid pool in cqm_pkg_data.
+ * @rmid: The rmid value to be programed in hardware.
+ *
+ * Its accesors ensure that CQM events for this rmid are read atomically and
+ * allow to throtle the frequency of reads to up to one each
+ * __rmid_min_update_time ms.
+ */
+struct prmid {
+ atomic64_t last_read_value;
+ atomic64_t last_read_time;
+ struct list_head pool_entry;
+ u32 rmid;
+};
+
+/*
* Minimum time elapsed between reads of occupancy value for an RMID when
* transversing the monr hierarchy.
*/
#define RMID_DEFAULT_MIN_UPDATE_TIME 20 /* ms */
+static unsigned int __rmid_min_update_time = RMID_DEFAULT_MIN_UPDATE_TIME;
+
+static inline int cqm_prmid_update(struct prmid *prmid);

# define INVALID_RMID (-1)

/*
+ * struct pkg_data: Per-package CQM data.
+ * @max_rmid: Max rmid valid for cpus in this package.
+ * @prmids_by_rmid: Utility mapping between rmid values and prmids.
+ * XXX: Make it an array of prmids.
+ * @free_prmid_pool: Free prmids.
+ * @pkg_data_mutex: Hold for stability when modifying pmonrs
+ * hierarchy.
+ * @pkg_data_lock: Hold to protect variables that may be accessed
+ * during process scheduling. The locks for all
+ * packages must be held when modifying the monr
+ * hierarchy.
+ * @rotation_cpu: CPU to run @rotation_work on, it must be in the
+ * package associated to this instance of pkg_data.
+ */
+struct pkg_data {
+ u32 max_rmid;
+ /* Quick map from rmids to prmids. */
+ struct prmid **prmids_by_rmid;
+
+ /*
+ * Pools of prmids used in rotation logic.
+ */
+ struct list_head free_prmids_pool;
+
+ struct mutex pkg_data_mutex;
+ raw_spinlock_t pkg_data_lock;
+
+ int rotation_cpu;
+};
+
+extern struct pkg_data *cqm_pkgs_data[PQR_MAX_NR_PKGS];
+
+static inline u16 __cqm_pkgs_data_next_online(u16 pkg_id)
+{
+ while (!cqm_pkgs_data[++pkg_id] && pkg_id < PQR_MAX_NR_PKGS)
+ ;
+ return pkg_id;
+}
+
+static inline u16 __cqm_pkgs_data_first_online(void)
+{
+ if (cqm_pkgs_data[0])
+ return 0;
+ return __cqm_pkgs_data_next_online(0);
+}
+
+/* Iterate for each online pkgs data */
+#define cqm_pkg_id_for_each_online(pkg_id__) \
+ for (pkg_id__ = __cqm_pkgs_data_first_online(); \
+ pkg_id__ < PQR_MAX_NR_PKGS; \
+ pkg_id__ = __cqm_pkgs_data_next_online(pkg_id__))
+
+#define __pkg_data(pmonr, member) cqm_pkgs_data[pmonr->pkg_id]->member
+
+/*
+ * Utility function and macros to manage per-package locks.
+ * Use macros to keep flags in caller's stace.
+ * Hold lock in all the packages, required to alter the monr hierarchy
+ */
+static inline void monr_hrchy_acquire_mutexes(void)
+{
+ int i;
+
+ cqm_pkg_id_for_each_online(i)
+ mutex_lock_nested(&cqm_pkgs_data[i]->pkg_data_mutex, i);
+}
+
+# define monr_hrchy_acquire_raw_spin_locks_irq_save(flags, i) \
+ do { \
+ raw_local_irq_save(flags); \
+ cqm_pkg_id_for_each_online(i) {\
+ raw_spin_lock_nested( \
+ &cqm_pkgs_data[i]->pkg_data_lock, i); \
+ } \
+ } while (0)
+
+#define monr_hrchy_acquire_locks(flags, i) \
+ do {\
+ monr_hrchy_acquire_mutexes(); \
+ monr_hrchy_acquire_raw_spin_locks_irq_save(flags, i); \
+ } while (0)
+
+static inline void monr_hrchy_release_mutexes(void)
+{
+ int i;
+
+ cqm_pkg_id_for_each_online(i)
+ mutex_unlock(&cqm_pkgs_data[i]->pkg_data_mutex);
+}
+
+# define monr_hrchy_release_raw_spin_locks_irq_restore(flags, i) \
+ do { \
+ cqm_pkg_id_for_each_online(i) {\
+ raw_spin_unlock(&cqm_pkgs_data[i]->pkg_data_lock); \
+ } \
+ raw_local_irq_restore(flags); \
+ } while (0)
+
+#define monr_hrchy_release_locks(flags, i) \
+ do {\
+ monr_hrchy_release_raw_spin_locks_irq_restore(flags, i); \
+ monr_hrchy_release_mutexes(); \
+ } while (0)
+
+static inline void monr_hrchy_assert_held_mutexes(void)
+{
+ int i;
+
+ cqm_pkg_id_for_each_online(i)
+ lockdep_assert_held(&cqm_pkgs_data[i]->pkg_data_mutex);
+}
+
+static inline void monr_hrchy_assert_held_raw_spin_locks(void)
+{
+ int i;
+
+ cqm_pkg_id_for_each_online(i)
+ lockdep_assert_held(&cqm_pkgs_data[i]->pkg_data_lock);
+}
+#ifdef CONFIG_LOCKDEP
+static inline int monr_hrchy_count_held_raw_spin_locks(void)
+{
+ int i, nr_held = 0;
+
+ cqm_pkg_id_for_each_online(i) {
+ if (lockdep_is_held(&cqm_pkgs_data[i]->pkg_data_lock))
+ nr_held++;
+ }
+ return nr_held;
+}
+#endif
+
+/*
* Time between execution of rotation logic. The frequency of execution does
* not affect the rate at which RMIDs are recycled, except by the delay by the
* delay updating the prmid's and their pools.
diff --git a/arch/x86/include/asm/pqr_common.h b/arch/x86/include/asm/pqr_common.h
index 0c2001b..f770637 100644
--- a/arch/x86/include/asm/pqr_common.h
+++ b/arch/x86/include/asm/pqr_common.h
@@ -27,5 +27,7 @@ struct intel_pqr_state {

DECLARE_PER_CPU(struct intel_pqr_state, pqr_state);

+#define PQR_MAX_NR_PKGS 8
+
#endif
#endif
--
2.8.0.rc3.226.g39d4020