On Tue, Mar 01, 2016 at 03:48:26PM -0800, Vikas Shivappa wrote:
Lot of the scheduling code was taken out from Tony's patch and a 3-4
lines of change were added in the intel_cqm_event_read. Since the timer
is no more added on every context switch this change was made.
It this here to confuse people or is there some actual information in
it?
Will fix
+/*
+ * MBM Counter is 24bits wide. MBM_CNTR_MAX defines max counter
+ * value
+ */
+#define MBM_CNTR_MAX 0xffffff
#define MBM_CNTR_WIDTH 24
#define MBM_CNTR_MAX ((1U << MBM_CNTR_WIDTH) - 1)
#define QOS_L3_OCCUP_EVENT_ID (1 << 0)
+/*
+ * MBM Event IDs as defined in SDM section 17.15.5
+ * Event IDs are used to program EVTSEL MSRs before reading mbm event counters
+ */
+enum mbm_evt_type {
+ QOS_MBM_TOTAL_EVENT_ID = 0x02,
+ QOS_MBM_LOCAL_EVENT_ID,
+ QOS_MBM_TOTAL_BW_EVENT_ID,
+ QOS_MBM_LOCAL_BW_EVENT_ID,
+};
QOS_L3_*_EVENT_ID is a define, these are an enum. Rather inconsistent.
struct rmid_read {
u32 rmid;
Hole, you could've filled with the enum (which ends up being an int I
think).
atomic64_t value;
+ enum mbm_evt_type evt_type;
};
+static bool is_mbm_event(int e)
You had an enum type, you might as well use it.
+{
+ return (e >= QOS_MBM_TOTAL_EVENT_ID && e <= QOS_MBM_LOCAL_BW_EVENT_ID);
+}
+static struct sample *update_sample(unsigned int rmid,
+ enum mbm_evt_type evt_type, int first)
+{
+ ktime_t cur_time;
+ struct sample *mbm_current;
+ u32 vrmid = rmid_2_index(rmid);
+ u64 val, bytes, diff_time;
+ u32 eventid;
+
+ if (evt_type & QOS_MBM_LOCAL_EVENT_MASK) {
+ mbm_current = &mbm_local[vrmid];
+ eventid = QOS_MBM_LOCAL_EVENT_ID;
+ } else {
+ mbm_current = &mbm_total[vrmid];
+ eventid = QOS_MBM_TOTAL_EVENT_ID;
+ }
+
+ cur_time = ktime_get();
+ wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid);
+ rdmsrl(MSR_IA32_QM_CTR, val);
+ if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
+ return mbm_current;
+ val &= MBM_CNTR_MAX;
+ if (val < mbm_current->prev_msr)
+ bytes = MBM_CNTR_MAX - mbm_current->prev_msr + val + 1;
+ else
+ bytes = val - mbm_current->prev_msr;
Would not something like:
shift = 64 - MBM_CNTR_WIDTH;
bytes = (val << shift) - (prev << shift);
bytes >>= shift;
be less obtuse? (and consistent with how every other perf update
function does it).
What guarantee is there we didn't wrap multiple times? Doesn't that
deserve a comment?
+ bytes *= cqm_l3_scale;
+
+ mbm_current->total_bytes += bytes;
+ mbm_current->interval_bytes += bytes;
+ mbm_current->prev_msr = val;
+ diff_time = ktime_ms_delta(cur_time, mbm_current->interval_start);
Here we do a / 1e6
+
+ /*
+ * The b/w measured is really the most recent/current b/w.
+ * We wait till enough time has passed to avoid
+ * arthmetic rounding problems.Having it at >=100ms,
+ * such errors would be <=1%.
+ */
+ if (diff_time > 100) {
This could well be > 100e6 instead, avoiding the above division most of
the time.
+ bytes = mbm_current->interval_bytes * MSEC_PER_SEC;
+ do_div(bytes, diff_time);
+ mbm_current->bandwidth = bytes;
+ mbm_current->interval_bytes = 0;
+ mbm_current->interval_start = cur_time;
+ }
+
+ return mbm_current;
+}
How does the above time tracking deal with the event not actually having
been scheduled the whole time?
+static void init_mbm_sample(u32 rmid, enum mbm_evt_type evt_type)
+{
+ struct rmid_read rr = {
+ .value = ATOMIC64_INIT(0),
+ };
+
+ rr.rmid = rmid;
+ rr.evt_type = evt_type;
That's just sad.. put those two in the struct init as well.
+ /* on each socket, init sample */
+ on_each_cpu_mask(&cqm_cpumask, __intel_mbm_event_init, &rr, 1);
+}