[RFC PATCH 06/12] perf topdown-parser: Interface for TMA_Metrics.csv.

From: Ian Rogers
Date: Tue Nov 10 2020 - 05:04:59 EST


From: Sandeep Dasgupta <sdasgup@xxxxxxxxxx>

Reads the CSV file then creates an in memory model from the data.

Co-authored-by: Stephane Eranian <eranian@xxxxxxxxxx>
Signed-off-by: Ian Rogers <irogers@xxxxxxxxxx>
Signed-off-by: Sandeep Dasgupta <sdasgup@xxxxxxxxxx>
---
.../topdown-parser/dependence_dag_utils.cpp | 984 ++++++++++++++++++
.../topdown-parser/dependence_dag_utils.h | 178 ++++
2 files changed, 1162 insertions(+)
create mode 100644 tools/perf/pmu-events/topdown-parser/dependence_dag_utils.cpp
create mode 100644 tools/perf/pmu-events/topdown-parser/dependence_dag_utils.h

diff --git a/tools/perf/pmu-events/topdown-parser/dependence_dag_utils.cpp b/tools/perf/pmu-events/topdown-parser/dependence_dag_utils.cpp
new file mode 100644
index 000000000000..7c9eff06e2a9
--- /dev/null
+++ b/tools/perf/pmu-events/topdown-parser/dependence_dag_utils.cpp
@@ -0,0 +1,984 @@
+/*
+ * Copyright 2020 Google LLC.
+ * SPDX-License-Identifier: GPL-2.0
+ */
+
+#include "dependence_dag_utils.h"
+
+#include <cassert>
+#include <fstream>
+#include <regex>
+
+#include "configuration.h"
+#include "general_utils.h"
+#include "logging.h"
+
+namespace topdown_parser
+{
+char g_PerfmonVersion[VERSION_MAX_STRLEN];
+
+std::map<std::string, TopdownInfo> *g_TopdownHierarchy = nullptr;
+
+std::vector<std::string> *g_RelevantCpus = nullptr;
+
+std::vector<std::set<std::string> > *g_CpuAliasesForEventInfo = nullptr;
+
+namespace
+{
+/**
+ * Column number in the input csv file specifying 'Count Domain'
+ */
+size_t g_CountDomainColm = UINT_MAX;
+
+/**
+ * Column number in the input csv file specifying 'Metric Group'
+ */
+size_t g_MetricGroupColm = UINT_MAX;
+
+/**
+ * Column number in the input csv file specifying 'Description'
+ */
+size_t g_DescColm = UINT_MAX;
+
+/**
+ * header_rowKey is used to derive the row number of the header. The
+ * header of the input csv file specifies the information like level
+ * numbers, CPU product names, Metric Description etc. A typical header
+ * row looks like: Key | Level1 | Level2 | SKX SKL | Count | Domain
+ * Metric Description | ...
+ */
+const char *header_rowKey = "level[0-9]+";
+
+/**
+ * formula_start_colm_Key is used to derive the first column number
+ * specifying a formula.
+ */
+const char *formula_start_colm_Key = "level[0-9]+";
+
+/**
+ * formula_end_colm_Key is used to derive the last column number
+ * specifying a formula.
+ */
+const char *formula_end_colm_Key = "locate-with";
+
+/**
+ * g_CountDomainColmKey is used to derive column number in the input csv
+ * file specifying 'Count Domain'.
+ */
+const char *g_CountDomainColmKey = "count";
+
+/**
+ * g_DescColmKey is used to derive column number in the input csv file
+ * specifying 'Description'.
+ */
+const char *g_DescColmKey = "description";
+
+/**
+ * g_MetricGroupColmKey is used to derive column number in the input csv
+ * file specifying 'Metric Group'.
+ */
+const char *g_MetricGroupColmKey = "group";
+
+/**
+ * Last row number in the input csv file specifying topdown levels
+ */
+size_t g_LevelEndRow = UINT_MAX;
+
+/**
+ * g_LevelEndRowKey is used to derive the last row number in the input
+ * csv file specifying topdown levels.
+ */
+const char *g_LevelEndRowKey = "\\.";
+
+/**
+ * First and last column numbers in the input csv file specifying
+ * topdown levels.
+ */
+size_t g_LevelStartColm = UINT_MAX;
+size_t g_LevelEndColm = UINT_MAX;
+
+/**
+ * Initialize globals.
+ */
+void InitGlobals()
+{
+ if (g_TopdownHierarchy == nullptr) {
+ g_TopdownHierarchy = new std::map<std::string, TopdownInfo>;
+ }
+
+ if (g_RelevantCpus == nullptr) {
+ g_RelevantCpus = new std::vector<std::string>;
+ }
+
+ if (g_CpuAliasesForEventInfo == nullptr) {
+ g_CpuAliasesForEventInfo =
+ new std::vector<std::set<std::string> >;
+ }
+}
+
+/**
+ * Plot the topdown hierarchy in graphviz dot.
+ */
+void PlotTopdownHierarchy(
+ const std::map<std::string, TopdownInfo> &g_TopdownHierarchy)
+{
+ std::string topdown_hierarchy_dot =
+ kConfigParams->output_path_ + "topdown_hierarchy.dot";
+ std::ofstream ofile_dot(topdown_hierarchy_dot);
+ if (!ofile_dot.is_open()) {
+ ERROR("Error opening file: " << topdown_hierarchy_dot);
+ exit(1);
+ }
+
+ INFO("Generating topdown hierarchy file: " << topdown_hierarchy_dot);
+
+ ofile_dot << "digraph graphname {\n";
+ int toggle = 0;
+ std::string color = "[color=blue]";
+ for (auto &p : g_TopdownHierarchy) {
+ auto metric_name = std::string("\"") + p.first + "\"";
+
+ if (toggle == 0) {
+ color = "[color=blue]";
+ } else {
+ color = "[color=red]";
+ }
+
+ for (size_t i = 0; i < p.second.child_metrics.size() - 1; ++i) {
+ ofile_dot << metric_name << " -> "
+ << "\"" << p.second.child_metrics[i] << "\" "
+ << color << std::endl;
+ }
+ ofile_dot
+ << metric_name << "->"
+ << "\""
+ << p.second.child_metrics[p.second.child_metrics.size() -
+ 1]
+ << "\" " << color << std::endl;
+ toggle = toggle ^ 1;
+ }
+ ofile_dot << "}\n";
+
+ ofile_dot.close();
+}
+
+/**
+ * GetTopdownHierarchy derives the topdown hierarchy
+ * from the csv file.
+ * The hierarchy looks like
+ *
+ * g_LevelStartColm g_LevelEndColm
+ * | |
+ * V V
+ * header_row | Level1 | Level2 | Level3 |
+ * header_row + 1 | A | | |
+ * | | B | |
+ * | | | C |
+ * | | G | |
+ * | | D | |
+ * | | | E |
+ * g_LevelEndRow | | | F |
+ *
+ * The function returns:
+ * g_TopdownHierarchy["Topdown"] --> {"<perf-stat-switch-name>", {"A"}}
+ * g_TopdownHierarchy["A"] --> {"<perf-stat-switch-name>", {"B", "G", "D"}}
+ * g_TopdownHierarchy["B"] --> {"<perf-stat-switch-name>", {"C"}}
+ * g_TopdownHierarchy["G"] --> {"<perf-stat-switch-name>", {"E", "F"}}
+ *
+ * <perf-stat-switch-name> for a metric is the name of the switch which
+ * will be used invoke perf stat on that metric. These names are
+ * provided by a configuration parameters perf_stat_switch_names_ for
+ * each parent metric.
+ */
+void GetTopdownHierarchy(const std::vector<std::vector<std::string> > &records)
+{
+ assert((UINT_MAX != kConfigParams->header_row &&
+ UINT_MAX != g_LevelEndRow && UINT_MAX != g_LevelStartColm &&
+ UINT_MAX != g_LevelEndColm) &&
+ "Cannot find topdown hierarchy");
+
+ std::string last_parent("");
+ for (size_t level = g_LevelStartColm; level <= g_LevelEndColm;
+ level++) {
+ for (size_t i = kConfigParams->header_row + 1;
+ i <= g_LevelEndRow; ++i) {
+ if (records[i][level].empty() &&
+ records[i][level - 1].empty()) {
+ continue;
+ }
+
+ // All the metrics in the first level becomes
+ // the sub-metrics of "topdown" metric.
+ if (g_LevelStartColm == level) {
+ if (!records[i][level].empty()) {
+ (*g_TopdownHierarchy)[std::string(
+ "topdown")]
+ .child_metrics.push_back(
+ records[i][level]);
+ }
+ continue;
+ }
+
+ // For the case
+ // Level1 | Level2 | Level3 |
+ // A | | |
+ // We register the parent metric "A"
+ if (records[i][level].empty() &&
+ !records[i][level - 1].empty()) {
+ last_parent = records[i][level - 1];
+ continue;
+ }
+
+ // For the case
+ // Level1 | Level2 | Level3 |
+ // | B | |
+ // We make "B" the sub-metric of the registered
+ // parent metric "A".
+ if (!records[i][level].empty() &&
+ records[i][level - 1].empty()) {
+ (*g_TopdownHierarchy)[last_parent]
+ .child_metrics.push_back(
+ records[i][level]);
+ continue;
+ }
+ }
+ }
+
+ // Assign a perf stat switch names to parent metrics. The perf
+ // stat switch names are provided by configuration parameter and
+ // used while invoking perf stat command.
+ for (auto &p : *g_TopdownHierarchy) {
+ if (kConfigParams->perf_stat_switch_names_.count(p.first) !=
+ 0) {
+ p.second.perf_stat_switch_name =
+ kConfigParams->perf_stat_switch_names_.at(
+ p.first);
+ }
+ // For some targets, like perf_public,
+ // perf_stat_switch_names_ will not be available.
+ }
+}
+
+/**
+ * GetPerfmonVersion extracts the version number from the
+ * input csv file. The number is extracted as follows:
+ * 1. Find the column in the first row of input csv file having a regex
+ * match with keyword "version"
+ * 2. The version number is typically specified in the very next column
+ * of the same row.
+ */
+void GetPerfmonVersion(const std::vector<std::vector<std::string> > &records)
+{
+ std::regex r("version", std::regex_constants::icase);
+ std::string retval;
+
+ for (size_t j = 0; j < records[0].size(); j++) {
+ if (regex_match(records[0][j], r)) {
+ retval = (j + 1 < records[0].size()) ?
+ records[0][j + 1] :
+ "";
+ strncpy(g_PerfmonVersion, retval.c_str(),
+ sizeof(g_PerfmonVersion));
+ return;
+ }
+ }
+
+ strncpy(g_PerfmonVersion, "", sizeof(g_PerfmonVersion));
+}
+
+/**
+ * Determine the level end row. Level end row is defined as the
+ * last row number in the input csv file specifying a topdown
+ * level. Typically it is marked in the csv file with a 'dot'.
+ */
+size_t GetLevelEndRow(const std::vector<std::vector<std::string> > &records)
+{
+ std::regex r(g_LevelEndRowKey, std::regex_constants::icase);
+ for (size_t i = kConfigParams->header_row + 1; i < records.size();
+ ++i) {
+ if (regex_search(records[i][0], r)) {
+ return i - 1;
+ }
+ }
+
+ ERROR("Failed to derive the level end row using level end row"
+ " key: "
+ << g_LevelEndRowKey);
+ INFO("Level end row not found. update the 'g_LevelEndRowKey' in"
+ " dependence_dag_utils.cc");
+ exit(1);
+
+ return UINT_MAX;
+}
+
+/**
+ * The function determines the row number of input csv file which
+ * specifies if a CPU product is a client or server. It is typically the
+ * row above the header row.
+ */
+size_t GetServerIdentifierRow()
+{
+ if (UINT_MAX != kConfigParams->server_identifier_row_) {
+ return kConfigParams->server_identifier_row_;
+ }
+
+ return kConfigParams->header_row - 1;
+}
+
+/**
+ * Determine the last column letter in the input csv file specifying
+ * topdown levels. It is derived as the column before the one which
+ * starts specifying formulas.
+ *
+ * A typical header row looks like:
+ * Key | Level1 | Level2 | Level3 | SKX | SKL | ...
+ * The function return the column number for Level3.
+ *
+ * In case the kConfigParams->first_last_ is provided, which specifies
+ * the last level number, then the function returns the column number
+ * corresponding to that level. For example, if
+ * kConfigParams->first_last_ == 2, then the function returns the column
+ * number for Level2.
+ */
+size_t GetLevelEndColm(const std::vector<std::vector<std::string> > &records)
+{
+ if (UINT_MAX != kConfigParams->first_last_) {
+ std::string search_string("level");
+ search_string += std::to_string(kConfigParams->first_last_);
+ std::regex r(search_string.c_str(),
+ std::regex_constants::icase);
+
+ for (size_t j = 1; j <= g_LevelEndColm; j++) {
+ const std::string &cell_content =
+ records[kConfigParams->header_row][j];
+ if (regex_search(cell_content, r)) {
+ return j;
+ }
+ }
+ ERROR("Wrong specification of last level in "
+ "configuration file. Current Value: "
+ << kConfigParams->first_last_);
+ INFO("Assumption is levels are marked in the csv file "
+ "as Level1, Level2, "
+ "..., Leveln and the expected values of last level"
+ " (to be "
+ "provided in the configuration file) are [1 - n]");
+ exit(1);
+ }
+
+ if (kConfigParams->formula_start_colm_ == UINT_MAX) {
+ assert(0 && "kConfigParams->formula_start_colm_ not set");
+ }
+ return kConfigParams->formula_start_colm_ - 1;
+}
+
+/**
+ * Determine the first column letter in the input csv file specifying
+ * topdown levels. It is derived as the column, in the header row,
+ * having a regex match with formula_start_colm_Key.
+ *
+ * A typical header row looks like:
+ * Key | Level1 | Level2 | SKX | SKL | ...
+ * The function return the column number for Level1.
+ *
+ * In case the kConfigParams->first_level_ is provided, which specifies
+ * the level number to begin with, then the function returns the column
+ * number corresponding to that level.
+ * For example, if kConfigParams->first_level_ == 2, then the function
+ * returns the column number for Level2.
+ */
+size_t GetLevelStartColm(const std::vector<std::vector<std::string> > &records)
+{
+ if (UINT_MAX != kConfigParams->first_level_) {
+ std::string search_string("level");
+ search_string += std::to_string(kConfigParams->first_level_);
+ std::regex r(search_string.c_str(),
+ std::regex_constants::icase);
+
+ for (size_t j = 1;
+ j < records[kConfigParams->header_row].size(); j++) {
+ const std::string &cell_content =
+ records[kConfigParams->header_row][j];
+ if (regex_search(cell_content, r)) {
+ return j;
+ }
+ }
+
+ ERROR("Wrong specification of first level in "
+ "onfiguration file. Current Value: "
+ << kConfigParams->first_level_);
+ INFO("Assumption is levels are marked in the csv file "
+ "as Level1, Level2, "
+ "..., Leveln and the expected values of first "
+ "level (to be "
+ "provided in the configuration file) are [1 - n]");
+ exit(1);
+ }
+
+ if (std::strcmp(formula_start_colm_Key, "") == 0) {
+ FATAL("Set formula_start_colm_Key in "
+ "dependence_dag_utils.cpp file");
+ }
+
+ std::regex r(formula_start_colm_Key, std::regex_constants::icase);
+ for (size_t j = 1; j < records[kConfigParams->header_row].size(); j++) {
+ if (regex_search(records[kConfigParams->header_row][j], r)) {
+ return j;
+ }
+ }
+
+ ERROR("Wrong specification of formula start column key "
+ "Current Value: "
+ << formula_start_colm_Key);
+ INFO("Assumption is 'g_LevelStartColm' is derived as the first"
+ "column whose header matches the formula start column key."
+ " Try updating the formula_start_colm_Key in "
+ "dependence_dag_utils.cc.");
+ exit(1);
+}
+
+/**
+ * Derives the header row as the first row in csv file counting from
+ * topmost row, that has a substring match with header_rowKey on any of
+ * its cells.
+ */
+size_t GetHeaderRow(const std::vector<std::vector<std::string> > &records)
+{
+ if (UINT_MAX != kConfigParams->header_row) {
+ return kConfigParams->header_row;
+ }
+
+ std::regex r(header_rowKey, std::regex_constants::icase);
+ for (size_t i = 0; i < records.size(); ++i) {
+ for (size_t j = 0; j < records[i].size(); j++) {
+ if (regex_search(records[i][j], r)) {
+ return i;
+ }
+ }
+ }
+
+ ERROR("Header row not found.");
+ INFO("Update the header row keys in dependence_dag_utils.cpp");
+ exit(1);
+ return UINT_MAX;
+}
+
+/**
+ * Derives "the first column number specifying a formula" as the first
+ * column in csv file, counting from left in the header row, which does
+ * not match with formula_start_colm_Key. The counting of columns starts
+ * with 2nd from the left as the left most one has the item "Key" in its
+ * header column.
+ */
+size_t
+GetFormulaStartColm(const std::vector<std::vector<std::string> > &records)
+{
+ if (UINT_MAX != kConfigParams->formula_start_colm_) {
+ return kConfigParams->formula_start_colm_;
+ }
+
+ std::regex r(formula_start_colm_Key, std::regex_constants::icase);
+ for (size_t j = 1; j < records[kConfigParams->header_row].size(); j++) {
+ if (!regex_search(records[kConfigParams->header_row][j], r)) {
+ return j;
+ }
+ }
+ assert(0 && "formula start column not found. update the formula "
+ "start column keys");
+ return UINT_MAX;
+}
+
+/**
+ * Derives "the last column number specifying a formula".
+ * For the purpose, we first find the first column from
+ * right in the header row which **do** match with formula_end_colm_Key.
+ * The desired column is the one to the left of above found column.
+ */
+size_t GetFormulaEndColm(const std::vector<std::vector<std::string> > &records)
+{
+ if (UINT_MAX != kConfigParams->formula_end_colm_) {
+ return kConfigParams->formula_end_colm_;
+ }
+
+ std::regex r(formula_end_colm_Key, std::regex_constants::icase);
+ for (size_t j = records[kConfigParams->header_row].size(); j-- > 0;) {
+ if (regex_search(records[kConfigParams->header_row][j], r)) {
+ return j - 1;
+ }
+ }
+ assert(0 && "formula end column not found. update the formula end "
+ "column keys");
+ return UINT_MAX;
+}
+
+/**
+ * Derives "Column number in the input csv file specifying
+ * 'Count Domain'" as the column number, counting from leftmost, that
+ * has a substring match with g_CountDomainColmKey.
+ */
+size_t GetCountDomainColm(const std::vector<std::vector<std::string> > &records)
+{
+ std::regex r(g_CountDomainColmKey, std::regex_constants::icase);
+ for (size_t j = 1; j < records[kConfigParams->header_row].size(); j++) {
+ if (regex_search(records[kConfigParams->header_row][j], r)) {
+ return j;
+ }
+ }
+
+ ERROR("Count domain column not found.");
+ INFO("Update the formula 'g_CountDomainColmKey' in "
+ "dependence_dag_utils.cpp");
+ exit(1);
+
+ return UINT_MAX;
+}
+
+/**
+ * Get the alias CPUs, marked in the csv file as CPUX/CPUY.
+ */
+void GetAliasCpus(const std::vector<std::vector<std::string> > &records)
+{
+ std::regex r("\\/");
+ for (size_t j = kConfigParams->formula_start_colm_;
+ j <= kConfigParams->formula_end_colm_; j++) {
+ const std::string &cell_content =
+ records[kConfigParams->header_row][j];
+ if (regex_search(cell_content, r)) {
+ std::set<std::string> alias_set;
+ std::vector<std::string> split_values =
+ Split(cell_content, '/');
+
+ for (auto &item : split_values) {
+ if (kConfigParams->dont_care_cpus_.count(
+ item) == 0) {
+ alias_set.insert(Trim(item));
+ }
+ }
+ if (alias_set.size() > 1) {
+ g_CpuAliasesForEventInfo->push_back(alias_set);
+ }
+ }
+ }
+}
+
+/**
+ * Determine the cpus relevant to generate topdown hierarchy.
+ * If kConfigParams->selected_cpus_ is present (which are the selected
+ * CPUs provided using configuration parameter selected_cpus), then the
+ * function return value == kConfigParams->selected_cpus_.
+ * If not, return value =
+ * (cpu names derived from csv file) - kConfigParams->dont_care_cpus_
+ */
+std::vector<std::string>
+GetRelevantCpus(const std::vector<std::vector<std::string> > &records)
+{
+ if (!g_RelevantCpus->empty()) {
+ return *g_RelevantCpus;
+ }
+
+ if (!kConfigParams->selected_cpus_.empty()) {
+ return (kConfigParams->selected_cpus_);
+ }
+
+ std::vector<std::string> retval;
+
+ std::regex r("\\/");
+ for (size_t j = kConfigParams->formula_start_colm_;
+ j <= kConfigParams->formula_end_colm_; j++) {
+ const std::string &cell_content =
+ records[kConfigParams->header_row][j];
+
+ // Check if the CPUs names are provided as CPUx/CPUy
+ if (regex_search(cell_content, r)) {
+ std::vector<std::string> split_values =
+ Split(cell_content, '/');
+
+ for (auto &item : split_values) {
+ if (kConfigParams->dont_care_cpus_.count(
+ item) == 0) {
+ retval.push_back(Trim(item));
+ }
+ }
+ } else {
+ if (kConfigParams->dont_care_cpus_.count(
+ cell_content) == 0) {
+ retval.push_back(Trim(cell_content));
+ }
+ }
+ }
+
+ return retval;
+}
+
+/**
+ * Determines the column number in the input csv file specifying
+ * 'Description'. It is derived as the column number, counting from
+ * leftmost, that has a substring match with g_DescColmKey.
+ */
+size_t GetDescColm(const std::vector<std::vector<std::string> > &records)
+{
+ std::regex r(g_DescColmKey, std::regex_constants::icase);
+ for (size_t j = 1; j < records[kConfigParams->header_row].size(); j++) {
+ if (regex_search(records[kConfigParams->header_row][j], r)) {
+ return j;
+ }
+ }
+
+ ERROR("Description column not found.");
+ INFO("Update the formula 'g_DescColmKey' in "
+ "dependence_dag_utils.cpp");
+ exit(1);
+
+ return UINT_MAX;
+}
+
+/**
+ * Determines the column number in the input csv file specifying
+ * 'Metric Group'. It is derived as the column number, counting from
+ * leftmost, that has a substring match with g_MetricGroupColmKey.
+ */
+size_t GetMetricGroupColm(const std::vector<std::vector<std::string> > &records)
+{
+ std::regex r(g_MetricGroupColmKey, std::regex_constants::icase);
+ for (size_t j = 1; j < records[kConfigParams->header_row].size(); j++) {
+ if (regex_search(records[kConfigParams->header_row][j], r)) {
+ return j;
+ }
+ }
+
+ ERROR("Metric Group column not found.");
+ INFO("Update the formula 'g_MetricGroupColmKey' in "
+ "dependence_dag_utils.cpp");
+ exit(1);
+
+ return UINT_MAX;
+}
+
+/**
+ * 'IsServer' determine if a product represented by a column number is a
+ * server or a client.
+ */
+bool IsServer(const std::vector<std::vector<std::string> > &records,
+ const size_t product_column_number)
+{
+ std::regex r("server", std::regex_constants::icase);
+ if (regex_match(records[kConfigParams->server_identifier_row_]
+ [product_column_number],
+ r)) {
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * The input csv file has intentionally omitted some formulas for many
+ * metrics. The idea is that those missing formulas can be derived
+ * using an inheritance rule which says:
+ *
+ * Client products (like SNB/IVB/HSW/BDW/SKL) inherits on their
+ * predecessors. E.g. BDW inherits HSW (which inherits IVB)
+ *
+ * Servers products (like JKT/IVT/HSX/BDX) inherits a baseline core and
+ * builds-on predecessors. E.g. HSX inherits HSW and builds-on IVT
+ * (which inherits IVB)
+ *
+ * PopulateEmptyFormulas modifies the `records` (which is the in-memory
+ * representation of the input csv file), to fill in the cell with
+ * missing formulas based on above inheritance rule.
+ *
+ */
+void PopulateEmptyFormulas(std::vector<std::vector<std::string> > *records)
+{
+ bool server_bool = false;
+
+ for (size_t i = kConfigParams->header_row + 1; i < records->size();
+ ++i) {
+ std::string last_client_data("");
+
+ for (size_t j = kConfigParams->formula_end_colm_;
+ j >= kConfigParams->formula_start_colm_; j--) {
+ server_bool = IsServer(*records, j);
+
+ if (!server_bool) {
+ // Client will inherit missing data from
+ // its predecessors clients.
+ if (!(*records)[i][j].empty()) {
+ last_client_data = (*records)[i][j];
+ } else {
+ (*records)[i][j] = last_client_data;
+ }
+ } else {
+ // Servers will inherit missing data
+ // from its predecessors clients.
+ if ((*records)[i][j].empty()) {
+ (*records)[i][j] = last_client_data;
+ }
+ }
+ }
+ }
+}
+
+/**
+ * `records` is the in-memory representation of the input csv file.
+ * `ParseRecordToMappedData` parses each cell given by
+ * `records[row][column]` and extracts information as follows:
+ *
+ * For example: For the following csv entry
+ * 0 1 2 3 4 5
+ * Level1 | SKX | Count Domain | Description | Metric Group
+ * P M | Formula | Slots | description | MG
+ *
+ * For the cell specifying "Formula", the information collected are:
+ * (1) row and column number: 0,2
+ * (2) Textual content of the cell: "Formula"
+ * (3) Name of the header: "SKX"
+ * (4) The count domain: "Slots"
+ * (5) Descriptive text: "Some description"
+ * (6) Metric group: "MG"
+ * (7) Key: "M"
+ * (8) Prefix: "P"
+ * (9) Aux data: A collection of data like Count Domain, Description,
+ * Metric Group etc.
+ */
+MappedData
+ParseRecordToMappedData(const std::vector<std::vector<std::string> > &records,
+ const size_t &row, const size_t &column)
+{
+ MappedData obj;
+ obj.row_ = row;
+ obj.column_ = column;
+ obj.cell_content_ = records[row][column];
+ obj.header_name_ = records[kConfigParams->header_row][column];
+ obj.count_domain_ = records[row][g_CountDomainColm];
+ obj.description_ = records[row][g_DescColm];
+ obj.metric_group_ = records[row][g_MetricGroupColm];
+
+ // Find metric name
+ // This is equal to the first non-empty string in `row` before
+ // the column starting formula/expression specification.
+ for (size_t j = kConfigParams->formula_start_colm_; j-- > 0;) {
+ if (!records[row][j].empty()) {
+ obj.metric_name_ = records[row][j];
+ break;
+ }
+ }
+
+ if (obj.metric_name_.empty()) {
+ std::cerr << "key missing for row: " << row
+ << " column: " << column << "\n";
+ assert(0);
+ }
+
+ // Find the prefix string.
+ bool flag = true;
+ for (size_t j = kConfigParams->formula_start_colm_; j-- > 0;) {
+ if (!records[row][j].empty()) {
+ if (flag) {
+ flag = false;
+ } else {
+ obj.prefix_ = records[row][j] + obj.prefix_;
+ }
+ }
+ }
+
+ // Find the aux_data string.
+ for (size_t j = kConfigParams->formula_end_colm_ + 1;
+ j < records[row].size(); j++) {
+ if (!records[row][j].empty()) {
+ obj.aux_data_ += "\t * " +
+ records[kConfigParams->header_row][j] +
+ ": " + records[row][j] + "\n";
+ }
+ }
+
+ return obj;
+}
+
+std::string GetKey(const MappedData &data)
+{
+ return data.metric_name_ + "_" + data.header_name_;
+}
+
+/**
+ * Create a dependence dag using the 'records' data-structure
+ * (which is the in-memory representation of the input csv file)
+ * "dependence dag" is implemented as a map as follows:
+ * A. Suppose we have rows
+ * Level1 Level2 || SKL BDW
+ * K L1 || P1*L2 P2*L2
+ * K L2 || P3*Q3 P4*Q4
+ *
+ * The information we will be storing in the map are as follows:
+ *
+ * Map Key -> and object of `MappedData`
+ * -----------------------------------------
+ * <metric>_<CPU> -> {<header>, <textual formula>, <prefix>, ...}
+ * L1_SKL -> {SKL, P1*L2, "K", ...}
+ * L1_BWD -> {BWD, P2*L2, "K", ...}
+ * L2_SKL -> {SKL, P3*Q3, "K", ...}
+ * L2_BWD -> {BWD, P4*Q4, "K", ...}
+ */
+std::unordered_map<std::string, MappedData>
+CreateDependenceDag(const std::vector<std::vector<std::string> > &records)
+{
+ std::unordered_map<std::string, MappedData> dependence_dag;
+
+ // Store the records in a dependence std::map
+ for (size_t i = kConfigParams->header_row + 1; i < records.size();
+ ++i) {
+ for (size_t j = kConfigParams->formula_start_colm_;
+ j <= kConfigParams->formula_end_colm_; j++) {
+ MappedData data =
+ ParseRecordToMappedData(records, i, j);
+
+ // Skip std::map population for irrelevant keys.
+ if (data.metric_name_.empty() ||
+ data.metric_name_ == ".") {
+ continue;
+ }
+
+ std::string key = GetKey(data);
+
+ if (dependence_dag.count(key) > 0) {
+ std::cerr << "Duplicate key: " << key
+ << " Row: " << i << " Colm: " << j
+ << "\n";
+ assert(0 && "duplicate!!");
+ } else {
+ dependence_dag[key] = data;
+ }
+ }
+ }
+
+ assert(!dependence_dag.empty() && "empty dependence dag");
+
+ // Remove the entries with column header label as SKL/BDW and
+ // create separate entry for them.
+ std::regex r("\\/");
+ std::vector<std::string> keys_to_remove;
+ std::vector<std::pair<std::string, MappedData> > keys_to_insert;
+ for (auto &p : dependence_dag) {
+ const std::string &key = p.first;
+ MappedData &value = p.second;
+
+ if (regex_search(value.header_name_, r)) {
+ std::vector<std::string> split_values =
+ Split(value.header_name_, '/');
+
+ for (auto &item : split_values) {
+ MappedData new_value = value;
+ new_value.header_name_ = Trim(item);
+ std::string new_mapkey = GetKey(new_value);
+ if (dependence_dag.count(new_mapkey) > 0) {
+ std::cerr << "Duplicate key: "
+ << new_mapkey << "\n";
+ assert(0 && "Duplicate 2");
+ }
+ keys_to_insert.push_back(
+ std::pair<std::string, MappedData>(
+ new_mapkey, new_value));
+ }
+ keys_to_remove.push_back(key);
+ }
+ }
+
+ for (auto &delkey : keys_to_remove) {
+ dependence_dag.erase(delkey);
+ }
+ for (auto &insertkey : keys_to_insert) {
+ dependence_dag.insert(insertkey);
+ }
+
+ // Adding description for dummy metric topdown.
+ for (auto &known_cpu : *g_RelevantCpus) {
+ MappedData &data =
+ dependence_dag[std::string("topdown") + "_" + known_cpu];
+ data.header_name_ = known_cpu;
+ data.metric_name_ = std::string("topdown") + "_" + known_cpu;
+ data.description_ = std::string(
+ "Intel Topdown analysis expressed in % of issue"
+ " slots");
+ }
+
+ return dependence_dag;
+}
+
+/**
+ * Print Diagnosis results.
+ */
+void PrintConfigVars()
+{
+ std::cout << std::endl;
+ INFO("Important csv artifacts");
+ INFO(std::string("Header row number = ") +
+ std::to_string(kConfigParams->header_row + 1));
+ INFO(std::string("Server identifier row number = ") +
+ std::to_string(kConfigParams->server_identifier_row_ + 1));
+ INFO(std::string("Formula start column = ") +
+ std::string(1, static_cast<char>(
+ 'A' + kConfigParams->formula_start_colm_)));
+ INFO(std::string("Formula end column = ") +
+ std::string(1, static_cast<char>(
+ 'A' + kConfigParams->formula_end_colm_)));
+ INFO(std::string("Level start column = ") +
+ std::string(1, static_cast<char>('A' + g_LevelStartColm)));
+ INFO(std::string("Level end column = ") +
+ std::string(1, static_cast<char>('A' + g_LevelEndColm)));
+ INFO(std::string("Level end row number = ") +
+ std::to_string(g_LevelEndRow + 1));
+ INFO(std::string("Count Domain column = ") +
+ std::string(1, static_cast<char>('A' + g_CountDomainColm)));
+ INFO(std::string("Description column = ") +
+ std::string(1, static_cast<char>('A' + g_DescColm)));
+
+ std::cout << std::endl;
+ INFO("Relevant CPUs = " << *g_RelevantCpus);
+ INFO("Don't care CPUs = " << kConfigParams->dont_care_cpus_);
+ INFO("CPU alias sets for event encodings");
+ for (auto &alias_set : *g_CpuAliasesForEventInfo) {
+ INFO("\t{" << alias_set << "} ");
+ }
+}
+
+} // namespace
+
+std::unordered_map<std::string, MappedData>
+ProcessRecords(std::vector<std::vector<std::string> > *records)
+{
+ InitGlobals();
+
+ // Task 0
+ GetPerfmonVersion(*records);
+
+ // Task 1
+ kConfigParams->header_row = GetHeaderRow(*records);
+ kConfigParams->formula_start_colm_ = GetFormulaStartColm(*records);
+ kConfigParams->formula_end_colm_ = GetFormulaEndColm(*records);
+ g_CountDomainColm = GetCountDomainColm(*records);
+ g_DescColm = GetDescColm(*records);
+ g_MetricGroupColm = GetMetricGroupColm(*records);
+ g_LevelStartColm = GetLevelStartColm(*records);
+ g_LevelEndColm = GetLevelEndColm(*records);
+ g_LevelEndRow = GetLevelEndRow(*records);
+ (*g_RelevantCpus) = GetRelevantCpus(*records);
+ kConfigParams->server_identifier_row_ = GetServerIdentifierRow();
+
+ // Task 2
+ GetTopdownHierarchy(*records);
+ PlotTopdownHierarchy(*g_TopdownHierarchy);
+
+ // Task 3
+ GetAliasCpus(*records);
+
+ // Task 4
+ PopulateEmptyFormulas(records);
+
+ // Task 5
+ PrintConfigVars();
+
+ // Task 6
+ return CreateDependenceDag(*records);
+}
+
+} // namespace topdown_parser
diff --git a/tools/perf/pmu-events/topdown-parser/dependence_dag_utils.h b/tools/perf/pmu-events/topdown-parser/dependence_dag_utils.h
new file mode 100644
index 000000000000..e7f992f98e45
--- /dev/null
+++ b/tools/perf/pmu-events/topdown-parser/dependence_dag_utils.h
@@ -0,0 +1,178 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+// --------------------------------------------------------
+// File: dependence_dag_utils.h
+// --------------------------------------------------------
+//
+// The header provides the interface to read the input csv file, process it and
+// populate an in-memory model.
+//
+// The cells of the input csv file can be broadly divided into two types:
+// (1) Ones specifying the top down metric and (2) Ones specifying the
+// metric expression for a metric and CPU pair. (CPU is specified in the csv
+// file by the column and metric by the row).
+//
+// A formula might involve the following:
+// (1) Raw PMU events
+// (2) Constants
+// (3) External parameters: The definition of such components are not defined in
+// the input csv file and must come from elsewhere. For example, a formula
+// component `SMT_on`, specifying if hyper-threading is enabled on CPU or not,
+// need to be extracted from host machine.
+// Example, ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CLKS
+// (4) Another (sub-)metric, as in
+// 1 - ( Frontend_Bound + Bad_Speculation + Retiring )
+//
+// We represent the formula as a dependence dag where the root of the dag
+// represents a topdown metric (hence a formula), the intermediate nodes
+// represent sub-formulas and the leaves represent the PMU events, constants or
+// external parameter. We implement this dependence dag using a map.
+
+#ifndef TOPDOWN_PARSER_DEPENDENCE_DAG_UTILS_H_
+#define TOPDOWN_PARSER_DEPENDENCE_DAG_UTILS_H_
+
+#include <map>
+#include <set>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+namespace topdown_parser
+{
+/**
+ * For each metric, the data-structure `TopdownInfo` stores the
+ * (1) the name of perf-metric switch name to be used for invoking perf stat.
+ * Note: This field is not required for all targets.
+ * (2) the names of all the sub-metrics.
+ */
+struct TopdownInfo {
+ std::string perf_stat_switch_name;
+ std::vector<std::string> child_metrics;
+};
+
+/**
+ * `g_TopdownHierarchy` stores the topdown hierarchy.
+ *
+ * An example: The metric `topdown` has four sub-metrics and each of the
+ * sub-metrics can be further broken down.
+ * Topdown
+ * Frontend_Bound
+ * Frontend_Latency
+ * Frontend_Bandwidth
+ * Backend_Bound
+ * ...
+ * Bad_Speculation
+ * ...
+ * Retiring
+ * ...
+ *
+ * g_TopdownHierarchy is an map from the parent metric name to an object of
+ * type TopdownInfo which contains
+ * 1. Name of perf stat switch: This is derived from the configuration
+ * parameter `perf_stat_switch_names_`.
+ * 2. Names of all the child metrics
+ *
+ * For example, in the context of running example,
+ *
+ * g_TopdownHierarchy["Topdown"] --> {"topdown",
+ * {"Frontend_Bound", Backend_Bound, Bad_Speculation, Retiring}}
+ * g_TopdownHierarchy["Frontend_Bound"] --> {"topdown_fe",
+ * {"Frontend_Latency", "Frontend_Bandwidth"}}
+ */
+extern std::map<std::string, TopdownInfo> *g_TopdownHierarchy;
+
+/**
+ * The version number of the input csv file.
+ */
+#define VERSION_MAX_STRLEN 100
+extern char g_PerfmonVersion[VERSION_MAX_STRLEN];
+
+/**
+ * The CPUs actually used for generating topdown files. This takes into account
+ * the CPUs derived from the input csv file and the ones included or excluded
+ * by configuration parameters `selected_cpus_` and `dont_care_cpus_`
+ */
+extern std::vector<std::string> *g_RelevantCpus;
+
+/**
+ * List of unique CPU names which are specified in the input csv file as
+ * CPUX/CPUY
+ */
+extern std::vector<std::set<std::string> > *g_CpuAliasesForEventInfo;
+
+/**
+ * Each textual entry of the input csv file is parsed to the following
+ * data-structure.
+ */
+struct MappedData {
+ // Row and column of the textual entry.
+ size_t row_, column_;
+ // The textual content.
+ std::string cell_content_;
+ // Prefix is used to make the
+ // function name more informative.
+ std::string prefix_;
+ // Auxiliary data about the entry.
+ std::string aux_data_;
+ // The header value for the entry, which equals the CPU model.
+ std::string header_name_;
+ // Metric name
+ std::string metric_name_;
+ // The value of count domain
+ // for the entry.
+ std::string count_domain_;
+ // The value of description
+ // for the entry.
+ std::string description_;
+ // The value of metric group
+ // for the entry.
+ std::string metric_group_;
+};
+
+std::ostream &operator<<(std::ostream &, const MappedData &);
+std::ostream &operator<<(std::ostream &,
+ const std::unordered_map<std::string, MappedData> &);
+
+/**
+ * ProcessRecords parses and process the entries of the csv file and
+ * creates an in-memory model. It process the list of rows 'records' of the csv
+ * file in the following way.
+ * Task 0. Determine the version number of the input csv file.
+ *
+ * Task 1. Derive information from the input csv file.
+ *
+ * Task 2: Generate the topdown hierarchy
+ *
+ * Task 3. Determine the alias CPUs.
+ * If the csv file has column headers like "CPUX/CPUY", then we consider the
+ * CPUs as aliases for event encoding look-up purposes. That is, if the event
+ * encoding JSon file for CPUX, is missing or an event is not found in the
+ * event encoding file for CPUX, then we will lookup in encoding file of CPUY.
+ *
+ * Task 4. Populate the missing cell values.
+ *
+ * Task 5. Print the information derived at Task 2.
+ *
+ * Task 6. Create a map storing the records. in the following fashion.
+ * Example
+ * A. Suppose we have rows
+ * Level1 Level2 || SKL BDW
+ * K L1 || P1*L2 P2*L2
+ * K L2 || P3*Q3 P4*Q4
+ *
+ * The information we will be storing in the map are as follows:
+ *
+ * Map Key -> Some of the mapped values
+ *
+ * <metric>_<CPU> -> {<header>, <textual formula>, <prefix>, ...}
+ * L1_SKL -> {SKL, P1*L2, "K", ...}
+ * L1_BWD -> {BWD, P2*L2, "K", ...}
+ * L2_SKL -> {SKL, P3*Q3, "K", ...}
+ * L2_BWD -> {BWD, P4*Q4, "K", ...}
+ */
+std::unordered_map<std::string, MappedData>
+ProcessRecords(std::vector<std::vector<std::string> > *);
+
+} // namespace topdown_parser
+
+#endif // TOPDOWN_PARSER_DEPENDENCE_DAG_UTILS_H_
--
2.29.2.222.g5d2a92d10f8-goog