[PATCH] perf mem/c2c: Fix perf_mem_events to support powerpc
From: Ravi Bangoria
Date: Sun Jan 13 2019 - 23:12:53 EST
Powerpc hw does not have inbuilt latency filter (--ldlat) for mem-load
event and, perf_mem_events by default includes ldlat=30 which is
causing failure on powerpc. Refactor code to support perf mem/c2c on
powerpc.
This patch depends on kernel side changes done my Madhavan:
https://lists.ozlabs.org/pipermail/linuxppc-dev/2018-December/182596.html
Signed-off-by: Ravi Bangoria <ravi.bangoria@xxxxxxxxxxxxx>
---
tools/perf/Documentation/perf-c2c.txt | 16 ++++++++++++----
tools/perf/Documentation/perf-mem.txt | 2 +-
tools/perf/arch/x86/util/Build | 1 +
tools/perf/arch/x86/util/mem-events.c | 25 +++++++++++++++++++++++++
tools/perf/util/mem-events.c | 26 ++++----------------------
tools/perf/util/mem-events.h | 2 ++
6 files changed, 45 insertions(+), 27 deletions(-)
create mode 100644 tools/perf/arch/x86/util/mem-events.c
diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt
index 095aebd..4e12551 100644
--- a/tools/perf/Documentation/perf-c2c.txt
+++ b/tools/perf/Documentation/perf-c2c.txt
@@ -19,8 +19,11 @@ C2C stands for Cache To Cache.
The perf c2c tool provides means for Shared Data C2C/HITM analysis. It allows
you to track down the cacheline contentions.
-The tool is based on x86's load latency and precise store facility events
-provided by Intel CPUs. These events provide:
+On x86, the tool is based on load latency and precise store facility events
+provided by Intel CPUs. On PowerPC, the tool uses random instruction sampling
+with thresholding feature.
+
+These events provide:
- memory address of the access
- type of the access (load and store details)
- latency (in cycles) of the load access
@@ -46,7 +49,7 @@ RECORD OPTIONS
-l::
--ldlat::
- Configure mem-loads latency.
+ Configure mem-loads latency. (x86 only)
-k::
--all-kernel::
@@ -119,11 +122,16 @@ Following perf record options are configured by default:
-W,-d,--phys-data,--sample-cpu
Unless specified otherwise with '-e' option, following events are monitored by
-default:
+default on x86:
cpu/mem-loads,ldlat=30/P
cpu/mem-stores/P
+and following on PowerPC:
+
+ cpu/mem-loads/P
+ cpu/mem-stores/P
+
User can pass any 'perf record' option behind '--' mark, like (to enable
callchains and system wide monitoring):
diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt
index f8d2167..199ea0f 100644
--- a/tools/perf/Documentation/perf-mem.txt
+++ b/tools/perf/Documentation/perf-mem.txt
@@ -82,7 +82,7 @@ RECORD OPTIONS
Be more verbose (show counter open errors, etc)
--ldlat <n>::
- Specify desired latency for loads event.
+ Specify desired latency for loads event. (x86 only)
In addition, for report all perf report options are valid, and for record
all perf record options.
diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build
index 844b8f3..9827240 100644
--- a/tools/perf/arch/x86/util/Build
+++ b/tools/perf/arch/x86/util/Build
@@ -6,6 +6,7 @@ libperf-y += perf_regs.o
libperf-y += group.o
libperf-y += machine.o
libperf-y += event.o
+libperf-y += mem-events.o
libperf-$(CONFIG_DWARF) += dwarf-regs.o
libperf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o
diff --git a/tools/perf/arch/x86/util/mem-events.c b/tools/perf/arch/x86/util/mem-events.c
new file mode 100644
index 0000000..5b4dcfe
--- /dev/null
+++ b/tools/perf/arch/x86/util/mem-events.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "mem-events.h"
+
+struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
+ PERF_MEM_EVENT("ldlat-loads", "cpu/mem-loads,ldlat=%u/P", "mem-loads"),
+ PERF_MEM_EVENT("ldlat-stores", "cpu/mem-stores/P", "mem-stores"),
+};
+
+static char mem_loads_name[100];
+static bool mem_loads_name__init;
+
+char *perf_mem_events__name(int i)
+{
+ if (i == PERF_MEM_EVENTS__LOAD) {
+ if (!mem_loads_name__init) {
+ mem_loads_name__init = true;
+ scnprintf(mem_loads_name, sizeof(mem_loads_name),
+ perf_mem_events[i].name,
+ perf_mem_events__loads_ldlat);
+ }
+ return mem_loads_name;
+ }
+
+ return (char *)perf_mem_events[i].name;
+}
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index 93f74d8..1ffefd3 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -15,31 +15,13 @@
unsigned int perf_mem_events__loads_ldlat = 30;
-#define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s }
-
-struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
- E("ldlat-loads", "cpu/mem-loads,ldlat=%u/P", "mem-loads"),
- E("ldlat-stores", "cpu/mem-stores/P", "mem-stores"),
+struct perf_mem_event __weak perf_mem_events[PERF_MEM_EVENTS__MAX] = {
+ PERF_MEM_EVENT("ldlat-loads", "cpu/mem-loads/P", "mem-loads"),
+ PERF_MEM_EVENT("ldlat-stores", "cpu/mem-stores/P", "mem-stores"),
};
-#undef E
-
-#undef E
-
-static char mem_loads_name[100];
-static bool mem_loads_name__init;
-char *perf_mem_events__name(int i)
+char * __weak perf_mem_events__name(int i)
{
- if (i == PERF_MEM_EVENTS__LOAD) {
- if (!mem_loads_name__init) {
- mem_loads_name__init = true;
- scnprintf(mem_loads_name, sizeof(mem_loads_name),
- perf_mem_events[i].name,
- perf_mem_events__loads_ldlat);
- }
- return mem_loads_name;
- }
-
return (char *)perf_mem_events[i].name;
}
diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h
index a889ec2..4cb6935 100644
--- a/tools/perf/util/mem-events.h
+++ b/tools/perf/util/mem-events.h
@@ -16,6 +16,8 @@ struct perf_mem_event {
const char *sysfs_name;
};
+#define PERF_MEM_EVENT(t, n, s) { .tag = t, .name = n, .sysfs_name = s }
+
enum {
PERF_MEM_EVENTS__LOAD,
PERF_MEM_EVENTS__STORE,
--
1.8.3.1