[RFC 3/4] perf kvm: Enable 'report' on powerpc
From: Ravi Bangoria
Date: Wed Feb 24 2016 - 04:09:32 EST
'perf kvm record' on powerpc will record kvm_hv:kvm_guest_exit event
instead of cycles. However, to have some kind of periodicity, we can't
use all the kvm exits, rather exits which are bound to happen in certain
intervals. HV_DECREMENTER Interrupt forces the threads to exit after an
interval of 10 ms.
This patch makes use of the 'kvm_guest_exit' tracepoint and checks the
exit reason for any kvm exit. If it is HV_DECREMENTER, then the
instruction pointer dumped along with this tracepoint is retrieved and
mapped with the guest kallsyms.
Signed-off-by: Ravi Bangoria <ravi.bangoria@xxxxxxxxxxxxxxxxxx>
Signed-off-by: Hemant Kumar <hemant@xxxxxxxxxxxxxxxxxx>
---
tools/perf/util/event.c | 7 +++--
tools/perf/util/evsel.c | 77 +++++++++++++++++++++++++++++++++++++++++++++++
tools/perf/util/evsel.h | 7 +++++
tools/perf/util/session.c | 7 +++--
4 files changed, 92 insertions(+), 6 deletions(-)
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index bc0a3f0..31bbc50 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -1299,15 +1299,16 @@ int perf_event__preprocess_sample(const union perf_event *event,
struct machine *machine,
struct addr_location *al,
struct perf_sample *sample,
- struct perf_evsel *evsel __maybe_unused)
+ struct perf_evsel *evsel)
{
- u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+ u8 cpumode;
struct thread *thread = machine__findnew_thread(machine, sample->pid,
sample->tid);
-
if (thread == NULL)
return -1;
+ al->cpumode = cpumode = arch__get_cpumode(event, evsel, sample);
+
dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid);
/*
* Have we already created the kernel maps for this machine?
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 0902fe4..a4d309e 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1622,6 +1622,82 @@ static inline bool overflow(const void *endp, u16 max_size, const void *offset,
#define OVERFLOW_CHECK_u64(offset) \
OVERFLOW_CHECK(offset, sizeof(u64), sizeof(u64))
+#define KVMPPC_EXIT "kvm_hv:kvm_guest_exit"
+#define HV_DECREMENTER 2432
+#define HV_BIT 3
+#define PR_BIT 49
+#define PPC_MAX 63
+
+bool is_kvmppc_exit_event(struct perf_evsel *evsel)
+{
+ static unsigned int kvmppc_exit;
+
+ if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
+ return false;
+
+ if (unlikely(kvmppc_exit == 0)) {
+ if (strcmp(KVMPPC_EXIT, evsel->name))
+ return false;
+ kvmppc_exit = evsel->attr.config;
+ } else if (kvmppc_exit != evsel->attr.config) {
+ return false;
+ }
+
+ return true;
+}
+
+bool is_hv_dec_trap(struct perf_evsel *evsel, struct perf_sample *sample)
+{
+ int trap = perf_evsel__intval(evsel, sample, "trap");
+ return trap == HV_DECREMENTER;
+}
+
+bool is_perf_data_reorded_on_ppc(struct perf_evlist *evlist)
+{
+ if (evlist && evlist->env && evlist->env->arch)
+ return !strcmp(evlist->env->arch, "ppc64") ||
+ !strcmp(evlist->env->arch, "ppc64le");
+ return false;
+}
+
+u8 arch__get_cpumode(const union perf_event *event,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample)
+{
+ unsigned long hv, pr, msr;
+ u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+
+ if (!(is_perf_data_reorded_on_ppc(evsel->evlist) &&
+ perf_guest_only() &&
+ is_kvmppc_exit_event(evsel)))
+ goto ret;
+
+ if (sample->raw_data && is_hv_dec_trap(evsel, sample)) {
+ msr = perf_evsel__intval(evsel, sample, "msr");
+ hv = msr & ((unsigned long)1 << (PPC_MAX - HV_BIT));
+ pr = msr & ((unsigned long)1 << (PPC_MAX - PR_BIT));
+
+ if (!hv && pr)
+ cpumode = PERF_RECORD_MISC_GUEST_USER;
+ else
+ cpumode = PERF_RECORD_MISC_GUEST_KERNEL;
+ }
+
+ret:
+ return cpumode;
+}
+
+u64 arch__get_ip(struct perf_evsel *evsel, struct perf_sample *sample)
+{
+ if (is_perf_data_reorded_on_ppc(evsel->evlist) &&
+ perf_guest_only() &&
+ is_kvmppc_exit_event(evsel) &&
+ is_hv_dec_trap(evsel, sample))
+ return perf_evsel__intval(evsel, sample, "pc");
+
+ return sample->ip;
+}
+
int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
struct perf_sample *data)
{
@@ -1795,6 +1871,7 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
OVERFLOW_CHECK(array, data->raw_size, max_size);
data->raw_data = (void *)array;
array = (void *)array + data->raw_size;
+ data->ip = arch__get_ip(evsel, data);
}
if (type & PERF_SAMPLE_BRANCH_STACK) {
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index efad78f..a5f5cb5 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -408,4 +408,11 @@ typedef int (*attr__fprintf_f)(FILE *, const char *, const char *, void *);
int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
attr__fprintf_f attr__fprintf, void *priv);
+u64 arch__get_ip(struct perf_evsel *evsel, struct perf_sample *sample);
+u8 arch__get_cpumode(const union perf_event *event, struct perf_evsel *evsel,
+ struct perf_sample *sample);
+bool is_kvmppc_exit_event(struct perf_evsel *evsel);
+bool is_hv_dec_trap(struct perf_evsel *evsel, struct perf_sample *sample);
+bool is_perf_data_reorded_on_ppc(struct perf_evlist *evlist);
+
#endif /* __PERF_EVSEL_H */
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 40b7a0d..52beee8 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1131,9 +1131,10 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event,
static struct machine *machines__find_for_cpumode(struct machines *machines,
union perf_event *event,
- struct perf_sample *sample)
+ struct perf_sample *sample,
+ struct perf_evsel *evsel)
{
- const u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+ const u8 cpumode = arch__get_cpumode(event, evsel, sample);
struct machine *machine;
if (perf_guest &&
@@ -1237,7 +1238,7 @@ static int machines__deliver_event(struct machines *machines,
evsel = perf_evlist__id2evsel(evlist, sample->id);
- machine = machines__find_for_cpumode(machines, event, sample);
+ machine = machines__find_for_cpumode(machines, event, sample, evsel);
switch (event->header.type) {
case PERF_RECORD_SAMPLE:
--
2.1.4