[PATCH v2 2/3] perf kvm: enable record|report feature on powerpc

From: Ravi Bangoria
Date: Fri Jan 22 2016 - 01:00:39 EST


This patch contains core logic for enabling perf kvm {record|report} on
powerpc.

For perf kvm record,
This patch will replace default event(cycle) with kvm_hv:kvm_guest_exit
while recording guest data from host.

For perf kvm report,
This patch makes use of the 'kvm_guest_exit' tracepoint and checks the
exit reason for any kvm exit. If it is HV_DECREMENTER, then the
instruction pointer dumped along with this tracepoint is retrieved and
mapped with the guest kallsyms.

Signed-off-by: Ravi Bangoria <ravi.bangoria@xxxxxxxxxxxxxxxxxx>
Signed-off-by: Hemant Kumar <hemant@xxxxxxxxxxxxxxxxxx>
---
changes in v2:
- Breakdown of v1 patch into two sub patches
- Merged parse-tp.c and evlist.c from tools/perf/arch/powerpc/util/ into
single file with name kvm.c

tools/perf/arch/powerpc/util/Build | 1 +
tools/perf/arch/powerpc/util/kvm.c | 104 +++++++++++++++++++++++++++++++++++++
tools/perf/util/event.c | 12 ++++-
tools/perf/util/evlist.c | 9 ++++
tools/perf/util/evlist.h | 1 +
tools/perf/util/evsel.c | 7 +++
tools/perf/util/evsel.h | 4 ++
tools/perf/util/session.c | 9 ++--
tools/perf/util/util.c | 5 ++
tools/perf/util/util.h | 1 +
10 files changed, 147 insertions(+), 6 deletions(-)
create mode 100644 tools/perf/arch/powerpc/util/kvm.c

diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build
index 7b8b0d1..eb819e0 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build
@@ -1,5 +1,6 @@
libperf-y += header.o
libperf-y += sym-handling.o
+libperf-y += kvm.o

libperf-$(CONFIG_DWARF) += dwarf-regs.o
libperf-$(CONFIG_DWARF) += skip-callchain-idx.o
diff --git a/tools/perf/arch/powerpc/util/kvm.c b/tools/perf/arch/powerpc/util/kvm.c
new file mode 100644
index 0000000..317f29a
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/kvm.c
@@ -0,0 +1,104 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * Copyright (C) 2016 Hemant Kumar Shaw, IBM Corporation
+ * Copyright (C) 2016 Ravikumar B. Bangoria, IBM Corporation
+ */
+
+#include <linux/err.h>
+#include "../../../util/evsel.h"
+#include "../../../util/evlist.h"
+#include "../../../util/trace-event.h"
+#include "../../../util/session.h"
+#include "../../../util/util.h"
+
+#define KVMPPC_EXIT "kvm_hv:kvm_guest_exit"
+#define HV_DECREMENTER 2432
+#define HV_BIT 3
+#define PR_BIT 49
+#define PPC_MAX 63
+
+/*
+ * To sample for only guest, record kvm_hv:kvm_guest_exit.
+ * Otherwise go via normal way(cycles).
+ */
+int perf_evlist__arch_add_default(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel;
+
+ if (!perf_guest_only())
+ return -1;
+
+ evsel = perf_evsel__newtp_idx("kvm_hv", "kvm_guest_exit", 0);
+ if (IS_ERR(evsel))
+ return PTR_ERR(evsel);
+
+ perf_evlist__add(evlist, evsel);
+ return 0;
+}
+
+static bool is_kvmppc_exit_event(struct perf_evsel *evsel)
+{
+ static unsigned int kvmppc_exit;
+
+ if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
+ return false;
+
+ if (unlikely(kvmppc_exit == 0)) {
+ if (strcmp(KVMPPC_EXIT, evsel->name))
+ return false;
+ kvmppc_exit = evsel->attr.config;
+ } else if (kvmppc_exit != evsel->attr.config) {
+ return false;
+ }
+
+ return true;
+}
+
+static bool is_hv_dec_trap(struct perf_evsel *evsel, struct perf_sample *sample)
+{
+ int trap = perf_evsel__intval(evsel, sample, "trap");
+ return trap == HV_DECREMENTER;
+}
+
+/*
+ * Get the instruction pointer from the tracepoint data
+ */
+u64 arch__get_ip(struct perf_evsel *evsel, struct perf_sample *sample)
+{
+ if (perf_guest_only() &&
+ is_kvmppc_exit_event(evsel) &&
+ is_hv_dec_trap(evsel, sample))
+ return perf_evsel__intval(evsel, sample, "pc");
+
+ return sample->ip;
+}
+
+/*
+ * Get the HV and PR bits and accordingly, determine the cpumode
+ */
+u8 arch__get_cpumode(const union perf_event *event, struct perf_evsel *evsel,
+ struct perf_sample *sample)
+{
+ unsigned long hv, pr, msr;
+ u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+
+ if (!perf_guest_only() || !is_kvmppc_exit_event(evsel))
+ goto ret;
+
+ if (sample->raw_data && is_hv_dec_trap(evsel, sample)) {
+ msr = perf_evsel__intval(evsel, sample, "msr");
+ hv = msr & ((unsigned long)1 << (PPC_MAX - HV_BIT));
+ pr = msr & ((unsigned long)1 << (PPC_MAX - PR_BIT));
+
+ if (!hv && pr)
+ cpumode = PERF_RECORD_MISC_GUEST_USER;
+ else
+ cpumode = PERF_RECORD_MISC_GUEST_KERNEL;
+ }
+
+ret:
+ return cpumode;
+}
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index f86e172..b8105a6 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -1291,6 +1291,13 @@ void thread__find_addr_location(struct thread *thread,
al->sym = NULL;
}

+u8 __weak arch__get_cpumode(const union perf_event *event,
+ struct perf_evsel *evsel __maybe_unused,
+ struct perf_sample *sample __maybe_unused)
+{
+ return event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+}
+
/*
* Callers need to drop the reference to al->thread, obtained in
* machine__findnew_thread()
@@ -1301,13 +1308,14 @@ int perf_event__preprocess_sample(const union perf_event *event,
struct perf_sample *sample,
struct perf_evsel *evsel)
{
- u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+ u8 cpumode;
struct thread *thread = machine__findnew_thread(machine, sample->pid,
sample->tid);
-
if (thread == NULL)
return -1;

+ al->cpumode = cpumode = arch__get_cpumode(event, evsel, sample);
+
dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid);
/*
* Have we already created the kernel maps for this machine?
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index d81f13d..d0dca72 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -231,6 +231,12 @@ void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr)
}
}

+int __weak
+perf_evlist__arch_add_default(struct perf_evlist *evlist __maybe_unused)
+{
+ return -1;
+}
+
int perf_evlist__add_default(struct perf_evlist *evlist)
{
struct perf_event_attr attr = {
@@ -239,6 +245,9 @@ int perf_evlist__add_default(struct perf_evlist *evlist)
};
struct perf_evsel *evsel;

+ if (!perf_evlist__arch_add_default(evlist))
+ return 0;
+
event_attr_init(&attr);

perf_event_attr__set_max_precise_ip(&attr);
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 7c4d9a2..98e24cd 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -75,6 +75,7 @@ void perf_evlist__delete(struct perf_evlist *evlist);

void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry);
void perf_evlist__remove(struct perf_evlist *evlist, struct perf_evsel *evsel);
+int perf_evlist__arch_add_default(struct perf_evlist *evlist);
int perf_evlist__add_default(struct perf_evlist *evlist);
int __perf_evlist__add_default_attrs(struct perf_evlist *evlist,
struct perf_event_attr *attrs, size_t nr_attrs);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 4678086..afe1091 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1607,6 +1607,12 @@ static inline bool overflow(const void *endp, u16 max_size, const void *offset,
#define OVERFLOW_CHECK_u64(offset) \
OVERFLOW_CHECK(offset, sizeof(u64), sizeof(u64))

+u64 __weak arch__get_ip(struct perf_evsel *evsel __maybe_unused,
+ struct perf_sample *sample)
+{
+ return sample->ip;
+}
+
int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
struct perf_sample *data)
{
@@ -1780,6 +1786,7 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
OVERFLOW_CHECK(array, data->raw_size, max_size);
data->raw_data = (void *)array;
array = (void *)array + data->raw_size;
+ data->ip = arch__get_ip(evsel, data);
}

if (type & PERF_SAMPLE_BRANCH_STACK) {
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 8e75434..eb6f52e 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -400,4 +400,8 @@ typedef int (*attr__fprintf_f)(FILE *, const char *, const char *, void *);
int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
attr__fprintf_f attr__fprintf, void *priv);

+u64 arch__get_ip(struct perf_evsel *evsel, struct perf_sample *sample);
+u8 arch__get_cpumode(const union perf_event *event, struct perf_evsel *evsel,
+ struct perf_sample *sample);
+
#endif /* __PERF_EVSEL_H */
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 40b7a0d..1081ee0 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1130,10 +1130,11 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event,
}

static struct machine *machines__find_for_cpumode(struct machines *machines,
- union perf_event *event,
- struct perf_sample *sample)
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel)
{
- const u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+ u8 cpumode = arch__get_cpumode(event, evsel, sample);
struct machine *machine;

if (perf_guest &&
@@ -1237,7 +1238,7 @@ static int machines__deliver_event(struct machines *machines,

evsel = perf_evlist__id2evsel(evlist, sample->id);

- machine = machines__find_for_cpumode(machines, event, sample);
+ machine = machines__find_for_cpumode(machines, event, sample, evsel);

switch (event->header.type) {
case PERF_RECORD_SAMPLE:
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 7a2da7e..5e48ef1 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -37,6 +37,11 @@ bool test_attr__enabled;
bool perf_host = true;
bool perf_guest = false;

+bool perf_guest_only(void)
+{
+ return !perf_host && perf_guest;
+}
+
void event_attr_init(struct perf_event_attr *attr)
{
if (!perf_host)
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 61650f0..eff1d8f 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -344,5 +344,6 @@ int fetch_kernel_version(unsigned int *puint,

const char *perf_tip(const char *dirpath);
bool is_regular_file(const char *file);
+bool perf_guest_only(void);

#endif /* GIT_COMPAT_UTIL_H */
--
2.1.4