[PATCH v2] perf record: collect user registers set jointly with dwarf stacks

From: Alexey Budankov
Date: Fri Apr 19 2019 - 15:10:56 EST



When dwarf stacks are collected jointly with user specified register
set using --user-regs option like below the full register context is
still captured on a sample:

$ perf record -g --call-graph dwarf,1024 --user-regs=IP,SP,BP -- matrix.gcc.g.O3

188143843893585 0x6b48 [0x4f8]: PERF_RECORD_SAMPLE(IP, 0x4002): 23828/23828: 0x401236 period: 1363819 addr: 0x7ffedbdd51ac
... FP chain: nr:0
... user regs: mask 0xff0fff ABI 64-bit
.... AX 0x53b
.... BX 0x7ffedbdd3cc0
.... CX 0xffffffff
.... DX 0x33d3a
.... SI 0x7f09b74c38d0
.... DI 0x0
.... BP 0x401260
.... SP 0x7ffedbdd3cc0
.... IP 0x401236
.... FLAGS 0x20a
.... CS 0x33
.... SS 0x2b
.... R8 0x7f09b74c3800
.... R9 0x7f09b74c2da0
.... R10 0xfffffffffffff3ce
.... R11 0x246
.... R12 0x401070
.... R13 0x7ffedbdd5db0
.... R14 0x0
.... R15 0x0
... ustack: size 1024, offset 0xe0
. data_src: 0x5080021
... thread: stack_test2.g.O:23828
...... dso: /root/abudanko/stacks/stack_test2.g.O3

After applying the change suggested in the patch the sample data contain
only user specified register values:

$ perf record -g --call-graph dwarf,1024 --user-regs=BP -- matrix.gcc.g.03

188368474305373 0x5e40 [0x470]: PERF_RECORD_SAMPLE(IP, 0x4002): 23839/23839: 0x401236 period: 1260507 addr: 0x7ffd3d85e96c
... FP chain: nr:0
... user regs: mask 0x1c0 ABI 64-bit
.... BP 0x401260
.... SP 0x7ffd3d85cc20
.... IP 0x401236
... ustack: size 1024, offset 0x58
. data_src: 0x5080021
... thread: stack_test2.g.O:23839
...... dso: /root/abudanko/stacks/stack_test2.g.O3

IP and SP registers (dwarf_regs) are collected anayways regardless of
the --user-regs option value provided from the command line:

-g call-graph dwarf,K full_regs
-g call-graph dwarf,K --user-regs=user_regs user_regs | dwarf_regs
--user-regs=user_regs user_regs

Signed-off-by: Alexey Budankov <alexey.budankov@xxxxxxxxxxxxxxx>
---
Changes in v2:
- implemented dwarf register set to avoid corrupted trace
when --user-regs option value omits IP,SP

---
tools/perf/arch/arm/include/perf_regs.h | 3 +++
tools/perf/arch/arm64/include/perf_regs.h | 3 +++
tools/perf/arch/powerpc/include/perf_regs.h | 3 +++
tools/perf/arch/s390/include/perf_regs.h | 3 +++
tools/perf/arch/x86/include/perf_regs.h | 3 +++
tools/perf/util/evsel.c | 5 ++++-
tools/perf/util/perf_regs.h | 1 +
7 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/tools/perf/arch/arm/include/perf_regs.h b/tools/perf/arch/arm/include/perf_regs.h
index ed20e0253e25..550642e4b651 100644
--- a/tools/perf/arch/arm/include/perf_regs.h
+++ b/tools/perf/arch/arm/include/perf_regs.h
@@ -15,6 +15,9 @@ void perf_regs_load(u64 *regs);
#define PERF_REG_IP PERF_REG_ARM_PC
#define PERF_REG_SP PERF_REG_ARM_SP

+#define PERF_DWARF_REGS_MASK ((1ULL << PERF_REG_IP) | \
+ (1ULL << PERF_REG_SP))
+
static inline const char *perf_reg_name(int id)
{
switch (id) {
diff --git a/tools/perf/arch/arm64/include/perf_regs.h b/tools/perf/arch/arm64/include/perf_regs.h
index baaa5e64a3fb..74391ab6f5a6 100644
--- a/tools/perf/arch/arm64/include/perf_regs.h
+++ b/tools/perf/arch/arm64/include/perf_regs.h
@@ -15,6 +15,9 @@ void perf_regs_load(u64 *regs);
#define PERF_REG_IP PERF_REG_ARM64_PC
#define PERF_REG_SP PERF_REG_ARM64_SP

+#define PERF_DWARF_REGS_MASK ((1ULL << PERF_REG_IP) | \
+ (1ULL << PERF_REG_SP))
+
static inline const char *perf_reg_name(int id)
{
switch (id) {
diff --git a/tools/perf/arch/powerpc/include/perf_regs.h b/tools/perf/arch/powerpc/include/perf_regs.h
index e18a3556f5e3..a061f0ed12b0 100644
--- a/tools/perf/arch/powerpc/include/perf_regs.h
+++ b/tools/perf/arch/powerpc/include/perf_regs.h
@@ -19,6 +19,9 @@ void perf_regs_load(u64 *regs);
#define PERF_REG_IP PERF_REG_POWERPC_NIP
#define PERF_REG_SP PERF_REG_POWERPC_R1

+#define PERF_DWARF_REGS_MASK ((1ULL << PERF_REG_IP) | \
+ (1ULL << PERF_REG_SP))
+
static const char *reg_names[] = {
[PERF_REG_POWERPC_R0] = "r0",
[PERF_REG_POWERPC_R1] = "r1",
diff --git a/tools/perf/arch/s390/include/perf_regs.h b/tools/perf/arch/s390/include/perf_regs.h
index bcfbaed78cc2..c11b04c3385d 100644
--- a/tools/perf/arch/s390/include/perf_regs.h
+++ b/tools/perf/arch/s390/include/perf_regs.h
@@ -14,6 +14,9 @@ void perf_regs_load(u64 *regs);
#define PERF_REG_IP PERF_REG_S390_PC
#define PERF_REG_SP PERF_REG_S390_R15

+#define PERF_DWARF_REGS_MASK ((1ULL << PERF_REG_IP) | \
+ (1ULL << PERF_REG_SP))
+
static inline const char *perf_reg_name(int id)
{
switch (id) {
diff --git a/tools/perf/arch/x86/include/perf_regs.h b/tools/perf/arch/x86/include/perf_regs.h
index 7f6d538f8a89..f7866be7b44f 100644
--- a/tools/perf/arch/x86/include/perf_regs.h
+++ b/tools/perf/arch/x86/include/perf_regs.h
@@ -24,6 +24,9 @@ void perf_regs_load(u64 *regs);
#define PERF_REG_IP PERF_REG_X86_IP
#define PERF_REG_SP PERF_REG_X86_SP

+#define PERF_DWARF_REGS_MASK ((1ULL << PERF_REG_IP) | \
+ (1ULL << PERF_REG_SP))
+
static inline const char *perf_reg_name(int id)
{
switch (id) {
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 84cfb9fe2fc6..e21746f4c76d 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -702,7 +702,10 @@ static void __perf_evsel__config_callchain(struct perf_evsel *evsel,
if (!function) {
perf_evsel__set_sample_bit(evsel, REGS_USER);
perf_evsel__set_sample_bit(evsel, STACK_USER);
- attr->sample_regs_user |= PERF_REGS_MASK;
+ if (opts->sample_user_regs)
+ attr->sample_regs_user |= PERF_DWARF_REGS_MASK;
+ else
+ attr->sample_regs_user |= PERF_REGS_MASK;
attr->sample_stack_user = param->dump_size;
attr->exclude_callchain_user = 1;
} else {
diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h
index c9319f8d17a6..3d2c3e299081 100644
--- a/tools/perf/util/perf_regs.h
+++ b/tools/perf/util/perf_regs.h
@@ -31,6 +31,7 @@ int perf_reg_value(u64 *valp, struct regs_dump *regs, int id);
#else
#define PERF_REGS_MASK 0
#define PERF_REGS_MAX 0
+#define PERF_DWARF_REGS_MASK PERF_REGS_MASK

static inline const char *perf_reg_name(int id __maybe_unused)
{
--
2.20.1