[PATCH] perf ctf: Convert invalid chars in a string before set value

From: Wang Nan
Date: Wed May 25 2016 - 06:40:01 EST


We observed some crazy apps on Android set their comm to unprintable
string. For example:

# cat /proc/10607/task/*/comm
tencent.qqmusic
...
Binder_2
æåèåç <-- Chinese word 'log output thread'
WifiManager
...

'perf data convert' fails to convert perf.data with such string to CTF format.

For example:
# cat << EOF > ./badguy.c
#include <sys/prctl.h>
int main(int argc, char *argv[])
{
prctl(PR_SET_NAME, "\xe6\x97\xa5\xe5\xbf\x97\xe8\xbe\x93\xe5\x87\xba\xe7\xba\xbf");
while(1)
sleep(1);
return 0;
}
# gcc ./badguy.c
# perf record -e sched:* ./a.out
# perf data convert --to-ctf ./bad.ctf
CTF stream 4 flush failed
[ perf data convert: Converted 'perf.data' into CTF data './bad.ctf' ]
[ perf data convert: Converted and wrote 0.008 MB (78 samples) ]
# babeltrace ./bad.ctf/
[error] Packet size (18446744073709551615 bits) is larger than remaining file size (262144 bits).
[error] Stream index creation error.
[error] Open file stream error.
[warning] [Context] Cannot open_trace of format ctf at path ./bad.ctf.
[warning] [Context] cannot open trace "./bad.ctf" from ./bad.ctf/ for reading.
[error] Cannot open any trace for reading.

[error] opening trace "./bad.ctf/" for reading.

[error] none of the specified trace paths could be opened.

This patch converts unprintable characters to hexadecimal word.

After applying this patch the above test works correctly:

# ~/perf data convert --to-ctf ./good.ctf
[ perf data convert: Converted 'perf.data' into CTF data './good.ctf' ]
[ perf data convert: Converted and wrote 0.008 MB (78 samples) ]
# babeltrace ./good.ctf
..
[23:14:35.491665268] (+0.000001100) sched:sched_wakeup: { cpu_id = 4 }, { perf_ip = 0xFFFFFFFF810AEF33, perf_tid = 0, perf_pid = 0, perf_id = 5123, perf_period = 1, common_type = 270, common_flags = 45, common_preempt_count = 4, common_pid = 0, comm = "\xe6\x97\xa5\xe5\xbf\x97\xe8\xbe\x93\xe5\x87\xba\xe7\xba\xbf", pid = 1057, prio = 120, success = 1, target_cpu = 4 }
[23:14:35.491666230] (+0.000000962) sched:sched_wakeup: { cpu_id = 4 }, { perf_ip = 0xFFFFFFFF810AEF33, perf_tid = 0, perf_pid = 0, perf_id = 5122, perf_period = 1, common_type = 270, common_flags = 45, common_preempt_count = 4, common_pid = 0, comm = "\xe6\x97\xa5\xe5\xbf\x97\xe8\xbe\x93\xe5\x87\xba\xe7\xba\xbf", pid = 1057, prio = 120, success = 1, target_cpu = 4 }
..

Signed-off-by: Wang Nan <wangnan0@xxxxxxxxxx>
Cc: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
Cc: Jiri Olsa <jolsa@xxxxxxxxxx>
---
tools/perf/util/data-convert-bt.c | 31 +++++++++++++++++++++++++++++--
1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index bbf69d2..c66490e 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -204,6 +204,34 @@ static unsigned long long adjust_signedness(unsigned long long value_int, int si
return (value_int & value_mask) | ~value_mask;
}

+static int string_set_value(struct bt_ctf_field *field, const char *string)
+{
+ char *buffer;
+ size_t len = strlen(string), i, p;
+ int err;
+
+ buffer = zalloc(len * 4 + 2);
+ if (!buffer)
+ return bt_ctf_field_string_set_value(field, string);
+
+ for (i = p = 0; i < len; i++) {
+ if (isprint(string[i])) {
+ buffer[p++] = string[i];
+ } else {
+ char numstr[5];
+
+ snprintf(numstr, sizeof(numstr), "\\x%02x",
+ (unsigned int)(string[i]) & 0xff);
+ strncat(buffer, numstr, 4);
+ p = strlen(buffer);
+ }
+ }
+
+ err = bt_ctf_field_string_set_value(field, buffer);
+ free(buffer);
+ return err;
+}
+
static int add_tracepoint_field_value(struct ctf_writer *cw,
struct bt_ctf_event_class *event_class,
struct bt_ctf_event *event,
@@ -270,8 +298,7 @@ static int add_tracepoint_field_value(struct ctf_writer *cw,
}

if (flags & FIELD_IS_STRING)
- ret = bt_ctf_field_string_set_value(field,
- data + offset + i * len);
+ ret = string_set_value(field, data + offset + i * len);
else {
unsigned long long value_int;

--
1.8.3.4