[PATCH 05/12] perf trace: Deref sys_enter pointer args with contents from probe:vfs_getname

From: Arnaldo Carvalho de Melo
Date: Wed Aug 05 2015 - 16:12:07 EST


From: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>

To work like strace and dereference syscall pointer args we need to
insert probes (or tracepoints) right after we copy those bytes from
userspace.

Since we're formatting the syscall args at raw_syscalls:sys_enter time,
we need to have a formatter that just stores the position where, later,
when we get the probe:vfs_getname, we can insert the pointer contents.

Now, if a probe:vfs_getname with this format is in place:

# perf probe -l
probe:vfs_getname (on getname_flags:72@/home/git/linux/fs/namei.c with pathname)

That was, in this case, put in place with:

# perf probe 'vfs_getname=getname_flags:72 pathname=filename:string'
Added new event:
probe:vfs_getname (on getname_flags:72 with pathname=filename:string)

You can now use it in all perf tools, such as:

perf record -e probe:vfs_getname -aR sleep 1
#

Then 'perf trace' will notice that and do the pointer -> contents
expansion:

# trace -e open touch /tmp/bla
0.165 (0.010 ms): touch/17752 open(filename: /etc/ld.so.cache, flags: CLOEXEC) = 3
0.195 (0.011 ms): touch/17752 open(filename: /lib64/libc.so.6, flags: CLOEXEC) = 3
0.512 (0.012 ms): touch/17752 open(filename: /usr/lib/locale/locale-archive, flags: CLOEXEC) = 3
0.582 (0.012 ms): touch/17752 open(filename: /tmp/bla, flags: CREAT|NOCTTY|NONBLOCK|WRONLY, mode: 438) = 3
#

Roughly equivalent to strace's output:

# strace -rT -e open touch /tmp/bla
0.000000 open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3 <0.000039>
0.000317 open("/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3 <0.000102>
0.001461 open("/usr/lib/locale/locale-archive", O_RDONLY|O_CLOEXEC) = 3 <0.000072>
0.000405 open("/tmp/bla", O_WRONLY|O_CREAT|O_NOCTTY|O_NONBLOCK, 0666) = 3 <0.000055>
0.000641 +++ exited with 0 +++
#

Now we need to either look for at all syscalls that are marked as
pointers and have some well known names ("filename", "pathname", etc)
and set the arg formatter to the one used for the "open" syscall in this
patch.

This implementation works for syscalls with just a string being copied
from userspace, for matching syscalls with more than one string being
copied via the same probe/trace point (vfs_getname) we need to extend
the vfs_getname probe spec to include the pointer too, but there are
some problems with that in 'perf probe' or the kernel kprobes code, need
to investigate before considering supporting multiple strings per
syscall.

Cc: Adrian Hunter <adrian.hunter@xxxxxxxxx>
Cc: Borislav Petkov <bp@xxxxxxx>
Cc: David Ahern <dsahern@xxxxxxxxx>
Cc: Frederic Weisbecker <fweisbec@xxxxxxxxx>
Cc: Jiri Olsa <jolsa@xxxxxxxxxx>
Cc: Milian Wolff <mail@xxxxxxxxxx>
Cc: Namhyung Kim <namhyung@xxxxxxxxxx>
Cc: Stephane Eranian <eranian@xxxxxxxxxx>
Link: http://lkml.kernel.org/n/tip-xvuwx6nuj8cf389kf9s2ue2s@xxxxxxxxxxxxxx
Signed-off-by: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
---
tools/perf/builtin-trace.c | 76 +++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 75 insertions(+), 1 deletion(-)

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 12d6fc0227b1..0255dd892662 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -744,6 +744,11 @@ static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,

#define SCA_ACCMODE syscall_arg__scnprintf_access_mode

+static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
+ struct syscall_arg *arg);
+
+#define SCA_FILENAME syscall_arg__scnprintf_filename
+
static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
struct syscall_arg *arg)
{
@@ -1088,7 +1093,8 @@ static struct syscall_fmt {
{ .name = "newfstatat", .errmsg = true,
.arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
{ .name = "open", .errmsg = true,
- .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
+ .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
+ [1] = SCA_OPEN_FLAGS, /* flags */ }, },
{ .name = "open_by_handle_at", .errmsg = true,
.arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
[2] = SCA_OPEN_FLAGS, /* flags */ }, },
@@ -1208,6 +1214,11 @@ static size_t fprintf_duration(unsigned long t, FILE *fp)
return printed + fprintf(fp, "): ");
}

+/**
+ * filename.ptr: The filename char pointer that will be vfs_getname'd
+ * filename.entry_str_pos: Where to insert the string translated from
+ * filename.ptr by the vfs_getname tracepoint/kprobe.
+ */
struct thread_trace {
u64 entry_time;
u64 exit_time;
@@ -1216,6 +1227,10 @@ struct thread_trace {
unsigned long pfmaj, pfmin;
char *entry_str;
double runtime_ms;
+ struct {
+ unsigned long ptr;
+ int entry_str_pos;
+ } filename;
struct {
int max;
char **table;
@@ -1418,6 +1433,27 @@ static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
return printed;
}

+static void thread__set_filename_pos(struct thread *thread, const char *bf,
+ unsigned long ptr)
+{
+ struct thread_trace *ttrace = thread__priv(thread);
+
+ ttrace->filename.ptr = ptr;
+ ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
+}
+
+static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
+ struct syscall_arg *arg)
+{
+ unsigned long ptr = arg->val;
+
+ if (!arg->trace->vfs_getname)
+ return scnprintf(bf, size, "%#x", ptr);
+
+ thread__set_filename_pos(arg->thread, bf, ptr);
+ return 0;
+}
+
static bool trace__filter_duration(struct trace *trace, double t)
{
return t < (trace->duration_filter * NSEC_PER_MSEC);
@@ -1938,7 +1974,45 @@ static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
union perf_event *event __maybe_unused,
struct perf_sample *sample)
{
+ struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
+ struct thread_trace *ttrace;
+ size_t filename_len, entry_str_len, to_move;
+ ssize_t remaining_space;
+ char *pos;
+ const char *filename;
+
trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
+
+ if (!thread)
+ goto out;
+
+ ttrace = thread__priv(thread);
+ if (!ttrace)
+ goto out;
+
+ if (!ttrace->filename.ptr)
+ goto out;
+
+ entry_str_len = strlen(ttrace->entry_str);
+ remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
+ if (remaining_space <= 0)
+ goto out;
+
+ filename = trace->last_vfs_getname;
+ filename_len = strlen(filename);
+ if (filename_len > (size_t)remaining_space) {
+ filename += filename_len - remaining_space;
+ filename_len = remaining_space;
+ }
+
+ to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
+ pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
+ memmove(pos + filename_len, pos, to_move);
+ memcpy(pos, filename, filename_len);
+
+ ttrace->filename.ptr = 0;
+ ttrace->filename.entry_str_pos = 0;
+out:
return 0;
}

--
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/