[PATCH v2] perf tools: Allow passing perf's own pid to '--filter'

From: Wang Nan
Date: Mon Jul 06 2015 - 23:39:24 EST


This patch allows passing perf's own PID to '--filter' by using
'$PERFPID'. This should be useful when system-widely capturing
tracepoints events.

Before this patch, when doing something like:

# perf record -a -e syscalls:sys_enter_write <cmd>

One could easily get result like this:

# perf report --stdio
...
# Overhead Command Shared Object Symbol
# ........ ....... .................. ....................
#
99.99% perf libpthread-2.18.so [.] __write_nocancel
0.01% ls libc-2.18.so [.] write
0.01% sshd libc-2.18.so [.] write
...

Where most events are generated by perf itself.

A shell trick can be done to filter perf itself out:

# cat << EOF > ./tmp
> #!/bin/sh
> exec perf record -e ... --filter="common_pid != \$\$" -a sleep 10
> EOF
# chmod a+x ./tmp
# ./tmp

However, doing so is user unfriendly.

This patch introduces '$PERFPID' placeholder to perf's filter. Now
user is allowed to do the above work with:

# perf record -e ... --filter='common_pid != $PERFPID' -a sleep 10

This patch adds the variable replacement code to perf_evsel__apply_filter(),
before PERF_EVENT_IOC_SET_FILTER ioctl, so not only 'perf record', all
subcommands which uses filter can utilize $PERFPID.

Andi Kleen sent a similar patch at 2014, but wasn't applied, the
reason is not clear.

Signed-off-by: Wang Nan <wangnan0@xxxxxxxxxx>
---

This patch is based on Arnaldo Carvalho de Melo's git tree:

https://git.kernel.org/cgit/linux/kernel/git/acme/linux.git/commit/?h=perf/core

---
tools/perf/Documentation/perf-record.txt | 5 +-
tools/perf/util/evsel.c | 110 ++++++++++++++++++++++++++++++-
2 files changed, 111 insertions(+), 4 deletions(-)

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 9b9d9d0..9c67482 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -61,7 +61,10 @@ OPTIONS
"perf report" to view group events together.

--filter=<filter>::
- Event filter.
+ Event filter. $PERFPID is allowed to be used to represent perf's own pid.
+ Note that '$' has special meaning for shell. Don't forget to use ''
+ quotation marks or to use '\' to escape when using '$PERFPID' in command
+ line.

-a::
--all-cpus::
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 83c0803..7f2a1a5 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -816,12 +816,116 @@ static int perf_evsel__run_ioctl(struct perf_evsel *evsel, int ncpus, int nthrea
return 0;
}

+static int
+perf_evsel__append_filter_token(const char *key, char *new_filter,
+ ssize_t *pspace)
+{
+ if (strcmp(key, "PERFPID") == 0) {
+ char pid_buf[32];
+ pid_t self_pid = getpid();
+
+ snprintf(pid_buf, sizeof(pid_buf), "%d", self_pid);
+ strncat(new_filter, pid_buf, *pspace);
+ *pspace -= strlen(pid_buf);
+ if (*pspace < 0)
+ return -1;
+ return 0;
+ }
+
+ return -1;
+}
+
+static const char *
+perf_evsel__postproc_filter(const char *filter)
+{
+ char *dollar = NULL, *sep = NULL, *p;
+ char *old_filter = NULL, *new_filter = NULL;
+ ssize_t space;
+
+ if (!filter)
+ return NULL;
+
+ dollar = strchr(filter, '$');
+ if (!dollar)
+ return filter;
+
+ p = old_filter = strdup(filter);
+ if (!old_filter) {
+ pr_warning("Can't alloc memory when postprocing filter '%s'\n",
+ filter);
+ return filter;
+ }
+
+ dollar = old_filter + (dollar - filter);
+
+ /*
+ * See perf_event_set_filter(). Length of a valid filter is
+ * limited by page_size.
+ */
+ new_filter = malloc(page_size);
+ if (!new_filter) {
+ pr_warning("Can't alloc memory when postprocing filter '%s'\n",
+ filter);
+ goto errout;
+ }
+
+ *new_filter = '\0';
+ space = page_size - 1;
+
+ while (1) {
+ if (dollar)
+ *dollar = '\0';
+ strncat(new_filter, p, space);
+ space -= strlen(p);
+ if (space < 0)
+ goto errout;
+ if (!dollar)
+ break;
+
+ sep = strchr(dollar + 1, ' ');
+ if (sep)
+ *sep = '\0';
+
+ if (perf_evsel__append_filter_token(dollar + 1, new_filter,
+ &space)) {
+ pr_warning("Filter become too long: '%s'\n", filter);
+ goto errout;
+ }
+
+ if (!sep)
+ break;
+
+ p = sep;
+ *p = ' ';
+ dollar = strchr(p, '$');
+ }
+
+ free(old_filter);
+ return new_filter;
+
+errout:
+ free(old_filter);
+ free(new_filter);
+ return filter;
+}
+
int perf_evsel__apply_filter(struct perf_evsel *evsel, int ncpus, int nthreads,
const char *filter)
{
- return perf_evsel__run_ioctl(evsel, ncpus, nthreads,
- PERF_EVENT_IOC_SET_FILTER,
- (void *)filter);
+ const char *real_filter;
+ int err;
+
+ real_filter = perf_evsel__postproc_filter(filter);
+ if (!real_filter)
+ real_filter = filter;
+
+ pr_debug("set filter: '%s'\n", real_filter);
+ err = perf_evsel__run_ioctl(evsel, ncpus, nthreads,
+ PERF_EVENT_IOC_SET_FILTER,
+ (void *)real_filter);
+ if (real_filter != filter)
+ free((void *)real_filter);
+ return err;
}

int perf_evsel__set_filter(struct perf_evsel *evsel, const char *filter)
--
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/