[PATCH/RFC v4 2/2] perf tools: monitoring per task counter with per cgroup event
From: Song Liu
Date: Wed Oct 03 2018 - 17:29:56 EST
This is just a prototype.
Previous patch enables sharing hardware PMU among perf_events within same
perf_event_context. This sharing comes with limitation that per CPU event
cannot share hardware PMU with per task event. This limitation becomes
a blocker when certain events could only use a specific PMU, for example,
ref-cycles in some Intel CPUs. The following two commands will not share
the PMU (when run in parallel):
perf stat -e ref-cycles -I 1000
perf stat -e ref-cycles -I 1000 --pid <pid>
This patch shows a prototype that solves this problem with cgroup events.
With this patch, the following two commands can share the PMU:
perf stat -e ref-cycles -I 1000
perf stat -e ref-cycles -I 1000 --pid <pid> --create-cgroup
The second command creates a cgroup for the pid, and move the pid to
that cgroup. Then, a cgroup event (instead of task event) is created
to monitor the process.
Alternatively, we can also create a mechanism in the kernel that is very
similar to cgroup perf events. I am also open to other suggestions.
Signed-off-by: Song Liu <songliubraving@xxxxxx>
Cc: Tejun Heo <tj@xxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Jiri Olsa <jolsa@xxxxxxxxxx>
---
tools/perf/builtin-stat.c | 26 ++++++++++++++
tools/perf/util/cgroup.c | 76 +++++++++++++++++++++++++++++++++++++++
tools/perf/util/cgroup.h | 5 +++
tools/perf/util/target.h | 1 +
4 files changed, 108 insertions(+)
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index b86aba1c8028..66a4da2d506e 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -646,6 +646,17 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
ts, argc, argv);
}
+static void cleanup(void)
+{
+ /* clean up cgroups */
+ if (target.create_cgroup) {
+ char name[32];
+
+ scnprintf(name, 31, "perf.%u", getpid());
+ cgroup__cleanup(name);
+ }
+}
+
static volatile int signr = -1;
static void skip_signal(int signo)
@@ -661,6 +672,7 @@ static void skip_signal(int signo)
* and fast PID recycling
*/
child_pid = -1;
+ cleanup();
}
static void sig_atexit(void)
@@ -725,6 +737,8 @@ static const struct option stat_options[] = {
"stat events on existing process id"),
OPT_STRING('t', "tid", &target.tid, "tid",
"stat events on existing thread id"),
+ OPT_BOOLEAN(0, "create-cgroup", &target.create_cgroup,
+ "create a cgroup for the pid/tid"),
OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
"system-wide collection from all CPUs"),
OPT_BOOLEAN('g', "group", &group,
@@ -1607,6 +1621,17 @@ int cmd_stat(int argc, const char **argv)
perf_stat__collect_metric_expr(evsel_list);
perf_stat__init_shadow_stats();
+ if (target.create_cgroup) {
+ char name[32];
+
+ scnprintf(name, 31, "perf.%u", getpid());
+ cgroup__create(name);
+ cgroup__add_pid(name, strtoul(target.pid, NULL, 0));
+
+ cgroup__add_evlist(name, evsel_list);
+ target.pid = NULL;
+ }
+
if (stat_config.csv_sep) {
stat_config.csv_output = true;
if (!strcmp(stat_config.csv_sep, "\\t"))
@@ -1906,5 +1931,6 @@ int cmd_stat(int argc, const char **argv)
runtime_stat_delete(&stat_config);
+ cleanup();
return status;
}
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index ccd02634a616..f3e706f6fa96 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -249,3 +249,79 @@ int parse_cgroups(const struct option *opt, const char *str,
}
return 0;
}
+
+int cgroup__add_evlist(const char *name, struct perf_evlist *evlist)
+{
+ return add_cgroup(evlist, name);
+}
+
+int cgroup__create(const char *name)
+{
+ char path[PATH_MAX + 1];
+ char mnt[PATH_MAX + 1];
+
+ if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1))
+ return -1;
+
+ scnprintf(path, PATH_MAX, "%s/%s", mnt, name);
+
+ return mkdir(path, 0755);
+}
+
+int cgroup__add_pid(const char *name, pid_t pid)
+{
+ char path[PATH_MAX + 1];
+ char mnt[PATH_MAX + 1];
+ char buf[32];
+ int fd;
+
+ if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1))
+ return -1;
+
+ scnprintf(path, PATH_MAX, "%s/%s/cgroup.procs", mnt, name);
+ fd = open(path, O_WRONLY);
+ if (fd < 0)
+ return -1;
+ scnprintf(buf, 31, "%u", pid);
+ if (write(fd, buf, strlen(buf)) < 0)
+ fprintf(stderr, "Error writing %s to %s\n", buf, path);
+
+ close(fd);
+ return 0;
+}
+
+int cgroup__cleanup(const char *name)
+{
+ char path[PATH_MAX + 1];
+ char mnt[PATH_MAX + 1];
+ char *line;
+ size_t len = 0;
+ FILE *fp;
+ int fd;
+
+ if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1))
+ return -1;
+
+ scnprintf(path, PATH_MAX, "%s/%s/cgroup.procs", mnt, name);
+ fp = fopen(path, "r");
+
+ if (fp == NULL)
+ return -1;
+
+ scnprintf(path, PATH_MAX, "%s/cgroup.procs", mnt);
+ fd = open(path, O_WRONLY);
+ if (fd < 0) {
+ fclose(fp);
+ return -1;
+ }
+
+ while (getline(&line, &len, fp) != -1) {
+ if (write(fd, line, strlen(line)) < 0)
+ fprintf(stderr, "Error writing %s to %s\n", line, path);
+ }
+ close(fd);
+ fclose(fp);
+
+ scnprintf(path, PATH_MAX, "%s/%s", mnt, name);
+ return rmdir(path);
+}
diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h
index f033a80c1b14..7bdd8d99d130 100644
--- a/tools/perf/util/cgroup.h
+++ b/tools/perf/util/cgroup.h
@@ -26,4 +26,9 @@ void evlist__set_default_cgroup(struct perf_evlist *evlist, struct cgroup *cgrou
int parse_cgroups(const struct option *opt, const char *str, int unset);
+int cgroup__create(const char *name);
+int cgroup__cleanup(const char *name);
+int cgroup__add_pid(const char *name, pid_t pid);
+int cgroup__add_evlist(const char *name, struct perf_evlist *evlist);
+
#endif /* __CGROUP_H__ */
diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h
index 6ef01a83b24e..03c9ac06660a 100644
--- a/tools/perf/util/target.h
+++ b/tools/perf/util/target.h
@@ -15,6 +15,7 @@ struct target {
bool uses_mmap;
bool default_per_cpu;
bool per_thread;
+ bool create_cgroup;
};
enum target_errno {
--
2.17.1