[PATCH 2/9] perf/core: Add PERF_SAMPLE_CGROUP feature

From: Namhyung Kim
Date: Mon Dec 23 2019 - 01:08:21 EST


The PERF_SAMPLE_CGROUP bit is to save (perf_event) cgroup information
in the sample. It will add a 64-bit id to identify current cgroup and
it's the file handle in the cgroup file system. Userspace should use
this information with PERF_RECORD_CGROUP event to match which cgroup
it belongs.

I put it before PERF_SAMPLE_AUX for simplicity since it just needs a
64-bit word. But if we want bigger samples, I can work on that
direction too.

Cc: Tejun Heo <tj@xxxxxxxxxx>
Cc: Li Zefan <lizefan@xxxxxxxxxx>
Cc: Johannes Weiner <hannes@xxxxxxxxxxx>
Signed-off-by: Namhyung Kim <namhyung@xxxxxxxxxx>
---
include/linux/perf_event.h | 1 +
include/uapi/linux/perf_event.h | 3 ++-
init/Kconfig | 3 ++-
kernel/events/core.c | 22 ++++++++++++++++++++++
4 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 6d4c22aee384..17b5bff045a6 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1001,6 +1001,7 @@ struct perf_sample_data {
u64 stack_user_size;

u64 phys_addr;
+ u64 cgroup;
} ____cacheline_aligned;

/* default value for data source */
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index de2ab87ca92c..3a81e9806cb1 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -142,8 +142,9 @@ enum perf_event_sample_format {
PERF_SAMPLE_REGS_INTR = 1U << 18,
PERF_SAMPLE_PHYS_ADDR = 1U << 19,
PERF_SAMPLE_AUX = 1U << 20,
+ PERF_SAMPLE_CGROUP = 1U << 21,

- PERF_SAMPLE_MAX = 1U << 21, /* non-ABI */
+ PERF_SAMPLE_MAX = 1U << 22, /* non-ABI */

__PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */
};
diff --git a/init/Kconfig b/init/Kconfig
index 128b68a16951..fedd7b503bf3 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1046,7 +1046,8 @@ config CGROUP_PERF
help
This option extends the perf per-cpu mode to restrict monitoring
to threads which belong to the cgroup specified and run on the
- designated cpu.
+ designated cpu. Or this can be used to have cgroup ID in samples
+ so that it can monitor performance events among cgroups.

Say N if unsure.

diff --git a/kernel/events/core.c b/kernel/events/core.c
index b0aa1b921769..db04ef695a33 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1754,6 +1754,9 @@ static void __perf_event_header_size(struct perf_event *event, u64 sample_type)
if (sample_type & PERF_SAMPLE_PHYS_ADDR)
size += sizeof(data->phys_addr);

+ if (sample_type & PERF_SAMPLE_CGROUP)
+ size += sizeof(data->cgroup);
+
event->header_size = size;
}

@@ -6699,6 +6702,9 @@ void perf_output_sample(struct perf_output_handle *handle,
if (sample_type & PERF_SAMPLE_PHYS_ADDR)
perf_output_put(handle, data->phys_addr);

+ if (sample_type & PERF_SAMPLE_CGROUP)
+ perf_output_put(handle, data->cgroup);
+
if (sample_type & PERF_SAMPLE_AUX) {
perf_output_put(handle, data->aux_size);

@@ -6895,6 +6901,16 @@ void perf_prepare_sample(struct perf_event_header *header,
if (sample_type & PERF_SAMPLE_PHYS_ADDR)
data->phys_addr = perf_virt_to_phys(data->addr);

+#ifdef CONFIG_CGROUP_PERF
+ if (sample_type & PERF_SAMPLE_CGROUP) {
+ struct cgroup *cgrp;
+
+ /* protected by RCU */
+ cgrp = task_css_check(current, perf_event_cgrp_id, 1)->cgroup;
+ data->cgroup = cgroup_id(cgrp);
+ }
+#endif
+
if (sample_type & PERF_SAMPLE_AUX) {
u64 size;

@@ -11090,6 +11106,12 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,

if (attr->sample_type & PERF_SAMPLE_REGS_INTR)
ret = perf_reg_validate(attr->sample_regs_intr);
+
+#ifndef CONFIG_CGROUP_PERF
+ if (attr->sample_type & PERF_SAMPLE_CGROUP)
+ return -EINVAL;
+#endif
+
out:
return ret;

--
2.24.1.735.g03f4e72817-goog