Re: [PATCH 1/3] perf: add PERF_RECORD_NAMESPACES to include namespaces related info
From: Peter Zijlstra
Date: Thu Nov 10 2016 - 08:19:22 EST
On Thu, Nov 10, 2016 at 05:08:06PM +0530, Hari Bathini wrote:
> diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
> index c66a485..575aed6 100644
> --- a/include/uapi/linux/perf_event.h
> +++ b/include/uapi/linux/perf_event.h
> @@ -344,7 +344,8 @@ struct perf_event_attr {
> use_clockid : 1, /* use @clockid for time fields */
> context_switch : 1, /* context switch data */
> write_backward : 1, /* Write ring buffer from end to beginning */
> - __reserved_1 : 36;
> + namespaces : 1, /* include namespaces data */
> + __reserved_1 : 35;
>
> union {
> __u32 wakeup_events; /* wakeup every n events */
> @@ -862,6 +863,24 @@ enum perf_event_type {
> */
> PERF_RECORD_SWITCH_CPU_WIDE = 15,
>
> + /*
> + * struct {
> + * struct perf_event_header header;
> + *
> + * u32 pid, tid;
> + * u64 time;
> + * u32 uts_ns_inum;
> + * u32 ipc_ns_inum;
> + * u32 mnt_ns_inum;
> + * u32 pid_ns_inum;
> + * u32 net_ns_inum;
> + * u32 cgroup_ns_inum;
> + * u32 user_ns_inum;
> + * struct sample_id sample_id;
> + * };
> + */
> + PERF_RECORD_NAMESPACES = 16,
So this format is not extensible, that is, if someone adds yet another
namespace, we'll need to introduce PERF_RECORD_NAMESPACES2.
Is there a 'natural' and exposed namespace index that we can use to
change it like:
u32 nr_nss;
u32 namespace[nr_nss];
?
> +static void perf_event_namespaces_output(struct perf_event *event,
> + void *data)
> +{
> + struct perf_namespaces_event *namespaces_event = data;
> + struct perf_output_handle handle;
> + struct perf_sample_data sample;
> + int size = namespaces_event->event_id.header.size;
> + struct nsproxy *nsproxy;
> + int ret;
> +
> + if (!perf_event_namespaces_match(event))
> + return;
> +
> + perf_event_header__init_id(&namespaces_event->event_id.header,
> + &sample, event);
> + ret = perf_output_begin(&handle, event,
> + namespaces_event->event_id.header.size);
> +
> + if (ret)
> + goto out;
If you were to introduce:
struct ns_event_id *ei = &namespace_event->event_id;
> +
> + namespaces_event->event_id.pid = perf_event_pid(event,
> + namespaces_event->task);
> + namespaces_event->event_id.tid = perf_event_tid(event,
> + namespaces_event->task);
> +
> + if (namespaces_event->task != current)
> + task_lock(namespaces_event->task);
> +
> + nsproxy = namespaces_event->task->nsproxy;
> + if (nsproxy != NULL) {
> + namespaces_event->event_id.uts_ns_inum =
> + nsproxy->uts_ns->ns.inum;
> +#ifdef CONFIG_IPC_NS
> + namespaces_event->event_id.ipc_ns_inum =
> + nsproxy->ipc_ns->ns.inum;
> +#endif
> + namespaces_event->event_id.mnt_ns_inum =
> + nsproxy->mnt_ns->ns.inum;
> + namespaces_event->event_id.pid_ns_inum =
> + nsproxy->pid_ns_for_children->ns.inum;
> +#ifdef CONFIG_NET
> + namespaces_event->event_id.net_ns_inum =
> + nsproxy->net_ns->ns.inum;
> +#endif
> +#ifdef CONFIG_CGROUPS
> + namespaces_event->event_id.cgroup_ns_inum =
> + nsproxy->cgroup_ns->ns.inum;
> +#endif
> + }
> +
> + namespaces_event->event_id.user_ns_inum =
> + __task_cred(namespaces_event->task)->user_ns->ns.inum;
You can do s/namespace_event->event_id./ei->/ which is tons shorter and
would result in less wrapping of lines and generally improve
readability.
> +
> + if (namespaces_event->task != current)
> + task_unlock(namespaces_event->task);
> +
> + namespaces_event->event_id.time = perf_event_clock(event);
> +
> + perf_output_put(&handle, namespaces_event->event_id);
> +
> + perf_event__output_id_sample(event, &handle, &sample);
> +
> + perf_output_end(&handle);
> +out:
> + namespaces_event->event_id.header.size = size;
> +}
> +
> +void perf_event_namespaces(struct task_struct *task)
> +{
> + struct perf_namespaces_event namespaces_event;
> +
> + if (!atomic_read(&nr_namespaces_events))
> + return;
> +
> + namespaces_event = (struct perf_namespaces_event){
> + .task = task,
> + .event_id = {
> + .header = {
> + .type = PERF_RECORD_NAMESPACES,
> + .misc = 0,
> + .size = sizeof(namespaces_event.event_id),
> + },
> + /* .pid */
> + /* .tid */
> + /* .time */
> + /* .uts_ns_inum */
> + /* .ipc_ns_inum */
> + /* .mnt_ns_inum */
> + /* .pid_ns_inum */
> + /* .net_ns_inum */
> + /* .cgroup_ns_inum */
> + /* .user_ns_inum */
> + },
> + };
> +
> + perf_iterate_sb(perf_event_namespaces_output,
> + &namespaces_event,
> + NULL);
> +}
> diff --git a/kernel/fork.c b/kernel/fork.c
> index 997ac1d..3faca3d 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -1818,6 +1818,7 @@ static __latent_entropy struct task_struct *copy_process(
> cgroup_post_fork(p);
> threadgroup_change_end(current);
> perf_event_fork(p);
> + perf_event_namespaces(p);
I would much prefer calling perf_event_namespace() from
perf_event_fork() and reduce the external interface.