On Wed, Feb 08, 2017 at 02:01:24PM +0530, Hari Bathini wrote:
With the advert of container technologies like docker, that depend'While device numbers are the same ..." ?
on namespaces for isolation, there is a need for tracing support for
namespaces. This patch introduces new PERF_RECORD_NAMESPACES event
for tracing based on namespaces related info. This event records
the device and inode numbers for every namespace of all processes.
While device number is same for all namespaces currently, that may
change in future, to avoid the need for a namespace of namespaces.Unfinished sentence
Recording device number along with inode number will take care of suchMore words on why this is so? Because I've no clue.
scenario.
@@ -6584,6 +6590,135 @@ void perf_event_comm(struct task_struct *task, bool exec)Contains the same hole and makes the record depend on architecture
}
/*
+ * namespaces tracking
+ */
+
+struct namespaces_event_id {
+ struct perf_event_header header;
+
+ u32 pid;
+ u32 tid;
+ u32 nr_namespaces;
+ struct perf_ns_link_info link_info[NAMESPACES_MAX];
+};
alignment requirements.
+static void perf_fill_ns_link_info(struct perf_ns_link_info *ns_link_info,Two points here, since you've given these thingies a string identifier,
+ struct task_struct *task,
+ const struct proc_ns_operations *ns_ops)
+{
+ struct path ns_path;
+ struct inode *ns_inode;
+ void *error;
+
+ error = ns_get_path(&ns_path, task, ns_ops);
+ if (!error) {
+ snprintf(ns_link_info->name, NS_NAME_SIZE,
+ "%s", ns_path.dentry->d_iname);
+
+ ns_inode = ns_path.dentry->d_inode;
+ ns_link_info->dev = new_encode_dev(ns_inode->i_sb->s_dev);
+ ns_link_info->ino = ns_inode->i_ino;
+ }
+}
+
+static void perf_event_namespaces_output(struct perf_event *event,
+ void *data)
+{
+ struct perf_namespaces_event *namespaces_event = data;
+ struct perf_output_handle handle;
+ struct perf_sample_data sample;
+ struct namespaces_event_id *ei;
+ struct task_struct *task = namespaces_event->task;
+ int ret;
+
+ if (!perf_event_namespaces_match(event))
+ return;
+
+ ei = &namespaces_event->event_id;
+ perf_event_header__init_id(&ei->header, &sample, event);
+ ret = perf_output_begin(&handle, event, ei->header.size);
+ if (ret)
+ return;
+
+ ei->pid = perf_event_pid(event, task);
+ ei->tid = perf_event_tid(event, task);
+
+ ei->nr_namespaces = NAMESPACES_MAX;
+
+ perf_fill_ns_link_info(&ei->link_info[MNT_NS_INDEX],
+ task, &mntns_operations);
+
+#ifdef CONFIG_USER_NS
+ perf_fill_ns_link_info(&ei->link_info[USER_NS_INDEX],
+ task, &userns_operations);
+#endif
+#ifdef CONFIG_NET_NS
+ perf_fill_ns_link_info(&ei->link_info[NET_NS_INDEX],
+ task, &netns_operations);
+#endif
+#ifdef CONFIG_UTS_NS
+ perf_fill_ns_link_info(&ei->link_info[UTS_NS_INDEX],
+ task, &utsns_operations);
+#endif
+#ifdef CONFIG_IPC_NS
+ perf_fill_ns_link_info(&ei->link_info[IPC_NS_INDEX],
+ task, &ipcns_operations);
+#endif
+#ifdef CONFIG_PID_NS
+ perf_fill_ns_link_info(&ei->link_info[PID_NS_INDEX],
+ task, &pidns_operations);
+#endif
+#ifdef CONFIG_CGROUPS
+ perf_fill_ns_link_info(&ei->link_info[CGROUP_NS_INDEX],
+ task, &cgroupns_operations);
+#endif
do you still need to rely on their positional index? If not, you could
generate smaller events if we lack some of these CONFIG knobs.
Secondly, does anything in here depend on @event? Afaict you're
generating the exact same information for each event.
The reason we set {pid,tid} for each event is because of PID_NS, we
report the pid number as per the namespace associated with the event.
But from what I can tell, there is no such namespace of namespaces
dependency here and this should live in the function below.
@@ -9667,6 +9804,11 @@ SYSCALL_DEFINE5(perf_event_open,Many times we allow users access to such information; should this be
return -EACCES;
}
+ if (attr.namespaces) {
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+ }
qualified with something like perf_paranoid_kernel() or somesuch?
+That seems like a very odd place, why not put it right before the
if (attr.freq) {
if (attr.sample_freq > sysctl_perf_event_sample_rate)
return -EINVAL;
diff --git a/kernel/fork.c b/kernel/fork.c
index 11c5c8a..fd77e67 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2289,6 +2289,9 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
free_fs_struct(new_fs);
bad_unshare_out:
+ if (!err)
+ perf_event_namespaces(current);
+
return err;
}
bad_unshare_cleanup_cred: label?
@@ -264,6 +265,10 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype)Same again..
switch_task_namespaces(tsk, new_nsproxy);
out:
fput(file);
+
+ if (!err)
+ perf_event_namespaces(tsk);
+
return err;
}