[RFC PATCH 1/2] perf/namespaces: Add a new namespace for isolated tracing

From: Aravinda Prasad
Date: Tue Jun 14 2016 - 12:49:58 EST


From: Hari Bathini <hbathini@xxxxxxxxxxxxxxxxxx>

This patch adds a new namespace to the kernel inline with the existing
namespaces like pid, uts, etc. The aim of this namespace is to support
isolated tracing within the context of this new namespace.

Signed-off-by: Hari Bathini <hbathini@xxxxxxxxxxxxxxxxxx>
---
fs/proc/namespaces.c | 4 +
include/linux/nsproxy.h | 2 +
include/linux/perf_namespace.h | 52 +++++++++++++++++
include/linux/proc_ns.h | 2 +
include/uapi/linux/sched.h | 1
init/Kconfig | 7 ++
kernel/Makefile | 1
kernel/fork.c | 3 +
kernel/nsproxy.c | 20 ++++++
kernel/perf_namespace.c | 124 ++++++++++++++++++++++++++++++++++++++++
10 files changed, 213 insertions(+), 3 deletions(-)
create mode 100644 include/linux/perf_namespace.h
create mode 100644 kernel/perf_namespace.c

diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 51b8b0a..f9812fc 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -8,6 +8,7 @@
#include <linux/ipc_namespace.h>
#include <linux/pid_namespace.h>
#include <linux/user_namespace.h>
+#include <linux/perf_namespace.h>
#include "internal.h"


@@ -31,6 +32,9 @@ static const struct proc_ns_operations *ns_entries[] = {
#ifdef CONFIG_CGROUPS
&cgroupns_operations,
#endif
+#ifdef CONFIG_PERF_NS
+ &perfns_operations,
+#endif
};

static const char *proc_ns_get_link(struct dentry *dentry,
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index ac0d65b..7e83e63 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -9,6 +9,7 @@ struct uts_namespace;
struct ipc_namespace;
struct pid_namespace;
struct cgroup_namespace;
+struct perf_namespace;
struct fs_struct;

/*
@@ -35,6 +36,7 @@ struct nsproxy {
struct pid_namespace *pid_ns_for_children;
struct net *net_ns;
struct cgroup_namespace *cgroup_ns;
+ struct perf_namespace *perf_ns;
};
extern struct nsproxy init_nsproxy;

diff --git a/include/linux/perf_namespace.h b/include/linux/perf_namespace.h
new file mode 100644
index 0000000..9713724
--- /dev/null
+++ b/include/linux/perf_namespace.h
@@ -0,0 +1,52 @@
+#ifndef _LINUX_PERF_NS_H
+#define _LINUX_PERF_NS_H
+
+#include <linux/nsproxy.h>
+#include <linux/kref.h>
+#include <linux/ns_common.h>
+
+struct user_namespace;
+extern struct user_namespace init_user_ns;
+
+struct perf_namespace {
+ struct kref kref;
+ struct user_namespace *user_ns; /* Owning user namespace */
+ struct ns_common ns;
+};
+extern struct perf_namespace init_perf_ns;
+
+#ifdef CONFIG_PERF_NS
+extern struct perf_namespace *copy_perf_ns(unsigned long flags,
+ struct user_namespace *user_ns, struct perf_namespace *old_ns);
+extern void free_perf_ns(struct kref *kref);
+
+static inline void get_perf_ns(struct perf_namespace *ns)
+{
+ kref_get(&ns->kref);
+}
+
+static inline void put_perf_ns(struct perf_namespace *ns)
+{
+ kref_put(&ns->kref, free_perf_ns);
+}
+
+#else /* !CONFIG_PERF_NS */
+static inline void get_perf_ns(struct perf_namespace *ns)
+{
+}
+
+static inline void put_perf_ns(struct perf_namespace *ns)
+{
+}
+
+static inline struct perf_namespace *copy_perf_ns(unsigned long flags,
+ struct user_namespace *user_ns, struct perf_namespace *old_ns)
+{
+ if (flags & CLONE_NEWPERF)
+ return ERR_PTR(-EINVAL);
+
+ return old_ns;
+}
+#endif /* CONFIG_PERF_NS */
+
+#endif /* _LINUX_PERF_NS_H */
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index de0e771..c2916a7 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -27,6 +27,7 @@ extern const struct proc_ns_operations pidns_operations;
extern const struct proc_ns_operations userns_operations;
extern const struct proc_ns_operations mntns_operations;
extern const struct proc_ns_operations cgroupns_operations;
+extern const struct proc_ns_operations perfns_operations;

/*
* We always define these enumerators
@@ -38,6 +39,7 @@ enum {
PROC_USER_INIT_INO = 0xEFFFFFFDU,
PROC_PID_INIT_INO = 0xEFFFFFFCU,
PROC_CGROUP_INIT_INO = 0xEFFFFFFBU,
+ PROC_PERF_INIT_INO = 0xEFFFFFFAU,
};

#ifdef CONFIG_PROC_FS
diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
index 5f0fe01..6a13d40 100644
--- a/include/uapi/linux/sched.h
+++ b/include/uapi/linux/sched.h
@@ -9,6 +9,7 @@
#define CLONE_FS 0x00000200 /* set if fs info shared between processes */
#define CLONE_FILES 0x00000400 /* set if open files shared between processes */
#define CLONE_SIGHAND 0x00000800 /* set if signal handlers and blocked signals shared */
+#define CLONE_NEWPERF 0x00001000 /* New perf namespace */
#define CLONE_PTRACE 0x00002000 /* set if we want to let tracing continue on the child too */
#define CLONE_VFORK 0x00004000 /* set if the parent wants the child to wake it up on mm_release */
#define CLONE_PARENT 0x00008000 /* set if we want to have the same parent as the cloner */
diff --git a/init/Kconfig b/init/Kconfig
index f755a60..e0b23f2 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1221,6 +1221,13 @@ config NET_NS
Allow user space to create what appear to be multiple instances
of the network stack.

+config PERF_NS
+ bool "Perf Namespaces"
+ default y if PERF_EVENTS
+ help
+ Support perf namespaces. A namespace to provide isolated tracing
+ support in context of this namespace.
+
endif # NAMESPACES

config SCHED_AUTOGROUP
diff --git a/kernel/Makefile b/kernel/Makefile
index e2ec54e..ee94119 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -71,6 +71,7 @@ obj-$(CONFIG_CPUSETS) += cpuset.o
obj-$(CONFIG_UTS_NS) += utsname.o
obj-$(CONFIG_USER_NS) += user_namespace.o
obj-$(CONFIG_PID_NS) += pid_namespace.o
+obj-$(CONFIG_PERF_NS) += perf_namespace.o
obj-$(CONFIG_IKCONFIG) += configs.o
obj-$(CONFIG_SMP) += stop_machine.o
obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o
diff --git a/kernel/fork.c b/kernel/fork.c
index 5c2c355..d53756c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1926,7 +1926,8 @@ static int check_unshare_flags(unsigned long unshare_flags)
if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET|
- CLONE_NEWUSER|CLONE_NEWPID|CLONE_NEWCGROUP))
+ CLONE_NEWUSER|CLONE_NEWPID|
+ CLONE_NEWCGROUP|CLONE_NEWPERF))
return -EINVAL;
/*
* Not implemented, but pretend it works if there is nothing
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 782102e..b9a9831 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -22,6 +22,7 @@
#include <linux/pid_namespace.h>
#include <net/net_namespace.h>
#include <linux/ipc_namespace.h>
+#include <linux/perf_namespace.h>
#include <linux/proc_ns.h>
#include <linux/file.h>
#include <linux/syscalls.h>
@@ -43,6 +44,9 @@ struct nsproxy init_nsproxy = {
#ifdef CONFIG_CGROUPS
.cgroup_ns = &init_cgroup_ns,
#endif
+#ifdef CONFIG_PERF_NS
+ .perf_ns = &init_perf_ns,
+#endif
};

static inline struct nsproxy *create_nsproxy(void)
@@ -103,6 +107,12 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
goto out_cgroup;
}

+ new_nsp->perf_ns = copy_perf_ns(flags, user_ns, tsk->nsproxy->perf_ns);
+ if (IS_ERR(new_nsp->perf_ns)) {
+ err = PTR_ERR(new_nsp->perf_ns);
+ goto out_perf;
+ }
+
new_nsp->net_ns = copy_net_ns(flags, user_ns, tsk->nsproxy->net_ns);
if (IS_ERR(new_nsp->net_ns)) {
err = PTR_ERR(new_nsp->net_ns);
@@ -113,6 +123,9 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,

out_net:
put_cgroup_ns(new_nsp->cgroup_ns);
+out_perf:
+ if (new_nsp->net_ns)
+ put_net(new_nsp->net_ns);
out_cgroup:
if (new_nsp->pid_ns_for_children)
put_pid_ns(new_nsp->pid_ns_for_children);
@@ -142,7 +155,7 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)

if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
CLONE_NEWPID | CLONE_NEWNET |
- CLONE_NEWCGROUP)))) {
+ CLONE_NEWCGROUP | CLONE_NEWPERF)))) {
get_nsproxy(old_ns);
return 0;
}
@@ -177,6 +190,8 @@ void free_nsproxy(struct nsproxy *ns)
put_uts_ns(ns->uts_ns);
if (ns->ipc_ns)
put_ipc_ns(ns->ipc_ns);
+ if (ns->perf_ns)
+ put_perf_ns(ns->perf_ns);
if (ns->pid_ns_for_children)
put_pid_ns(ns->pid_ns_for_children);
put_cgroup_ns(ns->cgroup_ns);
@@ -195,7 +210,8 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags,
int err = 0;

if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
- CLONE_NEWNET | CLONE_NEWPID | CLONE_NEWCGROUP)))
+ CLONE_NEWNET | CLONE_NEWPID |
+ CLONE_NEWCGROUP | CLONE_NEWPERF)))
return 0;

user_ns = new_cred ? new_cred->user_ns : current_user_ns();
diff --git a/kernel/perf_namespace.c b/kernel/perf_namespace.c
new file mode 100644
index 0000000..5b76fd8
--- /dev/null
+++ b/kernel/perf_namespace.c
@@ -0,0 +1,124 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ */
+
+#include <linux/export.h>
+#include <linux/perf_namespace.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/user_namespace.h>
+#include <linux/proc_ns.h>
+
+static struct perf_namespace *create_perf_ns(struct user_namespace *user_ns)
+{
+ struct perf_namespace *perf_ns;
+ int err;
+
+ perf_ns = kmalloc(sizeof(struct perf_namespace), GFP_KERNEL);
+ if (!perf_ns)
+ return ERR_PTR(-ENOMEM);
+
+ kref_init(&perf_ns->kref);
+ err = ns_alloc_inum(&perf_ns->ns);
+ if (err) {
+ kfree(perf_ns);
+ return ERR_PTR(err);
+ }
+
+ perf_ns->ns.ops = &perfns_operations;
+ perf_ns->user_ns = get_user_ns(user_ns);
+ return perf_ns;
+}
+
+struct perf_namespace *copy_perf_ns(unsigned long flags,
+ struct user_namespace *user_ns, struct perf_namespace *old_ns)
+{
+ struct perf_namespace *new_ns;
+
+ BUG_ON(!old_ns);
+ get_perf_ns(old_ns);
+
+ if (!(flags & CLONE_NEWPERF))
+ return old_ns;
+
+ new_ns = create_perf_ns(user_ns);
+
+ put_perf_ns(old_ns);
+ return new_ns;
+}
+
+void free_perf_ns(struct kref *kref)
+{
+ struct perf_namespace *ns;
+
+ ns = container_of(kref, struct perf_namespace, kref);
+ put_user_ns(ns->user_ns);
+ ns_free_inum(&ns->ns);
+ kfree(ns);
+}
+
+static inline struct perf_namespace *to_perf_ns(struct ns_common *ns)
+{
+ return container_of(ns, struct perf_namespace, ns);
+}
+
+static struct ns_common *perfns_get(struct task_struct *task)
+{
+ struct perf_namespace *ns = NULL;
+ struct nsproxy *nsproxy;
+
+ task_lock(task);
+ nsproxy = task->nsproxy;
+ if (nsproxy) {
+ ns = nsproxy->perf_ns;
+ get_perf_ns(ns);
+ }
+ task_unlock(task);
+
+ return ns ? &ns->ns : NULL;
+}
+
+static void perfns_put(struct ns_common *ns)
+{
+ put_perf_ns(to_perf_ns(ns));
+}
+
+static int perfns_install(struct nsproxy *nsproxy, struct ns_common *new)
+{
+ struct perf_namespace *ns = to_perf_ns(new);
+
+ if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) ||
+ !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
+ return -EPERM;
+
+ get_perf_ns(ns);
+ put_perf_ns(nsproxy->perf_ns);
+ nsproxy->perf_ns = ns;
+ return 0;
+}
+
+const struct proc_ns_operations perfns_operations = {
+ .name = "perf",
+ .type = CLONE_NEWPERF,
+ .get = perfns_get,
+ .put = perfns_put,
+ .install = perfns_install,
+};
+
+/*
+ * TODO: Find a better place to put this..
+ */
+struct perf_namespace init_perf_ns = {
+ .kref = {
+ .refcount = ATOMIC_INIT(2),
+ },
+ .user_ns = &init_user_ns,
+ .ns.inum = PROC_PERF_INIT_INO,
+#ifdef CONFIG_PERF_NS
+ .ns.ops = &perfns_operations,
+#endif
+};
+EXPORT_SYMBOL_GPL(init_perf_ns);