Re: [PATCH v4 1/3] nsproxy: add struct nsset

From: Serge E. Hallyn
Date: Fri May 08 2020 - 00:04:56 EST


On Tue, May 05, 2020 at 04:04:30PM +0200, Christian Brauner wrote:
> Add a simple struct nsset. It holds all necessary pieces to switch to a new
> set of namespaces without leaving a task in a half-switched state which we
> will make use of in the next patch. This patch switches the existing setns
> logic over without causing a change in setns() behavior. This brings
> setns() closer to how unshare() works(). The prepare_ns() function is
> responsible to prepare all necessary information. This has two reasons.
> First it minimizes dependencies between individual namespaces, i.e. all
> install handler can expect that all fields are properly initialized
> independent in what order they are called in. Second, this makes the code
> easier to maintain and easier to follow if it needs to be changed.
>
> The prepare_ns() helper will only be switched over to use a flags argument
> in the next patch. Here it will still use nstype as a simple integer
> argument which was argued would be clearer. I'm not particularly
> opinionated about this if it really helps or not. The struct nsset itself
> already contains the flags field since its name already indicates that it
> can contain information required by different namespaces. None of this
> should have functional consequences.
>
> Cc: Eric W. Biederman <ebiederm@xxxxxxxxxxxx>
> Cc: Serge Hallyn <serge@xxxxxxxxxx>

Reviewed-by: Serge Hallyn <serge@xxxxxxxxxx>

Thanks, Christian.

> Cc: Jann Horn <jannh@xxxxxxxxxx>
> Cc: Michael Kerrisk <mtk.manpages@xxxxxxxxx>
> Cc: Aleksa Sarai <cyphar@xxxxxxxxxx>
> Signed-off-by: Christian Brauner <christian.brauner@xxxxxxxxxx>
> ---
> /* v2 */
> patch introduced
>
> /* v3 */
> - Eric W. Biederman <ebiederm@xxxxxxxxxxxx>:
> - Remove the prior ns_capable_cred() patch and simplify the permission
> check from ns_capable_cred(nsset, nsset->cred->user_ns, CAP_SYS_ADMIN))
> to from ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN)).
>
> /* v4 */
> - Eric W. Biederman <ebiederm@xxxxxxxxxxxx>:
> - Fix nstype == 0 case.
> ---
> fs/namespace.c | 10 ++--
> include/linux/mnt_namespace.h | 1 +
> include/linux/nsproxy.h | 24 ++++++++++
> include/linux/proc_ns.h | 4 +-
> ipc/namespace.c | 7 ++-
> kernel/cgroup/namespace.c | 5 +-
> kernel/nsproxy.c | 90 ++++++++++++++++++++++++++++++-----
> kernel/pid_namespace.c | 5 +-
> kernel/time/namespace.c | 5 +-
> kernel/user_namespace.c | 8 ++--
> kernel/utsname.c | 5 +-
> net/core/net_namespace.c | 5 +-
> 12 files changed, 132 insertions(+), 37 deletions(-)
>
> diff --git a/fs/namespace.c b/fs/namespace.c
> index a28e4db075ed..62899fad4a04 100644
> --- a/fs/namespace.c
> +++ b/fs/namespace.c
> @@ -3954,16 +3954,18 @@ static void mntns_put(struct ns_common *ns)
> put_mnt_ns(to_mnt_ns(ns));
> }
>
> -static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns)
> +static int mntns_install(struct nsset *nsset, struct ns_common *ns)
> {
> - struct fs_struct *fs = current->fs;
> + struct nsproxy *nsproxy = nsset->nsproxy;
> + struct fs_struct *fs = nsset->fs;
> struct mnt_namespace *mnt_ns = to_mnt_ns(ns), *old_mnt_ns;
> + struct user_namespace *user_ns = nsset->cred->user_ns;
> struct path root;
> int err;
>
> if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||
> - !ns_capable(current_user_ns(), CAP_SYS_CHROOT) ||
> - !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
> + !ns_capable(user_ns, CAP_SYS_CHROOT) ||
> + !ns_capable(user_ns, CAP_SYS_ADMIN))
> return -EPERM;
>
> if (is_anon_ns(mnt_ns))
> diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h
> index 35942084cd40..007cfa52efb2 100644
> --- a/include/linux/mnt_namespace.h
> +++ b/include/linux/mnt_namespace.h
> @@ -6,6 +6,7 @@
> struct mnt_namespace;
> struct fs_struct;
> struct user_namespace;
> +struct ns_common;
>
> extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *,
> struct user_namespace *, struct fs_struct *);
> diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
> index 074f395b9ad2..cdb171efc7cb 100644
> --- a/include/linux/nsproxy.h
> +++ b/include/linux/nsproxy.h
> @@ -41,6 +41,30 @@ struct nsproxy {
> };
> extern struct nsproxy init_nsproxy;
>
> +/*
> + * A structure to encompass all bits needed to install
> + * a partial or complete new set of namespaces.
> + *
> + * If a new user namespace is requested cred will
> + * point to a modifiable set of credentials. If a pointer
> + * to a modifiable set is needed nsset_cred() must be
> + * used and tested.
> + */
> +struct nsset {
> + unsigned flags;
> + struct nsproxy *nsproxy;
> + struct fs_struct *fs;
> + const struct cred *cred;
> +};
> +
> +static inline struct cred *nsset_cred(struct nsset *set)
> +{
> + if (set->flags & CLONE_NEWUSER)
> + return (struct cred *)set->cred;
> +
> + return NULL;
> +}
> +
> /*
> * the namespaces access rules are:
> *
> diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
> index 6abe85c34681..75807ecef880 100644
> --- a/include/linux/proc_ns.h
> +++ b/include/linux/proc_ns.h
> @@ -8,7 +8,7 @@
> #include <linux/ns_common.h>
>
> struct pid_namespace;
> -struct nsproxy;
> +struct nsset;
> struct path;
> struct task_struct;
> struct inode;
> @@ -19,7 +19,7 @@ struct proc_ns_operations {
> int type;
> struct ns_common *(*get)(struct task_struct *task);
> void (*put)(struct ns_common *ns);
> - int (*install)(struct nsproxy *nsproxy, struct ns_common *ns);
> + int (*install)(struct nsset *nsset, struct ns_common *ns);
> struct user_namespace *(*owner)(struct ns_common *ns);
> struct ns_common *(*get_parent)(struct ns_common *ns);
> } __randomize_layout;
> diff --git a/ipc/namespace.c b/ipc/namespace.c
> index b3ca1476ca51..fdc3b5f3f53a 100644
> --- a/ipc/namespace.c
> +++ b/ipc/namespace.c
> @@ -177,15 +177,14 @@ static void ipcns_put(struct ns_common *ns)
> return put_ipc_ns(to_ipc_ns(ns));
> }
>
> -static int ipcns_install(struct nsproxy *nsproxy, struct ns_common *new)
> +static int ipcns_install(struct nsset *nsset, struct ns_common *new)
> {
> + struct nsproxy *nsproxy = nsset->nsproxy;
> struct ipc_namespace *ns = to_ipc_ns(new);
> if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) ||
> - !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
> + !ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN))
> return -EPERM;
>
> - /* Ditch state from the old ipc namespace */
> - exit_sem(current);
> put_ipc_ns(nsproxy->ipc_ns);
> nsproxy->ipc_ns = get_ipc_ns(ns);
> return 0;
> diff --git a/kernel/cgroup/namespace.c b/kernel/cgroup/namespace.c
> index b05f1dd58a62..812a61afd538 100644
> --- a/kernel/cgroup/namespace.c
> +++ b/kernel/cgroup/namespace.c
> @@ -95,11 +95,12 @@ static inline struct cgroup_namespace *to_cg_ns(struct ns_common *ns)
> return container_of(ns, struct cgroup_namespace, ns);
> }
>
> -static int cgroupns_install(struct nsproxy *nsproxy, struct ns_common *ns)
> +static int cgroupns_install(struct nsset *nsset, struct ns_common *ns)
> {
> + struct nsproxy *nsproxy = nsset->nsproxy;
> struct cgroup_namespace *cgroup_ns = to_cg_ns(ns);
>
> - if (!ns_capable(current_user_ns(), CAP_SYS_ADMIN) ||
> + if (!ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN) ||
> !ns_capable(cgroup_ns->user_ns, CAP_SYS_ADMIN))
> return -EPERM;
>
> diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
> index ed9882108cd2..b7954fd60475 100644
> --- a/kernel/nsproxy.c
> +++ b/kernel/nsproxy.c
> @@ -19,6 +19,7 @@
> #include <net/net_namespace.h>
> #include <linux/ipc_namespace.h>
> #include <linux/time_namespace.h>
> +#include <linux/fs_struct.h>
> #include <linux/proc_ns.h>
> #include <linux/file.h>
> #include <linux/syscalls.h>
> @@ -257,12 +258,79 @@ void exit_task_namespaces(struct task_struct *p)
> switch_task_namespaces(p, NULL);
> }
>
> +static void put_nsset(struct nsset *nsset)
> +{
> + unsigned flags = nsset->flags;
> +
> + if (flags & CLONE_NEWUSER)
> + put_cred(nsset_cred(nsset));
> + if (nsset->nsproxy)
> + free_nsproxy(nsset->nsproxy);
> +}
> +
> +static int prepare_nsset(int nstype, struct nsset *nsset)
> +{
> + struct task_struct *me = current;
> +
> + nsset->nsproxy = create_new_namespaces(0, me, current_user_ns(), me->fs);
> + if (IS_ERR(nsset->nsproxy))
> + return PTR_ERR(nsset->nsproxy);
> +
> + if (nstype == CLONE_NEWUSER)
> + nsset->cred = prepare_creds();
> + else
> + nsset->cred = current_cred();
> + if (!nsset->cred)
> + goto out;
> +
> + if (nstype == CLONE_NEWNS)
> + nsset->fs = me->fs;
> +
> + nsset->flags = nstype;
> + return 0;
> +
> +out:
> + put_nsset(nsset);
> + return -ENOMEM;
> +}
> +
> +/*
> + * This is the point of no return. There are just a few namespaces
> + * that do some actual work here and it's sufficiently minimal that
> + * a separate ns_common operation seems unnecessary for now.
> + * Unshare is doing the same thing. If we'll end up needing to do
> + * more in a given namespace or a helper here is ultimately not
> + * exported anymore a simple commit handler for each namespace
> + * should be added to ns_common.
> + */
> +static void commit_nsset(struct nsset *nsset)
> +{
> + unsigned flags = nsset->flags;
> + struct task_struct *me = current;
> +
> +#ifdef CONFIG_USER_NS
> + if (flags & CLONE_NEWUSER) {
> + /* transfer ownership */
> + commit_creds(nsset_cred(nsset));
> + nsset->cred = NULL;
> + }
> +#endif
> +
> +#ifdef CONFIG_IPC_NS
> + if (flags & CLONE_NEWIPC)
> + exit_sem(me);
> +#endif
> +
> + /* transfer ownership */
> + switch_task_namespaces(me, nsset->nsproxy);
> + nsset->nsproxy = NULL;
> +}
> +
> SYSCALL_DEFINE2(setns, int, fd, int, nstype)
> {
> - struct task_struct *tsk = current;
> - struct nsproxy *new_nsproxy;
> struct file *file;
> struct ns_common *ns;
> + struct nsset nsset = {};
> int err;
>
> file = proc_ns_fget(fd);
> @@ -274,20 +342,16 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype)
> if (nstype && (ns->ops->type != nstype))
> goto out;
>
> - new_nsproxy = create_new_namespaces(0, tsk, current_user_ns(), tsk->fs);
> - if (IS_ERR(new_nsproxy)) {
> - err = PTR_ERR(new_nsproxy);
> + err = prepare_nsset(ns->ops->type, &nsset);
> + if (err)
> goto out;
> - }
>
> - err = ns->ops->install(new_nsproxy, ns);
> - if (err) {
> - free_nsproxy(new_nsproxy);
> - goto out;
> + err = ns->ops->install(&nsset, ns);
> + if (!err) {
> + commit_nsset(&nsset);
> + perf_event_namespaces(current);
> }
> - switch_task_namespaces(tsk, new_nsproxy);
> -
> - perf_event_namespaces(tsk);
> + put_nsset(&nsset);
> out:
> fput(file);
> return err;
> diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
> index 01f8ba32cc0c..11db2bdbb41e 100644
> --- a/kernel/pid_namespace.c
> +++ b/kernel/pid_namespace.c
> @@ -378,13 +378,14 @@ static void pidns_put(struct ns_common *ns)
> put_pid_ns(to_pid_ns(ns));
> }
>
> -static int pidns_install(struct nsproxy *nsproxy, struct ns_common *ns)
> +static int pidns_install(struct nsset *nsset, struct ns_common *ns)
> {
> + struct nsproxy *nsproxy = nsset->nsproxy;
> struct pid_namespace *active = task_active_pid_ns(current);
> struct pid_namespace *ancestor, *new = to_pid_ns(ns);
>
> if (!ns_capable(new->user_ns, CAP_SYS_ADMIN) ||
> - !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
> + !ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN))
> return -EPERM;
>
> /*
> diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c
> index 53bce347cd50..5d9fc22d836a 100644
> --- a/kernel/time/namespace.c
> +++ b/kernel/time/namespace.c
> @@ -280,8 +280,9 @@ static void timens_put(struct ns_common *ns)
> put_time_ns(to_time_ns(ns));
> }
>
> -static int timens_install(struct nsproxy *nsproxy, struct ns_common *new)
> +static int timens_install(struct nsset *nsset, struct ns_common *new)
> {
> + struct nsproxy *nsproxy = nsset->nsproxy;
> struct time_namespace *ns = to_time_ns(new);
> int err;
>
> @@ -289,7 +290,7 @@ static int timens_install(struct nsproxy *nsproxy, struct ns_common *new)
> return -EUSERS;
>
> if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) ||
> - !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
> + !ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN))
> return -EPERM;
>
> timens_set_vvar_page(current, ns);
> diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
> index 8eadadc478f9..87804e0371fe 100644
> --- a/kernel/user_namespace.c
> +++ b/kernel/user_namespace.c
> @@ -1253,7 +1253,7 @@ static void userns_put(struct ns_common *ns)
> put_user_ns(to_user_ns(ns));
> }
>
> -static int userns_install(struct nsproxy *nsproxy, struct ns_common *ns)
> +static int userns_install(struct nsset *nsset, struct ns_common *ns)
> {
> struct user_namespace *user_ns = to_user_ns(ns);
> struct cred *cred;
> @@ -1274,14 +1274,14 @@ static int userns_install(struct nsproxy *nsproxy, struct ns_common *ns)
> if (!ns_capable(user_ns, CAP_SYS_ADMIN))
> return -EPERM;
>
> - cred = prepare_creds();
> + cred = nsset_cred(nsset);
> if (!cred)
> - return -ENOMEM;
> + return -EINVAL;
>
> put_user_ns(cred->user_ns);
> set_cred_user_ns(cred, get_user_ns(user_ns));
>
> - return commit_creds(cred);
> + return 0;
> }
>
> struct ns_common *ns_get_owner(struct ns_common *ns)
> diff --git a/kernel/utsname.c b/kernel/utsname.c
> index f0e491193009..e488d0e2ab45 100644
> --- a/kernel/utsname.c
> +++ b/kernel/utsname.c
> @@ -140,12 +140,13 @@ static void utsns_put(struct ns_common *ns)
> put_uts_ns(to_uts_ns(ns));
> }
>
> -static int utsns_install(struct nsproxy *nsproxy, struct ns_common *new)
> +static int utsns_install(struct nsset *nsset, struct ns_common *new)
> {
> + struct nsproxy *nsproxy = nsset->nsproxy;
> struct uts_namespace *ns = to_uts_ns(new);
>
> if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) ||
> - !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
> + !ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN))
> return -EPERM;
>
> get_uts_ns(ns);
> diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
> index 190ca66a383b..dcd61aca343e 100644
> --- a/net/core/net_namespace.c
> +++ b/net/core/net_namespace.c
> @@ -1353,12 +1353,13 @@ static void netns_put(struct ns_common *ns)
> put_net(to_net_ns(ns));
> }
>
> -static int netns_install(struct nsproxy *nsproxy, struct ns_common *ns)
> +static int netns_install(struct nsset *nsset, struct ns_common *ns)
> {
> + struct nsproxy *nsproxy = nsset->nsproxy;
> struct net *net = to_net_ns(ns);
>
> if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) ||
> - !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
> + !ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN))
> return -EPERM;
>
> put_net(nsproxy->net_ns);
> --
> 2.26.2