Re: [RFC PATCH] cgroup namespaces: add a 'nsroot=' mountinfo field

From: Tycho Andersen
Date: Tue Mar 29 2016 - 09:59:01 EST


Hi Serge,

On Mon, Mar 21, 2016 at 06:41:33PM -0500, Serge E. Hallyn wrote:
> One practical problem I've found with cgroup namespaces is that there
> is no way to disambiguate between a cgroupfs mount which was done in
> a cgroup namespace, and a bind mount of a cgroupfs directory. So
> whether I do
>
> unshare --cgroup -- bash -c "mount -t cgroup -o freezer f /mnt; cat /proc/self/mountinfo"
>
> or whether I just
>
> mount --bind /sys/fs/cgroup/freezer/$(awk -F: '/freezer/ { print $3 }' /proc/self/cgroup) /mnt
>
> 'mount root' field (field 3) in /proc/self/mountinfo will show the
> same thing, the result of awk -F: '/freezer/ { print $3 }' /proc/self/cgroup.
>
> This patch adds a 'nsroot=' field to cgroup mountinfo entries, so that
> userspace can distinguish a mount made in a cgroup namespace from a bind
> mount from a cgroup subdirectory.

With this patch, mountinfo shows nsroot= in the mount options, but the
actual mount() call for cgroups doesn't allow nsroot. Would it be
possible to allow passing nsroot= to mount, as long is it does in fact
match the current nsroot?

The motivation for this is that CRIU just copies the mount options and
uses them on restore, so with this patch we have to add a special case
to trim off nsroot= before we restore.

Tycho

> Signed-off-by: Serge Hallyn <serge.hallyn@xxxxxxxxxx>
> ---
> fs/kernfs/mount.c | 2 +-
> include/linux/kernfs.h | 3 ++-
> kernel/cgroup.c | 29 ++++++++++++++++++++++++++++-
> 3 files changed, 31 insertions(+), 3 deletions(-)
>
> diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
> index b67dbcc..58f59fd 100644
> --- a/fs/kernfs/mount.c
> +++ b/fs/kernfs/mount.c
> @@ -36,7 +36,7 @@ static int kernfs_sop_show_options(struct seq_file *sf, struct dentry *dentry)
> struct kernfs_syscall_ops *scops = root->syscall_ops;
>
> if (scops && scops->show_options)
> - return scops->show_options(sf, root);
> + return scops->show_options(sf, dentry, root);
> return 0;
> }
>
> diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
> index c06c442..3124b91 100644
> --- a/include/linux/kernfs.h
> +++ b/include/linux/kernfs.h
> @@ -145,7 +145,8 @@ struct kernfs_node {
> */
> struct kernfs_syscall_ops {
> int (*remount_fs)(struct kernfs_root *root, int *flags, char *data);
> - int (*show_options)(struct seq_file *sf, struct kernfs_root *root);
> + int (*show_options)(struct seq_file *sf, struct dentry *dentry,
> + struct kernfs_root *root);
>
> int (*mkdir)(struct kernfs_node *parent, const char *name,
> umode_t mode);
> diff --git a/kernel/cgroup.c b/kernel/cgroup.c
> index 671dc05..806d1e7 100644
> --- a/kernel/cgroup.c
> +++ b/kernel/cgroup.c
> @@ -1593,7 +1593,32 @@ static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
> return 0;
> }
>
> -static int cgroup_show_options(struct seq_file *seq,
> +static void cgroup_show_nsroot(struct seq_file *seq, struct dentry *dentry,
> + struct kernfs_root *kf_root)
> +{
> + struct kernfs_node *d_kn = dentry->d_fsdata;
> + char *nsroot;
> + int len, ret;
> +
> + if (!kf_root)
> + return;
> + len = kernfs_path_from_node(d_kn, kf_root->kn, NULL, 0);
> + if (len <= 0)
> + return;
> + nsroot = kzalloc(len + 1, GFP_ATOMIC);
> + if (!nsroot)
> + return;
> + ret = kernfs_path_from_node(d_kn, kf_root->kn, nsroot, len + 1);
> + if (ret <= 0 || ret > len)
> + goto out;
> +
> + seq_show_option(seq, "nsroot", nsroot);
> +
> +out:
> + kfree(nsroot);
> +}
> +
> +static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry,
> struct kernfs_root *kf_root)
> {
> struct cgroup_root *root = cgroup_root_from_kf(kf_root);
> @@ -1619,6 +1644,8 @@ static int cgroup_show_options(struct seq_file *seq,
> seq_puts(seq, ",clone_children");
> if (strlen(root->name))
> seq_show_option(seq, "name", root->name);
> + cgroup_show_nsroot(seq, dentry, kf_root);
> +
> return 0;
> }
>
> --
> 2.7.3
>