[RFC PATCH 1/4] namespacefs: Introduce 'namespacefs'

From: Yordan Karadzhov (VMware)
Date: Thu Nov 18 2021 - 13:13:14 EST


Introducing a simple read-only pseudo file system that aims to provide
direct mechanism for examining the existing hierarchy of namespaces on
the system. When fully functional, 'namespacefs' will allow the user to
see all namespaces that are active on the system and to easily retrieve
the specific data, managed by each namespace. For example the PIDs of
all tasks enclosed in each individual PID namespace.

Here we introduce only the basic definitions of the virtual filesystem
that are based off of 'fs/debugfs/inide.c' and 'fs/tracefs/inod.c'.
The actual coupling between the new filesystem and the namespaces and
all methods for adding/removing namespace directories and files will be
added later.

Signed-off-by: Yordan Karadzhov (VMware) <y.karadz@xxxxxxxxx>
---
fs/Kconfig | 1 +
fs/Makefile | 1 +
fs/namespacefs/Kconfig | 6 +
fs/namespacefs/Makefile | 4 +
fs/namespacefs/inode.c | 213 ++++++++++++++++++++++++++++++++++++
include/linux/idr-seq.h | 0
include/linux/namespacefs.h | 47 ++++++++
include/uapi/linux/magic.h | 2 +
8 files changed, 274 insertions(+)
create mode 100644 fs/namespacefs/Kconfig
create mode 100644 fs/namespacefs/Makefile
create mode 100644 fs/namespacefs/inode.c
create mode 100644 include/linux/idr-seq.h
create mode 100644 include/linux/namespacefs.h

diff --git a/fs/Kconfig b/fs/Kconfig
index a6313a969bc5..84c220160615 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -268,6 +268,7 @@ config ARCH_HAS_GIGANTIC_PAGE

source "fs/configfs/Kconfig"
source "fs/efivarfs/Kconfig"
+source "fs/namespacefs/Kconfig"

endmenu

diff --git a/fs/Makefile b/fs/Makefile
index 84c5e4cdfee5..5c850f6a7cb0 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -138,3 +138,4 @@ obj-$(CONFIG_EFIVAR_FS) += efivarfs/
obj-$(CONFIG_EROFS_FS) += erofs/
obj-$(CONFIG_VBOXSF_FS) += vboxsf/
obj-$(CONFIG_ZONEFS_FS) += zonefs/
+obj-$(CONFIG_NAMESPACE_FS) += namespacefs/
diff --git a/fs/namespacefs/Kconfig b/fs/namespacefs/Kconfig
new file mode 100644
index 000000000000..f26bc62376d4
--- /dev/null
+++ b/fs/namespacefs/Kconfig
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config NAMESPACE_FS
+ bool "NameSpace Filesystem support"
+ help
+ This option enables support for namespacefs - a pseudo filesystem
+ that allows to examine the hierarchy of namespaces.
diff --git a/fs/namespacefs/Makefile b/fs/namespacefs/Makefile
new file mode 100644
index 000000000000..23628d3207e3
--- /dev/null
+++ b/fs/namespacefs/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+namespacefs-objs := inode.o
+obj-$(CONFIG_NAMESPACE_FS) += namespacefs.o
diff --git a/fs/namespacefs/inode.c b/fs/namespacefs/inode.c
new file mode 100644
index 000000000000..0f6293b0877d
--- /dev/null
+++ b/fs/namespacefs/inode.c
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * inode.c - part of namespacefs, pseudo filesystem for examining namespaces.
+ *
+ * Copyright 2021 VMware Inc, Yordan Karadzhov (VMware) <y.karadz@xxxxxxxxx>
+ */
+
+#include <linux/fs.h>
+#include <linux/sysfs.h>
+#include <linux/namei.h>
+#include <linux/fsnotify.h>
+#include <linux/magic.h>
+
+static struct vfsmount *namespacefs_mount;
+static int namespacefs_mount_count;
+
+static const struct super_operations namespacefs_super_operations = {
+ .statfs = simple_statfs,
+};
+
+#define S_IRALL (S_IRUSR | S_IRGRP | S_IROTH)
+#define S_IXALL (S_IXUSR | S_IXGRP | S_IXOTH)
+
+static int fill_super(struct super_block *sb, void *data, int silent)
+{
+ static const struct tree_descr files[] = {{""}};
+ int err;
+
+ err = simple_fill_super(sb, NAMESPACEFS_MAGIC, files);
+ if (err)
+ return err;
+
+ sb->s_op = &namespacefs_super_operations;
+ sb->s_root->d_inode->i_mode |= S_IRALL;
+
+ return 0;
+}
+
+static struct dentry *ns_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name,
+ void *data)
+{
+ return mount_single(fs_type, flags, data, fill_super);
+}
+
+static struct file_system_type namespacefs_fs_type = {
+ .name = "namespacefs",
+ .mount = ns_mount,
+ .kill_sb = kill_litter_super,
+ .fs_flags = FS_USERNS_MOUNT,
+};
+
+static inline void release_namespacefs(void)
+{
+ simple_release_fs(&namespacefs_mount, &namespacefs_mount_count);
+}
+
+static inline struct inode *parent_inode(struct dentry *dentry)
+{
+ return dentry->d_parent->d_inode;
+}
+
+static struct inode *get_inode(struct super_block *sb)
+{
+ struct inode *inode = new_inode(sb);
+ if (inode) {
+ inode->i_ino = get_next_ino();
+ inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+ }
+ return inode;
+}
+
+static inline void set_file_inode(struct inode *inode,
+ const struct file_operations *fops,
+ void *data)
+{
+ inode->i_fop = fops;
+ inode->i_private = data;
+ inode->i_mode = S_IFREG | S_IRUSR | S_IRGRP;
+}
+
+static inline void set_dir_inode(struct inode *inode)
+{
+ inode->i_op = &simple_dir_inode_operations;
+ inode->i_fop = &simple_dir_operations;
+ inode->i_mode = S_IFDIR | S_IXALL | S_IRALL;
+}
+
+static inline int pin_fs(void)
+{
+ return simple_pin_fs(&namespacefs_fs_type,
+ &namespacefs_mount,
+ &namespacefs_mount_count);
+}
+
+static struct dentry *create(const char *name, struct dentry *parent,
+ const struct user_namespace *user_ns,
+ const struct file_operations *fops,
+ void *data)
+{
+ struct dentry *dentry = NULL;
+ struct inode *inode;
+
+ if (pin_fs())
+ return ERR_PTR(-ESTALE);
+
+ /*
+ * If the parent is not specified, we create it in the root.
+ * We need the root dentry to do this, which is in the super
+ * block. A pointer to that is in the struct vfsmount that we
+ * have around.
+ */
+ if (!parent)
+ parent = namespacefs_mount->mnt_root;
+
+ inode_lock(parent->d_inode);
+ if (unlikely(IS_DEADDIR(parent->d_inode)))
+ return ERR_PTR(-ESTALE);
+
+ dentry = lookup_one_len(name, parent, strlen(name));
+ if (IS_ERR(dentry) || (!IS_ERR(dentry) && dentry->d_inode))
+ goto fail;
+
+ inode = get_inode(dentry->d_sb);
+ if (unlikely(!inode))
+ goto fail;
+
+ inode->i_uid = user_ns->owner;
+ inode->i_gid = user_ns->group;
+
+ if (fops) {
+ /* Create a file. */
+ set_file_inode(inode, fops, data);
+ d_instantiate(dentry, inode);
+ fsnotify_create(parent_inode(dentry), dentry);
+ } else {
+ /* Create a directory. */
+ set_dir_inode(inode);
+ d_instantiate(dentry, inode);
+ set_nlink(inode, 2);
+ inc_nlink(parent_inode(dentry));
+ fsnotify_mkdir(parent_inode(dentry), dentry);
+ }
+
+ inode_unlock(parent_inode(dentry));
+ return dentry;
+
+ fail:
+ if(!IS_ERR_OR_NULL(dentry))
+ dput(dentry);
+
+ inode_unlock(parent->d_inode);
+ release_namespacefs();
+
+ return ERR_PTR(-ESTALE);
+}
+
+struct dentry *
+namespacefs_create_file(const char *name, struct dentry *parent,
+ const struct user_namespace *user_ns,
+ const struct file_operations *fops,
+ void *data)
+{
+ return create(name, parent, user_ns, fops, data);
+}
+
+struct dentry *
+namespacefs_create_dir(const char *name, struct dentry *parent,
+ const struct user_namespace *user_ns)
+{
+ return create(name, parent, user_ns, NULL, NULL);
+}
+
+static void remove_one(struct dentry *d)
+{
+ release_namespacefs();
+}
+
+void namespacefs_remove_dir(struct dentry *dentry)
+{
+ if (IS_ERR_OR_NULL(dentry))
+ return;
+
+ if (pin_fs())
+ return;
+
+ simple_recursive_removal(dentry, remove_one);
+ release_namespacefs();
+}
+
+#define _NS_MOUNT_DIR "namespaces"
+
+static int __init namespacefs_init(void)
+{
+ int err;
+
+ err = sysfs_create_mount_point(fs_kobj, _NS_MOUNT_DIR);
+ if (err)
+ goto fail;
+
+ err = register_filesystem(&namespacefs_fs_type);
+ if (err)
+ goto rm_mount;
+
+ return 0;
+
+ rm_mount:
+ sysfs_remove_mount_point(fs_kobj, _NS_MOUNT_DIR);
+ fail:
+ return err;
+}
+
+fs_initcall(namespacefs_init);
diff --git a/include/linux/idr-seq.h b/include/linux/idr-seq.h
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/include/linux/namespacefs.h b/include/linux/namespacefs.h
new file mode 100644
index 000000000000..44a760080df7
--- /dev/null
+++ b/include/linux/namespacefs.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * namespacefs.h - a pseudo file system for examining namespaces.
+ */
+
+#ifndef _NAMESPACEFS_H_
+#define _NAMESPACEFS_H_
+
+#ifdef CONFIG_NAMESPACE_FS
+
+#include <linux/fs.h>
+
+struct dentry *
+namespacefs_create_file(const char *name, struct dentry *parent,
+ const struct user_namespace *user_ns,
+ const struct file_operations *fops,
+ void *data);
+struct dentry *
+namespacefs_create_dir(const char *name, struct dentry *parent,
+ const struct user_namespace *user_ns);
+void namespacefs_remove_dir(struct dentry *dentry);
+
+#else
+
+static inline struct dentry *
+namespacefs_create_file(const char *name, struct dentry *parent,
+ const struct user_namespace *user_ns,
+ const struct file_operations *fops,
+ void *data)
+{
+ return NULL;
+}
+
+static inline struct dentry *
+namespacefs_create_dir(const char *name, struct dentry *parent,
+ const struct user_namespace *user_ns)
+{
+ return NULL;
+}
+
+static inline void namespacefs_remove_dir(struct dentry *dentry)
+{
+}
+
+#endif /* CONFIG_NAMESPACE_FS */
+
+#endif
diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
index 35687dcb1a42..36b432be0d22 100644
--- a/include/uapi/linux/magic.h
+++ b/include/uapi/linux/magic.h
@@ -62,6 +62,8 @@
#define CGROUP_SUPER_MAGIC 0x27e0eb
#define CGROUP2_SUPER_MAGIC 0x63677270

+#define NAMESPACEFS_MAGIC 0x458728fa
+
#define RDTGROUP_SUPER_MAGIC 0x7655821

#define STACK_END_MAGIC 0x57AC6E9D
--
2.33.1