[PATCH v2 4/5] stats_fs fs: virtual fs to show stats to the end-user
From: Emanuele Giuseppe Esposito
Date: Mon May 04 2020 - 07:04:17 EST
Add virtual fs that maps stats_fs sources with directories, and values
(simple or aggregates) to files.
Every time a file is read/cleared, the fs internally invokes the stats_fs
API to get/set the requested value.
fs/stats_fs/inode.c is pretty much similar to what is done in
fs/debugfs/inode.c, with the exception that the API is only
composed by stats_fs_create_file, stats_fs_create_dir and stats_fs_remove.
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@xxxxxxxxxx>
---
fs/stats_fs/Makefile | 2 +-
fs/stats_fs/inode.c | 337 +++++++++++++++++++++++++++++++++++++
fs/stats_fs/internal.h | 15 ++
fs/stats_fs/stats_fs.c | 163 ++++++++++++++++++
include/linux/stats_fs.h | 15 ++
include/uapi/linux/magic.h | 1 +
tools/lib/api/fs/fs.c | 21 +++
7 files changed, 553 insertions(+), 1 deletion(-)
create mode 100644 fs/stats_fs/inode.c
diff --git a/fs/stats_fs/Makefile b/fs/stats_fs/Makefile
index 9db130fac6b6..ac12c27545f6 100644
--- a/fs/stats_fs/Makefile
+++ b/fs/stats_fs/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
-stats_fs-objs := stats_fs.o
+stats_fs-objs := inode.o stats_fs.o
stats_fs-tests-objs := stats_fs-tests.o
obj-$(CONFIG_STATS_FS) += stats_fs.o
diff --git a/fs/stats_fs/inode.c b/fs/stats_fs/inode.c
new file mode 100644
index 000000000000..865ee91656ba
--- /dev/null
+++ b/fs/stats_fs/inode.c
@@ -0,0 +1,337 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * inode.c - part of stats_fs, a tiny little stats_fs file system
+ *
+ * Copyright (C) 2020 Emanuele Giuseppe Esposito <eesposit@xxxxxxxxxx>
+ * Copyright (C) 2020 Redhat
+ */
+#define pr_fmt(fmt) "stats_fs: " fmt
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/init.h>
+#include <linux/stats_fs.h>
+#include <linux/string.h>
+#include <linux/seq_file.h>
+#include <linux/parser.h>
+#include <linux/magic.h>
+#include <linux/slab.h>
+
+#include "internal.h"
+
+#define STATS_FS_DEFAULT_MODE 0700
+
+static struct simple_fs stats_fs;
+static bool stats_fs_registered;
+
+struct stats_fs_mount_opts {
+ kuid_t uid;
+ kgid_t gid;
+ umode_t mode;
+};
+
+enum {
+ Opt_uid,
+ Opt_gid,
+ Opt_mode,
+ Opt_err
+};
+
+static const match_table_t tokens = {
+ {Opt_uid, "uid=%u"},
+ {Opt_gid, "gid=%u"},
+ {Opt_mode, "mode=%o"},
+ {Opt_err, NULL}
+};
+
+struct stats_fs_fs_info {
+ struct stats_fs_mount_opts mount_opts;
+};
+
+static int stats_fs_parse_options(char *data, struct stats_fs_mount_opts *opts)
+{
+ substring_t args[MAX_OPT_ARGS];
+ int option;
+ int token;
+ kuid_t uid;
+ kgid_t gid;
+ char *p;
+
+ opts->mode = STATS_FS_DEFAULT_MODE;
+
+ while ((p = strsep(&data, ",")) != NULL) {
+ if (!*p)
+ continue;
+
+ token = match_token(p, tokens, args);
+ switch (token) {
+ case Opt_uid:
+ if (match_int(&args[0], &option))
+ return -EINVAL;
+ uid = make_kuid(current_user_ns(), option);
+ if (!uid_valid(uid))
+ return -EINVAL;
+ opts->uid = uid;
+ break;
+ case Opt_gid:
+ if (match_int(&args[0], &option))
+ return -EINVAL;
+ gid = make_kgid(current_user_ns(), option);
+ if (!gid_valid(gid))
+ return -EINVAL;
+ opts->gid = gid;
+ break;
+ case Opt_mode:
+ if (match_octal(&args[0], &option))
+ return -EINVAL;
+ opts->mode = option & S_IALLUGO;
+ break;
+ /*
+ * We might like to report bad mount options here;
+ * but traditionally stats_fs has ignored all mount options
+ */
+ }
+ }
+
+ return 0;
+}
+
+static int stats_fs_apply_options(struct super_block *sb)
+{
+ struct stats_fs_fs_info *fsi = sb->s_fs_info;
+ struct inode *inode = d_inode(sb->s_root);
+ struct stats_fs_mount_opts *opts = &fsi->mount_opts;
+
+ inode->i_mode &= ~S_IALLUGO;
+ inode->i_mode |= opts->mode;
+
+ inode->i_uid = opts->uid;
+ inode->i_gid = opts->gid;
+
+ return 0;
+}
+
+static int stats_fs_remount(struct super_block *sb, int *flags, char *data)
+{
+ int err;
+ struct stats_fs_fs_info *fsi = sb->s_fs_info;
+
+ sync_filesystem(sb);
+ err = stats_fs_parse_options(data, &fsi->mount_opts);
+ if (err)
+ goto fail;
+
+ stats_fs_apply_options(sb);
+
+fail:
+ return err;
+}
+
+static int stats_fs_show_options(struct seq_file *m, struct dentry *root)
+{
+ struct stats_fs_fs_info *fsi = root->d_sb->s_fs_info;
+ struct stats_fs_mount_opts *opts = &fsi->mount_opts;
+
+ if (!uid_eq(opts->uid, GLOBAL_ROOT_UID))
+ seq_printf(m, ",uid=%u",
+ from_kuid_munged(&init_user_ns, opts->uid));
+ if (!gid_eq(opts->gid, GLOBAL_ROOT_GID))
+ seq_printf(m, ",gid=%u",
+ from_kgid_munged(&init_user_ns, opts->gid));
+ if (opts->mode != STATS_FS_DEFAULT_MODE)
+ seq_printf(m, ",mode=%o", opts->mode);
+
+ return 0;
+}
+
+
+static void stats_fs_free_inode(struct inode *inode)
+{
+ kfree(inode->i_private);
+ free_inode_nonrcu(inode);
+}
+
+static const struct super_operations stats_fs_super_operations = {
+ .statfs = simple_statfs,
+ .remount_fs = stats_fs_remount,
+ .show_options = stats_fs_show_options,
+ .free_inode = stats_fs_free_inode,
+};
+
+static int stats_fs_fill_super(struct super_block *sb, void *data, int silent)
+{
+ static const struct tree_descr stats_fs_files[] = {{""}};
+ struct stats_fs_fs_info *fsi;
+ int err;
+
+ fsi = kzalloc(sizeof(struct stats_fs_fs_info), GFP_KERNEL);
+ sb->s_fs_info = fsi;
+ if (!fsi) {
+ err = -ENOMEM;
+ goto fail;
+ }
+
+ err = stats_fs_parse_options(data, &fsi->mount_opts);
+ if (err)
+ goto fail;
+
+ err = simple_fill_super(sb, STATSFS_MAGIC, stats_fs_files);
+ if (err)
+ goto fail;
+
+ sb->s_op = &stats_fs_super_operations;
+
+ stats_fs_apply_options(sb);
+
+ return 0;
+
+fail:
+ kfree(fsi);
+ sb->s_fs_info = NULL;
+ return err;
+}
+
+static struct dentry *stats_fs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name,
+ void *data)
+{
+ return mount_single(fs_type, flags, data, stats_fs_fill_super);
+}
+
+static struct file_system_type stats_fs_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "statsfs",
+ .mount = stats_fs_mount,
+ .kill_sb = kill_litter_super,
+};
+MODULE_ALIAS_FS("statsfs");
+
+
+/**
+ * stats_fs_create_file - create a file in the stats_fs filesystem
+ * @val: a pointer to a stats_fs_value containing all the infos of
+ * the file to create (name, permission)
+ * @src: a pointer to a stats_fs_source containing the dentry of where
+ * to add this file
+ *
+ * This function will return a pointer to a dentry if it succeeds. This
+ * pointer must be passed to the stats_fs_remove() function when the file is
+ * to be removed (no automatic cleanup happens if your module is unloaded,
+ * you are responsible here.) If an error occurs, ERR_PTR(-ERROR) will be
+ * returned.
+ *
+ * Val and src will be also inglobated in a ststsfs_data_inode struct
+ * that will be internally stored as inode->i_private and used in the
+ * get/set attribute functions (see stats_fs_ops in stats_fs.c).
+ */
+struct dentry *stats_fs_create_file(struct stats_fs_value *val, struct stats_fs_source *src)
+{
+ struct dentry *dentry;
+ struct inode *inode;
+ struct stats_fs_data_inode *val_inode;
+
+ val_inode = kzalloc(sizeof(struct stats_fs_data_inode), GFP_KERNEL);
+ if (!val_inode) {
+ printk(KERN_ERR
+ "Kzalloc failure in stats_fs_create_files (ENOMEM)\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ val_inode->src = src;
+ val_inode->val = val;
+
+
+ dentry = simplefs_create_file(&stats_fs, &stats_fs_fs_type,
+ val->name, stats_fs_val_get_mode(val),
+ src->source_dentry, val_inode, &inode);
+ if (IS_ERR(dentry))
+ return dentry;
+
+ inode->i_fop = &stats_fs_ops;
+
+ return simplefs_finish_dentry(dentry, inode);
+}
+/**
+ * stats_fs_create_dir - create a directory in the stats_fs filesystem
+ * @name: a pointer to a string containing the name of the directory to
+ * create.
+ * @parent: a pointer to the parent dentry for this file. This should be a
+ * directory dentry if set. If this parameter is NULL, then the
+ * directory will be created in the root of the stats_fs filesystem.
+ *
+ * This function creates a directory in stats_fs with the given name.
+ *
+ * This function will return a pointer to a dentry if it succeeds. This
+ * pointer must be passed to the stats_fs_remove() function when the file is
+ * to be removed (no automatic cleanup happens if your module is unloaded,
+ * you are responsible here.) If an error occurs, ERR_PTR(-ERROR) will be
+ * returned.
+ */
+struct dentry *stats_fs_create_dir(const char *name, struct dentry *parent)
+{
+ struct dentry *dentry;
+ struct inode *inode;
+
+ dentry = simplefs_create_dir(&stats_fs, &stats_fs_fs_type,
+ name, 0755, parent, &inode);
+ if (IS_ERR(dentry))
+ return dentry;
+
+ inode->i_op = &simple_dir_inode_operations;
+ return simplefs_finish_dentry(dentry, inode);
+}
+
+static void remove_one(struct dentry *victim)
+{
+ simple_release_fs(&stats_fs);
+}
+
+/**
+ * stats_fs_remove - recursively removes a directory
+ * @dentry: a pointer to a the dentry of the directory to be removed. If this
+ * parameter is NULL or an error value, nothing will be done.
+ *
+ * This function recursively removes a directory tree in stats_fs that
+ * was previously created with a call to another stats_fs function
+ * (like stats_fs_create_file() or variants thereof.)
+ *
+ * This function is required to be called in order for the file to be
+ * removed, no automatic cleanup of files will happen when a module is
+ * removed, you are responsible here.
+ */
+void stats_fs_remove(struct dentry *dentry)
+{
+ if (IS_ERR_OR_NULL(dentry))
+ return;
+
+ simple_pin_fs(&stats_fs, &stats_fs_fs_type);
+ simple_recursive_removal(dentry, remove_one);
+ simple_release_fs(&stats_fs);
+}
+/**
+ * stats_fs_initialized - Tells whether stats_fs has been registered
+ */
+bool stats_fs_initialized(void)
+{
+ return stats_fs_registered;
+}
+EXPORT_SYMBOL_GPL(stats_fs_initialized);
+
+static int __init stats_fs_init(void)
+{
+ int retval;
+
+ retval = sysfs_create_mount_point(kernel_kobj, "statsfs");
+ if (retval)
+ return retval;
+
+ retval = register_filesystem(&stats_fs_fs_type);
+ if (retval)
+ sysfs_remove_mount_point(kernel_kobj, "statsfs");
+ else
+ stats_fs_registered = true;
+
+ return retval;
+}
+core_initcall(stats_fs_init);
diff --git a/fs/stats_fs/internal.h b/fs/stats_fs/internal.h
index ddf262a60736..1f7bb1da6c3c 100644
--- a/fs/stats_fs/internal.h
+++ b/fs/stats_fs/internal.h
@@ -15,6 +15,21 @@ struct stats_fs_value_source {
struct list_head list_element;
};
+struct stats_fs_data_inode {
+ struct stats_fs_source *src;
+ struct stats_fs_value *val;
+};
+
+extern const struct file_operations stats_fs_ops;
+
+struct dentry *stats_fs_create_file(struct stats_fs_value *val,
+ struct stats_fs_source *src);
+
+struct dentry *stats_fs_create_dir(const char *name, struct dentry *parent);
+
+void stats_fs_remove(struct dentry *dentry);
+#define stats_fs_remove_recursive stats_fs_remove
+
int stats_fs_val_get_mode(struct stats_fs_value *val);
#endif /* _STATS_FS_INTERNAL_H_ */
diff --git a/fs/stats_fs/stats_fs.c b/fs/stats_fs/stats_fs.c
index b63de12769e2..4ac6fe1ec62e 100644
--- a/fs/stats_fs/stats_fs.c
+++ b/fs/stats_fs/stats_fs.c
@@ -17,16 +17,114 @@ struct stats_fs_aggregate_value {
uint32_t count, count_zero;
};
+static void stats_fs_source_remove_files(struct stats_fs_source *src);
+
static int is_val_signed(struct stats_fs_value *val)
{
return val->type & STATS_FS_SIGN;
}
+static int stats_fs_attr_get(void *data, u64 *val)
+{
+ int r = -EFAULT;
+ struct stats_fs_data_inode *val_inode =
+ (struct stats_fs_data_inode *)data;
+
+ r = stats_fs_source_get_value(val_inode->src, val_inode->val, val);
+ return r;
+}
+
+static int stats_fs_attr_clear(void *data, u64 val)
+{
+ int r = -EFAULT;
+ struct stats_fs_data_inode *val_inode =
+ (struct stats_fs_data_inode *)data;
+
+ if (val)
+ return -EINVAL;
+
+ r = stats_fs_source_clear(val_inode->src, val_inode->val);
+ return r;
+}
+
int stats_fs_val_get_mode(struct stats_fs_value *val)
{
return val->mode ? val->mode : 0644;
}
+static int stats_fs_attr_data_open(struct inode *inode, struct file *file)
+{
+ struct stats_fs_data_inode *val_inode;
+ char *fmt;
+
+ val_inode = (struct stats_fs_data_inode *)inode->i_private;
+
+ /* Inodes hold a pointer to the source which is not included in the
+ * refcount, so they files be opened while destroy is running, but
+ * values are removed (base_addr = NULL) before the source is destroyed.
+ */
+ if (!kref_get_unless_zero(&val_inode->src->refcount))
+ return -ENOENT;
+
+ if (is_val_signed(val_inode->val))
+ fmt = "%lld\n";
+ else
+ fmt = "%llu\n";
+
+ if (simple_attr_open(inode, file, stats_fs_attr_get,
+ stats_fs_val_get_mode(val_inode->val) & 0222 ?
+ stats_fs_attr_clear :
+ NULL,
+ fmt)) {
+ stats_fs_source_put(val_inode->src);
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+static int stats_fs_attr_release(struct inode *inode, struct file *file)
+{
+ struct stats_fs_data_inode *val_inode;
+
+ val_inode = (struct stats_fs_data_inode *)inode->i_private;
+
+ simple_attr_release(inode, file);
+ stats_fs_source_put(val_inode->src);
+
+ return 0;
+}
+
+const struct file_operations stats_fs_ops = {
+ .owner = THIS_MODULE,
+ .open = stats_fs_attr_data_open,
+ .release = stats_fs_attr_release,
+ .read = simple_attr_read,
+ .write = simple_attr_write,
+ .llseek = no_llseek,
+};
+
+/* Called with rwsem held for writing */
+static void stats_fs_source_remove_files_locked(struct stats_fs_source *src)
+{
+ struct stats_fs_source *child;
+
+ if (src->source_dentry == NULL)
+ return;
+
+ list_for_each_entry (child, &src->subordinates_head, list_element)
+ stats_fs_source_remove_files(child);
+
+ stats_fs_remove_recursive(src->source_dentry);
+ src->source_dentry = NULL;
+}
+
+static void stats_fs_source_remove_files(struct stats_fs_source *src)
+{
+ down_write(&src->rwsem);
+ stats_fs_source_remove_files_locked(src);
+ up_write(&src->rwsem);
+}
+
static struct stats_fs_value *find_value(struct stats_fs_value_source *src,
struct stats_fs_value *val)
{
@@ -57,6 +155,62 @@ search_value_in_source(struct stats_fs_source *src, struct stats_fs_value *arg,
return NULL;
}
+/* Called with rwsem held for writing */
+static void stats_fs_create_files_locked(struct stats_fs_source *source)
+{
+ struct stats_fs_value_source *val_src;
+ struct stats_fs_value *val;
+
+ if (!source->source_dentry)
+ return;
+
+ list_for_each_entry (val_src, &source->values_head, list_element) {
+ if (val_src->files_created)
+ continue;
+
+ for (val = val_src->values; val->name; val++)
+ stats_fs_create_file(val, source);
+
+ val_src->files_created = true;
+ }
+}
+
+/* Called with rwsem held for writing */
+static void
+stats_fs_create_files_recursive_locked(struct stats_fs_source *source,
+ struct dentry *parent_dentry)
+{
+ struct stats_fs_source *child;
+
+ /* first check values in this folder, since it might be new */
+ if (!source->source_dentry) {
+ source->source_dentry =
+ stats_fs_create_dir(source->name, parent_dentry);
+ }
+
+ stats_fs_create_files_locked(source);
+
+ list_for_each_entry (child, &source->subordinates_head, list_element) {
+ if (child->source_dentry == NULL) {
+ /* assume that if child has a folder,
+ * also the sub-child have that.
+ */
+ down_write(&child->rwsem);
+ stats_fs_create_files_recursive_locked(
+ child, source->source_dentry);
+ up_write(&child->rwsem);
+ }
+ }
+}
+
+void stats_fs_source_register(struct stats_fs_source *source)
+{
+ down_write(&source->rwsem);
+ stats_fs_create_files_recursive_locked(source, NULL);
+ up_write(&source->rwsem);
+}
+EXPORT_SYMBOL_GPL(stats_fs_source_register);
+
/* Called with rwsem held for writing */
static struct stats_fs_value_source *create_value_source(void *base)
{
@@ -93,6 +247,9 @@ int stats_fs_source_add_values(struct stats_fs_source *source,
/* add the val_src to the source list */
list_add(&val_src->list_element, &source->values_head);
+ /* create child if it's the case */
+ stats_fs_create_files_locked(source);
+
up_write(&source->rwsem);
return 0;
@@ -106,6 +263,9 @@ void stats_fs_source_add_subordinate(struct stats_fs_source *source,
stats_fs_source_get(sub);
list_add(&sub->list_element, &source->subordinates_head);
+ if (source->source_dentry)
+ stats_fs_create_files_recursive_locked(sub,
+ source->source_dentry);
up_write(&source->rwsem);
}
@@ -122,6 +282,7 @@ stats_fs_source_remove_subordinate_locked(struct stats_fs_source *source,
list_element) {
if (src_entry == sub) {
list_del_init(&src_entry->list_element);
+ stats_fs_source_remove_files(src_entry);
stats_fs_source_put(src_entry);
return;
}
@@ -565,6 +726,8 @@ static void stats_fs_source_destroy(struct kref *kref_source)
stats_fs_source_remove_subordinate_locked(source, child);
}
+ stats_fs_source_remove_files_locked(source);
+
up_write(&source->rwsem);
kfree(source->name);
kfree(source);
diff --git a/include/linux/stats_fs.h b/include/linux/stats_fs.h
index dc2d2e11f5ea..b04c42d827cf 100644
--- a/include/linux/stats_fs.h
+++ b/include/linux/stats_fs.h
@@ -87,6 +87,18 @@ struct stats_fs_source {
*/
struct stats_fs_source *stats_fs_source_create(const char *fmt, ...);
+/**
+ * stats_fs_source_register - register a source in the stats_fs filesystem
+ * @source: a pointer to the source that will be registered
+ *
+ * Add the given folder as direct child of /sys/kernel/statsfs.
+ * It also starts to recursively search its own child and create all folders
+ * and files if they weren't already. All subsequent add_subordinate calls
+ * on the same source that is used in this function will create corresponding
+ * files and directories.
+ */
+void stats_fs_source_register(struct stats_fs_source *source);
+
/**
* stats_fs_source_add_values - adds values to the given source
* @source: a pointer to the source that will receive the values
@@ -235,6 +247,9 @@ static inline struct stats_fs_source *stats_fs_source_create(const char *fmt,
return ERR_PTR(-ENODEV);
}
+static inline void stats_fs_source_register(struct stats_fs_source *source)
+{ }
+
static inline int stats_fs_source_add_values(struct stats_fs_source *source,
struct stats_fs_value *val,
void *base_ptr)
diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
index d78064007b17..46c66ea3fc9e 100644
--- a/include/uapi/linux/magic.h
+++ b/include/uapi/linux/magic.h
@@ -10,6 +10,7 @@
#define CRAMFS_MAGIC 0x28cd3d45 /* some random number */
#define CRAMFS_MAGIC_WEND 0x453dcd28 /* magic number with the wrong endianess */
#define DEBUGFS_MAGIC 0x64626720
+#define STATSFS_MAGIC 0x73746174
#define SECURITYFS_MAGIC 0x73636673
#define SELINUX_MAGIC 0xf97cff8c
#define SMACK_MAGIC 0x43415d53 /* "SMAC" */
diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c
index 027b18f7ed8c..6fe306206dfb 100644
--- a/tools/lib/api/fs/fs.c
+++ b/tools/lib/api/fs/fs.c
@@ -35,6 +35,10 @@
#define TRACEFS_MAGIC 0x74726163
#endif
+#ifndef STATSFS_MAGIC
+#define STATSFS_MAGIC 0x73746174
+#endif
+
#ifndef HUGETLBFS_MAGIC
#define HUGETLBFS_MAGIC 0x958458f6
#endif
@@ -76,6 +80,16 @@ static const char * const tracefs__known_mountpoints[] = {
0,
};
+#ifndef STATSFS_DEFAULT_PATH
+#define STATSFS_DEFAULT_PATH "/sys/kernel/statsfs"
+#endif
+
+static const char * const statsfs__known_mountpoints[] = {
+ STATSFS_DEFAULT_PATH,
+ "/statsfs",
+ 0,
+};
+
static const char * const hugetlbfs__known_mountpoints[] = {
0,
};
@@ -100,6 +114,7 @@ enum {
FS__TRACEFS = 3,
FS__HUGETLBFS = 4,
FS__BPF_FS = 5,
+ FS__STATSFS = 6,
};
#ifndef TRACEFS_MAGIC
@@ -127,6 +142,11 @@ static struct fs fs__entries[] = {
.mounts = tracefs__known_mountpoints,
.magic = TRACEFS_MAGIC,
},
+ [FS__STATSFS] = {
+ .name = "statsfs",
+ .mounts = statsfs__known_mountpoints,
+ .magic = STATSFS_MAGIC,
+ },
[FS__HUGETLBFS] = {
.name = "hugetlbfs",
.mounts = hugetlbfs__known_mountpoints,
@@ -297,6 +317,7 @@ FS(sysfs, FS__SYSFS);
FS(procfs, FS__PROCFS);
FS(debugfs, FS__DEBUGFS);
FS(tracefs, FS__TRACEFS);
+FS(statsfs, FS__STATSFS);
FS(hugetlbfs, FS__HUGETLBFS);
FS(bpf_fs, FS__BPF_FS);
--
2.25.2