[RFC] sysfs: defer instantiation of default attrs

From: Edward Cree
Date: Sat Feb 20 2016 - 05:42:44 EST


Ok, here's a rather ugly proof-of-concept patch. I've gone through some
contortions to avoid double-locking the kernfs_mutex; I should really
try to think of a better approach.
However, (within /sys/class/net) it only seems to do anything for the
queue kobjects, not the netdevice or statistics; those appear to be done
by a different method (attribute groups rather than default attributes).
It should be possible to do something similar for attribute groups, but I
will need to spend some more time thinking about it.

Testing this in a VM, and with udevd disabled (being too lazy to do it
properly), created 1024 bridges. On ls'ing
/sys/class/net/bridge*/queues/*/, saw free memory drop by 64kB,
suggesting that much had been saved by deferral. It's not very much,
hopefully deferring attribute groups will do better.

Changes behaviour if someone tries to add a node that clashes with one of
the default attrs. Previously the add would fail with EEXIST, now it
succeeds while later the deferred populate will fail to add the default
attr node, pr_warn(), and carry on (no point passing the EEXIST back to
the caller, as there's nothing they can do about it).
I've gone with that as it's simple and it doesn't seem likely to matter -
name collisions should be found promptly either way - but if it's a
problem I could change it.

Signed-off-by: Edward Cree <ec429@xxxxxxxxxx>
---
fs/kernfs/dir.c | 100 +++++++++++++++++++++++++++++---------------
fs/kernfs/file.c | 53 ++++++++++++++---------
fs/kernfs/kernfs-internal.h | 2 +
fs/sysfs/dir.c | 9 ++++
fs/sysfs/file.c | 32 ++++++++++----
fs/sysfs/mount.c | 2 +
fs/sysfs/sysfs.h | 1 +
include/linux/kernfs.h | 11 +++++
include/linux/kobject.h | 2 +
include/linux/sysfs.h | 12 +++++-
lib/kobject.c | 48 ++++++++++++++-------
11 files changed, 194 insertions(+), 78 deletions(-)

diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 996b774..8ad0531 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -577,26 +577,17 @@ struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
return kn;
}

-/**
- * kernfs_add_one - add kernfs_node to parent without warning
- * @kn: kernfs_node to be added
- *
- * The caller must already have initialized @kn->parent. This
- * function increments nlink of the parent's inode if @kn is a
- * directory and link into the children list of the parent.
- *
- * RETURNS:
- * 0 on success, -EEXIST if entry with the given name already
- * exists.
- */
-int kernfs_add_one(struct kernfs_node *kn)
+static void __kernfs_activate(struct kernfs_node *kn, bool locked);
+
+int __kernfs_add_one(struct kernfs_node *kn, bool locked)
{
struct kernfs_node *parent = kn->parent;
struct kernfs_iattrs *ps_iattr;
bool has_ns;
int ret;

- mutex_lock(&kernfs_mutex);
+ if (!locked)
+ mutex_lock(&kernfs_mutex);

ret = -EINVAL;
has_ns = kernfs_ns_enabled(parent);
@@ -627,7 +618,8 @@ int kernfs_add_one(struct kernfs_node *kn)
ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME;
}

- mutex_unlock(&kernfs_mutex);
+ if (!locked)
+ mutex_unlock(&kernfs_mutex);

/*
* Activate the new node unless CREATE_DEACTIVATED is requested.
@@ -637,14 +629,44 @@ int kernfs_add_one(struct kernfs_node *kn)
* trigger deactivation.
*/
if (!(kernfs_root(kn)->flags & KERNFS_ROOT_CREATE_DEACTIVATED))
- kernfs_activate(kn);
+ __kernfs_activate(kn, locked);
return 0;

out_unlock:
- mutex_unlock(&kernfs_mutex);
+ if (!locked)
+ mutex_unlock(&kernfs_mutex);
return ret;
}

+
+/**
+ * kernfs_add_one - add kernfs_node to parent without warning
+ * @kn: kernfs_node to be added
+ *
+ * The caller must already have initialized @kn->parent. This
+ * function increments nlink of the parent's inode if @kn is a
+ * directory and link into the children list of the parent.
+ *
+ * RETURNS:
+ * 0 on success, -EEXIST if entry with the given name already
+ * exists.
+ */
+int kernfs_add_one(struct kernfs_node *kn)
+{
+ return __kernfs_add_one(kn, false);
+}
+
+/* Caller must ensure kernfs_mutex held */
+void kernfs_ensure_populated(struct kernfs_node *kn)
+{
+ if (unlikely(kn->flags & KERNFS_UNPOPULATED)) {
+ struct kernfs_root *kr = kernfs_root(kn);
+
+ kr->populate(kn);
+ kn->flags &= ~KERNFS_UNPOPULATED;
+ }
+}
+
/**
* kernfs_find_ns - find kernfs_node with the given name
* @parent: kernfs_node to search under
@@ -664,6 +686,8 @@ static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent,

lockdep_assert_held(&kernfs_mutex);

+ kernfs_ensure_populated(parent);
+
if (has_ns != (bool)ns) {
WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
has_ns ? "required" : "invalid", parent->name, name);
@@ -790,6 +814,7 @@ struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
kn->priv = priv;
kn->dir.root = root;

+ root->populate = NULL;
root->syscall_ops = scops;
root->flags = flags;
root->kn = kn;
@@ -1052,24 +1077,12 @@ static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos,
return pos->parent;
}

-/**
- * kernfs_activate - activate a node which started deactivated
- * @kn: kernfs_node whose subtree is to be activated
- *
- * If the root has KERNFS_ROOT_CREATE_DEACTIVATED set, a newly created node
- * needs to be explicitly activated. A node which hasn't been activated
- * isn't visible to userland and deactivation is skipped during its
- * removal. This is useful to construct atomic init sequences where
- * creation of multiple nodes should either succeed or fail atomically.
- *
- * The caller is responsible for ensuring that this function is not called
- * after kernfs_remove*() is invoked on @kn.
- */
-void kernfs_activate(struct kernfs_node *kn)
+static void __kernfs_activate(struct kernfs_node *kn, bool locked)
{
struct kernfs_node *pos;

- mutex_lock(&kernfs_mutex);
+ if (!locked)
+ mutex_lock(&kernfs_mutex);

pos = NULL;
while ((pos = kernfs_next_descendant_post(pos, kn))) {
@@ -1083,7 +1096,26 @@ void kernfs_activate(struct kernfs_node *kn)
pos->flags |= KERNFS_ACTIVATED;
}

- mutex_unlock(&kernfs_mutex);
+ if (!locked)
+ mutex_unlock(&kernfs_mutex);
+}
+
+/**
+ * kernfs_activate - activate a node which started deactivated
+ * @kn: kernfs_node whose subtree is to be activated
+ *
+ * If the root has KERNFS_ROOT_CREATE_DEACTIVATED set, a newly created node
+ * needs to be explicitly activated. A node which hasn't been activated
+ * isn't visible to userland and deactivation is skipped during its
+ * removal. This is useful to construct atomic init sequences where
+ * creation of multiple nodes should either succeed or fail atomically.
+ *
+ * The caller is responsible for ensuring that this function is not called
+ * after kernfs_remove*() is invoked on @kn.
+ */
+void kernfs_activate(struct kernfs_node *kn)
+{
+ __kernfs_activate(kn, false);
}

static void __kernfs_remove(struct kernfs_node *kn)
@@ -1479,6 +1511,8 @@ static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx)
return 0;
mutex_lock(&kernfs_mutex);

+ kernfs_ensure_populated(parent);
+
if (kernfs_ns_enabled(parent))
ns = kernfs_info(dentry->d_sb)->ns;

diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 7247252..1454257 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -892,25 +892,13 @@ const struct file_operations kernfs_file_fops = {
.poll = kernfs_fop_poll,
};

-/**
- * __kernfs_create_file - kernfs internal function to create a file
- * @parent: directory to create the file in
- * @name: name of the file
- * @mode: mode of the file
- * @size: size of the file
- * @ops: kernfs operations for the file
- * @priv: private data for the file
- * @ns: optional namespace tag of the file
- * @key: lockdep key for the file's active_ref, %NULL to disable lockdep
- *
- * Returns the created node on success, ERR_PTR() value on error.
- */
-struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
- const char *name,
- umode_t mode, loff_t size,
- const struct kernfs_ops *ops,
- void *priv, const void *ns,
- struct lock_class_key *key)
+struct kernfs_node *___kernfs_create_file(struct kernfs_node *parent,
+ const char *name,
+ umode_t mode, loff_t size,
+ const struct kernfs_ops *ops,
+ void *priv, const void *ns,
+ struct lock_class_key *key,
+ bool locked)
{
struct kernfs_node *kn;
unsigned flags;
@@ -944,10 +932,35 @@ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
if (ops->mmap)
kn->flags |= KERNFS_HAS_MMAP;

- rc = kernfs_add_one(kn);
+ rc = __kernfs_add_one(kn, locked);
if (rc) {
kernfs_put(kn);
return ERR_PTR(rc);
}
return kn;
}
+
+
+/**
+ * __kernfs_create_file - kernfs internal function to create a file
+ * @parent: directory to create the file in
+ * @name: name of the file
+ * @mode: mode of the file
+ * @size: size of the file
+ * @ops: kernfs operations for the file
+ * @priv: private data for the file
+ * @ns: optional namespace tag of the file
+ * @key: lockdep key for the file's active_ref, %NULL to disable lockdep
+ *
+ * Returns the created node on success, ERR_PTR() value on error.
+ */
+struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
+ const char *name,
+ umode_t mode, loff_t size,
+ const struct kernfs_ops *ops,
+ void *priv, const void *ns,
+ struct lock_class_key *key)
+{
+ return ___kernfs_create_file(parent, name, mode, size, ops, priv, ns,
+ key, false);
+}
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index 6762bfb..b978181 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -98,10 +98,12 @@ extern const struct inode_operations kernfs_dir_iops;

struct kernfs_node *kernfs_get_active(struct kernfs_node *kn);
void kernfs_put_active(struct kernfs_node *kn);
+int __kernfs_add_one(struct kernfs_node *kn, bool locked);
int kernfs_add_one(struct kernfs_node *kn);
struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
const char *name, umode_t mode,
unsigned flags);
+void kernfs_ensure_populated(struct kernfs_node *kn);

/*
* file.c
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 94374e4..9c4fa2c 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -98,6 +98,15 @@ void sysfs_remove_dir(struct kobject *kobj)
}
}

+void sysfs_populate_dir(struct kernfs_node *kn)
+{
+ struct kobject *kobj = kobject_get(kn->priv);
+
+ if (!WARN_ON(!kobj))
+ kobject_populate_dir(kobj);
+ kobject_put(kobj);
+}
+
int sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name,
const void *new_ns)
{
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index f35523d..41b0a72 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -237,9 +237,9 @@ static const struct kernfs_ops sysfs_bin_kfops_mmap = {
.mmap = sysfs_kf_bin_mmap,
};

-int sysfs_add_file_mode_ns(struct kernfs_node *parent,
- const struct attribute *attr, bool is_bin,
- umode_t mode, const void *ns)
+int __sysfs_add_file_mode_ns(struct kernfs_node *parent,
+ const struct attribute *attr, bool is_bin,
+ umode_t mode, const void *ns, bool locked)
{
struct lock_class_key *key = NULL;
const struct kernfs_ops *ops;
@@ -296,8 +296,8 @@ int sysfs_add_file_mode_ns(struct kernfs_node *parent,
if (!attr->ignore_lockdep)
key = attr->key ?: (struct lock_class_key *)&attr->skey;
#endif
- kn = __kernfs_create_file(parent, attr->name, mode & 0777, size, ops,
- (void *)attr, ns, key);
+ kn = ___kernfs_create_file(parent, attr->name, mode & 0777, size, ops,
+ (void *)attr, ns, key, locked);
if (IS_ERR(kn)) {
if (PTR_ERR(kn) == -EEXIST)
sysfs_warn_dup(parent, attr->name);
@@ -306,12 +306,30 @@ int sysfs_add_file_mode_ns(struct kernfs_node *parent,
return 0;
}

+
+int sysfs_add_file_mode_ns(struct kernfs_node *parent,
+ const struct attribute *attr, bool is_bin,
+ umode_t mode, const void *ns)
+{
+ return __sysfs_add_file_mode_ns(parent, attr, is_bin, mode, ns, false);
+}
+
int sysfs_add_file(struct kernfs_node *parent, const struct attribute *attr,
bool is_bin)
{
return sysfs_add_file_mode_ns(parent, attr, is_bin, attr->mode, NULL);
}

+int __sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr,
+ const void *ns, bool locked)
+{
+ BUG_ON(!kobj || !kobj->sd || !attr);
+
+ return __sysfs_add_file_mode_ns(kobj->sd, attr, false, attr->mode, ns,
+ locked);
+}
+EXPORT_SYMBOL_GPL(__sysfs_create_file_ns);
+
/**
* sysfs_create_file_ns - create an attribute file for an object with custom ns
* @kobj: object we're creating for
@@ -321,9 +339,7 @@ int sysfs_add_file(struct kernfs_node *parent, const struct attribute *attr,
int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr,
const void *ns)
{
- BUG_ON(!kobj || !kobj->sd || !attr);
-
- return sysfs_add_file_mode_ns(kobj->sd, attr, false, attr->mode, ns);
+ return __sysfs_create_file_ns(kobj, attr, ns, false);

}
EXPORT_SYMBOL_GPL(sysfs_create_file_ns);
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index f3db820..0c7144c 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -71,6 +71,8 @@ int __init sysfs_init(void)
if (IS_ERR(sysfs_root))
return PTR_ERR(sysfs_root);

+ sysfs_root->populate = sysfs_populate_dir;
+
sysfs_root_kn = sysfs_root->kn;

err = register_filesystem(&sysfs_fs_type);
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 0e2f1cc..ea56275 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -24,6 +24,7 @@ extern struct kernfs_node *sysfs_root_kn;
extern spinlock_t sysfs_symlink_target_lock;

void sysfs_warn_dup(struct kernfs_node *parent, const char *name);
+void sysfs_populate_dir(struct kernfs_node *kn);

/*
* file.c
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index af51df3..3f8110d 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -46,6 +46,7 @@ enum kernfs_node_flag {
KERNFS_SUICIDAL = 0x0400,
KERNFS_SUICIDED = 0x0800,
KERNFS_EMPTY_DIR = 0x1000,
+ KERNFS_UNPOPULATED = 0x2000,
};

/* @flags for kernfs_create_root() */
@@ -159,6 +160,9 @@ struct kernfs_root {
struct kernfs_node *kn;
unsigned int flags; /* KERNFS_ROOT_* flags */

+ /* deferred directory populate */
+ void (*populate)(struct kernfs_node *kn);
+
/* private fields, do not use outside kernfs proper */
struct ida ino_ida;
struct kernfs_syscall_ops *syscall_ops;
@@ -292,6 +296,13 @@ struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
void *priv, const void *ns);
struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent,
const char *name);
+struct kernfs_node *___kernfs_create_file(struct kernfs_node *parent,
+ const char *name,
+ umode_t mode, loff_t size,
+ const struct kernfs_ops *ops,
+ void *priv, const void *ns,
+ struct lock_class_key *key,
+ bool locked);
struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
const char *name,
umode_t mode, loff_t size,
diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index e628459..14693f2 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -110,6 +110,8 @@ extern int __must_check kobject_move(struct kobject *, struct kobject *);
extern struct kobject *kobject_get(struct kobject *kobj);
extern void kobject_put(struct kobject *kobj);

+extern void kobject_populate_dir(struct kobject *kobj);
+
extern const void *kobject_namespace(struct kobject *kobj);
extern char *kobject_get_path(struct kobject *kobj, gfp_t flag);

diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index c6f0f0d..3cc8255 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -488,10 +488,20 @@ static inline void sysfs_enable_ns(struct kernfs_node *kn)

#endif /* CONFIG_SYSFS */

+int __sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr,
+ const void *ns, bool locked);
+
+static inline int __must_check __sysfs_create_file(struct kobject *kobj,
+ const struct attribute *attr,
+ bool locked)
+{
+ return __sysfs_create_file_ns(kobj, attr, NULL, locked);
+}
+
static inline int __must_check sysfs_create_file(struct kobject *kobj,
const struct attribute *attr)
{
- return sysfs_create_file_ns(kobj, attr, NULL);
+ return __sysfs_create_file(kobj, attr, false);
}

static inline void sysfs_remove_file(struct kobject *kobj,
diff --git a/lib/kobject.c b/lib/kobject.c
index 7cbccd2..c56b199 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -18,6 +18,8 @@
#include <linux/stat.h>
#include <linux/slab.h>
#include <linux/random.h>
+#include <linux/printk.h>
+#include <uapi/linux/limits.h>

/**
* kobject_namespace - return @kobj's namespace tag
@@ -37,52 +39,66 @@ const void *kobject_namespace(struct kobject *kobj)
return kobj->ktype->namespace(kobj);
}

+static void warn_populate_fail(struct kernfs_node *parent, const char *name,
+ int error)
+{
+ char *buf, *path = NULL;
+
+ buf = kzalloc(PATH_MAX, GFP_KERNEL);
+ if (buf)
+ path = kernfs_path(parent, buf, PATH_MAX);
+
+ pr_warn("sysfs: cannot create attribute file '%s/%s': %d\n",
+ path, name, error);
+
+ kfree(buf);
+}
+
/*
- * populate_dir - populate directory with attributes.
+ * kobject_populate_dir - populate sysfs directory with attributes.
* @kobj: object we're working on.
*
* Most subsystems have a set of default attributes that are associated
- * with an object that registers with them. This is a helper called during
- * object registration that loops through the default attributes of the
- * subsystem and creates attributes files for them in sysfs.
+ * with an object that registers with them. This is a helper called
+ * when the kobject's sysfs dir is first touched, that loops through the
+ * default attributes of the subsystem and creates attribute files for
+ * them in sysfs.
+ *
+ * We don't report errors from the file creations to the caller, because
+ * nothing could be done about them anyway.
*/
-static int populate_dir(struct kobject *kobj)
+void kobject_populate_dir(struct kobject *kobj)
{
struct kobj_type *t = get_ktype(kobj);
struct attribute *attr;
- int error = 0;
+ int error;
int i;

if (t && t->default_attrs) {
for (i = 0; (attr = t->default_attrs[i]) != NULL; i++) {
- error = sysfs_create_file(kobj, attr);
+ error = __sysfs_create_file(kobj, attr, true);
if (error)
- break;
+ warn_populate_fail(kobj->sd, attr->name, error);
}
}
- return error;
}

static int create_dir(struct kobject *kobj)
{
const struct kobj_ns_type_operations *ops;
+ struct kernfs_node *kn;
int error;

error = sysfs_create_dir_ns(kobj, kobject_namespace(kobj));
if (error)
return error;

- error = populate_dir(kobj);
- if (error) {
- sysfs_remove_dir(kobj);
- return error;
- }
-
/*
* @kobj->sd may be deleted by an ancestor going away. Hold an
* extra reference so that it stays until @kobj is gone.
*/
- sysfs_get(kobj->sd);
+ kn = sysfs_get(kobj->sd);
+ kn->flags |= KERNFS_UNPOPULATED;

/*
* If @kobj has ns_ops, its children need to be filtered based on
--
2.6.4