[PATCH v2 1/2] kernfs: Make it possible to use RCU for kernfs_node::name lookup.

From: Sebastian Andrzej Siewior
Date: Tue Nov 12 2024 - 11:54:14 EST


Instead of using kernfs_rename_lock for lookups of ::name allow to use
RCU protection for lookup. Rely on kn's kernfs_root::kernfs_rwsem for
update synchronisation.

KERNFS_ROOT_SAME_PARENT is added to signal that the parent never
changes. kernfs_rename_ns() checks that flag and if it is seen, it
ensures that the parent is the same and then does not acquire
kernfs_rename_lock during parent/ name assignment and updates only the
name attribute. Without the flag, the update is performed as always.

kernfs_name_rcu() is a copy of kernfs_name() which is using RCU
protection while accessing the kernfs_node::name. Both functions
validate the KERNFS_ROOT_SAME_PARENT flag. The same is true for
kernfs_path_from_node() and kernfs_path_from_node_rcu().

Suggested-by: Tejun Heo <tj@xxxxxxxxxx>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx>
---
fs/kernfs/dir.c | 151 ++++++++++++++++++++++++++++++-----------
include/linux/kernfs.h | 23 ++++++-
2 files changed, 133 insertions(+), 41 deletions(-)

diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 458519e416fe7..41c87ee76aa70 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -51,14 +51,6 @@ static bool kernfs_lockdep(struct kernfs_node *kn)
#endif
}

-static int kernfs_name_locked(struct kernfs_node *kn, char *buf, size_t buflen)
-{
- if (!kn)
- return strscpy(buf, "(null)", buflen);
-
- return strscpy(buf, kn->parent ? kn->name : "/", buflen);
-}
-
/* kernfs_node_depth - compute depth from @from to @to */
static size_t kernfs_depth(struct kernfs_node *from, struct kernfs_node *to)
{
@@ -168,10 +160,13 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,

/* Calculate how many bytes we need for the rest */
for (i = depth_to - 1; i >= 0; i--) {
+ const char *name;
+
for (kn = kn_to, j = 0; j < i; j++)
kn = kn->parent;

- len += scnprintf(buf + len, buflen - len, "/%s", kn->name);
+ name = rcu_dereference_check(kn->name_rcu, lockdep_is_held(&kernfs_rename_lock));
+ len += scnprintf(buf + len, buflen - len, "/%s", name);
}

return len;
@@ -184,7 +179,8 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
* @buflen: size of @buf
*
* Copies the name of @kn into @buf of @buflen bytes. The behavior is
- * similar to strscpy().
+ * similar to strscpy(). The root node must not be initialized with
+ * KERNFS_ROOT_SAME_PARENT.
*
* Fills buffer with "(null)" if @kn is %NULL.
*
@@ -195,13 +191,47 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
*/
int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen)
{
- unsigned long flags;
- int ret;
+ struct kernfs_root *root;

- read_lock_irqsave(&kernfs_rename_lock, flags);
- ret = kernfs_name_locked(kn, buf, buflen);
- read_unlock_irqrestore(&kernfs_rename_lock, flags);
- return ret;
+ guard(read_lock_irqsave)(&kernfs_rename_lock);
+ if (kn) {
+ root = kernfs_root(kn);
+ if (WARN_ON_ONCE(root->flags & KERNFS_ROOT_SAME_PARENT))
+ kn = NULL;
+ }
+
+ if (!kn)
+ return strscpy(buf, "(null)", buflen);
+
+ return strscpy(buf, kn->parent ? kn->name : "/", buflen);
+}
+
+/**
+ * kernfs_name_rcu - obtain the name of a given node
+ * @kn: kernfs_node of interest
+ * @buf: buffer to copy @kn's name into
+ * @buflen: size of @buf
+ *
+ * Same as kernfs_name except it uses RCU for name lookup. The root node must
+ * be with KERNFS_ROOT_SAME_PARENT.
+ *
+ * This function can be called from any context.
+ */
+
+int kernfs_name_rcu(struct kernfs_node *kn, char *buf, size_t buflen)
+{
+ struct kernfs_root *root;
+
+ if (kn) {
+ root = kernfs_root(kn);
+ if (WARN_ON_ONCE(!(root->flags & KERNFS_ROOT_SAME_PARENT)))
+ kn = NULL;
+ }
+ if (!kn)
+ return strscpy(buf, "(null)", buflen);
+
+ guard(rcu)();
+ return strscpy(buf, kn->parent ? rcu_dereference(kn->name_rcu) : "/", buflen);
}

/**
@@ -214,7 +244,8 @@ int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen)
* Builds @to's path relative to @from in @buf. @from and @to must
* be on the same kernfs-root. If @from is not parent of @to, then a relative
* path (which includes '..'s) as needed to reach from @from to @to is
- * returned.
+ * returned. The root node must not be initialized with
+ * KERNFS_ROOT_SAME_PARENT.
*
* Return: the length of the constructed path. If the path would have been
* greater than @buflen, @buf contains the truncated path with the trailing
@@ -223,16 +254,42 @@ int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen)
int kernfs_path_from_node(struct kernfs_node *to, struct kernfs_node *from,
char *buf, size_t buflen)
{
- unsigned long flags;
- int ret;
+ struct kernfs_root *root;

- read_lock_irqsave(&kernfs_rename_lock, flags);
- ret = kernfs_path_from_node_locked(to, from, buf, buflen);
- read_unlock_irqrestore(&kernfs_rename_lock, flags);
- return ret;
+ guard(read_lock_irqsave)(&kernfs_rename_lock);
+ if (to) {
+ root = kernfs_root(to);
+ if (WARN_ON_ONCE(root->flags & KERNFS_ROOT_SAME_PARENT))
+ to = NULL;
+ }
+ return kernfs_path_from_node_locked(to, from, buf, buflen);
}
EXPORT_SYMBOL_GPL(kernfs_path_from_node);

+/**
+ * kernfs_path_from_node_rcu - build path of node @to relative to @from.
+ * @from: parent kernfs_node relative to which we need to build the path
+ * @to: kernfs_node of interest
+ * @buf: buffer to copy @to's path into
+ * @buflen: size of @buf
+ *
+ * Same as kernfs_path_from_node. Uses RCU for the name lookup. The root node
+ * must be initialized with KERNFS_ROOT_SAME_PARENT.
+ */
+int kernfs_path_from_node_rcu(struct kernfs_node *to, struct kernfs_node *from,
+ char *buf, size_t buflen)
+{
+ struct kernfs_root *root;
+
+ if (to) {
+ root = kernfs_root(to);
+ if (WARN_ON_ONCE(!(root->flags & KERNFS_ROOT_SAME_PARENT)))
+ to = NULL;
+ }
+ guard(rcu)();
+ return kernfs_path_from_node_locked(to, from, buf, buflen);
+}
+
/**
* pr_cont_kernfs_name - pr_cont name of a kernfs_node
* @kn: kernfs_node of interest
@@ -1717,7 +1774,8 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
{
struct kernfs_node *old_parent;
struct kernfs_root *root;
- const char *old_name = NULL;
+ bool rcu_name = false;
+ const char *kn_name;
int error;

/* can't move or rename root */
@@ -1732,9 +1790,18 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
(new_parent->flags & KERNFS_EMPTY_DIR))
goto out;

+ if (root->flags & KERNFS_ROOT_SAME_PARENT) {
+ error = -EINVAL;
+ if (WARN_ON_ONCE(kn->parent != new_parent))
+ goto out;
+ rcu_name = true;
+ }
+
error = 0;
+ kn_name = rcu_dereference_check(kn->name_rcu,
+ lockdep_is_held(&root->kernfs_rwsem));
if ((kn->parent == new_parent) && (kn->ns == new_ns) &&
- (strcmp(kn->name, new_name) == 0))
+ (strcmp(kn_name, new_name) == 0))
goto out; /* nothing to rename */

error = -EEXIST;
@@ -1742,7 +1809,7 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
goto out;

/* rename kernfs_node */
- if (strcmp(kn->name, new_name) != 0) {
+ if (strcmp(kn_name, new_name) != 0) {
error = -ENOMEM;
new_name = kstrdup_const(new_name, GFP_KERNEL);
if (!new_name)
@@ -1755,27 +1822,33 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
* Move to the appropriate place in the appropriate directories rbtree.
*/
kernfs_unlink_sibling(kn);
- kernfs_get(new_parent);

/* rename_lock protects ->parent and ->name accessors */
- write_lock_irq(&kernfs_rename_lock);
+ if (!rcu_name) {
+ write_lock_irq(&kernfs_rename_lock);

- old_parent = kn->parent;
- kn->parent = new_parent;
+ kernfs_get(new_parent);
+ old_parent = kn->parent;
+ kn->parent = new_parent;

- kn->ns = new_ns;
- if (new_name) {
- old_name = kn->name;
- kn->name = new_name;
+ kn->ns = new_ns;
+ if (new_name)
+ kn->name = new_name;
+
+ write_unlock_irq(&kernfs_rename_lock);
+ kernfs_put(old_parent);
+ } else {
+ /* name assignment is RCU protected, parent is the same */
+ kn->ns = new_ns;
+ if (new_name)
+ rcu_assign_pointer(kn->name_rcu, new_name);
}

- write_unlock_irq(&kernfs_rename_lock);
-
- kn->hash = kernfs_name_hash(kn->name, kn->ns);
+ kn->hash = kernfs_name_hash(new_name ?: kn_name, kn->ns);
kernfs_link_sibling(kn);

- kernfs_put(old_parent);
- kfree_const(old_name);
+ if (new_name && !is_kernel_rodata((unsigned long)kn_name))
+ kfree_rcu_mightsleep(kn_name);

error = 0;
out:
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 87c79d076d6d7..b52393f1045c6 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -147,6 +147,11 @@ enum kernfs_root_flag {
* Support user xattrs to be written to nodes rooted at this root.
*/
KERNFS_ROOT_SUPPORT_USER_XATTR = 0x0008,
+
+ /*
+ * Renames must not change the parent node.
+ */
+ KERNFS_ROOT_SAME_PARENT = 0x0010,
};

/* type-specific structures for kernfs_node union members */
@@ -200,7 +205,10 @@ struct kernfs_node {
* parent directly.
*/
struct kernfs_node *parent;
- const char *name;
+ union {
+ const char __rcu *name_rcu;
+ const char *name;
+ };

struct rb_node rb;

@@ -395,8 +403,11 @@ static inline bool kernfs_ns_enabled(struct kernfs_node *kn)
}

int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen);
-int kernfs_path_from_node(struct kernfs_node *root_kn, struct kernfs_node *kn,
+int kernfs_name_rcu(struct kernfs_node *kn, char *buf, size_t buflen);
+int kernfs_path_from_node(struct kernfs_node *kn_to, struct kernfs_node *kn_from,
char *buf, size_t buflen);
+int kernfs_path_from_node_rcu(struct kernfs_node *kn_to, struct kernfs_node *kn_from,
+ char *buf, size_t buflen);
void pr_cont_kernfs_name(struct kernfs_node *kn);
void pr_cont_kernfs_path(struct kernfs_node *kn);
struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn);
@@ -475,11 +486,19 @@ static inline bool kernfs_ns_enabled(struct kernfs_node *kn)
static inline int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen)
{ return -ENOSYS; }

+static inline int kernfs_name_rcu(struct kernfs_node *kn, char *buf, size_t buflen)
+{ return -ENOSYS; }
+
static inline int kernfs_path_from_node(struct kernfs_node *root_kn,
struct kernfs_node *kn,
char *buf, size_t buflen)
{ return -ENOSYS; }

+static inline int kernfs_path_from_node_rcu(struct kernfs_node *root_kn,
+ struct kernfs_node *kn,
+ char *buf, size_t buflen)
+{ return -ENOSYS; }
+
static inline void pr_cont_kernfs_name(struct kernfs_node *kn) { }
static inline void pr_cont_kernfs_path(struct kernfs_node *kn) { }

--
2.45.2