Re: [PATCH 06/14] xattr: remove rbtree-based simple_xattr infrastructure
From: Jan Kara
Date: Fri Feb 27 2026 - 10:21:29 EST
On Mon 16-02-26 14:32:02, Christian Brauner wrote:
> Now that all consumers (shmem, kernfs, pidfs) have been converted to
> use the rhashtable-based simple_xattrs with pointer-based lazy
> allocation, remove the legacy rbtree code path. The rhashtable
> implementation provides O(1) average-case lookup with RCU-based lockless
> reads, replacing the O(log n) rbtree with reader-writer spinlock
> contention.
>
> Signed-off-by: Christian Brauner <brauner@xxxxxxxxxx>
Looks good. Feel free to add:
Reviewed-by: Jan Kara <jack@xxxxxxx>
Honza
> ---
> fs/xattr.c | 387 +++++++++++++-------------------------------------
> include/linux/xattr.h | 12 +-
> 2 files changed, 103 insertions(+), 296 deletions(-)
>
> diff --git a/fs/xattr.c b/fs/xattr.c
> index eb45ae0fd17f..64803097e1dc 100644
> --- a/fs/xattr.c
> +++ b/fs/xattr.c
> @@ -1200,20 +1200,18 @@ void simple_xattr_free(struct simple_xattr *xattr)
>
> static void simple_xattr_rcu_free(struct rcu_head *head)
> {
> - struct simple_xattr *xattr;
> + struct simple_xattr *xattr = container_of(head, struct simple_xattr, rcu);
>
> - xattr = container_of(head, struct simple_xattr, rcu);
> simple_xattr_free(xattr);
> }
>
> /**
> - * simple_xattr_free_rcu - free an xattr object after an RCU grace period
> + * simple_xattr_free_rcu - free an xattr object with RCU delay
> * @xattr: the xattr object
> *
> - * Schedule RCU-deferred freeing of an xattr entry. This is used by
> - * rhashtable-based callers of simple_xattr_set() that replace or remove
> - * an existing entry while concurrent RCU readers may still be accessing
> - * it.
> + * Free the xattr object after an RCU grace period. This must be used when
> + * the xattr was removed from a data structure that concurrent RCU readers
> + * may still be traversing. Can handle @xattr being NULL.
> */
> void simple_xattr_free_rcu(struct simple_xattr *xattr)
> {
> @@ -1254,43 +1252,6 @@ struct simple_xattr *simple_xattr_alloc(const void *value, size_t size)
> return new_xattr;
> }
>
> -/**
> - * rbtree_simple_xattr_cmp - compare xattr name with current rbtree xattr entry
> - * @key: xattr name
> - * @node: current node
> - *
> - * Compare the xattr name with the xattr name attached to @node in the rbtree.
> - *
> - * Return: Negative value if continuing left, positive if continuing right, 0
> - * if the xattr attached to @node matches @key.
> - */
> -static int rbtree_simple_xattr_cmp(const void *key, const struct rb_node *node)
> -{
> - const char *xattr_name = key;
> - const struct simple_xattr *xattr;
> -
> - xattr = rb_entry(node, struct simple_xattr, rb_node);
> - return strcmp(xattr->name, xattr_name);
> -}
> -
> -/**
> - * rbtree_simple_xattr_node_cmp - compare two xattr rbtree nodes
> - * @new_node: new node
> - * @node: current node
> - *
> - * Compare the xattr attached to @new_node with the xattr attached to @node.
> - *
> - * Return: Negative value if continuing left, positive if continuing right, 0
> - * if the xattr attached to @new_node matches the xattr attached to @node.
> - */
> -static int rbtree_simple_xattr_node_cmp(struct rb_node *new_node,
> - const struct rb_node *node)
> -{
> - struct simple_xattr *xattr;
> - xattr = rb_entry(new_node, struct simple_xattr, rb_node);
> - return rbtree_simple_xattr_cmp(xattr->name, node);
> -}
> -
> static u32 simple_xattr_hashfn(const void *data, u32 len, u32 seed)
> {
> const char *name = data;
> @@ -1336,41 +1297,19 @@ static const struct rhashtable_params simple_xattr_params = {
> int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
> void *buffer, size_t size)
> {
> - struct simple_xattr *xattr = NULL;
> + struct simple_xattr *xattr;
> int ret = -ENODATA;
>
> - if (xattrs->use_rhashtable) {
> - guard(rcu)();
> - xattr = rhashtable_lookup(&xattrs->ht, name,
> - simple_xattr_params);
> - if (xattr) {
> - ret = xattr->size;
> - if (buffer) {
> - if (size < xattr->size)
> - ret = -ERANGE;
> - else
> - memcpy(buffer, xattr->value,
> - xattr->size);
> - }
> - }
> - } else {
> - struct rb_node *rbp;
> -
> - read_lock(&xattrs->lock);
> - rbp = rb_find(name, &xattrs->rb_root,
> - rbtree_simple_xattr_cmp);
> - if (rbp) {
> - xattr = rb_entry(rbp, struct simple_xattr, rb_node);
> - ret = xattr->size;
> - if (buffer) {
> - if (size < xattr->size)
> - ret = -ERANGE;
> - else
> - memcpy(buffer, xattr->value,
> - xattr->size);
> - }
> + guard(rcu)();
> + xattr = rhashtable_lookup(&xattrs->ht, name, simple_xattr_params);
> + if (xattr) {
> + ret = xattr->size;
> + if (buffer) {
> + if (size < xattr->size)
> + ret = -ERANGE;
> + else
> + memcpy(buffer, xattr->value, xattr->size);
> }
> - read_unlock(&xattrs->lock);
> }
> return ret;
> }
> @@ -1398,6 +1337,11 @@ int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
> * nothing if XATTR_CREATE is specified in @flags or @flags is zero. For
> * XATTR_REPLACE we fail as mentioned above.
> *
> + * Note: Callers must externally serialize writes. All current callers hold
> + * the inode lock for write operations. The lookup->replace/remove sequence
> + * is not atomic with respect to the rhashtable's per-bucket locking, but
> + * is safe because writes are serialized by the caller.
> + *
> * Return: On success, the removed or replaced xattr is returned, to be freed
> * by the caller; or NULL if none. On failure a negative error code is returned.
> */
> @@ -1406,7 +1350,7 @@ struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs,
> size_t size, int flags)
> {
> struct simple_xattr *old_xattr = NULL;
> - int err = 0;
> + int err;
>
> CLASS(simple_xattr, new_xattr)(value, size);
> if (IS_ERR(new_xattr))
> @@ -1418,119 +1362,52 @@ struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs,
> return ERR_PTR(-ENOMEM);
> }
>
> - if (xattrs->use_rhashtable) {
> - /*
> - * Lookup is safe without RCU here since writes are
> - * serialized by the caller.
> - */
> - old_xattr = rhashtable_lookup_fast(&xattrs->ht, name,
> - simple_xattr_params);
> -
> - if (old_xattr) {
> - /* Fail if XATTR_CREATE is requested and the xattr exists. */
> - if (flags & XATTR_CREATE)
> - return ERR_PTR(-EEXIST);
> -
> - if (new_xattr) {
> - err = rhashtable_replace_fast(&xattrs->ht,
> - &old_xattr->hash_node,
> - &new_xattr->hash_node,
> - simple_xattr_params);
> - if (err)
> - return ERR_PTR(err);
> - } else {
> - err = rhashtable_remove_fast(&xattrs->ht,
> - &old_xattr->hash_node,
> - simple_xattr_params);
> - if (err)
> - return ERR_PTR(err);
> - }
> - } else {
> - /* Fail if XATTR_REPLACE is requested but no xattr is found. */
> - if (flags & XATTR_REPLACE)
> - return ERR_PTR(-ENODATA);
> -
> - /*
> - * If XATTR_CREATE or no flags are specified together
> - * with a new value simply insert it.
> - */
> - if (new_xattr) {
> - err = rhashtable_insert_fast(&xattrs->ht,
> - &new_xattr->hash_node,
> - simple_xattr_params);
> - if (err)
> - return ERR_PTR(err);
> - }
> -
> - /*
> - * If XATTR_CREATE or no flags are specified and
> - * neither an old or new xattr exist then we don't
> - * need to do anything.
> - */
> - }
> - } else {
> - struct rb_node *parent = NULL, **rbp;
> - int ret;
> -
> - write_lock(&xattrs->lock);
> - rbp = &xattrs->rb_root.rb_node;
> - while (*rbp) {
> - parent = *rbp;
> - ret = rbtree_simple_xattr_cmp(name, *rbp);
> - if (ret < 0)
> - rbp = &(*rbp)->rb_left;
> - else if (ret > 0)
> - rbp = &(*rbp)->rb_right;
> - else
> - old_xattr = rb_entry(*rbp, struct simple_xattr,
> - rb_node);
> - if (old_xattr)
> - break;
> - }
> + /* Lookup is safe without RCU here since writes are serialized. */
> + old_xattr = rhashtable_lookup_fast(&xattrs->ht, name,
> + simple_xattr_params);
>
> - if (old_xattr) {
> - /* Fail if XATTR_CREATE is requested and the xattr exists. */
> - if (flags & XATTR_CREATE) {
> - err = -EEXIST;
> - goto out_unlock;
> - }
> + if (old_xattr) {
> + /* Fail if XATTR_CREATE is requested and the xattr exists. */
> + if (flags & XATTR_CREATE)
> + return ERR_PTR(-EEXIST);
>
> - if (new_xattr)
> - rb_replace_node(&old_xattr->rb_node,
> - &new_xattr->rb_node,
> - &xattrs->rb_root);
> - else
> - rb_erase(&old_xattr->rb_node,
> - &xattrs->rb_root);
> + if (new_xattr) {
> + err = rhashtable_replace_fast(&xattrs->ht,
> + &old_xattr->hash_node,
> + &new_xattr->hash_node,
> + simple_xattr_params);
> + if (err)
> + return ERR_PTR(err);
> } else {
> - /* Fail if XATTR_REPLACE is requested but no xattr is found. */
> - if (flags & XATTR_REPLACE) {
> - err = -ENODATA;
> - goto out_unlock;
> - }
> -
> - /*
> - * If XATTR_CREATE or no flags are specified together
> - * with a new value simply insert it.
> - */
> - if (new_xattr) {
> - rb_link_node(&new_xattr->rb_node, parent, rbp);
> - rb_insert_color(&new_xattr->rb_node,
> - &xattrs->rb_root);
> - }
> + err = rhashtable_remove_fast(&xattrs->ht,
> + &old_xattr->hash_node,
> + simple_xattr_params);
> + if (err)
> + return ERR_PTR(err);
> + }
> + } else {
> + /* Fail if XATTR_REPLACE is requested but no xattr is found. */
> + if (flags & XATTR_REPLACE)
> + return ERR_PTR(-ENODATA);
>
> - /*
> - * If XATTR_CREATE or no flags are specified and
> - * neither an old or new xattr exist then we don't
> - * need to do anything.
> - */
> + /*
> + * If XATTR_CREATE or no flags are specified together with a
> + * new value simply insert it.
> + */
> + if (new_xattr) {
> + err = rhashtable_insert_fast(&xattrs->ht,
> + &new_xattr->hash_node,
> + simple_xattr_params);
> + if (err)
> + return ERR_PTR(err);
> }
>
> -out_unlock:
> - write_unlock(&xattrs->lock);
> - if (err)
> - return ERR_PTR(err);
> + /*
> + * If XATTR_CREATE or no flags are specified and neither an
> + * old or new xattr exist then we don't need to do anything.
> + */
> }
> +
> retain_and_null_ptr(new_xattr);
> return old_xattr;
> }
> @@ -1572,6 +1449,7 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
> char *buffer, size_t size)
> {
> bool trusted = ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN);
> + struct rhashtable_iter iter;
> struct simple_xattr *xattr;
> ssize_t remaining_size = size;
> int err = 0;
> @@ -1595,77 +1473,34 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
> if (!xattrs)
> return size - remaining_size;
>
> - if (xattrs->use_rhashtable) {
> - struct rhashtable_iter iter;
> -
> - rhashtable_walk_enter(&xattrs->ht, &iter);
> - rhashtable_walk_start(&iter);
> -
> - while ((xattr = rhashtable_walk_next(&iter)) != NULL) {
> - if (IS_ERR(xattr)) {
> - if (PTR_ERR(xattr) == -EAGAIN)
> - continue;
> - err = PTR_ERR(xattr);
> - break;
> - }
> -
> - /* skip "trusted." attributes for unprivileged callers */
> - if (!trusted && xattr_is_trusted(xattr->name))
> - continue;
> + rhashtable_walk_enter(&xattrs->ht, &iter);
> + rhashtable_walk_start(&iter);
>
> - /* skip MAC labels; these are provided by LSM above */
> - if (xattr_is_maclabel(xattr->name))
> + while ((xattr = rhashtable_walk_next(&iter)) != NULL) {
> + if (IS_ERR(xattr)) {
> + if (PTR_ERR(xattr) == -EAGAIN)
> continue;
> -
> - err = xattr_list_one(&buffer, &remaining_size,
> - xattr->name);
> - if (err)
> - break;
> + err = PTR_ERR(xattr);
> + break;
> }
>
> - rhashtable_walk_stop(&iter);
> - rhashtable_walk_exit(&iter);
> - } else {
> - struct rb_node *rbp;
> -
> - read_lock(&xattrs->lock);
> - for (rbp = rb_first(&xattrs->rb_root); rbp;
> - rbp = rb_next(rbp)) {
> - xattr = rb_entry(rbp, struct simple_xattr, rb_node);
> -
> - /* skip "trusted." attributes for unprivileged callers */
> - if (!trusted && xattr_is_trusted(xattr->name))
> - continue;
> + /* skip "trusted." attributes for unprivileged callers */
> + if (!trusted && xattr_is_trusted(xattr->name))
> + continue;
>
> - /* skip MAC labels; these are provided by LSM above */
> - if (xattr_is_maclabel(xattr->name))
> - continue;
> + /* skip MAC labels; these are provided by LSM above */
> + if (xattr_is_maclabel(xattr->name))
> + continue;
>
> - err = xattr_list_one(&buffer, &remaining_size,
> - xattr->name);
> - if (err)
> - break;
> - }
> - read_unlock(&xattrs->lock);
> + err = xattr_list_one(&buffer, &remaining_size, xattr->name);
> + if (err)
> + break;
> }
>
> - return err ? err : size - remaining_size;
> -}
> + rhashtable_walk_stop(&iter);
> + rhashtable_walk_exit(&iter);
>
> -/**
> - * rbtree_simple_xattr_less - compare two xattr rbtree nodes
> - * @new_node: new node
> - * @node: current node
> - *
> - * Compare the xattr attached to @new_node with the xattr attached to @node.
> - * Note that this function technically tolerates duplicate entries.
> - *
> - * Return: True if insertion point in the rbtree is found.
> - */
> -static bool rbtree_simple_xattr_less(struct rb_node *new_node,
> - const struct rb_node *node)
> -{
> - return rbtree_simple_xattr_node_cmp(new_node, node) < 0;
> + return err ? err : size - remaining_size;
> }
>
> /**
> @@ -1676,33 +1511,29 @@ static bool rbtree_simple_xattr_less(struct rb_node *new_node,
> * Add an xattr object to @xattrs. This assumes no replacement or removal
> * of matching xattrs is wanted. Should only be called during inode
> * initialization when a few distinct initial xattrs are supposed to be set.
> + *
> + * Return: On success zero is returned. On failure a negative error code is
> + * returned.
> */
> int simple_xattr_add(struct simple_xattrs *xattrs,
> struct simple_xattr *new_xattr)
> {
> - if (xattrs->use_rhashtable)
> - return rhashtable_insert_fast(&xattrs->ht,
> - &new_xattr->hash_node,
> - simple_xattr_params);
> -
> - write_lock(&xattrs->lock);
> - rb_add(&new_xattr->rb_node, &xattrs->rb_root,
> - rbtree_simple_xattr_less);
> - write_unlock(&xattrs->lock);
> - return 0;
> + return rhashtable_insert_fast(&xattrs->ht, &new_xattr->hash_node,
> + simple_xattr_params);
> }
>
> /**
> * simple_xattrs_init - initialize new xattr header
> * @xattrs: header to initialize
> *
> - * Initialize relevant fields of a an xattr header.
> + * Initialize the rhashtable used to store xattr objects.
> + *
> + * Return: On success zero is returned. On failure a negative error code is
> + * returned.
> */
> -void simple_xattrs_init(struct simple_xattrs *xattrs)
> +int simple_xattrs_init(struct simple_xattrs *xattrs)
> {
> - xattrs->use_rhashtable = false;
> - xattrs->rb_root = RB_ROOT;
> - rwlock_init(&xattrs->lock);
> + return rhashtable_init(&xattrs->ht, &simple_xattr_params);
> }
>
> /**
> @@ -1710,7 +1541,8 @@ void simple_xattrs_init(struct simple_xattrs *xattrs)
> *
> * Dynamically allocate a simple_xattrs header and initialize the
> * underlying rhashtable. This is intended for consumers that want
> - * rhashtable-based xattr storage.
> + * to lazily allocate xattr storage only when the first xattr is set,
> + * avoiding the per-inode rhashtable overhead when no xattrs are used.
> *
> * Return: On success a new simple_xattrs is returned. On failure an
> * ERR_PTR is returned.
> @@ -1718,14 +1550,15 @@ void simple_xattrs_init(struct simple_xattrs *xattrs)
> struct simple_xattrs *simple_xattrs_alloc(void)
> {
> struct simple_xattrs *xattrs __free(kfree) = NULL;
> + int ret;
>
> xattrs = kzalloc(sizeof(*xattrs), GFP_KERNEL);
> if (!xattrs)
> return ERR_PTR(-ENOMEM);
>
> - xattrs->use_rhashtable = true;
> - if (rhashtable_init(&xattrs->ht, &simple_xattr_params))
> - return ERR_PTR(-ENOMEM);
> + ret = simple_xattrs_init(xattrs);
> + if (ret)
> + return ERR_PTR(ret);
>
> return no_free_ptr(xattrs);
> }
> @@ -1784,28 +1617,10 @@ static void simple_xattr_ht_free(void *ptr, void *arg)
> */
> void simple_xattrs_free(struct simple_xattrs *xattrs, size_t *freed_space)
> {
> + might_sleep();
> +
> if (freed_space)
> *freed_space = 0;
> -
> - if (xattrs->use_rhashtable) {
> - rhashtable_free_and_destroy(&xattrs->ht,
> - simple_xattr_ht_free, freed_space);
> - } else {
> - struct rb_node *rbp;
> -
> - rbp = rb_first(&xattrs->rb_root);
> - while (rbp) {
> - struct simple_xattr *xattr;
> - struct rb_node *rbp_next;
> -
> - rbp_next = rb_next(rbp);
> - xattr = rb_entry(rbp, struct simple_xattr, rb_node);
> - rb_erase(&xattr->rb_node, &xattrs->rb_root);
> - if (freed_space)
> - *freed_space += simple_xattr_space(xattr->name,
> - xattr->size);
> - simple_xattr_free(xattr);
> - rbp = rbp_next;
> - }
> - }
> + rhashtable_free_and_destroy(&xattrs->ht, simple_xattr_ht_free,
> + freed_space);
> }
> diff --git a/include/linux/xattr.h b/include/linux/xattr.h
> index 3063ecf0004d..f60357d9f938 100644
> --- a/include/linux/xattr.h
> +++ b/include/linux/xattr.h
> @@ -107,18 +107,10 @@ static inline const char *xattr_prefix(const struct xattr_handler *handler)
> }
>
> struct simple_xattrs {
> - bool use_rhashtable;
> - union {
> - struct {
> - struct rb_root rb_root;
> - rwlock_t lock;
> - };
> - struct rhashtable ht;
> - };
> + struct rhashtable ht;
> };
>
> struct simple_xattr {
> - struct rb_node rb_node;
> struct rhash_head hash_node;
> struct rcu_head rcu;
> char *name;
> @@ -126,7 +118,7 @@ struct simple_xattr {
> char value[];
> };
>
> -void simple_xattrs_init(struct simple_xattrs *xattrs);
> +int simple_xattrs_init(struct simple_xattrs *xattrs);
> struct simple_xattrs *simple_xattrs_alloc(void);
> struct simple_xattrs *simple_xattrs_lazy_alloc(struct simple_xattrs **xattrsp,
> const void *value, int flags);
>
> --
> 2.47.3
>
--
Jan Kara <jack@xxxxxxxx>
SUSE Labs, CR