Re: [PATCH 04/14] kernfs: adapt to rhashtable-based simple_xattrs with lazy allocation
From: Jan Kara
Date: Fri Feb 27 2026 - 10:08:24 EST
On Mon 16-02-26 14:32:00, Christian Brauner wrote:
> Adapt kernfs to use the rhashtable-based xattr path and switch from an
> embedded struct to pointer-based lazy allocation.
>
> Change kernfs_iattrs.xattrs from embedded 'struct simple_xattrs' to a
> pointer 'struct simple_xattrs *', initialized to NULL (zeroed by
> kmem_cache_zalloc). Since kernfs_iattrs is already lazily allocated
> itself, this adds a second level of lazy allocation specifically for
> the xattr store.
>
> The xattr store is allocated on first setxattr. Read paths
> check for NULL and return -ENODATA or empty list.
>
> Replaced xattr entries are freed via simple_xattr_free_rcu() to allow
> concurrent RCU readers to finish.
>
> The cleanup paths in kernfs_free_rcu() and __kernfs_new_node() error
> handling conditionally free the xattr store only when allocated.
>
> Signed-off-by: Christian Brauner <brauner@xxxxxxxxxx>
...
> @@ -584,6 +582,12 @@ void kernfs_put(struct kernfs_node *kn)
> if (kernfs_type(kn) == KERNFS_LINK)
> kernfs_put(kn->symlink.target_kn);
>
> + if (kn->iattr && kn->iattr->xattrs) {
> + simple_xattrs_free(kn->iattr->xattrs, NULL);
> + kfree(kn->iattr->xattrs);
> + kn->iattr->xattrs = NULL;
> + }
> +
> spin_lock(&root->kernfs_idr_lock);
> idr_remove(&root->ino_idr, (u32)kernfs_ino(kn));
> spin_unlock(&root->kernfs_idr_lock);
This is a slight change in the lifetime rules because previously kernfs
xattrs could be safely accessed only under RCU but after this change you
have to hold inode reference *and* RCU to safely access them. I don't think
anybody would be accessing xattrs without holding inode reference so this
should be safe but it would be good to mention this in the changelog.
Otherwise feel free to add:
Reviewed-by: Jan Kara <jack@xxxxxxx>
Honza
> @@ -682,7 +686,10 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
>
> err_out4:
> if (kn->iattr) {
> - simple_xattrs_free(&kn->iattr->xattrs, NULL);
> + if (kn->iattr->xattrs) {
> + simple_xattrs_free(kn->iattr->xattrs, NULL);
> + kfree(kn->iattr->xattrs);
> + }
> kmem_cache_free(kernfs_iattrs_cache, kn->iattr);
> }
> err_out3:
> diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c
> index a36aaee98dce..dfc3315b5afc 100644
> --- a/fs/kernfs/inode.c
> +++ b/fs/kernfs/inode.c
> @@ -45,7 +45,6 @@ static struct kernfs_iattrs *__kernfs_iattrs(struct kernfs_node *kn, bool alloc)
> ret->ia_mtime = ret->ia_atime;
> ret->ia_ctime = ret->ia_atime;
>
> - simple_xattrs_init(&ret->xattrs);
> atomic_set(&ret->nr_user_xattrs, 0);
> atomic_set(&ret->user_xattr_size, 0);
>
> @@ -146,7 +145,8 @@ ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size)
> if (!attrs)
> return -ENOMEM;
>
> - return simple_xattr_list(d_inode(dentry), &attrs->xattrs, buf, size);
> + return simple_xattr_list(d_inode(dentry), READ_ONCE(attrs->xattrs),
> + buf, size);
> }
>
> static inline void set_default_inode_attr(struct inode *inode, umode_t mode)
> @@ -298,27 +298,38 @@ int kernfs_xattr_get(struct kernfs_node *kn, const char *name,
> void *value, size_t size)
> {
> struct kernfs_iattrs *attrs = kernfs_iattrs_noalloc(kn);
> + struct simple_xattrs *xattrs;
> +
> if (!attrs)
> return -ENODATA;
>
> - return simple_xattr_get(&attrs->xattrs, name, value, size);
> + xattrs = READ_ONCE(attrs->xattrs);
> + if (!xattrs)
> + return -ENODATA;
> +
> + return simple_xattr_get(xattrs, name, value, size);
> }
>
> int kernfs_xattr_set(struct kernfs_node *kn, const char *name,
> const void *value, size_t size, int flags)
> {
> struct simple_xattr *old_xattr;
> + struct simple_xattrs *xattrs;
> struct kernfs_iattrs *attrs;
>
> attrs = kernfs_iattrs(kn);
> if (!attrs)
> return -ENOMEM;
>
> - old_xattr = simple_xattr_set(&attrs->xattrs, name, value, size, flags);
> + xattrs = simple_xattrs_lazy_alloc(&attrs->xattrs, value, flags);
> + if (IS_ERR_OR_NULL(xattrs))
> + return PTR_ERR(xattrs);
> +
> + old_xattr = simple_xattr_set(xattrs, name, value, size, flags);
> if (IS_ERR(old_xattr))
> return PTR_ERR(old_xattr);
>
> - simple_xattr_free(old_xattr);
> + simple_xattr_free_rcu(old_xattr);
> return 0;
> }
>
> @@ -376,7 +387,7 @@ static int kernfs_vfs_user_xattr_add(struct kernfs_node *kn,
>
> ret = 0;
> size = old_xattr->size;
> - simple_xattr_free(old_xattr);
> + simple_xattr_free_rcu(old_xattr);
> dec_size_out:
> atomic_sub(size, sz);
> dec_count_out:
> @@ -403,7 +414,7 @@ static int kernfs_vfs_user_xattr_rm(struct kernfs_node *kn,
>
> atomic_sub(old_xattr->size, sz);
> atomic_dec(nr);
> - simple_xattr_free(old_xattr);
> + simple_xattr_free_rcu(old_xattr);
> return 0;
> }
>
> @@ -415,6 +426,7 @@ static int kernfs_vfs_user_xattr_set(const struct xattr_handler *handler,
> {
> const char *full_name = xattr_full_name(handler, suffix);
> struct kernfs_node *kn = inode->i_private;
> + struct simple_xattrs *xattrs;
> struct kernfs_iattrs *attrs;
>
> if (!(kernfs_root(kn)->flags & KERNFS_ROOT_SUPPORT_USER_XATTR))
> @@ -424,11 +436,15 @@ static int kernfs_vfs_user_xattr_set(const struct xattr_handler *handler,
> if (!attrs)
> return -ENOMEM;
>
> + xattrs = simple_xattrs_lazy_alloc(&attrs->xattrs, value, flags);
> + if (IS_ERR_OR_NULL(xattrs))
> + return PTR_ERR(xattrs);
> +
> if (value)
> - return kernfs_vfs_user_xattr_add(kn, full_name, &attrs->xattrs,
> + return kernfs_vfs_user_xattr_add(kn, full_name, xattrs,
> value, size, flags);
> else
> - return kernfs_vfs_user_xattr_rm(kn, full_name, &attrs->xattrs,
> + return kernfs_vfs_user_xattr_rm(kn, full_name, xattrs,
> value, size, flags);
>
> }
> diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
> index 6061b6f70d2a..1324ed8c0661 100644
> --- a/fs/kernfs/kernfs-internal.h
> +++ b/fs/kernfs/kernfs-internal.h
> @@ -26,7 +26,7 @@ struct kernfs_iattrs {
> struct timespec64 ia_mtime;
> struct timespec64 ia_ctime;
>
> - struct simple_xattrs xattrs;
> + struct simple_xattrs *xattrs;
> atomic_t nr_user_xattrs;
> atomic_t user_xattr_size;
> };
>
> --
> 2.47.3
>
--
Jan Kara <jack@xxxxxxxx>
SUSE Labs, CR