Re: [External] [PATCH v4 3/3] fs: cache the string generated by reading /proc/filesystems

From: yunhui cui

Date: Tue Jun 09 2026 - 07:31:12 EST


Hi Mateusz,

On Sat, May 30, 2026 at 1:27 AM Mateusz Guzik <mjguzik@xxxxxxxxx> wrote:
>
> It is being read surprisingly often (e.g., by mkdir, ls and even sed!).
>
> This is lock-protected pointer chasing over a linked list to pay for
> sprintf for every fs (32 on my boxen).
>
> Instead cache the result.
>
> While here make the file as permanent to avoid spurious ref trips in
> procfs.
>
> Signed-off-by: Mateusz Guzik <mjguzik@xxxxxxxxx>
> ---
> fs/filesystems.c | 153 ++++++++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 151 insertions(+), 2 deletions(-)
>
> diff --git a/fs/filesystems.c b/fs/filesystems.c
> index 7976366d4197..673a03b5f32b 100644
> --- a/fs/filesystems.c
> +++ b/fs/filesystems.c
> @@ -31,6 +31,36 @@
> static HLIST_HEAD(file_systems);
> static DEFINE_SPINLOCK(file_systems_lock);
>
> +#ifdef CONFIG_PROC_FS
> +/*
> + * Cache a stringified version of the filesystem list.
> + *
> + * The fs list gets queried a lot by userspace because of libselinux, including
> + * rather surprising programs (would you guess *sed* is on the list?). In order
> + * to reduce the overhead we cache the resulting string, which normally hangs
> + * around below 512 bytes in size.
> + *
> + * As the list almost never changes, its creation is not particularly optimized
> + * to keep things simple.
> + *
> + * We sort it out on read in order to not introduce a failure point for fs
> + * registration (in principle we may be unable to alloc memory for the list).
> + */
> +struct file_systems_string {
> + struct rcu_head rcu;
> + unsigned long gen;
> + size_t len;
> + char string[];
> +};
> +
> +static unsigned long file_systems_gen;
> +static struct file_systems_string __read_mostly __rcu *file_systems_string;
> +
> +static void invalidate_filesystems_string(void);
> +#else
> +static inline void invalidate_filesystems_string(void) { }
> +#endif
> +
> /* WARNING: This can be used only if we _already_ own a reference */
> struct file_system_type *get_filesystem(struct file_system_type *fs)
> {
> @@ -80,6 +110,7 @@ int register_filesystem(struct file_system_type *fs)
> if (find_filesystem(fs->name, strlen(fs->name)))
> return -EBUSY;
> hlist_add_tail_rcu(&fs->list, &file_systems);
> + invalidate_filesystems_string();
> return 0;
> }
> EXPORT_SYMBOL(register_filesystem);
> @@ -101,6 +132,7 @@ int unregister_filesystem(struct file_system_type *fs)
> if (hlist_unhashed(&fs->list))
> return -EINVAL;
> hlist_del_init_rcu(&fs->list);
> + invalidate_filesystems_string();
> }
> synchronize_rcu();
> return 0;
> @@ -209,7 +241,100 @@ int __init list_bdev_fs_names(char *buf, size_t size)
> }
>
> #ifdef CONFIG_PROC_FS
> -static int filesystems_proc_show(struct seq_file *m, void *v)
> +static void invalidate_filesystems_string(void)
> +{
> + struct file_systems_string *old;
> +
> + lockdep_assert_held_write(&file_systems_lock);
> + file_systems_gen++;
> + old = rcu_replace_pointer(file_systems_string, NULL,
> + lockdep_is_held(&file_systems_lock));
> + if (old)
> + kfree_rcu(old, rcu);
> +}
> +
> +static __cold noinline int regen_filesystems_string(void)
> +{
> + struct file_system_type *p;
> + struct file_systems_string *old, *new;
> + size_t newlen, usedlen;
> + unsigned long gen;
> +
> +retry:
> + newlen = 0;
> +
> + /* pre-calc space for each fs */
> + spin_lock(&file_systems_lock);
> + gen = file_systems_gen;
> + hlist_for_each_entry_rcu(p, &file_systems, list) {
> + if (!(p->fs_flags & FS_REQUIRES_DEV))
> + newlen += strlen("nodev");
> + newlen += strlen("\t") + strlen(p->name) + strlen("\n");
> + }
> + spin_unlock(&file_systems_lock);
> +
> + new = kmalloc(offsetof(struct file_systems_string, string) + newlen + 1,
> + GFP_KERNEL);
> + if (!new)
> + return -ENOMEM;
> +
> + new->gen = gen;
> + new->len = newlen;
> + new->string[newlen] = '\0';
> +
> + spin_lock(&file_systems_lock);
> + old = file_systems_string;
> +
> + /*
> + * Did someone beat us to it?
> + */
> + if (old && old->gen == file_systems_gen) {
> + spin_unlock(&file_systems_lock);


It looks like linux-next still carries the older version of this patch and
misses the unlock here:

if (old && old->gen == file_systems_gen) {
kfree(new);
return 0;
}

The May 29 version has:
spin_unlock(&file_systems_lock);

before kfree(new), but that seems to be lost in linux-next.
Could you please check?

> + kfree(new);
> + return 0;
> + }
> +
> + /*
> + * Did the list change in the meantime?
> + */
> + if (gen != file_systems_gen) {
> + spin_unlock(&file_systems_lock);
> + kfree(new);
> + goto retry;
> + }
> +
> + /*
> + * Populate the string.
> + *
> + * We know we have just enough space because we calculated the right
> + * size the previous time we had the lock and confirmed the list has
> + * not changed after reacquiring it.
> + */
> + usedlen = 0;
> + hlist_for_each_entry_rcu(p, &file_systems, list) {
> + usedlen += sprintf(&new->string[usedlen], "%s\t%s\n",
> + (p->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev",
> + p->name);
> + }
> +
> + if (WARN_ON_ONCE(new->len != strlen(new->string))) {
> + /*
> + * Should never happen of course, keep this in case someone changes string
> + * generation above and messes it up.
> + */
> + spin_unlock(&file_systems_lock);
> + kfree(new);
> + return -EINVAL;
> + }
> +
> + rcu_assign_pointer(file_systems_string, new);
> + spin_unlock(&file_systems_lock);
> + if (old)
> + kfree_rcu(old, rcu);
> + return 0;
> +}
> +
> +static __cold noinline int filesystems_proc_show_fallback(struct seq_file *m, void *v)
> {
> struct file_system_type *p;
>
> @@ -222,9 +347,33 @@ static int filesystems_proc_show(struct seq_file *m, void *v)
> return 0;
> }
>
> +static int filesystems_proc_show(struct seq_file *m, void *v)
> +{
> + struct file_systems_string *fss;
> +
> + for (;;) {
> + scoped_guard(rcu) {
> + fss = rcu_dereference(file_systems_string);
> + if (likely(fss)) {
> + seq_write(m, fss->string, fss->len);
> + return 0;
> + }
> + }
> +
> + int err = regen_filesystems_string();
> + if (unlikely(err))
> + return filesystems_proc_show_fallback(m, v);
> + }
> +}
> +
> static int __init proc_filesystems_init(void)
> {
> - proc_create_single("filesystems", 0, NULL, filesystems_proc_show);
> + struct proc_dir_entry *pde;
> +
> + pde = proc_create_single("filesystems", 0, NULL, filesystems_proc_show);
> + if (!pde)
> + return -ENOMEM;
> + proc_make_permanent(pde);
> return 0;
> }
> module_init(proc_filesystems_init);
> --
> 2.48.1
>
>

Thanks,
Yunhui