[PATCH v3 42/49] fs: super: dynamically allocate the s_shrink

From: Qi Zheng
Date: Thu Jul 27 2023 - 04:31:12 EST


In preparation for implementing lockless slab shrink, use new APIs to
dynamically allocate the s_shrink, so that it can be freed asynchronously
using kfree_rcu(). Then it doesn't need to wait for RCU read-side critical
section when releasing the struct super_block.

Signed-off-by: Qi Zheng <zhengqi.arch@xxxxxxxxxxxxx>
Reviewed-by: Muchun Song <songmuchun@xxxxxxxxxxxxx>
---
fs/btrfs/super.c | 2 +-
fs/kernfs/mount.c | 2 +-
fs/proc/root.c | 2 +-
fs/super.c | 36 ++++++++++++++++++++----------------
include/linux/fs.h | 2 +-
5 files changed, 24 insertions(+), 20 deletions(-)

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index cffdd6f7f8e8..4c9c878b0da4 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1519,7 +1519,7 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
error = -EBUSY;
} else {
snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
- shrinker_debugfs_rename(&s->s_shrink, "sb-%s:%s", fs_type->name,
+ shrinker_debugfs_rename(s->s_shrink, "sb-%s:%s", fs_type->name,
s->s_id);
btrfs_sb(s)->bdev_holder = fs_type;
error = btrfs_fill_super(s, fs_devices, data);
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index d49606accb07..2657ff1181f1 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -256,7 +256,7 @@ static int kernfs_fill_super(struct super_block *sb, struct kernfs_fs_context *k
sb->s_time_gran = 1;

/* sysfs dentries and inodes don't require IO to create */
- sb->s_shrink.seeks = 0;
+ sb->s_shrink->seeks = 0;

/* get root inode, initialize and unlock it */
down_read(&kf_root->kernfs_rwsem);
diff --git a/fs/proc/root.c b/fs/proc/root.c
index a86e65a608da..22b78b28b477 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -188,7 +188,7 @@ static int proc_fill_super(struct super_block *s, struct fs_context *fc)
s->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH;

/* procfs dentries and inodes don't require IO to create */
- s->s_shrink.seeks = 0;
+ s->s_shrink->seeks = 0;

pde_get(&proc_root);
root_inode = proc_get_inode(s, &proc_root);
diff --git a/fs/super.c b/fs/super.c
index da68584815e4..68b3877af941 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -67,7 +67,7 @@ static unsigned long super_cache_scan(struct shrinker *shrink,
long dentries;
long inodes;

- sb = container_of(shrink, struct super_block, s_shrink);
+ sb = shrink->private_data;

/*
* Deadlock avoidance. We may hold various FS locks, and we don't want
@@ -120,7 +120,7 @@ static unsigned long super_cache_count(struct shrinker *shrink,
struct super_block *sb;
long total_objects = 0;

- sb = container_of(shrink, struct super_block, s_shrink);
+ sb = shrink->private_data;

/*
* We don't call trylock_super() here as it is a scalability bottleneck,
@@ -182,7 +182,7 @@ static void destroy_unused_super(struct super_block *s)
security_sb_free(s);
put_user_ns(s->s_user_ns);
kfree(s->s_subtype);
- free_prealloced_shrinker(&s->s_shrink);
+ shrinker_free(s->s_shrink);
/* no delays needed */
destroy_super_work(&s->destroy_work);
}
@@ -259,16 +259,20 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
s->s_time_min = TIME64_MIN;
s->s_time_max = TIME64_MAX;

- s->s_shrink.seeks = DEFAULT_SEEKS;
- s->s_shrink.scan_objects = super_cache_scan;
- s->s_shrink.count_objects = super_cache_count;
- s->s_shrink.batch = 1024;
- s->s_shrink.flags = SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE;
- if (prealloc_shrinker(&s->s_shrink, "sb-%s", type->name))
+ s->s_shrink = shrinker_alloc(SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE,
+ "sb-%s", type->name);
+ if (!s->s_shrink)
goto fail;
- if (list_lru_init_memcg(&s->s_dentry_lru, &s->s_shrink))
+
+ s->s_shrink->seeks = DEFAULT_SEEKS;
+ s->s_shrink->scan_objects = super_cache_scan;
+ s->s_shrink->count_objects = super_cache_count;
+ s->s_shrink->batch = 1024;
+ s->s_shrink->private_data = s;
+
+ if (list_lru_init_memcg(&s->s_dentry_lru, s->s_shrink))
goto fail;
- if (list_lru_init_memcg(&s->s_inode_lru, &s->s_shrink))
+ if (list_lru_init_memcg(&s->s_inode_lru, s->s_shrink))
goto fail;
return s;

@@ -326,7 +330,7 @@ void deactivate_locked_super(struct super_block *s)
{
struct file_system_type *fs = s->s_type;
if (atomic_dec_and_test(&s->s_active)) {
- unregister_shrinker(&s->s_shrink);
+ shrinker_free(s->s_shrink);
fs->kill_sb(s);

/*
@@ -599,7 +603,7 @@ struct super_block *sget_fc(struct fs_context *fc,
hlist_add_head(&s->s_instances, &s->s_type->fs_supers);
spin_unlock(&sb_lock);
get_filesystem(s->s_type);
- register_shrinker_prepared(&s->s_shrink);
+ shrinker_register(s->s_shrink);
return s;

share_extant_sb:
@@ -678,7 +682,7 @@ struct super_block *sget(struct file_system_type *type,
hlist_add_head(&s->s_instances, &type->fs_supers);
spin_unlock(&sb_lock);
get_filesystem(type);
- register_shrinker_prepared(&s->s_shrink);
+ shrinker_register(s->s_shrink);
return s;
}
EXPORT_SYMBOL(sget);
@@ -1312,7 +1316,7 @@ int get_tree_bdev(struct fs_context *fc,
down_write(&s->s_umount);
} else {
snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
- shrinker_debugfs_rename(&s->s_shrink, "sb-%s:%s",
+ shrinker_debugfs_rename(s->s_shrink, "sb-%s:%s",
fc->fs_type->name, s->s_id);
sb_set_blocksize(s, block_size(bdev));
error = fill_super(s, fc);
@@ -1385,7 +1389,7 @@ struct dentry *mount_bdev(struct file_system_type *fs_type,
down_write(&s->s_umount);
} else {
snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
- shrinker_debugfs_rename(&s->s_shrink, "sb-%s:%s",
+ shrinker_debugfs_rename(s->s_shrink, "sb-%s:%s",
fs_type->name, s->s_id);
sb_set_blocksize(s, block_size(bdev));
error = fill_super(s, data, flags & SB_SILENT ? 1 : 0);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 891cf662b26f..500238213fd9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1232,7 +1232,7 @@ struct super_block {

const struct dentry_operations *s_d_op; /* default d_op for dentries */

- struct shrinker s_shrink; /* per-sb shrinker handle */
+ struct shrinker *s_shrink; /* per-sb shrinker handle */

/* Number of inodes with nlink == 0 but still referenced */
atomic_long_t s_remove_count;
--
2.30.2