[RESEND PATCH 3/5] kernfs: Introduce interface to access per-fs rwsem.

From: Imran Khan
Date: Wed Aug 10 2022 - 07:11:36 EST


per-fs rwsem is used across kernfs for synchronization purposes.
Having an interface to access it not only avoids code duplication, it
can also help in changing the underlying locking mechanism without needing
to change the lock users. For example next patch modifies this interface
to make use of hashed rwsems in place of per-fs rwsem.

Signed-off-by: Imran Khan <imran.f.khan@xxxxxxxxxx>
---
fs/kernfs/dir.c | 119 ++++++++++++++++++------------------
fs/kernfs/file.c | 5 +-
fs/kernfs/inode.c | 26 ++++----
fs/kernfs/kernfs-internal.h | 78 +++++++++++++++++++++++
fs/kernfs/mount.c | 6 +-
fs/kernfs/symlink.c | 6 +-
6 files changed, 159 insertions(+), 81 deletions(-)

diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index d2a0b4acd073..73f4ebc1464e 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -33,7 +33,7 @@ static DEFINE_SPINLOCK(kernfs_idr_lock); /* root->ino_idr */

static bool kernfs_active(struct kernfs_node *kn)
{
- lockdep_assert_held(&kernfs_root(kn)->kernfs_rwsem);
+ kernfs_rwsem_assert_held(kn);
return atomic_read(&kn->active) >= 0;
}

@@ -467,12 +467,20 @@ static void kernfs_drain(struct kernfs_node *kn)
__releases(&kernfs_root(kn)->kernfs_rwsem)
__acquires(&kernfs_root(kn)->kernfs_rwsem)
{
+ struct rw_semaphore *rwsem;
struct kernfs_root *root = kernfs_root(kn);

- lockdep_assert_held_write(&root->kernfs_rwsem);
+ /**
+ * kn has the same root as its ancestor, so it can be used to get
+ * per-fs rwsem.
+ */
+ rwsem = kernfs_rwsem_ptr(kn);
+
+ kernfs_rwsem_assert_held_write(kn);
+
WARN_ON_ONCE(kernfs_active(kn));

- up_write(&root->kernfs_rwsem);
+ kernfs_up_write(rwsem);

if (kernfs_lockdep(kn)) {
rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_);
@@ -491,7 +499,7 @@ static void kernfs_drain(struct kernfs_node *kn)

kernfs_drain_open_files(kn);

- down_write(&root->kernfs_rwsem);
+ kernfs_down_write(kn);
}

/**
@@ -726,12 +734,12 @@ struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root,
int kernfs_add_one(struct kernfs_node *kn)
{
struct kernfs_node *parent = kn->parent;
- struct kernfs_root *root = kernfs_root(parent);
struct kernfs_iattrs *ps_iattr;
+ struct rw_semaphore *rwsem;
bool has_ns;
int ret;

- down_write(&root->kernfs_rwsem);
+ rwsem = kernfs_down_write(parent);

ret = -EINVAL;
has_ns = kernfs_ns_enabled(parent);
@@ -762,7 +770,7 @@ int kernfs_add_one(struct kernfs_node *kn)
ps_iattr->ia_mtime = ps_iattr->ia_ctime;
}

- up_write(&root->kernfs_rwsem);
+ kernfs_up_write(rwsem);

/*
* Activate the new node unless CREATE_DEACTIVATED is requested.
@@ -776,7 +784,7 @@ int kernfs_add_one(struct kernfs_node *kn)
return 0;

out_unlock:
- up_write(&root->kernfs_rwsem);
+ kernfs_up_write(rwsem);
return ret;
}

@@ -797,7 +805,7 @@ static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent,
bool has_ns = kernfs_ns_enabled(parent);
unsigned int hash;

- lockdep_assert_held(&kernfs_root(parent)->kernfs_rwsem);
+ kernfs_rwsem_assert_held(parent);

if (has_ns != (bool)ns) {
WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
@@ -829,7 +837,7 @@ static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent,
size_t len;
char *p, *name;

- lockdep_assert_held_read(&kernfs_root(parent)->kernfs_rwsem);
+ kernfs_rwsem_assert_held_read(parent);

spin_lock_irq(&kernfs_pr_cont_lock);

@@ -867,12 +875,12 @@ struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent,
const char *name, const void *ns)
{
struct kernfs_node *kn;
- struct kernfs_root *root = kernfs_root(parent);
+ struct rw_semaphore *rwsem;

- down_read(&root->kernfs_rwsem);
+ rwsem = kernfs_down_read(parent);
kn = kernfs_find_ns(parent, name, ns);
kernfs_get(kn);
- up_read(&root->kernfs_rwsem);
+ kernfs_up_read(rwsem);

return kn;
}
@@ -892,12 +900,12 @@ struct kernfs_node *kernfs_walk_and_get_ns(struct kernfs_node *parent,
const char *path, const void *ns)
{
struct kernfs_node *kn;
- struct kernfs_root *root = kernfs_root(parent);
+ struct rw_semaphore *rwsem;

- down_read(&root->kernfs_rwsem);
+ rwsem = kernfs_down_read(parent);
kn = kernfs_walk_ns(parent, path, ns);
kernfs_get(kn);
- up_read(&root->kernfs_rwsem);
+ kernfs_up_read(rwsem);

return kn;
}
@@ -1062,7 +1070,7 @@ struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent,
static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
{
struct kernfs_node *kn;
- struct kernfs_root *root;
+ struct rw_semaphore *rwsem;

if (flags & LOOKUP_RCU)
return -ECHILD;
@@ -1078,13 +1086,12 @@ static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
parent = kernfs_dentry_node(dentry->d_parent);
if (parent) {
spin_unlock(&dentry->d_lock);
- root = kernfs_root(parent);
- down_read(&root->kernfs_rwsem);
+ rwsem = kernfs_down_read(parent);
if (kernfs_dir_changed(parent, dentry)) {
- up_read(&root->kernfs_rwsem);
+ kernfs_up_read(rwsem);
return 0;
}
- up_read(&root->kernfs_rwsem);
+ kernfs_up_read(rwsem);
} else
spin_unlock(&dentry->d_lock);

@@ -1095,8 +1102,7 @@ static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
}

kn = kernfs_dentry_node(dentry);
- root = kernfs_root(kn);
- down_read(&root->kernfs_rwsem);
+ rwsem = kernfs_down_read(kn);

/* The kernfs node has been deactivated */
if (!kernfs_active(kn))
@@ -1115,10 +1121,10 @@ static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
kernfs_info(dentry->d_sb)->ns != kn->ns)
goto out_bad;

- up_read(&root->kernfs_rwsem);
+ kernfs_up_read(rwsem);
return 1;
out_bad:
- up_read(&root->kernfs_rwsem);
+ kernfs_up_read(rwsem);
return 0;
}

@@ -1132,12 +1138,11 @@ static struct dentry *kernfs_iop_lookup(struct inode *dir,
{
struct kernfs_node *parent = dir->i_private;
struct kernfs_node *kn;
- struct kernfs_root *root;
struct inode *inode = NULL;
const void *ns = NULL;
+ struct rw_semaphore *rwsem;

- root = kernfs_root(parent);
- down_read(&root->kernfs_rwsem);
+ rwsem = kernfs_down_read(parent);
if (kernfs_ns_enabled(parent))
ns = kernfs_info(dir->i_sb)->ns;

@@ -1148,7 +1153,7 @@ static struct dentry *kernfs_iop_lookup(struct inode *dir,
* create a negative.
*/
if (!kernfs_active(kn)) {
- up_read(&root->kernfs_rwsem);
+ kernfs_up_read(rwsem);
return NULL;
}
inode = kernfs_get_inode(dir->i_sb, kn);
@@ -1163,7 +1168,7 @@ static struct dentry *kernfs_iop_lookup(struct inode *dir,
*/
if (!IS_ERR(inode))
kernfs_set_rev(parent, dentry);
- up_read(&root->kernfs_rwsem);
+ kernfs_up_read(rwsem);

/* instantiate and hash (possibly negative) dentry */
return d_splice_alias(inode, dentry);
@@ -1286,7 +1291,7 @@ static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos,
{
struct rb_node *rbn;

- lockdep_assert_held_write(&kernfs_root(root)->kernfs_rwsem);
+ kernfs_rwsem_assert_held_write(root);

/* if first iteration, visit leftmost descendant which may be root */
if (!pos)
@@ -1321,9 +1326,9 @@ static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos,
void kernfs_activate(struct kernfs_node *kn)
{
struct kernfs_node *pos;
- struct kernfs_root *root = kernfs_root(kn);
+ struct rw_semaphore *rwsem;

- down_write(&root->kernfs_rwsem);
+ rwsem = kernfs_down_write(kn);

pos = NULL;
while ((pos = kernfs_next_descendant_post(pos, kn))) {
@@ -1337,7 +1342,7 @@ void kernfs_activate(struct kernfs_node *kn)
pos->flags |= KERNFS_ACTIVATED;
}

- up_write(&root->kernfs_rwsem);
+ kernfs_up_write(rwsem);
}

static void __kernfs_remove(struct kernfs_node *kn)
@@ -1348,7 +1353,7 @@ static void __kernfs_remove(struct kernfs_node *kn)
if (!kn)
return;

- lockdep_assert_held_write(&kernfs_root(kn)->kernfs_rwsem);
+ kernfs_rwsem_assert_held_write(kn);

/*
* This is for kernfs_remove_self() which plays with active ref
@@ -1417,16 +1422,14 @@ static void __kernfs_remove(struct kernfs_node *kn)
*/
void kernfs_remove(struct kernfs_node *kn)
{
- struct kernfs_root *root;
+ struct rw_semaphore *rwsem;

if (!kn)
return;

- root = kernfs_root(kn);
-
- down_write(&root->kernfs_rwsem);
+ rwsem = kernfs_down_write(kn);
__kernfs_remove(kn);
- up_write(&root->kernfs_rwsem);
+ kernfs_up_write(rwsem);
}

/**
@@ -1512,9 +1515,9 @@ void kernfs_unbreak_active_protection(struct kernfs_node *kn)
bool kernfs_remove_self(struct kernfs_node *kn)
{
bool ret;
- struct kernfs_root *root = kernfs_root(kn);
+ struct rw_semaphore *rwsem;

- down_write(&root->kernfs_rwsem);
+ rwsem = kernfs_down_write(kn);
kernfs_break_active_protection(kn);

/*
@@ -1542,9 +1545,9 @@ bool kernfs_remove_self(struct kernfs_node *kn)
atomic_read(&kn->active) == KN_DEACTIVATED_BIAS)
break;

- up_write(&root->kernfs_rwsem);
+ kernfs_up_write(rwsem);
schedule();
- down_write(&root->kernfs_rwsem);
+ rwsem = kernfs_down_write(kn);
}
finish_wait(waitq, &wait);
WARN_ON_ONCE(!RB_EMPTY_NODE(&kn->rb));
@@ -1557,7 +1560,7 @@ bool kernfs_remove_self(struct kernfs_node *kn)
*/
kernfs_unbreak_active_protection(kn);

- up_write(&root->kernfs_rwsem);
+ kernfs_up_write(rwsem);
return ret;
}

@@ -1574,7 +1577,7 @@ int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name,
const void *ns)
{
struct kernfs_node *kn;
- struct kernfs_root *root;
+ struct rw_semaphore *rwsem;

if (!parent) {
WARN(1, KERN_WARNING "kernfs: can not remove '%s', no directory\n",
@@ -1582,14 +1585,14 @@ int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name,
return -ENOENT;
}

- root = kernfs_root(parent);
- down_write(&root->kernfs_rwsem);
+
+ rwsem = kernfs_down_write(parent);

kn = kernfs_find_ns(parent, name, ns);
if (kn)
__kernfs_remove(kn);

- up_write(&root->kernfs_rwsem);
+ kernfs_up_write(rwsem);

if (kn)
return 0;
@@ -1608,16 +1611,15 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
const char *new_name, const void *new_ns)
{
struct kernfs_node *old_parent;
- struct kernfs_root *root;
const char *old_name = NULL;
+ struct rw_semaphore *rwsem;
int error;

/* can't move or rename root */
if (!kn->parent)
return -EINVAL;

- root = kernfs_root(kn);
- down_write(&root->kernfs_rwsem);
+ rwsem = kernfs_down_write(kn);

error = -ENOENT;
if (!kernfs_active(kn) || !kernfs_active(new_parent) ||
@@ -1671,7 +1673,7 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,

error = 0;
out:
- up_write(&root->kernfs_rwsem);
+ kernfs_up_write(rwsem);
return error;
}

@@ -1742,14 +1744,13 @@ static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx)
struct dentry *dentry = file->f_path.dentry;
struct kernfs_node *parent = kernfs_dentry_node(dentry);
struct kernfs_node *pos = file->private_data;
- struct kernfs_root *root;
const void *ns = NULL;
+ struct rw_semaphore *rwsem;

if (!dir_emit_dots(file, ctx))
return 0;

- root = kernfs_root(parent);
- down_read(&root->kernfs_rwsem);
+ rwsem = kernfs_down_read(parent);

if (kernfs_ns_enabled(parent))
ns = kernfs_info(dentry->d_sb)->ns;
@@ -1766,12 +1767,12 @@ static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx)
file->private_data = pos;
kernfs_get(pos);

- up_read(&root->kernfs_rwsem);
+ kernfs_up_read(rwsem);
if (!dir_emit(ctx, name, len, ino, type))
return 0;
- down_read(&root->kernfs_rwsem);
+ rwsem = kernfs_down_read(parent);
}
- up_read(&root->kernfs_rwsem);
+ kernfs_up_read(rwsem);
file->private_data = NULL;
ctx->pos = INT_MAX;
return 0;
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 812165d8ab4f..669619e01be2 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -911,6 +911,7 @@ static void kernfs_notify_workfn(struct work_struct *work)
struct kernfs_node *kn;
struct kernfs_super_info *info;
struct kernfs_root *root;
+ struct rw_semaphore *rwsem;
repeat:
/* pop one off the notify_list */
spin_lock_irq(&kernfs_notify_lock);
@@ -925,7 +926,7 @@ static void kernfs_notify_workfn(struct work_struct *work)

root = kernfs_root(kn);
/* kick fsnotify */
- down_write(&root->kernfs_rwsem);
+ rwsem = kernfs_down_write(kn);

down_write(&root->supers_rwsem);
list_for_each_entry(info, &kernfs_root(kn)->supers, node) {
@@ -965,7 +966,7 @@ static void kernfs_notify_workfn(struct work_struct *work)
}
up_write(&root->supers_rwsem);

- up_write(&root->kernfs_rwsem);
+ kernfs_up_write(rwsem);
kernfs_put(kn);
goto repeat;
}
diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c
index 3d783d80f5da..efe5ae98abf4 100644
--- a/fs/kernfs/inode.c
+++ b/fs/kernfs/inode.c
@@ -99,11 +99,11 @@ int __kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr)
int kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr)
{
int ret;
- struct kernfs_root *root = kernfs_root(kn);
+ struct rw_semaphore *rwsem;

- down_write(&root->kernfs_rwsem);
+ rwsem = kernfs_down_write(kn);
ret = __kernfs_setattr(kn, iattr);
- up_write(&root->kernfs_rwsem);
+ kernfs_up_write(rwsem);
return ret;
}

@@ -112,14 +112,13 @@ int kernfs_iop_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
{
struct inode *inode = d_inode(dentry);
struct kernfs_node *kn = inode->i_private;
- struct kernfs_root *root;
+ struct rw_semaphore *rwsem;
int error;

if (!kn)
return -EINVAL;

- root = kernfs_root(kn);
- down_write(&root->kernfs_rwsem);
+ rwsem = kernfs_down_write(kn);
error = setattr_prepare(&init_user_ns, dentry, iattr);
if (error)
goto out;
@@ -132,7 +131,7 @@ int kernfs_iop_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
setattr_copy(&init_user_ns, inode, iattr);

out:
- up_write(&root->kernfs_rwsem);
+ kernfs_up_write(rwsem);
return error;
}

@@ -187,14 +186,14 @@ int kernfs_iop_getattr(struct user_namespace *mnt_userns,
{
struct inode *inode = d_inode(path->dentry);
struct kernfs_node *kn = inode->i_private;
- struct kernfs_root *root = kernfs_root(kn);
+ struct rw_semaphore *rwsem;

- down_read(&root->kernfs_rwsem);
+ rwsem = kernfs_down_read(kn);
spin_lock(&inode->i_lock);
kernfs_refresh_inode(kn, inode);
generic_fillattr(&init_user_ns, inode, stat);
spin_unlock(&inode->i_lock);
- up_read(&root->kernfs_rwsem);
+ kernfs_up_read(rwsem);

return 0;
}
@@ -277,22 +276,21 @@ void kernfs_evict_inode(struct inode *inode)
int kernfs_iop_permission(struct user_namespace *mnt_userns,
struct inode *inode, int mask)
{
+ struct rw_semaphore *rwsem;
struct kernfs_node *kn;
- struct kernfs_root *root;
int ret;

if (mask & MAY_NOT_BLOCK)
return -ECHILD;

kn = inode->i_private;
- root = kernfs_root(kn);

- down_read(&root->kernfs_rwsem);
+ rwsem = kernfs_down_read(kn);
spin_lock(&inode->i_lock);
kernfs_refresh_inode(kn, inode);
ret = generic_permission(&init_user_ns, inode, mask);
spin_unlock(&inode->i_lock);
- up_read(&root->kernfs_rwsem);
+ kernfs_up_read(rwsem);

return ret;
}
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index 3cd17c100d10..0babc3dc4f4a 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -169,4 +169,82 @@ extern const struct inode_operations kernfs_symlink_iops;
* kernfs locks
*/
extern struct kernfs_global_locks *kernfs_locks;
+
+static inline struct rw_semaphore *kernfs_rwsem_ptr(struct kernfs_node *kn)
+{
+ struct kernfs_root *root = kernfs_root(kn);
+
+ return &root->kernfs_rwsem;
+}
+
+static inline void kernfs_rwsem_assert_held(struct kernfs_node *kn)
+{
+ lockdep_assert_held(kernfs_rwsem_ptr(kn));
+}
+
+static inline void kernfs_rwsem_assert_held_write(struct kernfs_node *kn)
+{
+ lockdep_assert_held_write(kernfs_rwsem_ptr(kn));
+}
+
+static inline void kernfs_rwsem_assert_held_read(struct kernfs_node *kn)
+{
+ lockdep_assert_held_read(kernfs_rwsem_ptr(kn));
+}
+
+/**
+ * kernfs_down_write() - Acquire kernfs rwsem
+ *
+ * @kn: kernfs_node for which rwsem needs to be taken
+ *
+ * Return: pointer to acquired rwsem
+ */
+static inline struct rw_semaphore *kernfs_down_write(struct kernfs_node *kn)
+{
+ struct rw_semaphore *rwsem = kernfs_rwsem_ptr(kn);
+
+ down_write(rwsem);
+
+ return rwsem;
+}
+
+/**
+ * kernfs_up_write - Release kernfs rwsem
+ *
+ * @rwsem: address of rwsem to release
+ *
+ * Return: void
+ */
+static inline void kernfs_up_write(struct rw_semaphore *rwsem)
+{
+ up_write(rwsem);
+}
+
+/**
+ * kernfs_down_read() - Acquire kernfs rwsem
+ *
+ * @kn: kernfs_node for which rwsem needs to be taken
+ *
+ * Return: pointer to acquired rwsem
+ */
+static inline struct rw_semaphore *kernfs_down_read(struct kernfs_node *kn)
+{
+ struct rw_semaphore *rwsem = kernfs_rwsem_ptr(kn);
+
+ down_read(rwsem);
+
+ return rwsem;
+}
+
+/**
+ * kernfs_up_read - Release kernfs rwsem
+ *
+ * @rwsem: address of rwsem to release
+ *
+ * Return: void
+ */
+static inline void kernfs_up_read(struct rw_semaphore *rwsem)
+{
+ up_read(rwsem);
+}
#endif /* __KERNFS_INTERNAL_H */
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index d2be1c304715..3c5334b74f36 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -237,9 +237,9 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn,
static int kernfs_fill_super(struct super_block *sb, struct kernfs_fs_context *kfc)
{
struct kernfs_super_info *info = kernfs_info(sb);
- struct kernfs_root *kf_root = kfc->root;
struct inode *inode;
struct dentry *root;
+ struct rw_semaphore *rwsem;

info->sb = sb;
/* Userspace would break if executables or devices appear on sysfs */
@@ -257,9 +257,9 @@ static int kernfs_fill_super(struct super_block *sb, struct kernfs_fs_context *k
sb->s_shrink.seeks = 0;

/* get root inode, initialize and unlock it */
- down_read(&kf_root->kernfs_rwsem);
+ rwsem = kernfs_down_read(info->root->kn);
inode = kernfs_get_inode(sb, info->root->kn);
- up_read(&kf_root->kernfs_rwsem);
+ kernfs_up_read(rwsem);
if (!inode) {
pr_debug("kernfs: could not get root inode\n");
return -ENOMEM;
diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c
index 0ab13824822f..9d4103602554 100644
--- a/fs/kernfs/symlink.c
+++ b/fs/kernfs/symlink.c
@@ -113,12 +113,12 @@ static int kernfs_getlink(struct inode *inode, char *path)
struct kernfs_node *kn = inode->i_private;
struct kernfs_node *parent = kn->parent;
struct kernfs_node *target = kn->symlink.target_kn;
- struct kernfs_root *root = kernfs_root(parent);
+ struct rw_semaphore *rwsem;
int error;

- down_read(&root->kernfs_rwsem);
+ rwsem = kernfs_down_read(parent);
error = kernfs_get_target_path(parent, target, path);
- up_read(&root->kernfs_rwsem);
+ kernfs_up_read(rwsem);

return error;
}
--
2.30.2