[PATCH 10/11] VFS: take a shared lock for create/remove directory operations.

From: NeilBrown
Date: Thu Dec 19 2024 - 22:12:16 EST


With this patch the VFS takes a shared lock on the directory (i_rwsem)
when performing create or remove operations. Rename is as yet
unchanged.

Not all callers are changed, only the common ones in fs/namei.c

While the directory only has a shared lock, the dentry being updated has
an exclusive lock using a bit in ->d_flags. Waiters use
wait_var_event_spinlock(), and a wakeup is only sent in the unusual case
that some other task is actually waiting - indicated by another d_flags
bit.

Once the exclusive "update" lock is obtained on the dentry we must make
sure it wasn't unlinked or renamed while we slept. If it was we repeat
the lookup.

The filesystem operations that expect an exclusive lock are still
provided with exclusion, but this is handled by inode_dir_lock().

Signed-off-by: NeilBrown <neilb@xxxxxxx>
---
fs/dcache.c | 9 ++++++-
fs/namei.c | 53 ++++++++++++++++++++++++++++++++++++++----
include/linux/dcache.h | 4 ++++
3 files changed, 60 insertions(+), 6 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index ebe849474bd8..3fb3af83add5 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1636,9 +1636,10 @@ EXPORT_SYMBOL(d_invalidate);
* available. On a success the dentry is returned. The name passed in is
* copied and the copy passed in may be reused after this call.
*/
-
+
static struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
{
+ static struct lock_class_key __key;
struct dentry *dentry;
char *dname;
int err;
@@ -1697,6 +1698,8 @@ static struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
INIT_HLIST_NODE(&dentry->d_sib);
d_set_d_op(dentry, dentry->d_sb->s_d_op);

+ lockdep_init_map(&dentry->d_update_map, "DCACHE_PAR_UPDATE", &__key, 0);
+
if (dentry->d_op && dentry->d_op->d_init) {
err = dentry->d_op->d_init(dentry);
if (err) {
@@ -3030,6 +3033,10 @@ static int __d_unalias(struct dentry *dentry, struct dentry *alias)
* In that case, we know that the inode will be a regular file, and also this
* will only occur during atomic_open. So we need to check for the dentry
* being already hashed only in the final case.
+ *
+ * @dentry must have a valid ->d_parent and that directory must be
+ * locked (i_rwsem) either exclusively or shared. If shared then
+ * @dentry must have %DCACHE_PAR_LOOKUP or %DCACHE_PAR_UPDATE set.
*/
struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
{
diff --git a/fs/namei.c b/fs/namei.c
index 68750b15dbf4..fb40ae64dc8d 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1703,6 +1703,15 @@ struct dentry *lookup_one_qstr_excl(const struct qstr *name,
}
EXPORT_SYMBOL(lookup_one_qstr_excl);

+static bool check_dentry_locked(struct dentry *de)
+{
+ if (de->d_flags & DCACHE_PAR_UPDATE) {
+ de->d_flags |= DCACHE_PAR_WAITER;
+ return true;
+ }
+ return false;
+}
+
static struct dentry *lookup_and_lock(const struct qstr *last,
struct dentry *base,
unsigned int lookup_flags)
@@ -1710,10 +1719,36 @@ static struct dentry *lookup_and_lock(const struct qstr *last,
struct dentry *dentry;
int err;

- inode_lock_nested(base->d_inode, I_MUTEX_PARENT);
+ inode_lock_shared_nested(base->d_inode, I_MUTEX_PARENT);
+retry:
dentry = lookup_one_qstr_excl(last, base, lookup_flags);
if (IS_ERR(dentry))
goto out;
+ lock_acquire_exclusive(&dentry->d_update_map, 0, 0, NULL, _THIS_IP_);
+ spin_lock(&dentry->d_lock);
+ wait_var_event_spinlock(&dentry->d_flags,
+ !check_dentry_locked(dentry),
+ &dentry->d_lock);
+ if (d_is_positive(dentry)) {
+ rcu_read_lock(); /* needed for d_same_name() */
+ if (
+ /* Was unlinked while we waited ?*/
+ d_unhashed(dentry) ||
+ /* Or was dentry renamed ?? */
+ dentry->d_parent != base ||
+ dentry->d_name.hash != last->hash ||
+ !d_same_name(dentry, base, last)
+ ) {
+ rcu_read_unlock();
+ spin_unlock(&dentry->d_lock);
+ lock_map_release(&dentry->d_update_map);
+ dput(dentry);
+ goto retry;
+ }
+ rcu_read_unlock();
+ }
+ dentry->d_flags |= DCACHE_PAR_UPDATE;
+ spin_unlock(&dentry->d_lock);
err = -EEXIST;
if ((lookup_flags & LOOKUP_EXCL) && d_is_positive(dentry))
goto err;
@@ -1723,10 +1758,11 @@ static struct dentry *lookup_and_lock(const struct qstr *last,
return dentry;

err:
- dput(dentry);
- dentry = ERR_PTR(err);
+ done_lookup_and_lock(base, dentry);
+ return ERR_PTR(err);
+
out:
- inode_unlock(base->d_inode);
+ inode_unlock_shared(base->d_inode);
return dentry;
}

@@ -2795,8 +2831,15 @@ EXPORT_SYMBOL(user_path_locked_at);

void done_lookup_and_lock(struct dentry *parent, struct dentry *child)
{
+ lock_map_release(&child->d_update_map);
+ spin_lock(&child->d_lock);
+ if (child->d_flags & DCACHE_PAR_WAITER)
+ wake_up_var_locked(&child->d_flags, &child->d_lock);
+ child->d_flags &= ~(DCACHE_PAR_UPDATE | DCACHE_PAR_WAITER);
+ spin_unlock(&child->d_lock);
+
+ inode_unlock_shared(parent->d_inode);
dput(child);
- inode_unlock(d_inode(parent));
}
EXPORT_SYMBOL(done_lookup_and_lock);

diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index fc7f571bd5bb..6d404c296ac0 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -102,6 +102,8 @@ struct dentry {
* possible!
*/

+ /* lockdep tracking of DCACHE_PAR_UPDATE locks */
+ struct lockdep_map d_update_map;
union {
struct list_head d_lru; /* LRU list */
wait_queue_head_t *d_wait; /* in-lookup ones only */
@@ -220,6 +222,8 @@ struct dentry_operations {
#define DCACHE_DENTRY_CURSOR BIT(25)
#define DCACHE_NORCU BIT(26) /* No RCU delay for freeing */

+#define DCACHE_PAR_UPDATE BIT(27) /* Locked for update */
+#define DCACHE_PAR_WAITER BIT(28) /* someone is waiting for PAR_UPDATE */
extern seqlock_t rename_lock;

/*
--
2.47.0