[PATCH v2] fs/9p: fix inode nlink accounting

From: Eric Van Hensbergen
Date: Mon Jan 08 2024 - 10:16:26 EST


I was running some regressions and noticed a (race-y) kernel warning
that happens when nlink becomes less than zero. Looking through the
code it looks like we aren't good about protecting the inode lock when
manipulating nlink and some code that was added several years ago to
protect against bugs in underlying file systems nlink handling didn't
look quite right either. I took a look at what NFS was doing and tried
to follow similar approaches in the 9p code.

Signed-off-by: Eric Van Hensbergen <ericvh@xxxxxxxxxx>
---
Changes in v2:
- add wrappers
- fix checkpatch errors
- Link to v1: https://lore.kernel.org/r/20240107-fix-nlink-handling-v1-1-8b1f65ebc9b2@xxxxxxxxxx
---
fs/9p/v9fs_vfs.h | 1 +
fs/9p/vfs_inode.c | 49 +++++++++++++++++++++++++++++++++++--------------
fs/9p/vfs_inode_dotl.c | 2 +-
3 files changed, 37 insertions(+), 15 deletions(-)

diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 731e3d14b67d..6019909445d4 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -37,6 +37,7 @@ extern const struct file_operations v9fs_dir_operations_dotl;
extern const struct dentry_operations v9fs_dentry_operations;
extern const struct dentry_operations v9fs_cached_dentry_operations;
extern struct kmem_cache *v9fs_inode_cache;
+extern void v9fs_inc_nlink(struct inode *inode);

struct inode *v9fs_alloc_inode(struct super_block *sb);
void v9fs_free_inode(struct inode *inode);
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index b845ee18a80b..fbd87f43aea2 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -506,21 +506,43 @@ static int v9fs_at_to_dotl_flags(int flags)
}

/**
- * v9fs_dec_count - helper functon to drop i_nlink.
+ * v9fs_dec_nlink - helper functon to drop i_nlink.
*
- * If a directory had nlink <= 2 (including . and ..), then we should not drop
- * the link count, which indicates the underlying exported fs doesn't maintain
- * nlink accurately. e.g.
+ * Put a guards around this so we are only dropping nlink
+ * if it would be valid. This prevents bugs from an underlying
+ * filesystem implementations from triggering kernel WARNs. We'll
+ * still print 9p debug messages if the underlying filesystem is wrong.
+ *
+ * known underlying filesystems which might exhibit this issue:
* - overlayfs sets nlink to 1 for merged dir
* - ext4 (with dir_nlink feature enabled) sets nlink to 1 if a dir has more
* than EXT4_LINK_MAX (65000) links.
*
* @inode: inode whose nlink is being dropped
*/
-static void v9fs_dec_count(struct inode *inode)
+static void v9fs_dec_nlink(struct inode *inode)
{
- if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2)
+ spin_lock(&inode->i_lock);
+ if (inode->i_nlink > 0)
drop_nlink(inode);
+ else
+ p9_debug(P9_DEBUG_ERROR, "WARNING: nlink is already 0 inode %p\n",
+ inode);
+ spin_unlock(&inode->i_lock);
+}
+
+static void v9fs_clear_nlink(struct inode *inode)
+{
+ spin_lock(&inode->i_lock);
+ clear_nlink(inode);
+ spin_unlock(&inode->i_lock);
+}
+
+void v9fs_inc_nlink(struct inode *inode)
+{
+ spin_lock(&inode->i_lock);
+ inc_nlink(inode);
+ spin_unlock(&inode->i_lock);
}

/**
@@ -566,10 +588,9 @@ static int v9fs_remove(struct inode *dir, struct dentry *dentry, int flags)
* link count
*/
if (flags & AT_REMOVEDIR) {
- clear_nlink(inode);
- v9fs_dec_count(dir);
+ v9fs_clear_nlink(inode);
} else
- v9fs_dec_count(inode);
+ v9fs_dec_nlink(inode);

v9fs_invalidate_inode_attr(inode);
v9fs_invalidate_inode_attr(dir);
@@ -713,7 +734,7 @@ static int v9fs_vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
err = PTR_ERR(fid);
fid = NULL;
} else {
- inc_nlink(dir);
+ v9fs_inc_nlink(dir);
v9fs_invalidate_inode_attr(dir);
}

@@ -963,14 +984,14 @@ v9fs_vfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
if (!retval) {
if (new_inode) {
if (S_ISDIR(new_inode->i_mode))
- clear_nlink(new_inode);
+ v9fs_clear_nlink(new_inode);
else
- v9fs_dec_count(new_inode);
+ v9fs_dec_nlink(new_inode);
}
if (S_ISDIR(old_inode->i_mode)) {
if (!new_inode)
- inc_nlink(new_dir);
- v9fs_dec_count(old_dir);
+ v9fs_inc_nlink(new_dir);
+ v9fs_dec_nlink(old_dir);
}
v9fs_invalidate_inode_attr(old_inode);
v9fs_invalidate_inode_attr(old_dir);
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index c7319af2f471..fe0c41d042a1 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -427,7 +427,7 @@ static int v9fs_vfs_mkdir_dotl(struct mnt_idmap *idmap,
v9fs_set_create_acl(inode, fid, dacl, pacl);
d_instantiate(dentry, inode);
}
- inc_nlink(dir);
+ v9fs_inc_nlink(dir);
v9fs_invalidate_inode_attr(dir);
error:
p9_fid_put(fid);

---
base-commit: 0dd3ee31125508cd67f7e7172247f05b7fd1753a
change-id: 20240107-fix-nlink-handling-3c0646f5d927

Best regards,
--
Eric Van Hensbergen <ericvh@xxxxxxxxxx>