[PATCH v3] vfs: avoid spurious dentry ref/unref cycle on open
From: Mateusz Guzik
Date: Tue Mar 04 2025 - 12:26:53 EST
Opening a file grabs a reference on the terminal dentry in
__legitimize_path(), then another one in do_dentry_open() and finally
drops the initial reference in terminate_walk().
That's 2 modifications which don't need to be there -- do_dentry_open()
can consume the already held reference instead.
When benchmarking on a 20-core vm using will-it-scale's open3_processes
("Same file open/close"), the results are (ops/s):
before: 3087010
after: 4173977 (+35%)
Signed-off-by: Mateusz Guzik <mjguzik@xxxxxxxxx>
---
Al, while I originally brought this up you objected to my patch, which I
then rewrote to the form below. You wrote your own more invovled
variant, but the effort to get this in seem stalled:
https://lore.kernel.org/linux-fsdevel/20240822003359.GO504335@ZenIV/
(I think there was a bigger git branch somewhere, but now I can't find
it.)
The lockref thing is increasingly getting in the way of some other stuff
and is just overhead which is not need to be there.
If you don't have time to finish your patches, how about the variant
below? This is rebased v2 I sent a while back and which got no feedback.
I am indifferent as to what lands, as long as the extra ref cycle is
eliminated.
fs/internal.h | 3 ++-
fs/namei.c | 15 ++++++++++++---
fs/open.c | 44 +++++++++++++++++++++++++++++++++++++++++---
3 files changed, 55 insertions(+), 7 deletions(-)
diff --git a/fs/internal.h b/fs/internal.h
index 3d05a989e4fa..1d0eb25a7598 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -193,7 +193,8 @@ int chmod_common(const struct path *path, umode_t mode);
int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group,
int flag);
int chown_common(const struct path *path, uid_t user, gid_t group);
-extern int vfs_open(const struct path *, struct file *);
+int vfs_open_consume(struct path *, struct file *);
+int vfs_open(const struct path *, struct file *);
/*
* inode.c
diff --git a/fs/namei.c b/fs/namei.c
index d00443e38d3a..8ce8e6038346 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3796,6 +3796,7 @@ static const char *open_last_lookups(struct nameidata *nd,
static int do_open(struct nameidata *nd,
struct file *file, const struct open_flags *op)
{
+ struct vfsmount *mnt;
struct mnt_idmap *idmap;
int open_flag = op->open_flag;
bool do_truncate;
@@ -3833,11 +3834,17 @@ static int do_open(struct nameidata *nd,
error = mnt_want_write(nd->path.mnt);
if (error)
return error;
+ /*
+ * We grab an additional reference here because after the call to
+ * vfs_open_consume() we no longer own the reference in nd->path.mnt
+ * while we need to undo write access below.
+ */
+ mnt = mntget(nd->path.mnt);
do_truncate = true;
}
error = may_open(idmap, &nd->path, acc_mode, open_flag);
if (!error && !(file->f_mode & FMODE_OPENED))
- error = vfs_open(&nd->path, file);
+ error = vfs_open_consume(&nd->path, file);
if (!error)
error = security_file_post_open(file, op->acc_mode);
if (!error && do_truncate)
@@ -3846,8 +3853,10 @@ static int do_open(struct nameidata *nd,
WARN_ON(1);
error = -EINVAL;
}
- if (do_truncate)
- mnt_drop_write(nd->path.mnt);
+ if (do_truncate) {
+ mnt_drop_write(mnt);
+ mntput(mnt);
+ }
return error;
}
diff --git a/fs/open.c b/fs/open.c
index f2fcfaeb2232..fc1c6118eb30 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -891,6 +891,15 @@ static inline int file_get_write_access(struct file *f)
return error;
}
+/*
+ * Populate struct file
+ *
+ * NOTE: it assumes f_path is populated and consumes the caller's reference.
+ *
+ * The caller must not path_put on it regardless of the error code -- the
+ * routine will either clean it up on its own or rely on fput, which must
+ * be issued anyway.
+ */
static int do_dentry_open(struct file *f,
int (*open)(struct inode *, struct file *))
{
@@ -898,7 +907,6 @@ static int do_dentry_open(struct file *f,
struct inode *inode = f->f_path.dentry->d_inode;
int error;
- path_get(&f->f_path);
f->f_inode = inode;
f->f_mapping = inode->i_mapping;
f->f_wb_err = filemap_sample_wb_err(f->f_mapping);
@@ -1042,6 +1050,7 @@ int finish_open(struct file *file, struct dentry *dentry,
BUG_ON(file->f_mode & FMODE_OPENED); /* once it's opened, it's opened */
file->f_path.dentry = dentry;
+ path_get(&file->f_path);
return do_dentry_open(file, open);
}
EXPORT_SYMBOL(finish_open);
@@ -1074,15 +1083,22 @@ char *file_path(struct file *filp, char *buf, int buflen)
EXPORT_SYMBOL(file_path);
/**
- * vfs_open - open the file at the given path
+ * vfs_open_consume - open the file at the given path and consume the reference
* @path: path to open
* @file: newly allocated file with f_flag initialized
*/
-int vfs_open(const struct path *path, struct file *file)
+int vfs_open_consume(struct path *path, struct file *file)
{
int ret;
file->f_path = *path;
+ path->mnt = NULL;
+ path->dentry = NULL;
+
+ /*
+ * do_dentry_open() consumes the reference regardless of its
+ * return value
+ */
ret = do_dentry_open(file, NULL);
if (!ret) {
/*
@@ -1095,6 +1111,27 @@ int vfs_open(const struct path *path, struct file *file)
return ret;
}
+/**
+ * vfs_open - open the file at the given path
+ * @path: path to open
+ * @file: newly allocated file with f_flag initialized
+ *
+ * See commentary in vfs_open_consume. The difference here is that this routine
+ * grabs its own reference and does not clean up the passed path.
+ */
+int vfs_open(const struct path *path, struct file *file)
+{
+ int ret;
+
+ file->f_path = *path;
+ path_get(&file->f_path);
+ ret = do_dentry_open(file, NULL);
+ if (!ret) {
+ fsnotify_open(file);
+ }
+ return ret;
+}
+
struct file *dentry_open(const struct path *path, int flags,
const struct cred *cred)
{
@@ -1197,6 +1234,7 @@ struct file *kernel_file_open(const struct path *path, int flags,
return f;
f->f_path = *path;
+ path_get(&f->f_path);
error = do_dentry_open(f, NULL);
if (error) {
fput(f);
--
2.43.0