[PATCH v4 17/17] vfs: have faccessat retry once on an ESTALE error

From: Jeff Layton
Date: Thu Jul 26 2012 - 07:56:05 EST


Signed-off-by: Jeff Layton <jlayton@xxxxxxxxxx>
---
fs/open.c | 70 ++++++++++++++++++++++++++++++++++++---------------------------
1 file changed, 40 insertions(+), 30 deletions(-)

diff --git a/fs/open.c b/fs/open.c
index a7b94ac..8dd37aa 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -313,6 +313,9 @@ SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode)
struct path path;
struct inode *inode;
int res;
+ unsigned int lookup_flags = LOOKUP_FOLLOW;
+ unsigned int try = 0;
+ char *name;

if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */
return -EINVAL;
@@ -334,44 +337,51 @@ SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode)
override_cred->cap_permitted;
}

+ name = getname_flags(filename, lookup_flags, NULL);
+ if (IS_ERR(name))
+ return PTR_ERR(name);
+
old_cred = override_creds(override_cred);

- res = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path);
- if (res)
- goto out;
+ do {
+ res = kern_path_at(dfd, name, lookup_flags, &path);
+ if (res)
+ break;

- inode = path.dentry->d_inode;
+ inode = path.dentry->d_inode;

- if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) {
+ if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) {
+ /*
+ * MAY_EXEC on regular files is denied if the fs is
+ * mounted with the "noexec" flag.
+ */
+ res = -EACCES;
+ if (path.mnt->mnt_flags & MNT_NOEXEC)
+ goto out_path_release;
+ }
+
+ res = inode_permission(inode, mode | MAY_ACCESS);
+ /* SuS v2 requires we report a read only fs too */
+ if (res || !(mode & S_IWOTH) || special_file(inode->i_mode))
+ goto out_path_release;
/*
- * MAY_EXEC on regular files is denied if the fs is mounted
- * with the "noexec" flag.
+ * This is a rare case where using __mnt_is_readonly()
+ * is OK without a mnt_want/drop_write() pair. Since
+ * no actual write to the fs is performed here, we do
+ * not need to telegraph to that to anyone.
+ *
+ * By doing this, we accept that this access is
+ * inherently racy and know that the fs may change
+ * state before we even see this result.
*/
- res = -EACCES;
- if (path.mnt->mnt_flags & MNT_NOEXEC)
- goto out_path_release;
- }
-
- res = inode_permission(inode, mode | MAY_ACCESS);
- /* SuS v2 requires we report a read only fs too */
- if (res || !(mode & S_IWOTH) || special_file(inode->i_mode))
- goto out_path_release;
- /*
- * This is a rare case where using __mnt_is_readonly()
- * is OK without a mnt_want/drop_write() pair. Since
- * no actual write to the fs is performed here, we do
- * not need to telegraph to that to anyone.
- *
- * By doing this, we accept that this access is
- * inherently racy and know that the fs may change
- * state before we even see this result.
- */
- if (__mnt_is_readonly(path.mnt))
- res = -EROFS;
+ if (__mnt_is_readonly(path.mnt))
+ res = -EROFS;

out_path_release:
- path_put(&path);
-out:
+ path_put(&path);
+ lookup_flags |= LOOKUP_REVAL;
+ } while (retry_estale(res, try++));
+ putname(name);
revert_creds(old_cred);
put_cred(override_cred);
return res;
--
1.7.11.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/