[PATCH RFC v2] fhandle: expose u64 mount id to name_to_handle_at(2)

From: Aleksa Sarai
Date: Thu May 23 2024 - 16:58:16 EST


Now that we provide a unique 64-bit mount ID interface in statx, we can
now provide a race-free way for name_to_handle_at(2) to provide a file
handle and corresponding mount without needing to worry about racing
with /proc/mountinfo parsing.

While this is not necessary if you are using AT_EMPTY_PATH and don't
care about an extra statx(2) call, users that pass full paths into
name_to_handle_at(2) need to know which mount the file handle comes from
(to make sure they don't try to open_by_handle_at a file handle from a
different filesystem) and switching to AT_EMPTY_PATH would require
allocating a file for every name_to_handle_at(2) call, turning

err = name_to_handle_at(-EBADF, "/foo/bar/baz", &handle, &mntid,
AT_HANDLE_MNT_ID_UNIQUE);

into

int fd = openat(-EBADF, "/foo/bar/baz", O_PATH | O_CLOEXEC);
err1 = name_to_handle_at(fd, "", &handle, &unused_mntid, AT_EMPTY_PATH);
err2 = statx(fd, "", AT_EMPTY_PATH, STATX_MNT_ID_UNIQUE, &statxbuf);
mntid = statxbuf.stx_mnt_id;
close(fd);

Unlike AT_HANDLE_FID, as per Amir's suggestion, AT_HANDLE_MNT_ID_UNIQUE
uses a new AT_* bit from the historically-unused 0xFF range (which we
now define as being the "per-syscall" range for AT_* bits).

Signed-off-by: Aleksa Sarai <cyphar@xxxxxxxxxx>
---
Changes in v2:
- Fixed a few minor compiler warnings and a buggy copy_to_user() check.
- Rename AT_HANDLE_UNIQUE_MOUNT_ID -> AT_HANDLE_MNT_ID_UNIQUE to match statx.
- Switched to using an AT_* bit from 0xFF and defining that range as
being "per-syscall" for future usage.
- Sync tools/ copy of <linux/fcntl.h> to include changes.
- v1: <https://lore.kernel.org/r/20240520-exportfs-u64-mount-id-v1-1-f55fd9215b8e@xxxxxxxxxx>
---
fs/fhandle.c | 29 ++++++++++++++++++++++-------
include/linux/syscalls.h | 2 +-
include/uapi/linux/fcntl.h | 28 +++++++++++++++++++++-------
tools/include/uapi/linux/fcntl.h | 28 +++++++++++++++++++++-------
4 files changed, 65 insertions(+), 22 deletions(-)

diff --git a/fs/fhandle.c b/fs/fhandle.c
index 8a7f86c2139a..c6e90161b900 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -16,7 +16,8 @@

static long do_sys_name_to_handle(const struct path *path,
struct file_handle __user *ufh,
- int __user *mnt_id, int fh_flags)
+ void __user *mnt_id, bool unique_mntid,
+ int fh_flags)
{
long retval;
struct file_handle f_handle;
@@ -69,9 +70,19 @@ static long do_sys_name_to_handle(const struct path *path,
} else
retval = 0;
/* copy the mount id */
- if (put_user(real_mount(path->mnt)->mnt_id, mnt_id) ||
- copy_to_user(ufh, handle,
- struct_size(handle, f_handle, handle_bytes)))
+ if (unique_mntid) {
+ if (put_user(real_mount(path->mnt)->mnt_id_unique,
+ (u64 __user *) mnt_id))
+ retval = -EFAULT;
+ } else {
+ if (put_user(real_mount(path->mnt)->mnt_id,
+ (int __user *) mnt_id))
+ retval = -EFAULT;
+ }
+ /* copy the handle */
+ if (retval != -EFAULT &&
+ copy_to_user(ufh, handle,
+ struct_size(handle, f_handle, handle_bytes)))
retval = -EFAULT;
kfree(handle);
return retval;
@@ -83,6 +94,7 @@ static long do_sys_name_to_handle(const struct path *path,
* @name: name that should be converted to handle.
* @handle: resulting file handle
* @mnt_id: mount id of the file system containing the file
+ * (u64 if AT_HANDLE_MNT_ID_UNIQUE, otherwise int)
* @flag: flag value to indicate whether to follow symlink or not
* and whether a decodable file handle is required.
*
@@ -92,7 +104,7 @@ static long do_sys_name_to_handle(const struct path *path,
* value required.
*/
SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
- struct file_handle __user *, handle, int __user *, mnt_id,
+ struct file_handle __user *, handle, void __user *, mnt_id,
int, flag)
{
struct path path;
@@ -100,7 +112,8 @@ SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
int fh_flags;
int err;

- if (flag & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH | AT_HANDLE_FID))
+ if (flag & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH | AT_HANDLE_FID |
+ AT_HANDLE_MNT_ID_UNIQUE))
return -EINVAL;

lookup_flags = (flag & AT_SYMLINK_FOLLOW) ? LOOKUP_FOLLOW : 0;
@@ -109,7 +122,9 @@ SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
lookup_flags |= LOOKUP_EMPTY;
err = user_path_at(dfd, name, lookup_flags, &path);
if (!err) {
- err = do_sys_name_to_handle(&path, handle, mnt_id, fh_flags);
+ err = do_sys_name_to_handle(&path, handle, mnt_id,
+ flag & AT_HANDLE_MNT_ID_UNIQUE,
+ fh_flags);
path_put(&path);
}
return err;
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index e619ac10cd23..99c5a783a01e 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -863,7 +863,7 @@ asmlinkage long sys_fanotify_mark(int fanotify_fd, unsigned int flags,
const char __user *pathname);
asmlinkage long sys_name_to_handle_at(int dfd, const char __user *name,
struct file_handle __user *handle,
- int __user *mnt_id, int flag);
+ void __user *mnt_id, int flag);
asmlinkage long sys_open_by_handle_at(int mountdirfd,
struct file_handle __user *handle,
int flags);
diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
index c0bcc185fa48..9ed9d65842c1 100644
--- a/include/uapi/linux/fcntl.h
+++ b/include/uapi/linux/fcntl.h
@@ -87,22 +87,24 @@
#define DN_ATTRIB 0x00000020 /* File changed attibutes */
#define DN_MULTISHOT 0x80000000 /* Don't remove notifier */

+#define AT_FDCWD -100 /* Special value used to indicate
+ openat should use the current
+ working directory. */
+#define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic links. */
+
/*
- * The constants AT_REMOVEDIR and AT_EACCESS have the same value. AT_EACCESS is
- * meaningful only to faccessat, while AT_REMOVEDIR is meaningful only to
+ * The constants AT_REMOVEDIR and AT_EACCESS have the same value. AT_EACCESS
+ * is meaningful only to faccessat, while AT_REMOVEDIR is meaningful only to
* unlinkat. The two functions do completely different things and therefore,
* the flags can be allowed to overlap. For example, passing AT_REMOVEDIR to
* faccessat would be undefined behavior and thus treating it equivalent to
* AT_EACCESS is valid undefined behavior.
*/
-#define AT_FDCWD -100 /* Special value used to indicate
- openat should use the current
- working directory. */
-#define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic links. */
#define AT_EACCESS 0x200 /* Test access permitted for
effective IDs, not real IDs. */
#define AT_REMOVEDIR 0x200 /* Remove directory instead of
unlinking file. */
+
#define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */
#define AT_NO_AUTOMOUNT 0x800 /* Suppress terminal automount traversal */
#define AT_EMPTY_PATH 0x1000 /* Allow empty relative pathname */
@@ -114,10 +116,22 @@

#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */

-/* Flags for name_to_handle_at(2). We reuse AT_ flag space to save bits... */
+/*
+ * All new purely-syscall-specific AT_* flags should consider using bits from
+ * 0xFF, but the bits used by RENAME_* (0x7) should be avoided in case users
+ * decide to pass AT_* flags to renameat2() by accident. These flag bits are
+ * free for re-use by other syscall's syscall-specific flags without worry.
+ */
+
+/*
+ * Flags for name_to_handle_at(2). To save AT_ flag space we re-use the
+ * AT_EACCESS/AT_REMOVEDIR bit for AT_HANDLE_FID.
+ */
#define AT_HANDLE_FID AT_REMOVEDIR /* file handle is needed to
compare object identity and may not
be usable to open_by_handle_at(2) */
+#define AT_HANDLE_MNT_ID_UNIQUE 0x80 /* return the u64 unique mount id */
+
#if defined(__KERNEL__)
#define AT_GETATTR_NOSEC 0x80000000
#endif
diff --git a/tools/include/uapi/linux/fcntl.h b/tools/include/uapi/linux/fcntl.h
index 282e90aeb163..f671312ca481 100644
--- a/tools/include/uapi/linux/fcntl.h
+++ b/tools/include/uapi/linux/fcntl.h
@@ -85,22 +85,24 @@
#define DN_ATTRIB 0x00000020 /* File changed attibutes */
#define DN_MULTISHOT 0x80000000 /* Don't remove notifier */

+#define AT_FDCWD -100 /* Special value used to indicate
+ openat should use the current
+ working directory. */
+#define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic links. */
+
/*
- * The constants AT_REMOVEDIR and AT_EACCESS have the same value. AT_EACCESS is
- * meaningful only to faccessat, while AT_REMOVEDIR is meaningful only to
+ * The constants AT_REMOVEDIR and AT_EACCESS have the same value. AT_EACCESS
+ * is meaningful only to faccessat, while AT_REMOVEDIR is meaningful only to
* unlinkat. The two functions do completely different things and therefore,
* the flags can be allowed to overlap. For example, passing AT_REMOVEDIR to
* faccessat would be undefined behavior and thus treating it equivalent to
* AT_EACCESS is valid undefined behavior.
*/
-#define AT_FDCWD -100 /* Special value used to indicate
- openat should use the current
- working directory. */
-#define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic links. */
#define AT_EACCESS 0x200 /* Test access permitted for
effective IDs, not real IDs. */
#define AT_REMOVEDIR 0x200 /* Remove directory instead of
unlinking file. */
+
#define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */
#define AT_NO_AUTOMOUNT 0x800 /* Suppress terminal automount traversal */
#define AT_EMPTY_PATH 0x1000 /* Allow empty relative pathname */
@@ -112,10 +114,22 @@

#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */

-/* Flags for name_to_handle_at(2). We reuse AT_ flag space to save bits... */
+/*
+ * All new purely-syscall-specific AT_* flags should consider using bits from
+ * 0xFF, but the bits used by RENAME_* (0x7) should be avoided in case users
+ * decide to pass AT_* flags to renameat2() by accident. These flag bits are
+ * free for re-use by other syscall's syscall-specific flags without worry.
+ */
+
+/*
+ * Flags for name_to_handle_at(2). To save AT_ flag space we re-use the
+ * AT_EACCESS/AT_REMOVEDIR bit for AT_HANDLE_FID.
+ */
#define AT_HANDLE_FID AT_REMOVEDIR /* file handle is needed to
compare object identity and may not
be usable to open_by_handle_at(2) */
+#define AT_HANDLE_MNT_ID_UNIQUE 0x80 /* returned mount id is the u64 unique mount id */
+
#if defined(__KERNEL__)
#define AT_GETATTR_NOSEC 0x80000000
#endif

---
base-commit: 584bbf439d0fa83d728ec49f3a38c581bdc828b4
change-id: 20240515-exportfs-u64-mount-id-9ebb5c58b53c

Best regards,
--
Aleksa Sarai <cyphar@xxxxxxxxxx>