Re: [PATCH v9 4/5] proc: Skip the visibility check if subset=pid is used

From: Christian Brauner

Date: Thu Apr 16 2026 - 08:53:10 EST


On Mon, Apr 13, 2026 at 01:19:43PM +0200, Alexey Gladkov wrote:
> When procfs is mounted with the subset=pid option, all system files and
> directories from the root of the filesystem are not accessible in
> userspace. Only dynamic information about processes is available, which
> cannot be hidden with overmount.
>
> For this reason, checking for full visibility is not relevant if mounting
> is performed with the subset=pid option.
>
> Signed-off-by: Alexey Gladkov <legion@xxxxxxxxxx>
> ---
> fs/fs_context.c | 1 +
> fs/namespace.c | 15 +++++++--------
> fs/proc/root.c | 7 +++++++
> include/linux/fs_context.h | 1 +
> 4 files changed, 16 insertions(+), 8 deletions(-)
>
> diff --git a/fs/fs_context.c b/fs/fs_context.c
> index a37b0a093505..2fd3d6422a38 100644
> --- a/fs/fs_context.c
> +++ b/fs/fs_context.c
> @@ -545,6 +545,7 @@ void vfs_clean_context(struct fs_context *fc)
> kfree(fc->source);
> fc->source = NULL;
> fc->exclusive = false;
> + fc->skip_visibility = false;
>
> fc->purpose = FS_CONTEXT_FOR_RECONFIGURE;
> fc->phase = FS_CONTEXT_AWAITING_RECONF;
> diff --git a/fs/namespace.c b/fs/namespace.c
> index 539b74403072..32aaedb020c1 100644
> --- a/fs/namespace.c
> +++ b/fs/namespace.c
> @@ -3755,7 +3755,7 @@ static int do_add_mount(struct mount *newmnt, const struct pinned_mountpoint *mp
> return graft_tree(newmnt, mp);
> }
>
> -static bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags);
> +static bool mount_too_revealing(struct fs_context *fc, int *new_mnt_flags);
>
> /*
> * Create a new mount using a superblock configuration and request it
> @@ -3764,19 +3764,17 @@ static bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags
> static int do_new_mount_fc(struct fs_context *fc, const struct path *mountpoint,
> unsigned int mnt_flags)
> {
> - struct super_block *sb;
> struct vfsmount *mnt __free(mntput) = fc_mount(fc);
> int error;
>
> if (IS_ERR(mnt))
> return PTR_ERR(mnt);
>
> - sb = fc->root->d_sb;
> - error = security_sb_kern_mount(sb);
> + error = security_sb_kern_mount(fc->root->d_sb);
> if (unlikely(error))
> return error;
>
> - if (unlikely(mount_too_revealing(sb, &mnt_flags))) {
> + if (unlikely(mount_too_revealing(fc, &mnt_flags))) {
> errorfcp(fc, "VFS", "Mount too revealing");
> return -EPERM;
> }
> @@ -4463,7 +4461,7 @@ SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags,
> return ret;
>
> ret = -EPERM;
> - if (mount_too_revealing(fc->root->d_sb, &mnt_flags)) {
> + if (mount_too_revealing(fc, &mnt_flags)) {
> errorfcp(fc, "VFS", "Mount too revealing");
> return ret;
> }
> @@ -6368,10 +6366,11 @@ static bool mnt_already_visible(struct mnt_namespace *ns,
> return false;
> }
>
> -static bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags)
> +static bool mount_too_revealing(struct fs_context *fc, int *new_mnt_flags)
> {
> const unsigned long required_iflags = SB_I_NOEXEC | SB_I_NODEV;
> struct mnt_namespace *ns = current->nsproxy->mnt_ns;
> + const struct super_block *sb = fc->root->d_sb;
> unsigned long s_iflags;
>
> if (ns->user_ns == &init_user_ns)
> @@ -6388,7 +6387,7 @@ static bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags
> return true;
> }
>
> - return !mnt_already_visible(ns, sb, new_mnt_flags);
> + return (!fc->skip_visibility && !mnt_already_visible(ns, sb, new_mnt_flags));
> }
>
> bool mnt_may_suid(struct vfsmount *mnt)
> diff --git a/fs/proc/root.c b/fs/proc/root.c
> index 05558654df31..6dc870b3061b 100644
> --- a/fs/proc/root.c
> +++ b/fs/proc/root.c
> @@ -263,6 +263,13 @@ static int proc_fill_super(struct super_block *s, struct fs_context *fc)
> if (ret)
> return ret;
>
> + /*
> + * The dynamic part of procfs cannot be hidden using overmount.
> + * Therefore, the check for "not fully visible" can be skipped.
> + */
> + if (fs_info->pidonly)
> + fc->skip_visibility = true;
> +
> /* User space would break if executables or devices appear on proc */
> s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC | SB_I_NODEV;

I think we should move the SB_I_USERNS_VISIBLE check to the fs_type. It
really is something that applies to the filesystem type and isn't a
per-superblock thing. Then we can raise SB_I_USERNS_VISIBLE only on
superblocks that are restricted via pid_only and discount those when
deciding to allow procfs mount without pid_only. Something that Aleksa
had pointed out on an earlier review. Let ms see if I can write that up.