Re: [PATCH v7 2/5] proc: subset=pid: Show /proc/self/net only for CAP_NET_ADMIN

From: Alexey Gladkov

Date: Wed Feb 11 2026 - 14:35:31 EST


On Wed, Feb 04, 2026 at 03:39:53PM +0100, Christian Brauner wrote:
> On Tue, Jan 13, 2026 at 10:20:34AM +0100, Alexey Gladkov wrote:
> > Cache the mounters credentials and allow access to the net directories
> > contingent of the permissions of the mounter of proc.
> >
> > Do not show /proc/self/net when proc is mounted with subset=pid option
> > and the mounter does not have CAP_NET_ADMIN.
> >
> > Signed-off-by: Alexey Gladkov <legion@xxxxxxxxxx>
> > ---
> > fs/proc/proc_net.c | 8 ++++++++
> > fs/proc/root.c | 5 +++++
> > include/linux/proc_fs.h | 1 +
> > 3 files changed, 14 insertions(+)
> >
> > diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
> > index 52f0b75cbce2..6e0ccef0169f 100644
> > --- a/fs/proc/proc_net.c
> > +++ b/fs/proc/proc_net.c
> > @@ -23,6 +23,7 @@
> > #include <linux/uidgid.h>
> > #include <net/net_namespace.h>
> > #include <linux/seq_file.h>
> > +#include <linux/security.h>
> >
> > #include "internal.h"
> >
> > @@ -270,6 +271,7 @@ static struct net *get_proc_task_net(struct inode *dir)
> > struct task_struct *task;
> > struct nsproxy *ns;
> > struct net *net = NULL;
> > + struct proc_fs_info *fs_info = proc_sb_info(dir->i_sb);
> >
> > rcu_read_lock();
> > task = pid_task(proc_pid(dir), PIDTYPE_PID);
> > @@ -282,6 +284,12 @@ static struct net *get_proc_task_net(struct inode *dir)
> > }
> > rcu_read_unlock();
> >
> > + if (net && (fs_info->pidonly == PROC_PIDONLY_ON) &&
> > + security_capable(fs_info->mounter_cred, net->user_ns, CAP_NET_ADMIN, CAP_OPT_NONE) < 0) {
> > + put_net(net);
> > + net = NULL;
> > + }
> > +
> > return net;
> > }
> >
> > diff --git a/fs/proc/root.c b/fs/proc/root.c
> > index d8ca41d823e4..ed8a101d09d3 100644
> > --- a/fs/proc/root.c
> > +++ b/fs/proc/root.c
> > @@ -254,6 +254,7 @@ static int proc_fill_super(struct super_block *s, struct fs_context *fc)
> > return -ENOMEM;
> >
> > fs_info->pid_ns = get_pid_ns(ctx->pid_ns);
> > + fs_info->mounter_cred = get_cred(fc->cred);
> > proc_apply_options(fs_info, fc, current_user_ns());
> >
> > /* User space would break if executables or devices appear on proc */
> > @@ -303,6 +304,9 @@ static int proc_reconfigure(struct fs_context *fc)
> >
> > sync_filesystem(sb);
> >
> > + put_cred(fs_info->mounter_cred);
> > + fs_info->mounter_cred = get_cred(fc->cred);
>
> Afaict, this races with get_proc_task_net(). You need a synchronization
> mechanism here so that get_proc_task_net() doesn't risk accessing
> invalid mounter creds while someone concurrently updates the creds.
> Proposal how to fix that below.
>
> But I'm kinda torn here anyway whether we want that credential change on
> remount. The problem is that someone might inadvertently allow access to
> /proc/<pid>/net as a side-effect simply because they remounted procfs.
> But they never had a chance to prevent this.

I think you're right, and there's no need to change credentials on
remount. At least not now.

> I think it's best if mounter_creds stays fixed just as they do for
> overlayfs. So we don't allow them to change on reconfigure. That also
> makes all of the code I hinted at below pointless.

I'll just remove the mounter_cred update from proc_reconfigure.

> If we ever want to change the credentials it's easier to add a mount
> option to procfs like I did for overlayfs.
>
> _Untested_ patches:
>
> First, the preparatory patch diff (no functional changes intended):
>
> diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
> index 52f0b75cbce2..81825e5819b8 100644
> --- a/fs/proc/proc_net.c
> +++ b/fs/proc/proc_net.c
> @@ -268,19 +268,19 @@ EXPORT_SYMBOL_GPL(proc_create_net_single_write);
> static struct net *get_proc_task_net(struct inode *dir)
> {
> struct task_struct *task;
> - struct nsproxy *ns;
> - struct net *net = NULL;
> + struct net *net;
>
> - rcu_read_lock();
> + guard(rcu)();
> task = pid_task(proc_pid(dir), PIDTYPE_PID);
> - if (task != NULL) {
> - task_lock(task);
> - ns = task->nsproxy;
> - if (ns != NULL)
> - net = get_net(ns->net_ns);
> - task_unlock(task);
> + if (!task)
> + return NULL;
> +
> + scoped_guard(task_lock, task) {
> + struct nsproxy *ns = task->nsproxy;
> + if (!ns)
> + return NULL;
> + net = get_net(ns->net_ns);
> }
> - rcu_read_unlock();
>
> return net;
> }
>
> And then on top of it something like:
>
> diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
> index 81825e5819b8..47dc9806395c 100644
> --- a/fs/proc/proc_net.c
> +++ b/fs/proc/proc_net.c
> @@ -269,6 +269,8 @@ static struct net *get_proc_task_net(struct inode *dir)
> {
> struct task_struct *task;
> struct net *net;
> + struct proc_fs_info *fs_info;
> + const struct cred *cred;
>
> guard(rcu)();
> task = pid_task(proc_pid(dir), PIDTYPE_PID);
> @@ -282,6 +284,15 @@ static struct net *get_proc_task_net(struct inode *dir)
> net = get_net(ns->net_ns);
> }
>
> + fs_info = proc_sb_info(dir->i_sb);
> + if (fs_info->pidonly != PROC_PIDONLY_ON)
> + return net;
> +
> + cred = rcu_dereference(fs_info->mounter_cred);
> + if (security_capable(cred, net->user_ns, CAP_NET_ADMIN, CAP_OPT_NONE) != 0) {
> + put_net(net);
> + return NULL;
> + }
> return net;
> }
>
> diff --git a/fs/proc/root.c b/fs/proc/root.c
> index d8ca41d823e4..68397900dab7 100644
> --- a/fs/proc/root.c
> +++ b/fs/proc/root.c
> @@ -300,11 +300,15 @@ static int proc_reconfigure(struct fs_context *fc)
> {
> struct super_block *sb = fc->root->d_sb;
> struct proc_fs_info *fs_info = proc_sb_info(sb);
> + const struct cred *cred;
>
> sync_filesystem(sb);
>
> - proc_apply_options(fs_info, fc, current_user_ns());
> - return 0;
> + cred = rcu_replace_pointer(fs_info->mounter_cred, get_cred(fc->cred),
> + lockdep_is_held(&sb->s_umount));
> + put_cred(cred);
> +
> + return proc_apply_options(sb, fc, current_user_ns());
> }
>
> static int proc_get_tree(struct fs_context *fc)
>

--
Rgrds, legion