!!!! HERE BE DRAGONS - COMPLETELY UNTESTED !!!!

From: Christian Brauner
Date: Fri Dec 10 2021 - 05:47:37 EST


securityfs: only allow access to securityfs from within same namespace

Limit opening of securityfs files to callers located in the same namespace.

---
security/inode.c | 33 +++++++++++++++++++++++++++++++--
1 file changed, 31 insertions(+), 2 deletions(-)

diff --git a/security/inode.c b/security/inode.c
index eaccba7017d9..9eaf757c08cb 100644
--- a/security/inode.c
+++ b/security/inode.c
@@ -80,6 +80,35 @@ static struct file_system_type fs_type = {
.fs_flags = FS_USERNS_MOUNT,
};

+static int securityfs_permission(struct user_namespace *mnt_userns,
+ struct inode *inode, int mask)
+{
+ int err;
+
+ err = generic_permission(&init_user_ns, inode, mask);
+ if (!err) {
+ if (inode->i_sb->s_user_ns != current_user_ns())
+ err = -EACCES;
+ }
+
+ return err;
+}
+
+const struct inode_operations securityfs_dir_inode_operations = {
+ .permission = securityfs_permission,
+ .lookup = simple_lookup,
+};
+
+const struct file_operations securityfs_dir_operations = {
+ .permission = securityfs_permission,
+ .open = dcache_dir_open,
+ .release = dcache_dir_close,
+ .llseek = dcache_dir_lseek,
+ .read = generic_read_dir,
+ .iterate_shared = dcache_readdir,
+ .fsync = noop_fsync,
+};
+
/**
* securityfs_create_dentry - create a dentry in the securityfs filesystem
*
@@ -167,8 +196,8 @@ static struct dentry *securityfs_create_dentry(const char *name, umode_t mode,
inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
inode->i_private = data;
if (S_ISDIR(mode)) {
- inode->i_op = &simple_dir_inode_operations;
- inode->i_fop = &simple_dir_operations;
+ inode->i_op = &securityfs_dir_inode_operations;
+ inode->i_fop = &securityfs_dir_operations;
inc_nlink(inode);
inc_nlink(dir);
} else if (S_ISLNK(mode)) {
--
2.30.2

>
> Following man page of setns:
>
> "   User namespaces
>               A process reassociating itself with a user namespace must
>               have the CAP_SYS_ADMIN capability in the target user
>               namespace.  (This necessarily implies that it is only
>               possible to join a descendant user namespace.)  Upon
>               successfully joining a user namespace, a process is
>               granted all capabilities in that namespace, regardless of
>               its user and group IDs."
>
>
> So if we choose option 1 maybe we have to test for this capability upon
> every read/write from/to a file?
>

In general, never do permission checking at read/write time unless the
read/write fundamentally depends on what is read or written. Clean
semantics will do permission checking once at open time. If you really
really need to do permission checking at .read/.write time you need to
use f_cred possibly calling override_creds/revert_creds() while doing so
but simply don't do it.