[RFC v2 PATCH 2/8] VFS:uidshift: add flags and helpers to shift UIDs and GIDs to virtual view

From: Djalal Harouni
Date: Wed May 04 2016 - 10:32:21 EST


Add helpers to allow the VFS to shift UIDs and GIDs according to the
user namespace of the containing mount namespace. All decisions are
taken by VFS.

In this regard, to allow the shift of UID and GID filesystems when
mounted must set "vfs_shift_uids" and "vfs_shift_gids" options,
therefore add the appropriate flags to distinguish between mounts that
support UID/GID shifts and others. The flags are "SB_I_VFS_SHIFT_UIDS"
and "SB_I_VFS_SHIFT_GIDS" and should be set my filesystems in the
super_block->s_iflags when they are mounted.

If a mount supports VFS UID/GID shifts shows up in a mount namespace
that allows UID/GID shifts, VFS helpers will handle the shift
and translate it according to that mount namespace. The user namespace
of that containing mount namespace will be used to perform the
translation, this gives the advantage that only a process with the right
privileges or an ancestor of the mount namespace is able to set user
namespace mappings.

As an example do the shift to the virtual view when we stat() inodes, the
VFS handles that and inodes->{i_uid|i_gid} will always contain the
on-disk view.

Returned stat()->{uid|gid} example inside user_ns_X
---------------------------------------------------

-------------------------------------------------------------------------
inode->uid on Disk | init_user_ns uid | userns_ns_X uid | stat->uid
-------------------------------------------------------------------------
0 | 1000000 | 0 | 65534
-------------------------------------------------------------------------
999 | 1000999 | 999 | 65534
-------------------------------------------------------------------------
1000 | 1001000 | 1000 | 65534
-------------------------------------------------------------------------
1000000 | 1000000 | 0 | 0
-------------------------------------------------------------------------
1000999 | 1000000 | 0 | 999
-------------------------------------------------------------------------
1001000 | 1000999 | 999 | 1000
-------------------------------------------------------------------------

With this patch:
-------------------------------------------------------------------------
inode->uid on Disk | init_user_ns uid | userns_ns_X uid | stat->uid
-------------------------------------------------------------------------
0 | 1000000 | 0 | 0
-------------------------------------------------------------------------
999 | 1000999 | 999 | 999
-------------------------------------------------------------------------
1000 | 1001000 | 1000 | 1000
-------------------------------------------------------------------------
1000000 | 1000000 | 0 | 0
-------------------------------------------------------------------------
1000999 | 1000000 | 0 | 999
-------------------------------------------------------------------------
1001000 | 1000999 | 999 | 1000
-------------------------------------------------------------------------

Signed-off-by: Dongsu Park <dongsu@xxxxxxxxxxxx>
Signed-off-by: Djalal Harouni <tixxdz@xxxxxxxxxx>
---
fs/namespace.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
fs/stat.c | 4 ++--
include/linux/fs.h | 11 ++++++++++
3 files changed, 75 insertions(+), 2 deletions(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index 940ecfc..de02b39 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1652,6 +1652,22 @@ SYSCALL_DEFINE1(oldumount, char __user *, name)

#endif

+/* Returns true if the VFS should shift inode's UID */
+static bool vfs_mount_shift_i_uid(struct mnt_namespace *ns,
+ const struct inode *inode)
+{
+ return (ns->flags & CLONE_MNTNS_SHIFT_UIDGID) &&
+ (inode->i_sb->s_iflags & SB_I_VFS_SHIFT_UIDS);
+}
+
+/* Returns true if the VFS should shift inode's GID */
+static bool vfs_mount_shift_i_gid(struct mnt_namespace *ns,
+ const struct inode *inode)
+{
+ return (ns->flags & CLONE_MNTNS_SHIFT_UIDGID) &&
+ (inode->i_sb->s_iflags & SB_I_VFS_SHIFT_GIDS);
+}
+
static bool is_mnt_ns_file(struct dentry *dentry)
{
/* Is this a proxy for a mount namespace? */
@@ -1664,6 +1680,52 @@ struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
return container_of(ns, struct mnt_namespace, ns);
}

+/*
+ * Returns the virtual UID view of the inode's uid
+ * If UID shifts are enabled on the mount namespace and the filesystem,
+ * the VFS will return the shifted UID according to the rules of the
+ * user namespace of the containing mount namespace. If no shift is
+ * performed, inode->i_uid is returned.
+ */
+kuid_t vfs_shift_i_uid_to_virtual(const struct inode *inode)
+{
+ kuid_t i_uid = inode->i_uid;
+ struct mnt_namespace *ns = current->nsproxy->mnt_ns;
+
+ /* Nothing to do */
+ if (!vfs_mount_shift_i_uid(ns, inode))
+ return i_uid;
+
+ /* If there is no mapping construct one in the current mountns */
+ if (!kuid_has_mapping(ns->user_ns, i_uid))
+ return make_kuid(ns->user_ns, i_uid.val);
+
+ return i_uid;
+}
+
+/*
+ * Returns the virtual GID view of the inode's gid
+ * If GID shifts are enabled on the mount namespace and the filesystem,
+ * the VFS will return the shifted GID according to the rules of the
+ * user namespace of the containing mount namespace. If no shift is
+ * performed, inode->i_gid is returned.
+ */
+kgid_t vfs_shift_i_gid_to_virtual(const struct inode *inode)
+{
+ kgid_t i_gid = inode->i_gid;
+ struct mnt_namespace *ns = current->nsproxy->mnt_ns;
+
+ /* Nothing to do */
+ if (!vfs_mount_shift_i_gid(ns, inode))
+ return i_gid;
+
+ /* If there is no mapping construct one in the current mountns */
+ if (!kgid_has_mapping(ns->user_ns, i_gid))
+ return make_kgid(ns->user_ns, i_gid.val);
+
+ return i_gid;
+}
+
static bool mnt_ns_loop(struct dentry *dentry)
{
/* Could bind mounting the mount namespace inode cause a
diff --git a/fs/stat.c b/fs/stat.c
index bc045c7..d00622b 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -24,8 +24,8 @@ void generic_fillattr(struct inode *inode, struct kstat *stat)
stat->ino = inode->i_ino;
stat->mode = inode->i_mode;
stat->nlink = inode->i_nlink;
- stat->uid = inode->i_uid;
- stat->gid = inode->i_gid;
+ stat->uid = vfs_shift_i_uid_to_virtual(inode);
+ stat->gid = vfs_shift_i_gid_to_virtual(inode);
stat->rdev = inode->i_rdev;
stat->size = i_size_read(inode);
stat->atime = inode->i_atime;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 70e61b5..a9efc5a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1298,6 +1298,8 @@ struct mm_struct;
/* sb->s_iflags */
#define SB_I_CGROUPWB 0x00000001 /* cgroup-aware writeback enabled */
#define SB_I_NOEXEC 0x00000002 /* Ignore executables on this fs */
+#define SB_I_VFS_SHIFT_UIDS 0X00000004 /* FS allows VFS to do UID shifts */
+#define SB_I_VFS_SHIFT_GIDS 0X00000008 /* FS allows VFS to do GID shifts */

/* Possible states of 'frozen' field */
enum {
@@ -1563,6 +1565,15 @@ extern int vfs_whiteout(struct inode *, struct dentry *);
*/
extern void inode_init_owner(struct inode *inode, const struct inode *dir,
umode_t mode);
+
+/*
+ * VFS helpers to shift inodes's uid/gid and passed values to either virtual
+ * or on-disk view. The shift is done according to rules of the user namespace
+ * of the containing mount namespace.
+ */
+extern kuid_t vfs_shift_i_uid_to_virtual(const struct inode *inode);
+extern kgid_t vfs_shift_i_gid_to_virtual(const struct inode *inode);
+
/*
* VFS FS_IOC_FIEMAP helper definitions.
*/
--
2.5.5