Re: [PATCH linux-next] mqueue: fix IPC namespace use-after-free

From: Al Viro
Date: Tue Dec 19 2017 - 10:32:46 EST


On Tue, Dec 19, 2017 at 11:48:19AM +0000, Al Viro wrote:
> On Tue, Dec 19, 2017 at 11:14:40AM +0100, Giuseppe Scrivano wrote:
> > mqueue_evict_inode() doesn't access the ipc namespace if it was
> > already freed. It can happen if in a new IPC namespace the inode was
> > created without a prior mq_open() which creates the vfsmount used to
> > access the superblock from mq_clear_sbinfo().
> >
> > Keep a direct pointer to the superblock used by the inodes so we can
> > correctly reset the reference to the IPC namespace being destroyed.
> >
> > Bug introduced with 9c583773d03633 ("ipc, mqueue: lazy call
> > kern_mount_data in new namespaces")
>
> And just what will happen in the same scenario if you mount the damn
> thing in userland without ever calling mq_open(), touch a file there,
> then unmount and then leave the ipc namespace?

FWIW, the real solution would be to have userland mounts trigger the creation
of internal one, same as mq_open() would. Something along these lines
(completely untested, on top of vfs.git#for-next). Care to give it some
beating?

diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 10b82338415b..30327e201571 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -343,18 +343,46 @@ static int mqueue_fill_super(struct super_block *sb, void *data, int silent)
return 0;
}

+static struct file_system_type mqueue_fs_type;
+/*
+ * Return value is pinned only by reference in ->mq_mnt; it will
+ * live until ipcns dies. Caller does not need to drop it.
+ */
+static struct vfsmount *mq_internal_mount(void)
+{
+ struct ipc_namespace *ns = current->nsproxy->ipc_ns;
+ struct vfsmount *m = ns->mq_mnt;
+ if (m)
+ return m;
+ m = kern_mount_data(&mqueue_fs_type, ns);
+ spin_lock(&mq_lock);
+ if (unlikely(ns->mq_mnt)) {
+ spin_unlock(&mq_lock);
+ if (!IS_ERR(m))
+ kern_unmount(m);
+ return ns->mq_mnt;
+ }
+ if (!IS_ERR(m))
+ ns->mq_mnt = m;
+ spin_unlock(&mq_lock);
+ return m;
+}
+
static struct dentry *mqueue_mount(struct file_system_type *fs_type,
int flags, const char *dev_name,
void *data)
{
- struct ipc_namespace *ns;
- if (flags & MS_KERNMOUNT) {
- ns = data;
- data = NULL;
- } else {
- ns = current->nsproxy->ipc_ns;
- }
- return mount_ns(fs_type, flags, data, ns, ns->user_ns, mqueue_fill_super);
+ struct ipc_namespace *ns = data;
+ struct vfsmount *m;
+ if (flags & MS_KERNMOUNT)
+ return mount_ns(fs_type, flags, NULL, ns, ns->user_ns,
+ mqueue_fill_super);
+ m = mq_internal_mount();
+ if (IS_ERR(m))
+ return ERR_CAST(m);
+ atomic_inc(&m->mnt_sb->s_active);
+ down_write(&m->mnt_sb->s_umount);
+ return dget(m->mnt_root);
}

static void init_once(void *foo)
@@ -743,13 +771,16 @@ static int prepare_open(struct dentry *dentry, int oflag, int ro,
static int do_mq_open(const char __user *u_name, int oflag, umode_t mode,
struct mq_attr *attr)
{
- struct vfsmount *mnt = current->nsproxy->ipc_ns->mq_mnt;
- struct dentry *root = mnt->mnt_root;
+ struct vfsmount *mnt = mq_internal_mount();
+ struct dentry *root;
struct filename *name;
struct path path;
int fd, error;
int ro;

+ if (IS_ERR(mnt))
+ return PTR_ERR(mnt);
+
audit_mq_open(oflag, mode, attr);

if (IS_ERR(name = getname(u_name)))
@@ -760,6 +791,7 @@ static int do_mq_open(const char __user *u_name, int oflag, umode_t mode,
goto out_putname;

ro = mnt_want_write(mnt); /* we'll drop it in any case */
+ root = mnt->mnt_root;
inode_lock(d_inode(root));
path.dentry = lookup_one_len(name->name, root, strlen(name->name));
if (IS_ERR(path.dentry)) {
@@ -1535,27 +1567,24 @@ int mq_init_ns(struct ipc_namespace *ns)
ns->mq_msg_default = DFLT_MSG;
ns->mq_msgsize_default = DFLT_MSGSIZE;

- ns->mq_mnt = kern_mount_data(&mqueue_fs_type, ns);
- if (IS_ERR(ns->mq_mnt)) {
- int err = PTR_ERR(ns->mq_mnt);
- ns->mq_mnt = NULL;
- return err;
- }
return 0;
}

void mq_clear_sbinfo(struct ipc_namespace *ns)
{
- ns->mq_mnt->mnt_sb->s_fs_info = NULL;
+ if (ns->mq_mnt)
+ ns->mq_mnt->mnt_sb->s_fs_info = NULL;
}

void mq_put_mnt(struct ipc_namespace *ns)
{
- kern_unmount(ns->mq_mnt);
+ if (ns->mq_mnt)
+ kern_unmount(ns->mq_mnt);
}

static int __init init_mqueue_fs(void)
{
+ struct vfsmount *m;
int error;

mqueue_inode_cachep = kmem_cache_create("mqueue_inode_cache",
@@ -1577,6 +1606,10 @@ static int __init init_mqueue_fs(void)
if (error)
goto out_filesystem;

+ m = kern_mount_data(&mqueue_fs_type, &init_ipc_ns);
+ if (IS_ERR(m))
+ goto out_filesystem;
+ init_ipc_ns.mq_mnt = m;
return 0;

out_filesystem: