Re: [PATCH 14/16] vfs: Implement mount_super_once

From: Linus Torvalds
Date: Tue Apr 19 2016 - 21:24:25 EST


On Tue, Apr 19, 2016 at 4:29 PM, Linus Torvalds
<torvalds@xxxxxxxxxxxxxxxxxxxx> wrote:
>
> I _violently_ oppose the stupid DEVPTS_MULTIPLE_INSTANCES config option.

So just to show what I want to actually happen, here's the hacky patch
on top of my (now merged) cleanup patch that actually does what I want
devpts to do.

I say it's hacky, because the "follow_mount()" thing there really is
pretty hacky. Al - suggestions for how to do this *right*?

But this actually forcibly removes the whole "newinstance" thing, and
makes every pts mount a new instance, and just relies on "ptmx" doing
the right thing.

In other words, with this patch, you can *literally* do just this (as
root, obviously):

mkdir test-dir
cd test-dir

mknod ptmx c 5 2
mkdir pts
mount -t devpts pts pts

and after that it all just works. You can do this:

ls -l pts

which shows just the other ptmx noode (that is unused and pointless -
I'd actually like to just remove it, but whatever), and then you can
do

sleep 100 < ptmx &
sleep 100 < ptmx &
ls -l pts

and you will magically see those new 0/1 entries in that pts
subdirectory.. It's entirely independent of /dev/pts/, and there's no
magic connection or any magic dis-connection. It all JustWorks(tm).

Note how this works even *outside* of /dev. But it works inside of
/dev equally well.

Now, a *real* patch would

- solve that "follow_mount()" issue some other way

- not remove the newinstance code immediately (I did it to show that
even the bootup works with a unmodified distro)

- actually remove the whole "DEVPTS_MULTIPLE_INSTANCES" config option

- I'm not happy with devpts_pty_kill(). I would want to clean that up
a bit somehow. I think this is at least partly what Peter Hurley was
talking about. That thing is not pretty.

so this attached patch is by no means meant to be applied as-is. But
it's meant to show what (a) the new organization allows and (b) what I
was going for.

Linus
drivers/tty/pty.c | 2 +-
fs/devpts/inode.c | 107 +++++++++++++++++++++++++++++-----------------
fs/namei.c | 2 +-
include/linux/devpts_fs.h | 2 +-
4 files changed, 70 insertions(+), 43 deletions(-)

diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c
index 0058d9fbf931..2a567d0433a2 100644
--- a/drivers/tty/pty.c
+++ b/drivers/tty/pty.c
@@ -67,7 +67,7 @@ static void pty_close(struct tty_struct *tty, struct file *filp)
if (tty->driver == ptm_driver) {
mutex_lock(&devpts_mutex);
if (tty->link->driver_data)
- devpts_pty_kill(tty->link->driver_data);
+ devpts_pty_kill(tty->driver_data, tty->link->driver_data);
mutex_unlock(&devpts_mutex);
}
#endif
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 0af8e7d70d27..e36a4721eec2 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -137,17 +137,6 @@ static inline struct pts_fs_info *DEVPTS_SB(struct super_block *sb)
return sb->s_fs_info;
}

-static inline struct super_block *pts_sb_from_inode(struct inode *inode)
-{
-#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
- if (inode->i_sb->s_magic == DEVPTS_SUPER_MAGIC)
- return inode->i_sb;
-#endif
- if (!devpts_mnt)
- return NULL;
- return devpts_mnt->mnt_sb;
-}
-
#define PARSE_MOUNT 0
#define PARSE_REMOUNT 1

@@ -411,13 +400,6 @@ fail:
}

#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
-static int compare_init_pts_sb(struct super_block *s, void *p)
-{
- if (devpts_mnt)
- return devpts_mnt->mnt_sb == s;
- return 0;
-}
-
/*
* devpts_mount()
*
@@ -456,17 +438,7 @@ static struct dentry *devpts_mount(struct file_system_type *fs_type,
if (error)
return ERR_PTR(error);

- /* Require newinstance for all user namespace mounts to ensure
- * the mount options are not changed.
- */
- if ((current_user_ns() != &init_user_ns) && !opts.newinstance)
- return ERR_PTR(-EINVAL);
-
- if (opts.newinstance)
- s = sget(fs_type, NULL, set_anon_super, flags, NULL);
- else
- s = sget(fs_type, compare_init_pts_sb, set_anon_super, flags,
- NULL);
+ s = sget(fs_type, NULL, set_anon_super, flags, NULL);

if (IS_ERR(s))
return ERR_CAST(s);
@@ -571,23 +543,78 @@ void devpts_kill_index(struct pts_fs_info *fsi, int idx)
mutex_unlock(&allocated_ptys_lock);
}

+extern void follow_mount(struct path *path);
+
+static struct dentry *ptmx_to_pts(struct path *ptmx)
+{
+ struct dentry *dev = dget_parent(ptmx->dentry);
+
+ if (dev) {
+ struct path path;
+
+ path.dentry = lookup_one_len_unlocked("pts", dev, 3);
+ if (path.dentry) {
+ path.mnt = mntget(ptmx->mnt);
+ follow_mount(&path);
+ mntput(path.mnt);
+ return path.dentry;
+ }
+ }
+ return NULL;
+}
+
+static struct super_block *get_devpts_sb(struct inode *ptmx_inode, struct file *file)
+{
+ struct super_block *sb;
+ struct dentry *pts;
+
+ /* If it's the ptmx node in a devpts filesystem, we're done */
+ sb = ptmx_inode->i_sb;
+ if (sb->s_magic == DEVPTS_SUPER_MAGIC) {
+ atomic_inc(&sb->s_active);
+ return sb;
+ }
+
+ /* Otherwise, try to look up ptmx -> pts/ relationship */
+ pts = ptmx_to_pts(&file->f_path);
+WARN_ON_ONCE(!pts);
+ if (pts) {
+ sb = pts->d_sb;
+WARN_ON_ONCE(sb->s_magic != DEVPTS_SUPER_MAGIC);
+ if (sb->s_magic == DEVPTS_SUPER_MAGIC) {
+ atomic_inc(&sb->s_active);
+ dput(pts);
+ return sb;
+ }
+ }
+
+ /* This is disgusting, old, and wrong. We do it for legacy reasons */
+ if (devpts_mnt) {
+ sb = devpts_mnt->mnt_sb;
+ atomic_inc(&sb->s_active);
+ return sb;
+ }
+ return NULL;
+}
+
+
/*
* pty code needs to hold extra references in case of last /dev/tty close
*/
struct pts_fs_info *devpts_get_ref(struct inode *ptmx_inode, struct file *file)
{
struct super_block *sb;
- struct pts_fs_info *fsi;

- sb = pts_sb_from_inode(ptmx_inode);
- if (!sb)
- return NULL;
- fsi = DEVPTS_SB(sb);
- if (!fsi)
- return NULL;
+ sb = get_devpts_sb(ptmx_inode, file);
+ if (sb) {
+ struct pts_fs_info *fsi;

- atomic_inc(&sb->s_active);
- return fsi;
+ fsi = DEVPTS_SB(sb);
+ if (fsi)
+ return fsi;
+ deactivate_super(sb);
+ }
+ return NULL;
}

void devpts_put_ref(struct pts_fs_info *fsi)
@@ -682,9 +709,9 @@ void *devpts_get_priv(struct inode *pts_inode)
*
* This is an inverse operation of devpts_pty_new.
*/
-void devpts_pty_kill(struct inode *inode)
+void devpts_pty_kill(struct pts_fs_info *fsi, struct inode *inode)
{
- struct super_block *sb = pts_sb_from_inode(inode);
+ struct super_block *sb = fsi->sb;
struct dentry *root = sb->s_root;
struct dentry *dentry;

diff --git a/fs/namei.c b/fs/namei.c
index 1d9ca2d5dff6..5329efd11a3e 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1402,7 +1402,7 @@ EXPORT_SYMBOL(follow_down);
/*
* Skip to top of mountpoint pile in refwalk mode for follow_dotdot()
*/
-static void follow_mount(struct path *path)
+void follow_mount(struct path *path)
{
while (d_mountpoint(path->dentry)) {
struct vfsmount *mounted = lookup_mnt(path);
diff --git a/include/linux/devpts_fs.h b/include/linux/devpts_fs.h
index 358a4db72a27..95d17243605e 100644
--- a/include/linux/devpts_fs.h
+++ b/include/linux/devpts_fs.h
@@ -31,7 +31,7 @@ struct inode *devpts_pty_new(struct pts_fs_info *, dev_t, int, void *);
/* get private structure */
void *devpts_get_priv(struct inode *pts_inode);
/* unlink */
-void devpts_pty_kill(struct inode *inode);
+void devpts_pty_kill(struct pts_fs_info *, struct inode *inode);

#endif