Re: [PATCH 1/3] vfs: track count of child mounts

From: Ian Kent
Date: Wed Jul 20 2022 - 03:26:15 EST


On 20/7/22 10:17, Ian Kent wrote:

On 20/7/22 09:50, Al Viro wrote:
On Mon, Jul 11, 2022 at 11:37:40AM +0800, Ian Kent wrote:
While the total reference count of a mount is mostly all that's needed
the reference count corresponding to the mounts only is occassionally
also needed (for example, autofs checking if a tree of mounts can be
expired).

To make this reference count avaialble with minimal changes add a
counter to track the number of child mounts under a given mount. This
count can then be used to calculate the mounts only reference count.
No.  This is a wrong approach - instead of keeping track of number of
children, we should just stop having them contribute to refcount of
the parent.  Here's what I've got in my local tree; life gets simpler
that way.

Right, I'll grab this and run some tests.


Ian


commit e99f1f9cc864103f326a5352e6ce1e377613437f
Author: Al Viro <viro@xxxxxxxxxxxxxxxxxx>
Date:   Sat Jul 9 14:45:39 2022 -0400

     namespace: don't keep ->mnt_parent pinned
          makes refcounting more consistent
          Signed-off-by: Al Viro <viro@xxxxxxxxxxxxxxxxxx>

diff --git a/fs/namespace.c b/fs/namespace.c
index 68789f896f08..53c29110a0cd 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -906,7 +906,6 @@ void mnt_set_mountpoint(struct mount *mnt,
              struct mount *child_mnt)
  {
      mp->m_count++;
-    mnt_add_count(mnt, 1);    /* essentially, that's mntget */
      child_mnt->mnt_mountpoint = mp->m_dentry;
      child_mnt->mnt_parent = mnt;
      child_mnt->mnt_mp = mp;
@@ -1429,22 +1428,18 @@ void mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor)
  int may_umount_tree(struct vfsmount *m)
  {
      struct mount *mnt = real_mount(m);
-    int actual_refs = 0;
-    int minimum_refs = 0;
-    struct mount *p;
      BUG_ON(!m);
        /* write lock needed for mnt_get_count */
      lock_mount_hash();
-    for (p = mnt; p; p = next_mnt(p, mnt)) {
-        actual_refs += mnt_get_count(p);
-        minimum_refs += 2;
+    for (struct mount *p = mnt; p; p = next_mnt(p, mnt)) {
+        int allowed = p == mnt ? 2 : 1;
+        if (mnt_get_count(p) > allowed) {
+            unlock_mount_hash();
+            return 0;
+        }
      }

One part of the problem I'm trying to fix is when some other

process has created a mount namespace (say with unshare(1))

on an auto-mounted path and holds the mount in use in some way.

This leads to an attempted umount in the automount daemon which

fails because a propagation of the mount is in use.


Given the way may_umount() behaves I thought it sensible that

may_umount_tree() behave that way too, but the above doesn't

check propagated mounts.


I'll see if I can come up with something on that ... unless

I'm missing something and you have different thoughts how

this should be done ...


Ian

      unlock_mount_hash();
-
-    if (actual_refs > minimum_refs)
-        return 0;
-
      return 1;
  }
  @@ -1586,7 +1581,6 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
            disconnect = disconnect_mount(p, how);
          if (mnt_has_parent(p)) {
-            mnt_add_count(p->mnt_parent, -1);
              if (!disconnect) {
                  /* Don't forget about p */
                  list_add_tail(&p->mnt_child, &p->mnt_parent->mnt_mounts);
@@ -2892,12 +2886,8 @@ static int do_move_mount(struct path *old_path, struct path *new_path)
          put_mountpoint(old_mp);
  out:
      unlock_mount(mp);
-    if (!err) {
-        if (attached)
-            mntput_no_expire(parent);
-        else
-            free_mnt_ns(ns);
-    }
+    if (!err && !attached)
+        free_mnt_ns(ns);
      return err;
  }
  @@ -3869,7 +3859,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
          const char __user *, put_old)
  {
      struct path new, old, root;
-    struct mount *new_mnt, *root_mnt, *old_mnt, *root_parent, *ex_parent;
+    struct mount *new_mnt, *root_mnt, *old_mnt, *root_parent;
      struct mountpoint *old_mp, *root_mp;
      int error;
  @@ -3900,10 +3890,9 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
      new_mnt = real_mount(new.mnt);
      root_mnt = real_mount(root.mnt);
      old_mnt = real_mount(old.mnt);
-    ex_parent = new_mnt->mnt_parent;
      root_parent = root_mnt->mnt_parent;
      if (IS_MNT_SHARED(old_mnt) ||
-        IS_MNT_SHARED(ex_parent) ||
+        IS_MNT_SHARED(new_mnt->mnt_parent) ||
          IS_MNT_SHARED(root_parent))
          goto out4;
      if (!check_mnt(root_mnt) || !check_mnt(new_mnt))
@@ -3942,7 +3931,6 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
      attach_mnt(root_mnt, old_mnt, old_mp);
      /* mount new_root on / */
      attach_mnt(new_mnt, root_parent, root_mp);
-    mnt_add_count(root_parent, -1);
      touch_mnt_namespace(current->nsproxy->mnt_ns);
      /* A moved mount should not expire automatically */
      list_del_init(&new_mnt->mnt_expire);
@@ -3952,8 +3940,6 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
      error = 0;
  out4:
      unlock_mount(old_mp);
-    if (!error)
-        mntput_no_expire(ex_parent);
  out3:
      path_put(&root);
  out2:
diff --git a/fs/pnode.c b/fs/pnode.c
index 1106137c747a..e2c8a4b18857 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -368,7 +368,7 @@ static inline int do_refcount_check(struct mount *mnt, int count)
   */
  int propagate_mount_busy(struct mount *mnt, int refcnt)
  {
-    struct mount *m, *child, *topper;
+    struct mount *m, *child;
      struct mount *parent = mnt->mnt_parent;
        if (mnt == parent)
@@ -384,7 +384,6 @@ int propagate_mount_busy(struct mount *mnt, int refcnt)
        for (m = propagation_next(parent, parent); m;
                   m = propagation_next(m, parent)) {
-        int count = 1;
          child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint);
          if (!child)
              continue;
@@ -392,13 +391,10 @@ int propagate_mount_busy(struct mount *mnt, int refcnt)
          /* Is there exactly one mount on the child that covers
           * it completely whose reference should be ignored?
           */
-        topper = find_topper(child);
-        if (topper)
-            count += 1;
-        else if (!list_empty(&child->mnt_mounts))
+        if (!find_topper(child) && !list_empty(&child->mnt_mounts))
              continue;
  -        if (do_refcount_check(child, count))
+        if (do_refcount_check(child, 1))
              return 1;
      }
      return 0;