Re: [PATCH 03/34] teach move_mount(2) to work with OPEN_TREE_CLONE [ver #12]

From: Alan Jenkins
Date: Sun Oct 07 2018 - 15:20:28 EST


On 07/10/2018 11:48, Alan Jenkins wrote:
On 05/10/2018 19:24, Alan Jenkins wrote:
On 21/09/2018 17:30, David Howells wrote:
From: Al Viro <viro@xxxxxxxxxxxxxxxxxx>

Allow a detached tree created by open_tree(..., OPEN_TREE_CLONE) to be
attached by move_mount(2).

If by the time of final fput() of OPEN_TREE_CLONE-opened file its tree is
not detached anymore, it won't be dissolved. move_mount(2) is adjusted
to handle detached source.

That gives us equivalents of mount --bind and mount --rbind.

Signed-off-by: Al Viro <viro@xxxxxxxxxxxxxxxxxx>
Signed-off-by: David Howells <dhowells@xxxxxxxxxx>
---

 fs/namespace.c | 26 ++++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

The lockup seems to be a general problem with the cleanup code. Even if I use this as advertised, i.e. for a simple bind mount.

Ah, I see. The problem is you were expecting me to use the FD from open_tree() directly. But I did fchdir() into the FD, and then "mount --move . /mnt" :-).

If I use the FD directly, it avoids the hang. I used two separate C programs (attached, to avoid my MUA damage)...

(I was suspicious that being able to pass around detached trees as an FD, and re-attach them in any namespace, allows leaking memory by creating a namespace loop. I.e. maybe it gives you enough rope to skip the test in mnt_ns_loop().

...so here's the memory leak.

# open_tree --help
usage: open_tree 3</source/path FD_NUMBER COMMAND...
# move_mount --help
usage: move_mount 3</from/path 4</to/path

Create a child namespace:

# mount --make-shared /tmp
# cd /tmp
# mkdir private_mnt
# mount -t tmpfs tmp private_mnt
# mount --make-private private_mnt
# touch private_mnt/child_ns
# unshare --mount=private_mnt/child_ns --propagation=shared ls -l /proc/self/ns/mnt
lrwxrwxrwx. 1 root root 0 Oct 7 19:23 /proc/self/ns/mnt -> 'mnt:[4026532334]'
# findmnt | grep /tmp
ââ/tmp tmpfs tmpfs rw,nosuid,nodev,seclabel,size=1247640k,nr_inodes=311910
â ââ/tmp/private_mnt tmp tmpfs rw,relatime,seclabel,uid=1000,gid=1000
â ââ/tmp/private_mnt/child_ns nsfs[mnt:[4026532334]] nsfs rw,seclabel


Create a reference cycle:

# ~/test-open_tree 3</tmp/private_mnt 3 \
nsenter --mount=/tmp/private_mnt/child_ns \
sh -c '~/test-move_mount 4</mnt'

Attach 10MB of memory to the cycle:

# grep Shmem: /proc/meminfo
Shmem: 1464 kB
# dd if=/dev/zero of=/tmp/private_mnt/bigfile bs=1M count=10
10+0 records in
10+0 records out
10485760 bytes (10 MB, 10 MiB) copied, 0.00976358 s, 1.1 GB/s
# grep Shmem: /proc/meminfo
Shmem: 11704 kB

Detach the cycle, and leak all the memory:

# umount -l /tmp/private_mnt/
# grep Shmem: /proc/meminfo
Shmem: 11704 kB

diff --git a/samples/vfs/Makefile b/samples/vfs/Makefile
index 4ac9690fb3c4..13a32e125a74 100644
--- a/samples/vfs/Makefile
+++ b/samples/vfs/Makefile
@@ -1,10 +1,14 @@
# List of programs to build
hostprogs-$(CONFIG_SAMPLE_VFS) := \
test-fsmount \
+ open_tree \
+ move_mount \
test-statx

# Tell kbuild to always build the programs
always := $(hostprogs-y)

HOSTCFLAGS_test-fsmount.o += -I$(objtree)/usr/include
+HOSTCFLAGS_open_tree.o += -I$(objtree)/usr/include
+HOSTCFLAGS_move_mount.o += -I$(objtree)/usr/include
HOSTCFLAGS_test-statx.o += -I$(objtree)/usr/include
diff --git a/samples/vfs/open_tree.c b/samples/vfs/open_tree.c
new file mode 100644
index 000000000000..6222e69048f9
--- /dev/null
+++ b/samples/vfs/open_tree.c
@@ -0,0 +1,54 @@
+/* fd-based mount test.
+ *
+ * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@xxxxxxxxxx)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+#include <linux/fs.h>
+#include <linux/unistd.h>
+
+#ifndef AT_RECURSIVE
+#define AT_RECURSIVE 0x8000
+#endif
+
+#define E(x) do { if ((x) == -1) { perror(#x); exit(1); } } while(0)
+
+static inline int open_tree(int dfd, const char *pathname, unsigned flags)
+{
+ return syscall(__NR_open_tree, dfd, pathname, flags);
+}
+
+int main(int argc, char *argv[])
+{
+ int fd_number;
+ char **command;
+ int mfd;
+
+ if (argc < 3 || !isdigit(argv[1][0])) {
+ fprintf(stderr, "usage: open_tree 3</source/path FD_NUMBER COMMAND...\n");
+ exit(2);
+ }
+ fd_number = atoi(argv[1]);
+ command = argv + 2;
+
+ E( mfd = open_tree(3, "", AT_EMPTY_PATH | OPEN_TREE_CLONE | AT_RECURSIVE) );
+ if (fd_number != mfd) {
+ E( dup2(mfd, fd_number) );
+ E( close(mfd) );
+ }
+ E( execvp(command[0], command) );
+}
diff --git a/samples/vfs/move_mount.c b/samples/vfs/move_mount.c
new file mode 100644
index 000000000000..1bd2122245e2
--- /dev/null
+++ b/samples/vfs/move_mount.c
@@ -0,0 +1,47 @@
+/* fd-based mount test.
+ *
+ * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@xxxxxxxxxx)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+#include <linux/fs.h>
+#include <linux/unistd.h>
+
+#define E(x) do { if ((x) == -1) { perror(#x); exit(1); } } while(0)
+
+static inline int move_mount(int from_dfd, const char *from_pathname,
+ int to_dfd, const char *to_pathname,
+ unsigned int flags)
+{
+ return syscall(__NR_move_mount,
+ from_dfd, from_pathname,
+ to_dfd, to_pathname, flags);
+}
+
+int main(int argc, char *argv[])
+{
+ if (argc != 1) {
+ fprintf(stderr, "usage: move_mount 3</from/path 4</to/path\n");
+ exit(2);
+ }
+
+ if (move_mount(3, "", 4, "", MOVE_MOUNT_F_EMPTY_PATH |
+ MOVE_MOUNT_T_EMPTY_PATH) < 0) {
+ perror("move_mount");
+ exit(1);
+ }
+
+ exit(0);
+}