[PATCH] intrinsic automount and mountpoint degradation support

From: David Howells
Date: Wed Jun 30 2004 - 14:37:26 EST



Hi Linus, Andrew,

Here's a patch that I worked out with Al Viro that adds support for a
filesystem (such as kAFS) to perform automounting intrinsically without the
need for a userspace daemon. It also adds support for such mountpoints to be
degraded at the filesystem's behest until they've been untouched long enough
that they'll be removed.

I've a patch (to follow) that removes some #ifdef's from fs/afs/* thus
allowing it to make use of this facility.

David


diff -uNr linux-2.6.7/fs/namei.c linux-2.6.7-auto/fs/namei.c
--- linux-2.6.7/fs/namei.c 2004-06-18 13:44:02.000000000 +0100
+++ linux-2.6.7-auto/fs/namei.c 2004-06-25 13:48:28.000000000 +0100
@@ -278,6 +278,12 @@
mntput(nd->mnt);
}

+void path_release_on_umount(struct nameidata *nd)
+{
+ dput(nd->dentry);
+ _mntput(nd->mnt);
+}
+
/*
* Internal lookup() using the new generic dcache.
* SMP-safe
diff -uNr linux-2.6.7/fs/namespace.c linux-2.6.7-auto/fs/namespace.c
--- linux-2.6.7/fs/namespace.c 2004-06-18 13:44:02.000000000 +0100
+++ linux-2.6.7-auto/fs/namespace.c 2004-06-30 19:43:14.565098078 +0100
@@ -60,6 +60,7 @@
INIT_LIST_HEAD(&mnt->mnt_child);
INIT_LIST_HEAD(&mnt->mnt_mounts);
INIT_LIST_HEAD(&mnt->mnt_list);
+ INIT_LIST_HEAD(&mnt->mnt_fslink);
if (name) {
int size = strlen(name)+1;
char *newname = kmalloc(size, GFP_KERNEL);
@@ -106,13 +107,9 @@

EXPORT_SYMBOL(lookup_mnt);

-static int check_mnt(struct vfsmount *mnt)
+static inline int check_mnt(struct vfsmount *mnt)
{
- spin_lock(&vfsmount_lock);
- while (mnt->mnt_parent != mnt)
- mnt = mnt->mnt_parent;
- spin_unlock(&vfsmount_lock);
- return mnt == current->namespace->root;
+ return mnt->mnt_namespace == current->namespace;
}

static void detach_mnt(struct vfsmount *mnt, struct nameidata *old_nd)
@@ -164,6 +161,11 @@
mnt->mnt_root = dget(root);
mnt->mnt_mountpoint = mnt->mnt_root;
mnt->mnt_parent = mnt;
+ mnt->mnt_namespace = old->mnt_namespace;
+ spin_lock(&vfsmount_lock);
+ if (!list_empty(&old->mnt_fslink))
+ list_add(&mnt->mnt_fslink, &old->mnt_fslink);
+ spin_unlock(&vfsmount_lock);
}
return mnt;
}
@@ -346,6 +348,7 @@
while (!list_empty(&kill)) {
mnt = list_entry(kill.next, struct vfsmount, mnt_list);
list_del_init(&mnt->mnt_list);
+ list_del_init(&mnt->mnt_fslink);
if (mnt->mnt_parent == mnt) {
spin_unlock(&vfsmount_lock);
} else {
@@ -369,6 +372,24 @@
return retval;

/*
+ * If we've been asked to mark for expiry, we only _actually_ effect
+ * the unmount if:
+ * (1) the mark is already set (the mark is cleared by mntput())
+ * (2) the usage count == 1 [parent vfsmount] + 1 [sys_umount]
+ */
+ if (flags & MNT_EXPIRE) {
+ if (mnt == current->fs->rootmnt ||
+ flags & (MNT_FORCE | MNT_DETACH))
+ return -EINVAL;
+
+ if (atomic_read(&mnt->mnt_count) != 2)
+ return -EBUSY;
+
+ if (!xchg(&mnt->mnt_expiry_mark, 1))
+ return -EAGAIN;
+ }
+
+ /*
* If we may have to abort operations to get out of this
* mount, and they will themselves hold resources we must
* allow the fs to do things. In the Unix tradition of
@@ -461,7 +482,7 @@

retval = do_umount(nd.mnt, flags);
dput_and_out:
- path_release(&nd);
+ path_release_on_umount(&nd);
out:
return retval;
}
@@ -618,6 +639,10 @@
}

if (mnt) {
+ spin_lock(&vfsmount_lock);
+ list_del_init(&mnt->mnt_fslink);
+ spin_unlock(&vfsmount_lock);
+
err = graft_tree(mnt, nd);
if (err) {
spin_lock(&vfsmount_lock);
@@ -638,7 +663,8 @@
* on it - tough luck.
*/

-static int do_remount(struct nameidata *nd,int flags,int mnt_flags,void *data)
+static int do_remount(struct nameidata *nd, int flags, int mnt_flags,
+ void *data)
{
int err;
struct super_block * sb = nd->mnt->mnt_sb;
@@ -710,6 +736,7 @@

detach_mnt(old_nd.mnt, &parent_nd);
attach_mnt(old_nd.mnt, nd);
+ list_del_init(&old_nd.mnt->mnt_fslink);
out2:
spin_unlock(&vfsmount_lock);
out1:
@@ -722,11 +749,10 @@
return err;
}

-static int do_add_mount(struct nameidata *nd, char *type, int flags,
+static int do_new_mount(struct nameidata *nd, char *type, int flags,
int mnt_flags, char *name, void *data)
{
struct vfsmount *mnt;
- int err;

if (!type || !memchr(type, 0, PAGE_SIZE))
return -EINVAL;
@@ -736,9 +762,16 @@
return -EPERM;

mnt = do_kern_mount(type, flags, name, data);
- err = PTR_ERR(mnt);
if (IS_ERR(mnt))
- goto out;
+ return PTR_ERR(mnt);
+
+ return do_add_mount(mnt, nd, mnt_flags, NULL);
+}
+
+int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd,
+ int mnt_flags, struct list_head *fslist)
+{
+ int err;

down_write(&current->namespace->sem);
/* Something was mounted here while we slept */
@@ -750,22 +783,111 @@

/* Refuse the same filesystem on the same mount point */
err = -EBUSY;
- if (nd->mnt->mnt_sb == mnt->mnt_sb && nd->mnt->mnt_root == nd->dentry)
+ if (nd->mnt->mnt_sb == newmnt->mnt_sb &&
+ nd->mnt->mnt_root == nd->dentry)
goto unlock;

err = -EINVAL;
- if (S_ISLNK(mnt->mnt_root->d_inode->i_mode))
+ if (S_ISLNK(newmnt->mnt_root->d_inode->i_mode))
goto unlock;

- mnt->mnt_flags = mnt_flags;
- err = graft_tree(mnt, nd);
+ newmnt->mnt_flags = mnt_flags;
+ err = graft_tree(newmnt, nd);
+
+ if (err == 0 && fslist) {
+ spin_lock(&vfsmount_lock);
+ list_add_tail(&newmnt->mnt_fslink, fslist);
+ spin_unlock(&vfsmount_lock);
+ }
+
unlock:
up_write(&current->namespace->sem);
- mntput(mnt);
-out:
+ mntput(newmnt);
return err;
}

+EXPORT_SYMBOL_GPL(do_add_mount);
+
+void mark_mounts_for_expiry(struct list_head *mounts)
+{
+ struct namespace *namespace;
+ struct list_head graveyard, *_p, *_n;
+ struct vfsmount *mnt;
+
+ INIT_LIST_HEAD(&graveyard);
+
+ spin_lock(&vfsmount_lock);
+
+ list_for_each_safe(_p, _n, mounts) {
+ mnt = list_entry(_p, struct vfsmount, mnt_fslink);
+
+ if (!xchg(&mnt->mnt_expiry_mark, 1) ||
+ atomic_read(&mnt->mnt_count) != 1)
+ continue;
+
+ mntget(mnt);
+ list_move(&mnt->mnt_fslink, &graveyard);
+ }
+
+ while (!list_empty(&graveyard)) {
+ mnt = list_entry(graveyard.next, struct vfsmount, mnt_fslink);
+ list_del_init(&mnt->mnt_fslink);
+
+ namespace = mnt->mnt_namespace;
+ if (!namespace || atomic_read(&namespace->count) <= 0)
+ continue;
+ get_namespace(namespace);
+
+ spin_unlock(&vfsmount_lock);
+ down_write(&namespace->sem);
+ spin_lock(&vfsmount_lock);
+
+ if (atomic_read(&mnt->mnt_count) == 2) {
+ struct vfsmount *xdmnt;
+ struct dentry *xdentry;
+
+ list_del_init(&mnt->mnt_list);
+ list_del_init(&mnt->mnt_child);
+ list_del_init(&mnt->mnt_hash);
+ mnt->mnt_mountpoint->d_mounted--;
+
+ xdentry = mnt->mnt_mountpoint;
+ mnt->mnt_mountpoint = mnt->mnt_root;
+ xdmnt = mnt->mnt_parent;
+ mnt->mnt_parent = mnt;
+ spin_unlock(&vfsmount_lock);
+
+ mntput(xdmnt);
+ dput(xdentry);
+
+ if (atomic_read(&mnt->mnt_sb->s_active) == 1) {
+ /* last instance - try to be smart */
+ lock_kernel();
+ DQUOT_OFF(mnt->mnt_sb);
+ acct_auto_close(mnt->mnt_sb);
+ unlock_kernel();
+ }
+
+ mntput(mnt);
+ }
+ else {
+ list_add_tail(&mnt->mnt_fslink, mounts);
+ spin_unlock(&vfsmount_lock);
+ }
+
+ up_write(&namespace->sem);
+
+ mntput(mnt);
+ put_namespace(namespace);
+
+ spin_lock(&vfsmount_lock);
+ }
+
+ spin_unlock(&vfsmount_lock);
+}
+
+EXPORT_SYMBOL_GPL(mark_mounts_for_expiry);
+
int copy_mount_options (const void __user *data, unsigned long *where)
{
int i;
@@ -860,7 +982,7 @@
else if (flags & MS_MOVE)
retval = do_move_mount(&nd, dev_name);
else
- retval = do_add_mount(&nd, type_page, flags, mnt_flags,
+ retval = do_new_mount(&nd, type_page, flags, mnt_flags,
dev_name, data_page);
dput_out:
path_release(&nd);
@@ -1185,6 +1307,7 @@
init_rwsem(&namespace->sem);
list_add(&mnt->mnt_list, &namespace->list);
namespace->root = mnt;
+ mnt->mnt_namespace = namespace;

init_task.namespace = namespace;
read_lock(&tasklist_lock);
@@ -1252,8 +1375,15 @@

void __put_namespace(struct namespace *namespace)
{
+ struct vfsmount *mnt;
+
down_write(&namespace->sem);
spin_lock(&vfsmount_lock);
+
+ list_for_each_entry(mnt, &namespace->list, mnt_list) {
+ mnt->mnt_namespace = NULL;
+ }
+
umount_tree(namespace->root);
spin_unlock(&vfsmount_lock);
up_write(&namespace->sem);
diff -uNr linux-2.6.7/fs/super.c linux-2.6.7-auto/fs/super.c
--- linux-2.6.7/fs/super.c 2004-06-18 13:44:04.000000000 +0100
+++ linux-2.6.7-auto/fs/super.c 2004-06-25 13:50:04.000000000 +0100
@@ -788,6 +788,7 @@
mnt->mnt_root = dget(sb->s_root);
mnt->mnt_mountpoint = sb->s_root;
mnt->mnt_parent = mnt;
+ mnt->mnt_namespace = current->namespace;
up_write(&sb->s_umount);
put_filesystem(type);
return mnt;
@@ -804,6 +805,8 @@
return (struct vfsmount *)sb;
}

+EXPORT_SYMBOL_GPL(do_kern_mount);
+
struct vfsmount *kern_mount(struct file_system_type *type)
{
return do_kern_mount(type->name, 0, type->name, NULL);
diff -uNr linux-2.6.7/include/linux/fs.h linux-2.6.7-auto/include/linux/fs.h
--- linux-2.6.7/include/linux/fs.h 2004-06-18 13:44:05.000000000 +0100
+++ linux-2.6.7-auto/include/linux/fs.h 2004-06-25 13:48:35.000000000 +0100
@@ -714,6 +714,7 @@

#define MNT_FORCE 0x00000001 /* Attempt to forcibily umount */
#define MNT_DETACH 0x00000002 /* Just detach from the tree */
+#define MNT_EXPIRE 0x00000004 /* Mark for expiry */

extern struct list_head super_blocks;
extern spinlock_t sb_lock;
diff -uNr linux-2.6.7/include/linux/mount.h linux-2.6.7-auto/include/linux/mount.h
--- linux-2.6.7/include/linux/mount.h 2004-06-18 13:42:15.000000000 +0100
+++ linux-2.6.7-auto/include/linux/mount.h 2004-06-25 13:48:35.000000000 +0100
@@ -29,8 +29,11 @@
struct list_head mnt_child; /* and going through their mnt_child */
atomic_t mnt_count;
int mnt_flags;
+ int mnt_expiry_mark; /* true if marked for expiry */
char *mnt_devname; /* Name of device e.g. /dev/dsk/hda1 */
struct list_head mnt_list;
+ struct list_head mnt_fslink; /* link in fs-specific expiry list */
+ struct namespace *mnt_namespace; /* containing namespace */
};

static inline struct vfsmount *mntget(struct vfsmount *mnt)
@@ -42,7 +45,7 @@

extern void __mntput(struct vfsmount *mnt);

-static inline void mntput(struct vfsmount *mnt)
+static inline void _mntput(struct vfsmount *mnt)
{
if (mnt) {
if (atomic_dec_and_test(&mnt->mnt_count))
@@ -50,10 +53,26 @@
}
}

+static inline void mntput(struct vfsmount *mnt)
+{
+ if (mnt) {
+ mnt->mnt_expiry_mark = 0;
+ _mntput(mnt);
+ }
+}
+
extern void free_vfsmnt(struct vfsmount *mnt);
extern struct vfsmount *alloc_vfsmnt(const char *name);
extern struct vfsmount *do_kern_mount(const char *fstype, int flags,
const char *name, void *data);
+
+struct nameidata;
+
+extern int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd,
+ int mnt_flags, struct list_head *fslist);
+
+extern void mark_mounts_for_expiry(struct list_head *mounts);
+
extern spinlock_t vfsmount_lock;

#endif
diff -uNr linux-2.6.7/include/linux/namei.h linux-2.6.7-auto/include/linux/namei.h
--- linux-2.6.7/include/linux/namei.h 2004-06-18 13:42:16.000000000 +0100
+++ linux-2.6.7-auto/include/linux/namei.h 2004-06-25 13:48:35.000000000 +0100
@@ -57,6 +57,7 @@
extern int FASTCALL(path_walk(const char *, struct nameidata *));
extern int FASTCALL(link_path_walk(const char *, struct nameidata *));
extern void path_release(struct nameidata *);
+extern void path_release_on_umount(struct nameidata *);

extern struct dentry * lookup_one_len(const char *, struct dentry *, int);
extern struct dentry * lookup_hash(struct qstr *, struct dentry *);
diff -uNr linux-2.6.7/include/linux/namespace.h linux-2.6.7-auto/include/linux/namespace.h
--- linux-2.6.7/include/linux/namespace.h 2004-06-18 13:42:16.000000000 +0100
+++ linux-2.6.7-auto/include/linux/namespace.h 2004-06-25 13:48:35.000000000 +0100
@@ -14,7 +14,7 @@

extern void umount_tree(struct vfsmount *);
extern int copy_namespace(int, struct task_struct *);
-void __put_namespace(struct namespace *namespace);
+extern void __put_namespace(struct namespace *namespace);

static inline void put_namespace(struct namespace *namespace)
{
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/