[PATCH 27/41] union-mount: in-kernel file copy between union mounted filesystems

From: Valerie Aurora
Date: Wed Oct 21 2009 - 15:24:47 EST


This patch introduces in-kernel file copy between union mounted
filesystems. When a file is opened for writing but resides on a lower (thus
read-only) layer of the union stack it is copied to the topmost union layer
first.

This patch uses the do_splice() for doing the in-kernel file copy.

XXX - Optimize for non-union mounts in union mount enabled kernels
(esp. call to is_unionized() in do_filp_open()).

XXX - "flags" argument to union_copyup() is unused - bug? Leftover
code?

Signed-off-by: Bharata B Rao <bharata@xxxxxxxxxx>
Signed-off-by: Jan Blunck <jblunck@xxxxxxx>
Signed-off-by: Valerie Aurora <vaurora@xxxxxxxxxx>
---
fs/namei.c | 64 +++++++++-
fs/union.c | 316 +++++++++++++++++++++++++++++++++++++++++++++++++
include/linux/union.h | 7 +
3 files changed, 383 insertions(+), 4 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index fb463ac..f7ef769 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1050,7 +1050,7 @@ static int __follow_mount(struct path *path)
return res;
}

-static void follow_mount(struct path *path)
+void follow_mount(struct path *path)
{
while (d_mountpoint(path->dentry)) {
struct vfsmount *mounted = lookup_mnt(path);
@@ -1284,6 +1284,21 @@ static int __link_path_walk(const char *name, struct nameidata *nd)
if (err)
break;

+ if ((nd->flags & LOOKUP_TOPMOST) &&
+ (nd->um_flags & LAST_LOWLEVEL)) {
+ struct dentry *dentry;
+
+ dentry = union_create_topmost(nd, &this, &next);
+ if (IS_ERR(dentry)) {
+ err = PTR_ERR(dentry);
+ goto out_dput;
+ }
+ path_put_conditional(&next, nd);
+ next.mnt = nd->path.mnt;
+ next.dentry = dentry;
+ nd->um_flags &= ~LAST_LOWLEVEL;
+ }
+
err = -ENOENT;
inode = next.dentry->d_inode;
if (!inode)
@@ -1333,6 +1348,22 @@ last_component:
err = do_lookup(nd, &this, &next);
if (err)
break;
+
+ if ((nd->flags & LOOKUP_TOPMOST) &&
+ (nd->um_flags & LAST_LOWLEVEL)) {
+ struct dentry *dentry;
+
+ dentry = union_create_topmost(nd, &this, &next);
+ if (IS_ERR(dentry)) {
+ err = PTR_ERR(dentry);
+ goto out_dput;
+ }
+ path_put_conditional(&next, nd);
+ next.mnt = nd->path.mnt;
+ next.dentry = dentry;
+ nd->um_flags &= ~LAST_LOWLEVEL;
+ }
+
inode = next.dentry->d_inode;
if ((lookup_flags & LOOKUP_FOLLOW)
&& inode && inode->i_op->follow_link) {
@@ -1709,7 +1740,7 @@ out:
return err;
}

-static int hash_lookup_union(struct nameidata *nd, struct qstr *name,
+int hash_lookup_union(struct nameidata *nd, struct qstr *name,
struct path *path)
{
struct path safe = { .dentry = nd->path.dentry, .mnt = nd->path.mnt };
@@ -2208,6 +2239,12 @@ struct file *do_filp_open(int dfd, const char *pathname,
&nd, flag);
if (error)
return ERR_PTR(error);
+ if (unlikely(flag & FMODE_WRITE)) {
+ /* Check for union, etc. in union_copyup */
+ error = union_copyup(&nd, flag /* XXX not used */);
+ if (error)
+ return ERR_PTR(error);
+ }
goto ok;
}

@@ -2311,10 +2348,23 @@ do_last:
if (path.dentry->d_inode->i_op->follow_link)
goto do_link;

- path_to_nameidata(&path, &nd);
error = -EISDIR;
if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode))
- goto exit;
+ goto exit_dput;
+
+ /*
+ * If this file is on a lower layer of the union stack, copy it to the
+ * topmost layer before opening it
+ */
+ if (path.dentry->d_inode &&
+ (path.dentry->d_parent != dir) &&
+ S_ISREG(path.dentry->d_inode->i_mode)) {
+ error = __union_copyup(&path, &nd, &path);
+ if (error)
+ goto exit_dput;
+ }
+
+ path_to_nameidata(&path, &nd);
ok:
/*
* Consider:
@@ -3472,6 +3522,12 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
error = -ENOTEMPTY;
if (new.dentry == trap)
goto exit5;
+ /* renaming on unions is done by the user-space */
+ error = -EXDEV;
+ if (is_unionized(oldnd.path.dentry, oldnd.path.mnt))
+ goto exit5;
+ if (is_unionized(newnd.path.dentry, newnd.path.mnt))
+ goto exit5;

error = mnt_want_write(oldnd.path.mnt);
if (error)
diff --git a/fs/union.c b/fs/union.c
index 341fc03..de31fc9 100644
--- a/fs/union.c
+++ b/fs/union.c
@@ -21,6 +21,14 @@
#include <linux/mount.h>
#include <linux/fs_struct.h>
#include <linux/union.h>
+#include <linux/namei.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/quotaops.h>
+#include <linux/dnotify.h>
+#include <linux/security.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/splice.h>

/*
* This is borrowed from fs/inode.c. The hashtable for lookups. Somebody
@@ -337,6 +345,314 @@ int follow_union_mount(struct vfsmount **mnt, struct dentry **dentry)
}

/*
+ * Union mount copyup support
+ */
+
+extern int hash_lookup_union(struct nameidata *, struct qstr *, struct path *);
+extern void follow_mount(struct path *path);
+
+/*
+ * union_relookup_topmost - lookup and create the topmost path to dentry
+ * @nd: pointer to nameidata
+ * @flags: lookup flags
+ */
+static int union_relookup_topmost(struct nameidata *nd, int flags)
+{
+ int err;
+ char *kbuf, *name;
+ struct nameidata this;
+
+ kbuf = (char *)__get_free_page(GFP_KERNEL);
+ if (!kbuf)
+ return -ENOMEM;
+
+ name = d_path(&nd->path, kbuf, PAGE_SIZE);
+ err = PTR_ERR(name);
+ if (IS_ERR(name))
+ goto free_page;
+
+ err = path_lookup(name, flags|LOOKUP_CREATE|LOOKUP_TOPMOST, &this);
+ if (err)
+ goto free_page;
+
+ path_put(&nd->path);
+ nd->path.dentry = this.path.dentry;
+ nd->path.mnt = this.path.mnt;
+
+ /*
+ * the nd->flags should be unchanged
+ */
+ BUG_ON(this.um_flags & LAST_LOWLEVEL);
+ nd->um_flags &= ~LAST_LOWLEVEL;
+ free_page:
+ free_page((unsigned long)kbuf);
+ return err;
+}
+
+/*
+ * union_create_topmost - create the topmost path component
+ * @nd: pointer to nameidata of the base directory
+ * @name: pointer to file name
+ * @path: pointer to path of the overlaid file
+ *
+ * This is called by __link_path_walk() to create the directories on a path
+ * when it is called with LOOKUP_TOPMOST.
+ */
+struct dentry *union_create_topmost(struct nameidata *nd, struct qstr *name,
+ struct path *path)
+{
+ struct dentry *dentry, *parent = nd->path.dentry;
+ int res, mode = path->dentry->d_inode->i_mode;
+
+ if (parent->d_sb == path->dentry->d_sb)
+ return ERR_PTR(-EEXIST);
+
+ res = mnt_want_write(nd->path.mnt);
+ if (res)
+ return ERR_PTR(res);
+
+ mutex_lock(&parent->d_inode->i_mutex);
+ dentry = lookup_one_len(name->name, nd->path.dentry, name->len);
+ if (IS_ERR(dentry))
+ goto out_unlock;
+
+ switch (mode & S_IFMT) {
+ case S_IFREG:
+ /*
+ * FIXME: Does this make any sense in this case?
+ * Special case - lookup gave negative, but... we had foo/bar/
+ * From the vfs_mknod() POV we just have a negative dentry -
+ * all is fine. Let's be bastards - you had / on the end,you've
+ * been asking for (non-existent) directory. -ENOENT for you.
+ */
+ if (name->name[name->len] && !dentry->d_inode) {
+ dput(dentry);
+ dentry = ERR_PTR(-ENOENT);
+ goto out_unlock;
+ }
+
+ res = vfs_create(parent->d_inode, dentry, mode, nd);
+ if (res) {
+ dput(dentry);
+ dentry = ERR_PTR(res);
+ goto out_unlock;
+ }
+ break;
+ case S_IFDIR:
+ res = vfs_mkdir(parent->d_inode, dentry, mode);
+ if (res) {
+ dput(dentry);
+ dentry = ERR_PTR(res);
+ goto out_unlock;
+ }
+
+ res = append_to_union(nd->path.mnt, dentry, path->mnt,
+ path->dentry);
+ if (res) {
+ dput(dentry);
+ dentry = ERR_PTR(res);
+ goto out_unlock;
+ }
+ break;
+ default:
+ dput(dentry);
+ dentry = ERR_PTR(-EINVAL);
+ goto out_unlock;
+ }
+
+ out_unlock:
+ mutex_unlock(&parent->d_inode->i_mutex);
+ mnt_drop_write(nd->path.mnt);
+ return dentry;
+}
+
+static int union_copy_file(struct dentry *old_dentry, struct vfsmount *old_mnt,
+ struct dentry *new_dentry, struct vfsmount *new_mnt)
+{
+ int ret;
+ size_t size;
+ loff_t offset;
+ struct file *old_file, *new_file;
+ const struct cred *cred = current_cred();
+
+ dget(old_dentry);
+ mntget(old_mnt);
+ old_file = dentry_open(old_dentry, old_mnt, O_RDONLY, cred);
+ if (IS_ERR(old_file))
+ return PTR_ERR(old_file);
+
+ dget(new_dentry);
+ mntget(new_mnt);
+ new_file = dentry_open(new_dentry, new_mnt, O_WRONLY, cred);
+ ret = PTR_ERR(new_file);
+ if (IS_ERR(new_file))
+ goto fput_old;
+
+ size = i_size_read(old_file->f_path.dentry->d_inode);
+ if (((size_t)size != size) || ((ssize_t)size != size)) {
+ ret = -EFBIG;
+ goto fput_new;
+ }
+
+ offset = 0;
+ ret = do_splice_direct(old_file, &offset, new_file, size,
+ SPLICE_F_MOVE);
+ if (ret >= 0)
+ ret = 0;
+ fput_new:
+ fput(new_file);
+ fput_old:
+ fput(old_file);
+ return ret;
+}
+
+/**
+ * __union_copyup - copy a file to the topmost directory
+ * @old: pointer to path of the old file name
+ * @new_nd: pointer to nameidata of the topmost directory
+ * @new: pointer to path of the new file name
+ *
+ * The topmost directory @new_nd must already be locked. Creates the topmost
+ * file if it doesn't exist yet.
+ */
+int __union_copyup(struct path *old, struct nameidata *new_nd, struct path *new)
+{
+ struct dentry *dentry;
+ int error;
+
+ /* Maybe this should be -EINVAL */
+ if (S_ISDIR(old->dentry->d_inode->i_mode))
+ return -EISDIR;
+
+ if (new_nd->path.dentry != new->dentry->d_parent) {
+ mutex_lock(&new_nd->path.dentry->d_inode->i_mutex);
+ dentry = lookup_one_len(new->dentry->d_name.name,
+ new_nd->path.dentry,
+ new->dentry->d_name.len);
+ mutex_unlock(&new_nd->path.dentry->d_inode->i_mutex);
+ if (IS_ERR(dentry))
+ return PTR_ERR(dentry);
+ error = -EEXIST;
+ if (dentry->d_inode)
+ goto out_dput;
+ } else
+ dentry = dget(new->dentry);
+
+ error = mnt_want_write(new_nd->path.mnt);
+ if (error)
+ goto out_dput;
+
+ if (!dentry->d_inode) {
+ error = vfs_create(new_nd->path.dentry->d_inode, dentry,
+ old->dentry->d_inode->i_mode, new_nd);
+ if (error)
+ goto out_drop_write;
+ }
+
+ BUG_ON(!S_ISREG(old->dentry->d_inode->i_mode));
+ error = union_copy_file(old->dentry, old->mnt, dentry,
+ new_nd->path.mnt);
+ if (error) {
+ /* FIXME: are there return value we should not
+ * BUG() on ? */
+ BUG_ON(vfs_unlink(new_nd->path.dentry->d_inode,
+ dentry));
+ goto out_drop_write;
+ }
+
+ mnt_drop_write(new_nd->path.mnt);
+ dput(new->dentry);
+ new->dentry = dentry;
+ if (new->mnt != new_nd->path.mnt)
+ mntput(new->mnt);
+ new->mnt = new_nd->path.mnt;
+ return error;
+
+out_drop_write:
+ mnt_drop_write(new_nd->path.mnt);
+out_dput:
+ dput(dentry);
+ return error;
+}
+
+/*
+ * union_copyup - copy a file to the topmost layer of the union stack
+ * @nd: nameidata pointer to the file
+ * @flags: flags given to open_namei
+ */
+int union_copyup(struct nameidata *nd, int flags)
+{
+ struct qstr this;
+ char *name;
+ struct dentry *dir;
+ struct path path;
+ int err;
+
+ if (!is_unionized(nd->path.dentry, nd->path.mnt))
+ return 0;
+ if (!S_ISREG(nd->path.dentry->d_inode->i_mode))
+ return 0;
+
+ /* safe the name for hash_lookup_union() */
+ this.len = nd->path.dentry->d_name.len;
+ this.hash = nd->path.dentry->d_name.hash;
+ name = kmalloc(this.len + 1, GFP_KERNEL);
+ if (!name)
+ return -ENOMEM;
+ this.name = name;
+ memcpy(name, nd->path.dentry->d_name.name, nd->path.dentry->d_name.len);
+ name[this.len] = 0;
+
+ err = union_relookup_topmost(nd, nd->flags|LOOKUP_PARENT);
+ if (err) {
+ kfree(name);
+ return err;
+ }
+ nd->flags &= ~LOOKUP_PARENT;
+
+ dir = nd->path.dentry;
+ mutex_lock(&dir->d_inode->i_mutex);
+ err = hash_lookup_union(nd, &this, &path);
+ mutex_unlock(&dir->d_inode->i_mutex);
+ kfree(name);
+ if (err)
+ return err;
+
+ err = -ENOENT;
+ if (!path.dentry->d_inode)
+ goto exit_dput;
+
+ /* Necessary?! I guess not ... */
+ follow_mount(&path);
+
+ err = -ENOENT;
+ if (!path.dentry->d_inode)
+ goto exit_dput;
+
+ err = -EISDIR;
+ if (!S_ISREG(path.dentry->d_inode->i_mode))
+ goto exit_dput;
+
+ if (path.dentry->d_parent != nd->path.dentry) {
+ err = __union_copyup(&path, nd, &path);
+ if (err)
+ goto exit_dput;
+ }
+
+ dput(nd->path.dentry);
+ if (nd->path.mnt != path.mnt)
+ mntput(nd->path.mnt);
+ nd->path = path;
+ return 0;
+
+exit_dput:
+ dput(path.dentry);
+ if (path.mnt != nd->path.mnt)
+ mntput(path.mnt);
+ return err;
+}
+
+/*
* This must be called when unhashing a dentry. This is called with dcache_lock
* and unhashes all unions this dentry is in.
*/
diff --git a/include/linux/union.h b/include/linux/union.h
index 0b6f356..405baa9 100644
--- a/include/linux/union.h
+++ b/include/linux/union.h
@@ -53,6 +53,10 @@ extern void __shrink_d_unions(struct dentry *, struct list_head *);
extern int attach_mnt_union(struct vfsmount *, struct vfsmount *,
struct dentry *);
extern void detach_mnt_union(struct vfsmount *);
+extern struct dentry *union_create_topmost(struct nameidata *, struct qstr *,
+ struct path *);
+extern int __union_copyup(struct path *, struct nameidata *, struct path *);
+extern int union_copyup(struct nameidata *, int);

#else /* CONFIG_UNION_MOUNT */

@@ -67,6 +71,9 @@ extern void detach_mnt_union(struct vfsmount *);
#define __shrink_d_unions(x,y) do { } while (0)
#define attach_mnt_union(x, y, z) do { } while (0)
#define detach_mnt_union(x) do { } while (0)
+#define union_create_topmost(x, y, z) ({ BUG(); (NULL); })
+#define __union_copyup(x, y, z) ({ BUG(); (0); })
+#define union_copyup(x, y) ({ (0); })

#endif /* CONFIG_UNION_MOUNT */
#endif /* __KERNEL__ */
--
1.6.3.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/