[PATCH 08/19] Unionfs: lookup overhaul using vfs_path_lookup

From: Erez Zadok
Date: Tue Jul 29 2008 - 22:49:18 EST


Rework the lookup code to use vfs_path_lookup as much as possible, to ensure
that we have a vfsmount at this critical stage. This is necessary for the
upcoming VFS API change from vfs_* to path_* methods. This change also
allows unionfs to cross bind mounts and other mounts on lower branches.

Signed-off-by: Erez Zadok <ezk@xxxxxxxxxxxxx>
---
fs/unionfs/commonfops.c | 3 +-
fs/unionfs/inode.c | 18 ++-
fs/unionfs/lookup.c | 348 ++++++++++++++++++++++++++++++++++++++++++++++-
fs/unionfs/rename.c | 1 +
fs/unionfs/union.h | 4 +
5 files changed, 366 insertions(+), 8 deletions(-)

diff --git a/fs/unionfs/commonfops.c b/fs/unionfs/commonfops.c
index 5816d41..df002d5 100644
--- a/fs/unionfs/commonfops.c
+++ b/fs/unionfs/commonfops.c
@@ -422,7 +422,8 @@ reval_dentry:
dgen = atomic_read(&UNIONFS_D(dentry)->generation);

if (unlikely(sbgen > dgen)) {
- pr_debug("unionfs: retry dentry revalidation\n");
+ pr_debug("unionfs: retry dentry %s revalidation\n",
+ dentry->d_name.name);
schedule();
goto reval_dentry;
}
diff --git a/fs/unionfs/inode.c b/fs/unionfs/inode.c
index bfebc0c..f81ebb6 100644
--- a/fs/unionfs/inode.c
+++ b/fs/unionfs/inode.c
@@ -178,6 +178,7 @@ static struct dentry *unionfs_lookup(struct inode *parent,
{
struct path path_save = {NULL, NULL};
struct dentry *ret;
+ int err = 0;

unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
if (dentry != dentry->d_parent)
@@ -193,7 +194,14 @@ static struct dentry *unionfs_lookup(struct inode *parent,
* unionfs_lookup_backend returns a locked dentry upon success,
* so we'll have to unlock it below.
*/
- ret = unionfs_lookup_backend(dentry, nd, INTERPOSE_LOOKUP);
+
+ /* allocate dentry private data. We free it in ->d_release */
+ err = new_dentry_private_data(dentry, UNIONFS_DMUTEX_CHILD);
+ if (unlikely(err)) {
+ ret = ERR_PTR(err);
+ goto out;
+ }
+ ret = unionfs_lookup_full(dentry, nd, INTERPOSE_LOOKUP);

/* restore the dentry & vfsmnt in namei */
if (nd) {
@@ -203,6 +211,11 @@ static struct dentry *unionfs_lookup(struct inode *parent,
if (!IS_ERR(ret)) {
if (ret)
dentry = ret;
+ /* lookup_full can return multiple positive dentries */
+ if (dentry->d_inode && !S_ISDIR(dentry->d_inode->i_mode)) {
+ BUG_ON(dbstart(dentry) < 0);
+ unionfs_postcopyup_release(dentry);
+ }
unionfs_copy_attr_times(dentry->d_inode);
/* parent times may have changed */
unionfs_copy_attr_times(dentry->d_parent->d_inode);
@@ -212,9 +225,10 @@ static struct dentry *unionfs_lookup(struct inode *parent,
if (!IS_ERR(ret)) {
unionfs_check_dentry(dentry);
unionfs_check_nd(nd);
- unionfs_unlock_dentry(dentry);
}
+ unionfs_unlock_dentry(dentry);

+out:
if (dentry != dentry->d_parent) {
unionfs_check_dentry(dentry->d_parent);
unionfs_unlock_dentry(dentry->d_parent);
diff --git a/fs/unionfs/lookup.c b/fs/unionfs/lookup.c
index 0f62087..37adf66 100644
--- a/fs/unionfs/lookup.c
+++ b/fs/unionfs/lookup.c
@@ -18,7 +18,49 @@

#include "union.h"

-static int realloc_dentry_private_data(struct dentry *dentry);
+/*
+ * Lookup one path component @name relative to a <base,mnt> path pair.
+ * Behaves nearly the same as lookup_one_len (i.e., return negative dentry
+ * on ENOENT), but uses the @mnt passed, so it can cross bind mounts and
+ * other lower mounts properly. If @new_mnt is non-null, will fill in the
+ * new mnt there. Caller is responsible to dput/mntput/path_put returned
+ * @dentry and @new_mnt.
+ */
+struct dentry *__lookup_one(struct dentry *base, struct vfsmount *mnt,
+ const char *name, struct vfsmount **new_mnt)
+{
+ struct dentry *dentry = NULL;
+ struct nameidata lower_nd;
+ int err;
+
+ /* we use flags=0 to get basic lookup */
+ err = vfs_path_lookup(base, mnt, name, 0, &lower_nd);
+
+ switch (err) {
+ case 0: /* no error */
+ dentry = lower_nd.path.dentry;
+ if (new_mnt)
+ *new_mnt = lower_nd.path.mnt; /* rc already inc'ed */
+ break;
+ case -ENOENT:
+ /*
+ * We don't consider ENOENT an error, and we want to return
+ * a negative dentry (ala lookup_one_len). As we know
+ * there was no inode for this name before (-ENOENT), then
+ * it's safe to call lookup_one_len (which doesn't take a
+ * vfsmount).
+ */
+ dentry = lookup_one_len(name, base, strlen(name));
+ if (new_mnt)
+ *new_mnt = mntget(lower_nd.path.mnt);
+ break;
+ default: /* all other real errors */
+ dentry = ERR_PTR(err);
+ break;
+ }
+
+ return dentry;
+}

/*
* Main (and complex) driver function for Unionfs's lookup
@@ -32,7 +74,8 @@ static int realloc_dentry_private_data(struct dentry *dentry);
* dentry's info, which the caller must unlock.
*/
struct dentry *unionfs_lookup_backend(struct dentry *dentry,
- struct nameidata *nd, int lookupmode)
+ struct nameidata *nd_unused,
+ int lookupmode)
{
int err = 0;
struct dentry *lower_dentry = NULL;
@@ -361,6 +404,7 @@ out:
* Caller must lock this dentry with unionfs_lock_dentry.
*
* Returns: 0 (ok), or -ERRNO if an error occurred.
+ * XXX: get rid of _partial_lookup and make callers call _lookup_full directly
*/
int unionfs_partial_lookup(struct dentry *dentry)
{
@@ -368,7 +412,8 @@ int unionfs_partial_lookup(struct dentry *dentry)
struct nameidata nd = { .flags = 0 };
int err = -ENOSYS;

- tmp = unionfs_lookup_backend(dentry, &nd, INTERPOSE_PARTIAL);
+ tmp = unionfs_lookup_full(dentry, &nd, INTERPOSE_PARTIAL);
+
if (!tmp) {
err = 0;
goto out;
@@ -377,7 +422,7 @@ int unionfs_partial_lookup(struct dentry *dentry)
err = PTR_ERR(tmp);
goto out;
}
- /* need to change the interface */
+ /* XXX: need to change the interface */
BUG_ON(tmp != dentry);
out:
return err;
@@ -437,7 +482,7 @@ static inline int __realloc_dentry_private_data(struct dentry *dentry)
}

/* UNIONFS_D(dentry)->lock must be locked */
-static int realloc_dentry_private_data(struct dentry *dentry)
+int realloc_dentry_private_data(struct dentry *dentry)
{
if (!__realloc_dentry_private_data(dentry))
return 0;
@@ -568,3 +613,296 @@ void release_lower_nd(struct nameidata *nd, int err)
kfree(nd->intent.open.file);
#endif /* ALLOC_LOWER_ND_FILE */
}
+
+/*
+ * Main (and complex) driver function for Unionfs's lookup
+ *
+ * Returns: NULL (ok), ERR_PTR if an error occurred, or a non-null non-error
+ * PTR if d_splice returned a different dentry.
+ *
+ * If lookupmode is INTERPOSE_PARTIAL/REVAL/REVAL_NEG, the passed dentry's
+ * inode info must be locked. If lookupmode is INTERPOSE_LOOKUP (i.e., a
+ * newly looked-up dentry), then unionfs_lookup_backend will return a locked
+ * dentry's info, which the caller must unlock.
+ */
+struct dentry *unionfs_lookup_full(struct dentry *dentry,
+ struct nameidata *nd_unused, int lookupmode)
+{
+ int err = 0;
+ struct dentry *lower_dentry = NULL;
+ struct vfsmount *lower_mnt;
+ struct vfsmount *lower_dir_mnt;
+ struct dentry *wh_lower_dentry = NULL;
+ struct dentry *lower_dir_dentry = NULL;
+ struct dentry *parent_dentry = NULL;
+ struct dentry *d_interposed = NULL;
+ int bindex, bstart, bend, bopaque;
+ int opaque, num_positive = 0;
+ const char *name;
+ int namelen;
+ int pos_start, pos_end;
+
+ /*
+ * We should already have a lock on this dentry in the case of a
+ * partial lookup, or a revalidation. Otherwise it is returned from
+ * new_dentry_private_data already locked.
+ */
+ verify_locked(dentry);
+
+ /* must initialize dentry operations */
+ dentry->d_op = &unionfs_dops;
+
+ /* We never partial lookup the root directory. */
+ if (IS_ROOT(dentry))
+ goto out;
+ parent_dentry = dget_parent(dentry);
+
+ name = dentry->d_name.name;
+ namelen = dentry->d_name.len;
+
+ /* No dentries should get created for possible whiteout names. */
+ if (!is_validname(name)) {
+ err = -EPERM;
+ goto out_free;
+ }
+
+ /* Now start the actual lookup procedure. */
+ bstart = dbstart(parent_dentry);
+ bend = dbend(parent_dentry);
+ bopaque = dbopaque(parent_dentry);
+ BUG_ON(bstart < 0);
+
+ /* adjust bend to bopaque if needed */
+ if ((bopaque >= 0) && (bopaque < bend))
+ bend = bopaque;
+
+ /* lookup all possible dentries */
+ for (bindex = bstart; bindex <= bend; bindex++) {
+
+ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
+ lower_mnt = unionfs_lower_mnt_idx(dentry, bindex);
+
+ /* skip if we already have a positive lower dentry */
+ if (lower_dentry) {
+ if (dbstart(dentry) < 0)
+ dbstart(dentry) = bindex;
+ if (bindex > dbend(dentry))
+ dbend(dentry) = bindex;
+ if (lower_dentry->d_inode)
+ num_positive++;
+ continue;
+ }
+
+ lower_dir_dentry =
+ unionfs_lower_dentry_idx(parent_dentry, bindex);
+ /* if the lower dentry's parent does not exist, skip this */
+ if (!lower_dir_dentry || !lower_dir_dentry->d_inode)
+ continue;
+
+ /* also skip it if the parent isn't a directory. */
+ if (!S_ISDIR(lower_dir_dentry->d_inode->i_mode))
+ continue; /* XXX: should be BUG_ON */
+
+ /* check for whiteouts: stop lookup if found */
+ wh_lower_dentry = lookup_whiteout(name, lower_dir_dentry);
+ if (IS_ERR(wh_lower_dentry)) {
+ err = PTR_ERR(wh_lower_dentry);
+ goto out_free;
+ }
+ if (wh_lower_dentry->d_inode) {
+ dbend(dentry) = dbopaque(dentry) = bindex;
+ if (dbstart(dentry) < 0)
+ dbstart(dentry) = bindex;
+ dput(wh_lower_dentry);
+ break;
+ }
+ dput(wh_lower_dentry);
+
+ /* Now do regular lookup; lookup @name */
+ lower_dir_mnt = unionfs_lower_mnt_idx(parent_dentry, bindex);
+ lower_mnt = NULL; /* XXX: needed? */
+
+ lower_dentry = __lookup_one(lower_dir_dentry, lower_dir_mnt,
+ name, &lower_mnt);
+
+ if (IS_ERR(lower_dentry)) {
+ err = PTR_ERR(lower_dentry);
+ goto out_free;
+ }
+ unionfs_set_lower_dentry_idx(dentry, bindex, lower_dentry);
+ BUG_ON(!lower_mnt);
+ unionfs_set_lower_mnt_idx(dentry, bindex, lower_mnt);
+
+ /* adjust dbstart/end */
+ if (dbstart(dentry) < 0)
+ dbstart(dentry) = bindex;
+ if (bindex > dbend(dentry))
+ dbend(dentry) = bindex;
+ /*
+ * We always store the lower dentries above, and update
+ * dbstart/dbend, even if the whole unionfs dentry is
+ * negative (i.e., no lower inodes).
+ */
+ if (!lower_dentry->d_inode)
+ continue;
+ num_positive++;
+
+ /*
+ * check if we just found an opaque directory, if so, stop
+ * lookups here.
+ */
+ if (!S_ISDIR(lower_dentry->d_inode->i_mode))
+ continue;
+ opaque = is_opaque_dir(dentry, bindex);
+ if (opaque < 0) {
+ err = opaque;
+ goto out_free;
+ } else if (opaque) {
+ dbend(dentry) = dbopaque(dentry) = bindex;
+ break;
+ }
+ dbend(dentry) = bindex;
+
+ /* update parent directory's atime with the bindex */
+ fsstack_copy_attr_atime(parent_dentry->d_inode,
+ lower_dir_dentry->d_inode);
+ }
+
+ /* sanity checks, then decide if to process a negative dentry */
+ BUG_ON(dbstart(dentry) < 0 && dbend(dentry) >= 0);
+ BUG_ON(dbstart(dentry) >= 0 && dbend(dentry) < 0);
+
+ if (num_positive > 0)
+ goto out_positive;
+
+ /*** handle NEGATIVE dentries ***/
+
+ /*
+ * If negative, keep only first lower negative dentry, to save on
+ * memory.
+ */
+ if (dbstart(dentry) < dbend(dentry)) {
+ path_put_lowers(dentry, dbstart(dentry) + 1,
+ dbend(dentry), false);
+ dbend(dentry) = dbstart(dentry);
+ }
+ if (lookupmode == INTERPOSE_PARTIAL)
+ goto out;
+ if (lookupmode == INTERPOSE_LOOKUP) {
+ /*
+ * If all we found was a whiteout in the first available
+ * branch, then create a negative dentry for a possibly new
+ * file to be created.
+ */
+ if (dbopaque(dentry) < 0)
+ goto out;
+ /* XXX: need to get mnt here */
+ bindex = dbstart(dentry);
+ if (unionfs_lower_dentry_idx(dentry, bindex))
+ goto out;
+ lower_dir_dentry =
+ unionfs_lower_dentry_idx(parent_dentry, bindex);
+ if (!lower_dir_dentry || !lower_dir_dentry->d_inode)
+ goto out;
+ if (!S_ISDIR(lower_dir_dentry->d_inode->i_mode))
+ goto out; /* XXX: should be BUG_ON */
+ /* XXX: do we need to cross bind mounts here? */
+ lower_dentry = lookup_one_len(name, lower_dir_dentry, namelen);
+ if (IS_ERR(lower_dentry)) {
+ err = PTR_ERR(lower_dentry);
+ goto out;
+ }
+ /* XXX: need to mntget/mntput as needed too! */
+ unionfs_set_lower_dentry_idx(dentry, bindex, lower_dentry);
+ /* XXX: wrong mnt for crossing bind mounts! */
+ lower_mnt = unionfs_mntget(dentry->d_sb->s_root, bindex);
+ unionfs_set_lower_mnt_idx(dentry, bindex, lower_mnt);
+
+ goto out;
+ }
+
+ /* if we're revalidating a positive dentry, don't make it negative */
+ if (lookupmode != INTERPOSE_REVAL)
+ d_add(dentry, NULL);
+
+ goto out;
+
+out_positive:
+ /*** handle POSITIVE dentries ***/
+
+ /*
+ * This unionfs dentry is positive (at least one lower inode
+ * exists), so scan entire dentry from beginning to end, and remove
+ * any negative lower dentries, if any. Then, update dbstart/dbend
+ * to reflect the start/end of positive dentries.
+ */
+ pos_start = pos_end = -1;
+ for (bindex = bstart; bindex <= bend; bindex++) {
+ lower_dentry = unionfs_lower_dentry_idx(dentry,
+ bindex);
+ if (lower_dentry && lower_dentry->d_inode) {
+ if (pos_start < 0)
+ pos_start = bindex;
+ if (bindex > pos_end)
+ pos_end = bindex;
+ continue;
+ }
+ path_put_lowers(dentry, bindex, bindex, false);
+ }
+ if (pos_start >= 0)
+ dbstart(dentry) = pos_start;
+ if (pos_end >= 0)
+ dbend(dentry) = pos_end;
+
+ /* Partial lookups need to re-interpose, or throw away older negs. */
+ if (lookupmode == INTERPOSE_PARTIAL) {
+ if (dentry->d_inode) {
+ unionfs_reinterpose(dentry);
+ goto out;
+ }
+
+ /*
+ * This dentry was positive, so it is as if we had a
+ * negative revalidation.
+ */
+ lookupmode = INTERPOSE_REVAL_NEG;
+ update_bstart(dentry);
+ }
+
+ /*
+ * Interpose can return a dentry if d_splice returned a different
+ * dentry.
+ */
+ d_interposed = unionfs_interpose(dentry, dentry->d_sb, lookupmode);
+ if (IS_ERR(d_interposed))
+ err = PTR_ERR(d_interposed);
+ else if (d_interposed)
+ dentry = d_interposed;
+
+ if (!err)
+ goto out;
+ d_drop(dentry);
+
+out_free:
+ /* should dput/mntput all the underlying dentries on error condition */
+ if (dbstart(dentry) >= 0)
+ path_put_lowers_all(dentry, false);
+ /* free lower_paths unconditionally */
+ kfree(UNIONFS_D(dentry)->lower_paths);
+ UNIONFS_D(dentry)->lower_paths = NULL;
+
+out:
+ if (dentry && UNIONFS_D(dentry)) {
+ BUG_ON(dbstart(dentry) < 0 && dbend(dentry) >= 0);
+ BUG_ON(dbstart(dentry) >= 0 && dbend(dentry) < 0);
+ }
+ if (d_interposed && UNIONFS_D(d_interposed)) {
+ BUG_ON(dbstart(d_interposed) < 0 && dbend(d_interposed) >= 0);
+ BUG_ON(dbstart(d_interposed) >= 0 && dbend(d_interposed) < 0);
+ }
+
+ dput(parent_dentry);
+ if (!err && d_interposed)
+ return d_interposed;
+ return ERR_PTR(err);
+}
diff --git a/fs/unionfs/rename.c b/fs/unionfs/rename.c
index 85f96ee..ecce9da 100644
--- a/fs/unionfs/rename.c
+++ b/fs/unionfs/rename.c
@@ -195,6 +195,7 @@ static int do_unionfs_rename(struct inode *old_dir,
struct dentry *unlink_dentry;
struct dentry *unlink_dir_dentry;

+ BUG_ON(bindex < 0);
unlink_dentry = unionfs_lower_dentry_idx(new_dentry, bindex);
if (!unlink_dentry)
continue;
diff --git a/fs/unionfs/union.h b/fs/unionfs/union.h
index 9271b3b..81f34c4 100644
--- a/fs/unionfs/union.h
+++ b/fs/unionfs/union.h
@@ -294,6 +294,7 @@ static inline void unionfs_double_lock_dentry(struct dentry *d1,
}

extern int new_dentry_private_data(struct dentry *dentry, int subclass);
+extern int realloc_dentry_private_data(struct dentry *dentry);
extern void free_dentry_private_data(struct dentry *dentry);
extern void update_bstart(struct dentry *dentry);
extern int init_lower_nd(struct nameidata *nd, unsigned int flags);
@@ -309,6 +310,9 @@ extern struct dentry *create_parents(struct inode *dir, struct dentry *dentry,

/* partial lookup */
extern int unionfs_partial_lookup(struct dentry *dentry);
+extern struct dentry *unionfs_lookup_full(struct dentry *dentry,
+ struct nameidata *nd_unused,
+ int lookupmode);

/* copies a file from dbstart to newbindex branch */
extern int copyup_file(struct inode *dir, struct file *file, int bstart,
--
1.5.2.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/