[PATCH 10/29] Unionfs: dentry revalidation

From: Erez Zadok
Date: Thu Jan 10 2008 - 10:09:23 EST


Includes d_release methods and cache-coherency support for dentries.

Signed-off-by: Erez Zadok <ezk@xxxxxxxxxxxxx>
---
fs/unionfs/dentry.c | 548 +++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 548 insertions(+), 0 deletions(-)
create mode 100644 fs/unionfs/dentry.c

diff --git a/fs/unionfs/dentry.c b/fs/unionfs/dentry.c
new file mode 100644
index 0000000..cd15243
--- /dev/null
+++ b/fs/unionfs/dentry.c
@@ -0,0 +1,548 @@
+/*
+ * Copyright (c) 2003-2007 Erez Zadok
+ * Copyright (c) 2003-2006 Charles P. Wright
+ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
+ * Copyright (c) 2005-2006 Junjiro Okajima
+ * Copyright (c) 2005 Arun M. Krishnakumar
+ * Copyright (c) 2004-2006 David P. Quigley
+ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
+ * Copyright (c) 2003 Puja Gupta
+ * Copyright (c) 2003 Harikesavan Krishnan
+ * Copyright (c) 2003-2007 Stony Brook University
+ * Copyright (c) 2003-2007 The Research Foundation of SUNY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "union.h"
+
+/*
+ * Revalidate a single dentry.
+ * Assume that dentry's info node is locked.
+ * Assume that parent(s) are all valid already, but
+ * the child may not yet be valid.
+ * Returns true if valid, false otherwise.
+ */
+static bool __unionfs_d_revalidate_one(struct dentry *dentry,
+ struct nameidata *nd)
+{
+ bool valid = true; /* default is valid */
+ struct dentry *lower_dentry;
+ int bindex, bstart, bend;
+ int sbgen, dgen;
+ int positive = 0;
+ int interpose_flag;
+ struct nameidata lowernd; /* TODO: be gentler to the stack */
+
+ if (nd)
+ memcpy(&lowernd, nd, sizeof(struct nameidata));
+ else
+ memset(&lowernd, 0, sizeof(struct nameidata));
+
+ verify_locked(dentry);
+ verify_locked(dentry->d_parent);
+
+ /* if the dentry is unhashed, do NOT revalidate */
+ if (d_deleted(dentry))
+ goto out;
+
+ BUG_ON(dbstart(dentry) == -1);
+ if (dentry->d_inode)
+ positive = 1;
+ dgen = atomic_read(&UNIONFS_D(dentry)->generation);
+ sbgen = atomic_read(&UNIONFS_SB(dentry->d_sb)->generation);
+ /*
+ * If we are working on an unconnected dentry, then there is no
+ * revalidation to be done, because this file does not exist within
+ * the namespace, and Unionfs operates on the namespace, not data.
+ */
+ if (unlikely(sbgen != dgen)) {
+ struct dentry *result;
+ int pdgen;
+
+ /* The root entry should always be valid */
+ BUG_ON(IS_ROOT(dentry));
+
+ /* We can't work correctly if our parent isn't valid. */
+ pdgen = atomic_read(&UNIONFS_D(dentry->d_parent)->generation);
+ BUG_ON(pdgen != sbgen); /* should never happen here */
+
+ /* Free the pointers for our inodes and this dentry. */
+ bstart = dbstart(dentry);
+ bend = dbend(dentry);
+ if (bstart >= 0) {
+ struct dentry *lower_dentry;
+ for (bindex = bstart; bindex <= bend; bindex++) {
+ lower_dentry =
+ unionfs_lower_dentry_idx(dentry,
+ bindex);
+ dput(lower_dentry);
+ }
+ }
+ set_dbstart(dentry, -1);
+ set_dbend(dentry, -1);
+
+ interpose_flag = INTERPOSE_REVAL_NEG;
+ if (positive) {
+ interpose_flag = INTERPOSE_REVAL;
+
+ bstart = ibstart(dentry->d_inode);
+ bend = ibend(dentry->d_inode);
+ if (bstart >= 0) {
+ struct inode *lower_inode;
+ for (bindex = bstart; bindex <= bend;
+ bindex++) {
+ lower_inode =
+ unionfs_lower_inode_idx(
+ dentry->d_inode,
+ bindex);
+ iput(lower_inode);
+ }
+ }
+ kfree(UNIONFS_I(dentry->d_inode)->lower_inodes);
+ UNIONFS_I(dentry->d_inode)->lower_inodes = NULL;
+ ibstart(dentry->d_inode) = -1;
+ ibend(dentry->d_inode) = -1;
+ }
+
+ result = unionfs_lookup_backend(dentry, &lowernd,
+ interpose_flag);
+ if (result) {
+ if (IS_ERR(result)) {
+ valid = false;
+ goto out;
+ }
+ /*
+ * current unionfs_lookup_backend() doesn't return
+ * a valid dentry
+ */
+ dput(dentry);
+ dentry = result;
+ }
+
+ if (unlikely(positive && UNIONFS_I(dentry->d_inode)->stale)) {
+ make_bad_inode(dentry->d_inode);
+ d_drop(dentry);
+ valid = false;
+ goto out;
+ }
+ goto out;
+ }
+
+ /* The revalidation must occur across all branches */
+ bstart = dbstart(dentry);
+ bend = dbend(dentry);
+ BUG_ON(bstart == -1);
+ for (bindex = bstart; bindex <= bend; bindex++) {
+ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
+ if (!lower_dentry || !lower_dentry->d_op
+ || !lower_dentry->d_op->d_revalidate)
+ continue;
+ /*
+ * Don't pass nameidata to lower file system, because we
+ * don't want an arbitrary lower file being opened or
+ * returned to us: it may be useless to us because of the
+ * fanout nature of unionfs (cf. file/directory open-file
+ * invariants). We will open lower files as and when needed
+ * later on.
+ */
+ if (!lower_dentry->d_op->d_revalidate(lower_dentry, NULL))
+ valid = false;
+ }
+
+ if (!dentry->d_inode ||
+ ibstart(dentry->d_inode) < 0 ||
+ ibend(dentry->d_inode) < 0) {
+ valid = false;
+ goto out;
+ }
+
+ if (valid) {
+ /*
+ * If we get here, and we copy the meta-data from the lower
+ * inode to our inode, then it is vital that we have already
+ * purged all unionfs-level file data. We do that in the
+ * caller (__unionfs_d_revalidate_chain) by calling
+ * purge_inode_data.
+ */
+ unionfs_copy_attr_all(dentry->d_inode,
+ unionfs_lower_inode(dentry->d_inode));
+ fsstack_copy_inode_size(dentry->d_inode,
+ unionfs_lower_inode(dentry->d_inode));
+ }
+
+out:
+ return valid;
+}
+
+/*
+ * Determine if the lower inode objects have changed from below the unionfs
+ * inode. Return true if changed, false otherwise.
+ *
+ * We check if the mtime or ctime have changed. However, the inode times
+ * can be changed by anyone without much protection, including
+ * asynchronously. This can sometimes cause unionfs to find that the lower
+ * file system doesn't change its inode times quick enough, resulting in a
+ * false positive indication (which is harmless, it just makes unionfs do
+ * extra work in re-validating the objects). To minimize the chances of
+ * these situations, we still consider such small time changes valid, but we
+ * don't print debugging messages unless the time changes are greater than
+ * UNIONFS_MIN_CC_TIME (which defaults to 3 seconds, as with NFS's acregmin)
+ * because significant changes are more likely due to users manually
+ * touching lower files.
+ */
+bool is_newer_lower(const struct dentry *dentry)
+{
+ int bindex;
+ struct inode *inode;
+ struct inode *lower_inode;
+
+ /* ignore if we're called on semi-initialized dentries/inodes */
+ if (!dentry || !UNIONFS_D(dentry))
+ return false;
+ inode = dentry->d_inode;
+ if (!inode || !UNIONFS_I(inode)->lower_inodes ||
+ ibstart(inode) < 0 || ibend(inode) < 0)
+ return false;
+
+ for (bindex = ibstart(inode); bindex <= ibend(inode); bindex++) {
+ lower_inode = unionfs_lower_inode_idx(inode, bindex);
+ if (!lower_inode)
+ continue;
+
+ /* check if mtime/ctime have changed */
+ if (unlikely(timespec_compare(&inode->i_mtime,
+ &lower_inode->i_mtime) < 0)) {
+ if ((lower_inode->i_mtime.tv_sec -
+ inode->i_mtime.tv_sec) > UNIONFS_MIN_CC_TIME) {
+ pr_info("unionfs: new lower inode mtime "
+ "(bindex=%d, name=%s)\n", bindex,
+ dentry->d_name.name);
+ show_dinode_times(dentry);
+ }
+ return true;
+ }
+ if (unlikely(timespec_compare(&inode->i_ctime,
+ &lower_inode->i_ctime) < 0)) {
+ if ((lower_inode->i_ctime.tv_sec -
+ inode->i_ctime.tv_sec) > UNIONFS_MIN_CC_TIME) {
+ pr_info("unionfs: new lower inode ctime "
+ "(bindex=%d, name=%s)\n", bindex,
+ dentry->d_name.name);
+ show_dinode_times(dentry);
+ }
+ return true;
+ }
+ }
+ return false; /* default: lower is not newer */
+}
+
+/*
+ * Purge and invalidate as many data pages of a unionfs inode. This is
+ * called when the lower inode has changed, and we want to force processes
+ * to re-get the new data.
+ */
+static inline void purge_inode_data(struct inode *inode)
+{
+ /* remove all non-private mappings */
+ unmap_mapping_range(inode->i_mapping, 0, 0, 0);
+ /* invalidate as many pages as possible */
+ invalidate_mapping_pages(inode->i_mapping, 0, -1);
+ /*
+ * Don't try to truncate_inode_pages here, because this could lead
+ * to a deadlock between some of address_space ops and dentry
+ * revalidation: the address space op is invoked with a lock on our
+ * own page, and truncate_inode_pages will block on locked pages.
+ */
+}
+
+void purge_sb_data(struct super_block *sb)
+{
+ struct inode *inode;
+
+ list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
+ if (inode->i_state & (I_FREEING|I_WILL_FREE))
+ continue;
+ purge_inode_data(inode);
+ }
+}
+
+/*
+ * Revalidate a parent chain of dentries, then the actual node.
+ * Assumes that dentry is locked, but will lock all parents if/when needed.
+ *
+ * If 'willwrite' is true, and the lower inode times are not in sync, then
+ * *don't* purge_inode_data, as it could deadlock if ->write calls us and we
+ * try to truncate a locked page. Besides, if unionfs is about to write
+ * data to a file, then there's the data unionfs is about to write is more
+ * authoritative than what's below, therefore we can safely overwrite the
+ * lower inode times and data.
+ */
+bool __unionfs_d_revalidate_chain(struct dentry *dentry, struct nameidata *nd,
+ bool willwrite)
+{
+ bool valid = false; /* default is invalid */
+ struct dentry **chain = NULL; /* chain of dentries to reval */
+ int chain_len = 0;
+ struct dentry *dtmp;
+ int sbgen, dgen, i;
+ int saved_bstart, saved_bend, bindex;
+
+ /* find length of chain needed to revalidate */
+ /* XXX: should I grab some global (dcache?) lock? */
+ chain_len = 0;
+ sbgen = atomic_read(&UNIONFS_SB(dentry->d_sb)->generation);
+ dtmp = dentry->d_parent;
+ if (dentry != dtmp)
+ unionfs_lock_dentry(dtmp, UNIONFS_DMUTEX_REVAL_PARENT);
+ dgen = atomic_read(&UNIONFS_D(dtmp)->generation);
+ /* XXX: should we check if is_newer_lower all the way up? */
+ if (unlikely(is_newer_lower(dtmp))) {
+ /*
+ * Special case: the root dentry's generation number must
+ * always be valid, but its lower inode times don't have to
+ * be, so sync up the times only.
+ */
+ if (IS_ROOT(dtmp)) {
+ unionfs_copy_attr_times(dtmp->d_inode);
+ } else {
+ /*
+ * reset generation number to zero, guaranteed to be
+ * "old"
+ */
+ dgen = 0;
+ atomic_set(&UNIONFS_D(dtmp)->generation, dgen);
+ }
+ purge_inode_data(dtmp->d_inode);
+ }
+ if (dentry != dtmp)
+ unionfs_unlock_dentry(dtmp);
+ while (sbgen != dgen) {
+ /* The root entry should always be valid */
+ BUG_ON(IS_ROOT(dtmp));
+ chain_len++;
+ dtmp = dtmp->d_parent;
+ dgen = atomic_read(&UNIONFS_D(dtmp)->generation);
+ }
+ if (chain_len == 0)
+ goto out_this; /* shortcut if parents are OK */
+
+ /*
+ * Allocate array of dentries to reval. We could use linked lists,
+ * but the number of entries we need to alloc here is often small,
+ * and short lived, so locality will be better.
+ */
+ chain = kzalloc(chain_len * sizeof(struct dentry *), GFP_KERNEL);
+ if (unlikely(!chain)) {
+ printk(KERN_CRIT "unionfs: no more memory in %s\n",
+ __FUNCTION__);
+ goto out;
+ }
+
+ /*
+ * lock all dentries in chain, in child to parent order.
+ * if failed, then sleep for a little, then retry.
+ */
+ dtmp = dentry->d_parent;
+ for (i = chain_len-1; i >= 0; i--) {
+ chain[i] = dget(dtmp);
+ dtmp = dtmp->d_parent;
+ }
+
+ /*
+ * call __unionfs_d_revalidate_one() on each dentry, but in parent
+ * to child order.
+ */
+ for (i = 0; i < chain_len; i++) {
+ unionfs_lock_dentry(chain[i], UNIONFS_DMUTEX_REVAL_CHILD);
+ if (chain[i] != chain[i]->d_parent)
+ unionfs_lock_dentry(chain[i]->d_parent,
+ UNIONFS_DMUTEX_REVAL_PARENT);
+ saved_bstart = dbstart(chain[i]);
+ saved_bend = dbend(chain[i]);
+ sbgen = atomic_read(&UNIONFS_SB(dentry->d_sb)->generation);
+ dgen = atomic_read(&UNIONFS_D(chain[i])->generation);
+
+ valid = __unionfs_d_revalidate_one(chain[i], nd);
+ /* XXX: is this the correct mntput condition?! */
+ if (valid && chain_len > 0 &&
+ sbgen != dgen && chain[i]->d_inode &&
+ S_ISDIR(chain[i]->d_inode->i_mode)) {
+ for (bindex = saved_bstart; bindex <= saved_bend;
+ bindex++)
+ unionfs_mntput(chain[i], bindex);
+ }
+ if (chain[i] != chain[i]->d_parent)
+ unionfs_unlock_dentry(chain[i]->d_parent);
+ unionfs_unlock_dentry(chain[i]);
+
+ if (unlikely(!valid))
+ goto out_free;
+ }
+
+
+out_this:
+ /* finally, lock this dentry and revalidate it */
+ verify_locked(dentry);
+ if (dentry != dentry->d_parent)
+ unionfs_lock_dentry(dentry->d_parent,
+ UNIONFS_DMUTEX_REVAL_PARENT);
+ dgen = atomic_read(&UNIONFS_D(dentry)->generation);
+
+ if (unlikely(is_newer_lower(dentry))) {
+ /* root dentry special case as aforementioned */
+ if (IS_ROOT(dentry)) {
+ unionfs_copy_attr_times(dentry->d_inode);
+ } else {
+ /*
+ * reset generation number to zero, guaranteed to be
+ * "old"
+ */
+ dgen = 0;
+ atomic_set(&UNIONFS_D(dentry)->generation, dgen);
+ }
+ if (!willwrite)
+ purge_inode_data(dentry->d_inode);
+ }
+ valid = __unionfs_d_revalidate_one(dentry, nd);
+ if (dentry != dentry->d_parent)
+ unionfs_unlock_dentry(dentry->d_parent);
+
+ /*
+ * If __unionfs_d_revalidate_one() succeeded above, then it will
+ * have incremented the refcnt of the mnt's, but also the branch
+ * indices of the dentry will have been updated (to take into
+ * account any branch insertions/deletion. So the current
+ * dbstart/dbend match the current, and new, indices of the mnts
+ * which __unionfs_d_revalidate_one has incremented. Note: the "if"
+ * test below does not depend on whether chain_len was 0 or greater.
+ */
+ if (valid && sbgen != dgen)
+ for (bindex = dbstart(dentry);
+ bindex <= dbend(dentry);
+ bindex++)
+ unionfs_mntput(dentry, bindex);
+
+out_free:
+ /* unlock/dput all dentries in chain and return status */
+ if (chain_len > 0) {
+ for (i = 0; i < chain_len; i++)
+ dput(chain[i]);
+ kfree(chain);
+ }
+out:
+ return valid;
+}
+
+static int unionfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
+{
+ int err;
+
+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
+
+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
+ err = __unionfs_d_revalidate_chain(dentry, nd, false);
+ if (likely(err > 0)) { /* true==1: dentry is valid */
+ unionfs_check_dentry(dentry);
+ unionfs_check_nd(nd);
+ }
+ unionfs_unlock_dentry(dentry);
+
+ unionfs_read_unlock(dentry->d_sb);
+
+ return err;
+}
+
+/*
+ * At this point no one can reference this dentry, so we don't have to be
+ * careful about concurrent access.
+ */
+static void unionfs_d_release(struct dentry *dentry)
+{
+ int bindex, bstart, bend;
+
+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
+
+ unionfs_check_dentry(dentry);
+ /* this could be a negative dentry, so check first */
+ if (unlikely(!UNIONFS_D(dentry))) {
+ printk(KERN_ERR "unionfs: dentry without private data: %.*s\n",
+ dentry->d_name.len, dentry->d_name.name);
+ goto out;
+ } else if (dbstart(dentry) < 0)
+ goto out_free; /* due to a (normal) failed lookup */
+
+ /* Release all the lower dentries */
+ bstart = dbstart(dentry);
+ bend = dbend(dentry);
+ for (bindex = bstart; bindex <= bend; bindex++) {
+ dput(unionfs_lower_dentry_idx(dentry, bindex));
+ unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
+ /* NULL lower mnt is ok if this is a negative dentry */
+ if (!dentry->d_inode && !unionfs_lower_mnt_idx(dentry, bindex))
+ continue;
+ unionfs_mntput(dentry, bindex);
+ unionfs_set_lower_mnt_idx(dentry, bindex, NULL);
+ }
+ /* free private data (unionfs_dentry_info) here */
+ kfree(UNIONFS_D(dentry)->lower_paths);
+ UNIONFS_D(dentry)->lower_paths = NULL;
+
+out_free:
+ /* No need to unlock it, because it is disappeared. */
+ free_dentry_private_data(dentry);
+
+out:
+ unionfs_read_unlock(dentry->d_sb);
+ return;
+}
+
+/*
+ * Called when we're removing the last reference to our dentry. So we
+ * should drop all lower references too.
+ */
+static void unionfs_d_iput(struct dentry *dentry, struct inode *inode)
+{
+ int bindex, rc;
+
+ BUG_ON(!dentry);
+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
+
+ if (!UNIONFS_D(dentry) || dbstart(dentry) < 0)
+ goto drop_lower_inodes;
+ for (bindex = dbstart(dentry); bindex <= dbend(dentry); bindex++) {
+ if (unionfs_lower_mnt_idx(dentry, bindex)) {
+ unionfs_mntput(dentry, bindex);
+ unionfs_set_lower_mnt_idx(dentry, bindex, NULL);
+ }
+ if (unionfs_lower_dentry_idx(dentry, bindex)) {
+ dput(unionfs_lower_dentry_idx(dentry, bindex));
+ unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
+ }
+ }
+ set_dbstart(dentry, -1);
+ set_dbend(dentry, -1);
+
+drop_lower_inodes:
+ rc = atomic_read(&inode->i_count);
+ if (rc == 1 && inode->i_nlink == 1 && ibstart(inode) >= 0) {
+ /* see Documentation/filesystems/unionfs/issues.txt */
+ lockdep_off();
+ iput(unionfs_lower_inode(inode));
+ lockdep_on();
+ unionfs_set_lower_inode(inode, NULL);
+ /* XXX: may need to set start/end to -1? */
+ }
+
+ iput(inode);
+
+ unionfs_read_unlock(dentry->d_sb);
+}
+
+struct dentry_operations unionfs_dops = {
+ .d_revalidate = unionfs_d_revalidate,
+ .d_release = unionfs_d_release,
+ .d_iput = unionfs_d_iput,
+};
--
1.5.2.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/