Re: [PATCH 0/5] hybrid union filesystem prototype
From: Neil Brown
Date: Wed Sep 01 2010 - 00:34:06 EST
On Mon, 30 Aug 2010 21:40:27 +1000
Neil Brown <neilb@xxxxxxx> wrote:
> > > > I think it's best to leave that stuff until someone actually cares.
> > > > The "people might find it useful" argument is not strong enough to put
> > > > nontrivial effort into thinking about all the weird corner cases.
> > >
> > > My thought on this is that in order to describe the behaviour of the
> > > filesystem accurately you need to think about all the weird corner cases.
> > >
> > > Then it becomes a question of: is it easier to document the weird behaviour,
> > > or change the code so the documentation will be easier to understand?
> > > Some cases will go one way, some the other.
> > >
> > > But as you suggest this isn't urgent.
> >
> > You didn't mention one possibility: add limitations that prevent the
> > weird corner cases arising. I believe this is the simplest option.
>
> Val has been following that approach and asking if it is possible to make an
> NFS filesystem really-truly read-only. i.e. no changes.
> I don't believe it is.
>
> But I won't pursue this further without patches.
>
And here is a patch. It isn't really complete, but I have done enough for
today. It at least shows what I am trying to do.
To see a clear indication of the effect, try the following script before and
after the patch..
-----------------------
#!/bin/sh
# L = lower U = upper O = overlay
umount /tmp/O
rm -rf /tmp/[LUO]
mkdir /tmp/{L,U,O}
for i in 1 2 3; do > /tmp/L/$i ; done
mount -t union -o upperdir=/tmp/U,lowerdir=/tmp/L pointlessword /tmp/O
ls -l /tmp/O/foo
ls -l /tmp/O
> /tmp/L/foo
ls -l /tmp/O
-------------------------
The last 'ls -l' report a non-existent 'foo' without the patch. With the
patch it more correctly doesn't report anything at all about foo.
No patch change-log yet because I'm not suggesting it be included, just
reviewed.
NeilBrown
diff --git a/fs/union/union.c b/fs/union/union.c
index e19fe62..434d152 100644
--- a/fs/union/union.c
+++ b/fs/union/union.c
@@ -392,9 +392,160 @@ static void union_dentry_iput(struct dentry *dentry, struct inode *inode)
iput(inode);
}
+static int union_still_valid(struct dentry *dentry,
+ struct dentry *parent, struct dentry *child)
+{
+ /* dentry is in the union filesystem
+ * child is the corresponding dentry in the upper to lower layer
+ * parent is the corresponding dentry of dentry's parent in the same layer
+ *
+ * If child is NULL, the parent must be NULL or negative.
+ * Otherwise child must be hashed, have parent as the d_parent, and
+ * have the same name as dentry
+ *
+ */
+ struct qstr *qstr;
+ int rv;
+
+ if (child == NULL)
+ return (parent == NULL || parent->d_inode == NULL);
+
+ if (child->d_parent != parent)
+ return 0;
+ if (d_unhashed(child))
+ return 0;
+
+ /* Unfortunately we need d_lock to compare names */
+ spin_lock(&dentry->d_lock);
+ spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
+ qstr = &child->d_name;
+ if (parent->d_op && parent->d_op->d_compare)
+ rv = ! (parent->d_op->d_compare(parent, qstr, &dentry->d_name));
+ else
+ rv = (qstr->len == dentry->d_name.len &&
+ memcmp(qstr->name, dentry->d_name.name, qstr->len) == 0);
+
+ spin_unlock(&child->d_lock);
+ spin_unlock(&dentry->d_lock);
+ return rv;
+}
+
+static struct dentry *union_lookup_real(struct dentry *dir, struct qstr *name);
+static struct inode *union_new_inode(struct super_block *sb, umode_t mode);
+
+static struct union_entry do_union_lookup(struct inode *dir, struct dentry *dentry,
+ struct nameidata *nd)
+{
+ struct union_entry *pue = dentry->d_parent->d_fsdata;
+ struct union_entry ue;
+ struct dentry *upperdir = pue->upperpath.dentry;
+ struct dentry *upperdentry = NULL;
+ struct dentry *lowerdir = pue->lowerpath.dentry;
+ struct dentry *lowerdentry = NULL;
+ int err;
+
+ memset(&ue, 0, sizeof(ue));
+
+ if (upperdir) {
+ upperdentry = union_lookup_real(upperdir, &dentry->d_name);
+ err = PTR_ERR(upperdentry);
+ if (IS_ERR(upperdentry))
+ goto out;
+
+ if (upperdentry) {
+ if (union_is_opaquedir(upperdentry))
+ ue.opaque = true;
+ else if (union_is_whiteout(upperdentry)) {
+ dput(upperdentry);
+ upperdentry = NULL;
+ ue.opaque = true;
+ }
+ }
+ }
+ if (lowerdir && !ue.opaque) {
+ lowerdentry = union_lookup_real(lowerdir, &dentry->d_name);
+ if (IS_ERR(lowerdentry)) {
+ err = PTR_ERR(lowerdentry);
+ dput(upperdentry);
+ goto out;
+ }
+ }
+
+ if (lowerdentry && upperdentry &&
+ (!S_ISDIR(upperdentry->d_inode->i_mode) ||
+ !S_ISDIR(lowerdentry->d_inode->i_mode))) {
+ dput(lowerdentry);
+ lowerdentry = NULL;
+ ue.opaque = true;
+ }
+
+ if (upperdentry) {
+ ue.upperpath.mnt = pue->upperpath.mnt;
+ ue.upperpath.dentry = upperdentry;
+ path_get(&ue.upperpath);
+ dput(upperdentry);
+ }
+ if (lowerdentry) {
+ ue.lowerpath.mnt = pue->lowerpath.mnt;
+ ue.lowerpath.dentry = lowerdentry;
+ path_get(&ue.lowerpath);
+ dput(lowerdentry);
+ }
+
+ return ue;
+
+out:
+ ue.upperpath.dentry = ERR_PTR(err);
+ return ue;
+}
+
+static int union_dentry_revalidate(struct dentry *dentry, struct nameidata *nd)
+{
+ /* We need to revalidate this dentry if the upper parent might have
+ * changed.
+ */
+ struct dentry *parent = dget_parent(dentry);
+ struct union_entry *ue = dentry->d_fsdata;
+ struct union_entry *pue = parent->d_fsdata;
+ struct union_entry nue;
+ int rv = 1;
+
+ if (union_still_valid(dentry, pue->upperpath.dentry, ue->upperpath.dentry) &&
+ union_still_valid(dentry, pue->lowerpath.dentry, ue->lowerpath.dentry))
+ goto out;
+
+ /* If dentry is not a directory, it is safe to fail */
+ rv = 0;
+ if (dentry->d_inode == NULL || !S_ISDIR(dentry->d_inode->i_mode))
+ goto out;
+
+ /* If this dentry or a child is in use by someone else, it cannot be
+ * invalidated so the best thing to do is re-lookup the names
+ */
+ mutex_lock(&parent->d_inode->i_mutex);
+ if (dentry->d_parent != parent)
+ goto out_unlock;
+ rv = 1;
+ nue = do_union_lookup(parent->d_inode, dentry, nd);
+ if (IS_ERR(nue.upperpath.dentry))
+ /* Leave the dentry unchanged, but return the error */
+ rv = PTR_ERR(nue.upperpath.dentry);
+ else {
+ path_put(&ue->upperpath);
+ path_put(&ue->lowerpath);
+ *ue = nue;
+ }
+out_unlock:
+ mutex_unlock(&parent->d_inode->i_mutex);
+out:
+ dput(parent);
+ return rv;
+}
+
static const struct dentry_operations union_dentry_operations = {
.d_release = union_dentry_release,
.d_iput = union_dentry_iput,
+ .d_revalidate = union_dentry_revalidate,
};
static struct inode *union_new_inode(struct super_block *sb, umode_t mode)
@@ -459,12 +610,7 @@ static struct dentry *union_lookup_real(struct dentry *dir, struct qstr *name)
static struct dentry *union_lookup(struct inode *dir, struct dentry *dentry,
struct nameidata *nd)
{
- struct union_entry *pue = dentry->d_parent->d_fsdata;
struct union_entry *ue;
- struct dentry *upperdir = pue->upperpath.dentry;
- struct dentry *upperdentry = NULL;
- struct dentry *lowerdir = pue->lowerpath.dentry;
- struct dentry *lowerdentry = NULL;
struct inode *inode = NULL;
int err;
@@ -472,71 +618,29 @@ static struct dentry *union_lookup(struct inode *dir, struct dentry *dentry,
ue = kzalloc(sizeof(struct union_entry), GFP_KERNEL);
if (!ue)
goto out;
-
- if (upperdir) {
- upperdentry = union_lookup_real(upperdir, &dentry->d_name);
- err = PTR_ERR(upperdentry);
- if (IS_ERR(upperdentry))
- goto out_free;
-
- if (upperdentry) {
- if (union_is_opaquedir(upperdentry))
- ue->opaque = true;
- else if (union_is_whiteout(upperdentry)) {
- dput(upperdentry);
- upperdentry = NULL;
- ue->opaque = true;
- }
- }
- }
- if (lowerdir && !ue->opaque) {
- lowerdentry = union_lookup_real(lowerdir, &dentry->d_name);
- if (IS_ERR(lowerdentry)) {
- err = PTR_ERR(lowerdentry);
- dput(upperdentry);
- goto out_free;
- }
+ *ue = do_union_lookup(dir, dentry, nd);
+ if (IS_ERR(ue->upperpath.dentry)) {
+ err = PTR_ERR(ue->upperpath.dentry);
+ goto out_free;
}
-
- if (lowerdentry && upperdentry &&
- (!S_ISDIR(upperdentry->d_inode->i_mode) ||
- !S_ISDIR(lowerdentry->d_inode->i_mode))) {
- dput(lowerdentry);
- lowerdentry = NULL;
- ue->opaque = true;
- }
-
- if (lowerdentry || upperdentry) {
+ if (ue->upperpath.dentry || ue->lowerpath.dentry) {
struct dentry *realdentry;
- realdentry = upperdentry ? upperdentry : lowerdentry;
+ realdentry = ue->upperpath.dentry ? : ue->lowerpath.dentry;
inode = union_new_inode(dir->i_sb, realdentry->d_inode->i_mode);
- if (!inode)
- goto out_dput;
- }
-
- if (upperdentry) {
- ue->upperpath.mnt = pue->upperpath.mnt;
- ue->upperpath.dentry = upperdentry;
- path_get(&ue->upperpath);
- dput(upperdentry);
- }
- if (lowerdentry) {
- ue->lowerpath.mnt = pue->lowerpath.mnt;
- ue->lowerpath.dentry = lowerdentry;
- path_get(&ue->lowerpath);
- dput(lowerdentry);
+ err = -ENOMEM;
+ if (!inode) {
+ path_put(&ue->upperpath);
+ path_put(&ue->lowerpath);
+ goto out_free;
+ }
}
-
d_add(dentry, inode);
dentry->d_fsdata = ue;
dentry->d_op = &union_dentry_operations;
return NULL;
-out_dput:
- dput(upperdentry);
- dput(lowerdentry);
out_free:
kfree(ue);
out:
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/