[AUFS PATCH v2.6.26-rc2-mm1 29/39] aufs lower inode and internal inotify
From: hooanon05
Date: Tue May 20 2008 - 23:44:14 EST
From: Junjiro Okajima <hooanon05@xxxxxxxxxxx>
initial commit
handles inodes on lower/branch filesystem, including internal
inotify-watch
Signed-off-by: Junjiro Okajima <hooanon05@xxxxxxxxxxx>
---
fs/aufs/hinode.h | 184 +++++++++
fs/aufs/hinotify.c | 1059 ++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 1243 insertions(+), 0 deletions(-)
create mode 100644 fs/aufs/hinode.h
create mode 100644 fs/aufs/hinotify.c
diff --git a/fs/aufs/hinode.h b/fs/aufs/hinode.h
new file mode 100644
index 0000000..73933da
--- /dev/null
+++ b/fs/aufs/hinode.h
@@ -0,0 +1,184 @@
+/*
+ * Copyright (C) 2005-2008 Junjiro Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/*
+ * lower (branch filesystem) inode and setting inotify
+ */
+
+#ifndef __AUFS_HINODE_H__
+#define __AUFS_HINODE_H__
+
+#ifdef __KERNEL__
+
+#include <linux/fs.h>
+#include <linux/inotify.h>
+#include <linux/aufs_type.h>
+#include "super.h"
+#include "vfsub.h"
+
+/* ---------------------------------------------------------------------- */
+
+struct au_hinotify {
+#ifdef CONFIG_AUFS_HINOTIFY
+ struct inotify_watch hin_watch;
+ struct inode *hin_aufs_inode; /* no get/put */
+
+ /* an array of atomic_t X au_hin_nignore */
+ atomic_t hin_ignore[0];
+#endif
+};
+
+struct au_hinode {
+ struct inode *hi_inode;
+ aufs_bindex_t hi_id;
+#ifdef CONFIG_AUFS_HINOTIFY
+ struct au_hinotify *hi_notify;
+#endif
+
+ /* reference to the copied-up whiteout with get/put */
+ struct dentry *hi_whdentry;
+};
+
+struct au_hin_ignore {
+#ifdef CONFIG_AUFS_HINOTIFY
+ __u32 ign_events;
+ struct au_hinode *ign_hinode;
+#endif
+};
+
+/* ---------------------------------------------------------------------- */
+
+#ifdef CONFIG_AUFS_HINOTIFY
+static inline
+void au_hin_init(struct au_hinode *hinode, struct au_hinotify *val)
+{
+ hinode->hi_notify = val;
+}
+
+/* hinotify.c */
+int au_hin_alloc(struct au_hinode *hinode, struct inode *inode,
+ struct inode *h_inode);
+void au_hin_free(struct au_hinode *hinode);
+void au_do_hdir_lock(struct inode *h_dir, struct inode *dir,
+ aufs_bindex_t bindex, unsigned int lsc);
+void au_hdir_unlock(struct inode *h_dir, struct inode *dir,
+ aufs_bindex_t bindex);
+struct dentry *au_hdir_lock_rename(struct dentry **h_parents,
+ struct inode **dirs, aufs_bindex_t bindex,
+ int issamedir);
+void au_hdir_unlock_rename(struct dentry **h_parents, struct inode **dirs,
+ aufs_bindex_t bindex, int issamedir);
+void au_reset_hinotify(struct inode *inode, unsigned int flags);
+
+void au_hin_ignore(struct au_hinode *hinode, __u32 events);
+void au_hin_unignore(struct au_hinode *hinode, __u32 events);
+
+int __init au_inotify_init(void);
+void au_inotify_fin(void);
+
+#else
+
+static inline
+void au_hin_init(struct au_hinode *hinode, struct au_hinotify *val)
+{
+ /* empty */
+}
+
+static inline
+int au_hin_alloc(struct au_hinode *hinode, struct inode *inode,
+ struct inode *h_inode)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void au_hin_free(struct au_hinode *hinode)
+{
+ /* nothing */
+}
+
+static inline
+void au_do_hdir_lock(struct inode *h_dir, struct inode *dir,
+ aufs_bindex_t bindex, unsigned int lsc)
+{
+ mutex_lock_nested(&h_dir->i_mutex, lsc);
+}
+
+static inline
+void au_hdir_unlock(struct inode *h_dir, struct inode *dir,
+ aufs_bindex_t bindex)
+{
+ mutex_unlock(&h_dir->i_mutex);
+}
+
+static inline
+struct dentry *au_hdir_lock_rename(struct dentry **h_parents,
+ struct inode **dirs, aufs_bindex_t bindex,
+ int issamedir)
+{
+ return vfsub_lock_rename(h_parents[0], h_parents[1]);
+}
+
+static inline
+void au_hdir_unlock_rename(struct dentry **h_parents, struct inode **dirs,
+ aufs_bindex_t bindex, int issamedir)
+{
+ vfsub_unlock_rename(h_parents[0], h_parents[1]);
+}
+
+static inline void au_reset_hinotify(struct inode *inode, unsigned int flags)
+{
+ /* nothing */
+}
+
+static inline void au_hin_ignore(struct au_hinotify *hinotify, __u32 events)
+{
+ /* nothing */
+}
+
+static inline void au_hin_unignore(struct au_hinotify *hinotify, __u32 events)
+{
+ /* nothing */
+}
+
+static inline int au_inotify_init(void)
+{
+ return 0;
+}
+
+#define au_inotify_fin() do {} while (0)
+#endif /* CONFIG_AUFS_HINOTIFY */
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * au_hdir_lock, au_hdir2_lock
+ */
+#define AuLockFunc(name, lsc) \
+static inline \
+void name##_lock(struct inode *h_dir, struct inode *dir, aufs_bindex_t bindex) \
+{ au_do_hdir_lock(h_dir, dir, bindex, AuLsc_I_##lsc); }
+
+AuLockFunc(au_hdir, PARENT);
+AuLockFunc(au_hdir2, PARENT2);
+
+#undef AuLockFunc
+
+/* ---------------------------------------------------------------------- */
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_HINODE_H__ */
diff --git a/fs/aufs/hinotify.c b/fs/aufs/hinotify.c
new file mode 100644
index 0000000..90578b0
--- /dev/null
+++ b/fs/aufs/hinotify.c
@@ -0,0 +1,1059 @@
+/*
+ * Copyright (C) 2006-2008 Junjiro Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/*
+ * internal/hidden inotify handler
+ */
+
+#include "aufs.h"
+
+/* inotify events */
+static const __u32 AuInMask = (IN_MOVE | IN_DELETE | IN_CREATE
+ /* | IN_ACCESS */
+ | IN_MODIFY | IN_ATTRIB
+ /* | IN_DELETE_SELF | IN_MOVE_SELF */
+ );
+static struct inotify_handle *in_handle;
+
+/* the size of an array for ignore counter */
+static int au_hin_nignore;
+
+AuCacheFuncs(hinotify, AuCache_HINOTIFY);
+
+int au_hin_alloc(struct au_hinode *hinode, struct inode *inode,
+ struct inode *hidden_inode)
+{
+ int err, i;
+ struct au_hinotify *hin;
+ s32 wd;
+
+ LKTRTrace("i%lu, hi%lu\n", inode->i_ino, hidden_inode->i_ino);
+
+ err = -ENOMEM;
+ hin = au_cache_alloc_hinotify();
+ if (hin) {
+ AuDebugOn(hinode->hi_notify);
+ hinode->hi_notify = hin;
+ hin->hin_aufs_inode = inode;
+ for (i = 0; i < au_hin_nignore; i++)
+ atomic_set(hin->hin_ignore + i, 0);
+
+ inotify_init_watch(&hin->hin_watch);
+ wd = inotify_add_watch(in_handle, &hin->hin_watch, hidden_inode,
+ AuInMask);
+ if (wd >= 0)
+ return 0; /* success */
+
+ err = wd;
+ put_inotify_watch(&hin->hin_watch);
+ au_cache_free_hinotify(hin);
+ hinode->hi_notify = NULL;
+ }
+
+ AuTraceErr(err);
+ return err;
+}
+
+void au_hin_free(struct au_hinode *hinode)
+{
+ int err;
+ struct au_hinotify *hin;
+
+ AuTraceEnter();
+
+ hin = hinode->hi_notify;
+ if (unlikely(hin)) {
+ err = 0;
+ if (atomic_read(&hin->hin_watch.count))
+ err = inotify_rm_watch(in_handle, &hin->hin_watch);
+ if (unlikely(err))
+ /* it means the watch is already removed */
+ LKTRTrace("failed inotify_rm_watch() %d\n", err);
+ au_cache_free_hinotify(hin);
+ hinode->hi_notify = NULL;
+ }
+}
+
+/* ---------------------------------------------------------------------- */
+
+static void ctl_hinotify(struct au_hinode *hinode, const __u32 mask)
+{
+ struct inode *h_inode;
+ struct inotify_watch *watch;
+
+ h_inode = hinode->hi_inode;
+ LKTRTrace("hi%lu, sb %p, 0x%x\n", h_inode->i_ino, h_inode->i_sb, mask);
+ IMustLock(h_inode);
+ if (!hinode->hi_notify)
+ return;
+
+ watch = &hinode->hi_notify->hin_watch;
+#if 0 /* reserved for future use */
+ {
+ u32 wd;
+ wd = inotify_find_update_watch(in_handle, h_inode, mask);
+ AuTraceErr(wd);
+ /* ignore an err; */
+ }
+#else
+ /* struct inotify_handle is hidden */
+ mutex_lock(&h_inode->inotify_mutex);
+ /* mutex_lock(&watch->ih->mutex); */
+ watch->mask = mask;
+ /* mutex_unlock(&watch->ih->mutex); */
+ mutex_unlock(&h_inode->inotify_mutex);
+#endif
+ LKTRTrace("watch %p, mask %u\n", watch, watch->mask);
+}
+
+#define suspend_hinotify(hi) ctl_hinotify(hi, 0)
+#define resume_hinotify(hi) ctl_hinotify(hi, AuInMask)
+
+void au_do_hdir_lock(struct inode *h_dir, struct inode *dir,
+ aufs_bindex_t bindex, unsigned int lsc)
+{
+ struct au_hinode *hinode;
+
+ LKTRTrace("i%lu, b%d, lsc %d\n", dir->i_ino, bindex, lsc);
+ AuDebugOn(!S_ISDIR(dir->i_mode));
+ hinode = au_ii(dir)->ii_hinode + bindex;
+ AuDebugOn(h_dir != hinode->hi_inode);
+
+ mutex_lock_nested(&h_dir->i_mutex, lsc);
+ suspend_hinotify(hinode);
+}
+
+void au_hdir_unlock(struct inode *h_dir, struct inode *dir,
+ aufs_bindex_t bindex)
+{
+ struct au_hinode *hinode;
+
+ LKTRTrace("i%lu, b%d\n", dir->i_ino, bindex);
+ AuDebugOn(!S_ISDIR(dir->i_mode));
+ hinode = au_ii(dir)->ii_hinode + bindex;
+ AuDebugOn(h_dir != hinode->hi_inode);
+
+ resume_hinotify(hinode);
+ mutex_unlock(&h_dir->i_mutex);
+}
+
+struct dentry *au_hdir_lock_rename(struct dentry **h_parents,
+ struct inode **dirs, aufs_bindex_t bindex,
+ int issamedir)
+{
+ struct dentry *h_trap;
+ struct au_hinode *hinode;
+
+ LKTRTrace("%.*s, %.*s\n",
+ AuDLNPair(h_parents[0]), AuDLNPair(h_parents[1]));
+
+ h_trap = vfsub_lock_rename(h_parents[0], h_parents[1]);
+ hinode = au_ii(dirs[0])->ii_hinode + bindex;
+ AuDebugOn(h_parents[0]->d_inode != hinode->hi_inode);
+ suspend_hinotify(hinode);
+ if (!issamedir) {
+ hinode = au_ii(dirs[1])->ii_hinode + bindex;
+ AuDebugOn(h_parents[1]->d_inode != hinode->hi_inode);
+ suspend_hinotify(hinode);
+ }
+
+ return h_trap;
+}
+
+void au_hdir_unlock_rename(struct dentry **h_parents, struct inode **dirs,
+ aufs_bindex_t bindex, int issamedir)
+{
+ struct au_hinode *hinode;
+
+ LKTRTrace("%.*s, %.*s\n",
+ AuDLNPair(h_parents[0]), AuDLNPair(h_parents[1]));
+
+ hinode = au_ii(dirs[0])->ii_hinode + bindex;
+ AuDebugOn(h_parents[0]->d_inode != hinode->hi_inode);
+ resume_hinotify(hinode);
+ if (!issamedir) {
+ hinode = au_ii(dirs[1])->ii_hinode + bindex;
+ AuDebugOn(h_parents[1]->d_inode != hinode->hi_inode);
+ resume_hinotify(hinode);
+ }
+ vfsub_unlock_rename(h_parents[0], h_parents[1]);
+}
+
+void au_reset_hinotify(struct inode *inode, unsigned int flags)
+{
+ aufs_bindex_t bindex, bend;
+ struct inode *hi;
+ struct dentry *iwhdentry;
+
+ LKTRTrace("i%lu, 0x%x\n", inode->i_ino, flags);
+
+ bend = au_ibend(inode);
+ for (bindex = au_ibstart(inode); bindex <= bend; bindex++) {
+ hi = au_h_iptr(inode, bindex);
+ if (hi) {
+ /* mutex_lock_nested(&hi->i_mutex, AuLsc_I_CHILD); */
+ iwhdentry = au_hi_wh(inode, bindex);
+ if (unlikely(iwhdentry))
+ dget(iwhdentry);
+ igrab(hi);
+ au_set_h_iptr(inode, bindex, NULL, 0);
+ au_set_h_iptr(inode, bindex, igrab(hi),
+ flags & ~AuHi_XINO);
+ iput(hi);
+ dput(iwhdentry);
+ /* mutex_unlock(&hi->i_mutex); */
+ }
+ }
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* cf. fsnotify_change() */
+__u32 vfsub_events_notify_change(struct iattr *ia)
+{
+ __u32 events;
+ const unsigned int amtime = (ATTR_ATIME | ATTR_MTIME);
+
+ events = 0;
+ if ((ia->ia_valid & (ATTR_UID | ATTR_GID | ATTR_MODE))
+ || (ia->ia_valid & amtime) == amtime)
+ events |= IN_ATTRIB;
+ if ((ia->ia_valid & ATTR_SIZE)
+ || (ia->ia_valid & amtime) == ATTR_MTIME)
+ events |= IN_MODIFY;
+ return events;
+}
+
+void vfsub_ign_hinode(struct vfsub_args *vargs, __u32 events,
+ struct au_hinode *hinode)
+{
+ struct au_hin_ignore *ign;
+
+ AuDebugOn(!hinode);
+
+ ign = vargs->ignore + vargs->nignore++;
+ ign->ign_events = events;
+ ign->ign_hinode = hinode;
+}
+
+void vfsub_ignore(struct vfsub_args *vargs)
+{
+ int n;
+ struct au_hin_ignore *ign;
+
+ n = vargs->nignore;
+ ign = vargs->ignore;
+ while (n-- > 0) {
+ au_hin_ignore(ign->ign_hinode, ign->ign_events);
+ ign++;
+ }
+}
+
+void vfsub_unignore(struct vfsub_args *vargs)
+{
+ int n;
+ struct au_hin_ignore *ign;
+
+ n = vargs->nignore;
+ ign = vargs->ignore;
+ while (n-- > 0) {
+ au_hin_unignore(ign->ign_hinode, ign->ign_events);
+ ign++;
+ }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void au_hin_ignore(struct au_hinode *hinode, __u32 events)
+{
+ int i;
+ atomic_t *ign;
+
+ LKTRTrace("0x%x\n", events);
+ AuDebugOn(!hinode || !events);
+ if (unlikely(!hinode->hi_inode || !hinode->hi_notify)) {
+ /*
+ * it may happen by this scenario.
+ * - a file and its parent dir exist on two branches
+ * - a file on the upper branch is opened
+ * - the parent dir and the file are removed by udba
+ * - the parent is re-accessed, and new dentry/inode in
+ * aufs is generated for it, based upon the one on the lower
+ * branch
+ * - the opened file is re-accessed, re-validated, and it may be
+ * re-connected to the new parent dentry
+ * it means the file in aufs cannot get the actual removed
+ * parent dir on the branch.
+ */
+ return;
+ }
+ LKTRTrace("hi%lu\n", hinode->hi_inode->i_ino);
+#ifdef DbgInotify
+ AuDbg("hi%lu, 0x%x\n", hinode->hi_inode->i_ino, events);
+#endif
+ AuDebugOn(!hinode->hi_notify);
+
+ ign = hinode->hi_notify->hin_ignore;
+ for (i = 0; i < au_hin_nignore; i++)
+ if (1U << i & events)
+ atomic_inc_return(ign + i);
+}
+
+void au_hin_unignore(struct au_hinode *hinode, __u32 events)
+{
+ int i;
+ atomic_t *ign;
+
+ LKTRTrace("0x%x\n", events);
+ AuDebugOn(!hinode || !events);
+ if (unlikely(!hinode->hi_inode || !hinode->hi_notify))
+ return;
+ LKTRTrace("hi%lu\n", hinode->hi_inode->i_ino);
+#ifdef DbgInotify
+ AuDbg("hi%lu, 0x%x\n", hinode->hi_inode->i_ino, events);
+#endif
+ AuDebugOn(!hinode->hi_notify);
+
+ ign = hinode->hi_notify->hin_ignore;
+ for (i = 0; i < au_hin_nignore; i++)
+ if (1U << i & events)
+ atomic_dec_return(ign + i);
+}
+
+/* ---------------------------------------------------------------------- */
+
+static char *in_name(u32 mask)
+{
+#ifdef CONFIG_AUFS_DEBUG
+#define test_ret(flag) if (mask & flag) return #flag;
+ test_ret(IN_ACCESS);
+ test_ret(IN_MODIFY);
+ test_ret(IN_ATTRIB);
+ test_ret(IN_CLOSE_WRITE);
+ test_ret(IN_CLOSE_NOWRITE);
+ test_ret(IN_OPEN);
+ test_ret(IN_MOVED_FROM);
+ test_ret(IN_MOVED_TO);
+ test_ret(IN_CREATE);
+ test_ret(IN_DELETE);
+ test_ret(IN_DELETE_SELF);
+ test_ret(IN_MOVE_SELF);
+ test_ret(IN_UNMOUNT);
+ test_ret(IN_Q_OVERFLOW);
+ test_ret(IN_IGNORED);
+ return "";
+#undef test_ret
+#else
+ return "??";
+#endif
+}
+
+/* ---------------------------------------------------------------------- */
+
+static struct dentry *lookup_wlock_by_name(char *name, unsigned int nlen,
+ struct inode *dir)
+{
+ struct dentry *dentry, *d, *parent;
+ struct qstr *dname;
+
+ LKTRTrace("%.*s, dir%lu\n", nlen, name, dir->i_ino);
+
+ parent = d_find_alias(dir);
+ if (!parent)
+ return NULL;
+
+ dentry = NULL;
+ spin_lock(&dcache_lock);
+ list_for_each_entry(d, &parent->d_subdirs, d_u.d_child) {
+ LKTRTrace("%.*s\n", AuDLNPair(d));
+ dname = &d->d_name;
+ if (dname->len != nlen || memcmp(dname->name, name, nlen))
+ continue;
+ if (!atomic_read(&d->d_count)) {
+ spin_lock(&d->d_lock);
+ __d_drop(d);
+ spin_unlock(&d->d_lock);
+ continue;
+ }
+
+ dentry = dget(d);
+ break;
+ }
+ spin_unlock(&dcache_lock);
+ dput(parent);
+
+ if (dentry)
+ di_write_lock_child(dentry);
+ return dentry;
+}
+
+static struct inode *lookup_wlock_by_ino(struct super_block *sb,
+ aufs_bindex_t bindex, ino_t h_ino)
+{
+ struct inode *inode;
+ struct au_xino_entry xinoe;
+ int err;
+
+ LKTRTrace("b%d, hi%lu\n", bindex, h_ino);
+ AuDebugOn(!au_opt_test(au_mntflags(sb), XINO));
+
+ inode = NULL;
+ err = au_xino_read(sb, bindex, h_ino, &xinoe);
+ if (!err && xinoe.ino)
+ inode = ilookup(sb, xinoe.ino);
+ if (!inode)
+ goto out;
+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
+ AuWarn("wrong root branch\n");
+ iput(inode);
+ inode = NULL;
+ goto out;
+ }
+
+ ii_write_lock_child(inode);
+
+ out:
+ return inode;
+}
+
+static int hin_xino(struct inode *inode, struct inode *h_inode)
+{
+ int err;
+ aufs_bindex_t bindex, bend, bfound, bstart;
+ struct inode *h_i;
+
+ LKTRTrace("i%lu, hi%lu\n", inode->i_ino, h_inode->i_ino);
+
+ err = 0;
+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
+ AuWarn("branch root dir was changed\n");
+ goto out;
+ }
+
+ bfound = -1;
+ bend = au_ibend(inode);
+ bstart = au_ibstart(inode);
+#if 0 /* reserved for future use */
+ if (bindex == bend) {
+ /* keep this ino in rename case */
+ goto out;
+ }
+#endif
+ for (bindex = bstart; bindex <= bend; bindex++) {
+ if (au_h_iptr(inode, bindex) == h_inode) {
+ bfound = bindex;
+ break;
+ }
+ }
+ if (bfound < 0)
+ goto out;
+
+ for (bindex = bstart; bindex <= bend; bindex++) {
+ h_i = au_h_iptr(inode, bindex);
+ if (h_i)
+ err = au_xino_write0(inode->i_sb, bindex, h_i->i_ino,
+ 0);
+ /* ignore this error */
+ /* bad action? */
+ }
+
+ /* children inode number will be broken */
+
+ out:
+ AuTraceErr(err);
+ return err;
+}
+
+static int hin_gen_tree(struct dentry *dentry)
+{
+ int err, i, j, ndentry;
+ struct au_dcsub_pages dpages;
+ struct au_dpage *dpage;
+ struct dentry **dentries;
+
+ LKTRTrace("%.*s\n", AuDLNPair(dentry));
+
+ err = au_dpages_init(&dpages, GFP_TEMPORARY);
+ if (unlikely(err))
+ goto out;
+ err = au_dcsub_pages(&dpages, dentry, NULL, NULL);
+ if (unlikely(err))
+ goto out_dpages;
+
+ for (i = 0; i < dpages.ndpage; i++) {
+ dpage = dpages.dpages + i;
+ dentries = dpage->dentries;
+ ndentry = dpage->ndentry;
+ for (j = 0; j < ndentry; j++) {
+ struct dentry *d;
+ d = dentries[j];
+ LKTRTrace("%.*s\n", AuDLNPair(d));
+ if (IS_ROOT(d))
+ continue;
+
+ d_drop(d);
+ au_digen_dec(d);
+ if (d->d_inode)
+ /* todo: reset children xino?
+ cached children only? */
+ au_iigen_dec(d->d_inode);
+ }
+ }
+
+ out_dpages:
+ au_dpages_free(&dpages);
+
+ /* discard children */
+ dentry_unhash(dentry);
+ dput(dentry);
+ out:
+ AuTraceErr(err);
+ return err;
+}
+
+/*
+ * return 0 if processed.
+ */
+static int hin_gen_by_inode(char *name, unsigned int nlen, struct inode *inode,
+ const unsigned int isdir)
+{
+ int err;
+ struct dentry *d;
+ struct qstr *dname;
+
+ LKTRTrace("%.*s, i%lu\n", nlen, name, inode->i_ino);
+
+ err = 1;
+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
+ AuWarn("branch root dir was changed\n");
+ err = 0;
+ goto out;
+ }
+
+ if (!isdir) {
+ AuDebugOn(!name);
+ au_iigen_dec(inode);
+ spin_lock(&dcache_lock);
+ list_for_each_entry(d, &inode->i_dentry, d_alias) {
+ dname = &d->d_name;
+ if (dname->len != nlen
+ && memcmp(dname->name, name, nlen))
+ continue;
+ err = 0;
+ spin_lock(&d->d_lock);
+ __d_drop(d);
+ au_digen_dec(d);
+ spin_unlock(&d->d_lock);
+ break;
+ }
+ spin_unlock(&dcache_lock);
+ } else {
+ au_fset_si(au_sbi(inode->i_sb), FAILED_REFRESH_DIRS);
+ d = d_find_alias(inode);
+ if (!d) {
+ au_iigen_dec(inode);
+ goto out;
+ }
+
+ dname = &d->d_name;
+ if (dname->len == nlen && !memcmp(dname->name, name, nlen))
+ err = hin_gen_tree(d);
+ dput(d);
+ }
+
+ out:
+ AuTraceErr(err);
+ return err;
+}
+
+static int hin_gen_by_name(struct dentry *dentry, const unsigned int isdir)
+{
+ int err;
+ struct inode *inode;
+
+ LKTRTrace("%.*s\n", AuDLNPair(dentry));
+
+ inode = dentry->d_inode;
+ if (IS_ROOT(dentry)
+ /* || (inode && inode->i_ino == AUFS_ROOT_INO) */
+ ) {
+ AuWarn("branch root dir was changed\n");
+ return 0;
+ }
+
+ err = 0;
+ if (!isdir) {
+ d_drop(dentry);
+ au_digen_dec(dentry);
+ if (inode)
+ au_iigen_dec(inode);
+ } else {
+ au_fset_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIRS);
+ if (inode)
+ err = hin_gen_tree(dentry);
+ }
+
+ AuTraceErr(err);
+ return err;
+}
+
+static void hin_attr(struct inode *inode, struct inode *h_inode)
+{
+ struct dentry *h_dentry;
+
+ LKTRTrace("i%lu, hi%lu\n", inode->i_ino, h_inode->i_ino);
+
+ if (au_h_iptr(inode, au_ibstart(inode)) != h_inode)
+ return;
+
+ h_dentry = d_find_alias(h_inode);
+ if (h_dentry) {
+ au_update_fuse_h_inode(NULL, h_dentry);
+ /* ignore an error*/
+ dput(h_dentry);
+ }
+
+ au_cpup_attr_all(inode);
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* hinotify job flags */
+#define AuHinJob_XINO0 1
+#define AuHinJob_GEN (1 << 1)
+#define AuHinJob_DIRENT (1 << 2)
+#define AuHinJob_ATTR (1 << 3)
+#define AuHinJob_ISDIR (1 << 4)
+#define AuHinJob_TRYXINO0 (1 << 5)
+#define AuHinJob_MNTPNT (1 << 6)
+#define au_ftest_hinjob(flags, name) ((flags) & AuHinJob_##name)
+#define au_fset_hinjob(flags, name) { (flags) |= AuHinJob_##name; }
+#define au_fclr_hinjob(flags, name) { (flags) &= ~AuHinJob_##name; }
+
+struct hin_job_args {
+ unsigned int flags;
+ struct inode *inode, *h_inode, *dir, *h_dir;
+ struct dentry *dentry;
+ char *h_name;
+ int h_nlen;
+};
+
+static int hin_job(struct hin_job_args *a)
+{
+ const unsigned int isdir = au_ftest_hinjob(a->flags, ISDIR);
+
+ /* reset xino */
+ if (au_ftest_hinjob(a->flags, XINO0) && a->inode)
+ hin_xino(a->inode, a->h_inode);
+ /* ignore this error */
+
+ if (au_ftest_hinjob(a->flags, TRYXINO0)
+ && a->inode
+ && a->h_inode) {
+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
+ if (!a->h_inode->i_nlink)
+ hin_xino(a->inode, a->h_inode);
+ /* ignore this error */
+ mutex_unlock(&a->h_inode->i_mutex);
+ }
+
+ /* make the generation obsolete */
+ if (au_ftest_hinjob(a->flags, GEN)) {
+ int err = -1;
+ if (a->inode)
+ err = hin_gen_by_inode(a->h_name, a->h_nlen, a->inode,
+ isdir);
+ if (err && a->dentry)
+ hin_gen_by_name(a->dentry, isdir);
+ /* ignore this error */
+ }
+
+ /* make dir entries obsolete */
+ if (au_ftest_hinjob(a->flags, DIRENT) && a->inode) {
+ struct au_vdir *vdir;
+ IiMustWriteLock(a->inode);
+ vdir = au_ivdir(a->inode);
+ if (vdir)
+ vdir->vd_jiffy = 0;
+ /* IMustLock(a->inode); */
+ /* a->inode->i_version++; */
+ }
+
+ /* update the attr */
+ if (au_ftest_hinjob(a->flags, ATTR) && a->inode && a->h_inode)
+ hin_attr(a->inode, a->h_inode);
+
+ /* can do nothing but warn */
+ if (au_ftest_hinjob(a->flags, MNTPNT)
+ && a->dentry
+ && d_mountpoint(a->dentry))
+ AuWarn("mount-point %.*s is removed or renamed\n",
+ AuDLNPair(a->dentry));
+
+ return 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+enum { CHILD, PARENT };
+struct postproc_args {
+ struct inode *h_dir, *dir, *h_child_inode;
+ u32 mask;
+ unsigned int flags[2];
+ unsigned int h_child_nlen;
+ char h_child_name[];
+};
+
+static void postproc(void *_args)
+{
+ struct postproc_args *a = _args;
+ struct super_block *sb;
+ aufs_bindex_t bindex, bend, bfound;
+ int xino, err;
+ struct inode *inode;
+ ino_t h_ino;
+ struct hin_job_args args;
+ struct dentry *dentry;
+ struct au_sbinfo *sbinfo;
+
+ AuDebugOn(!_args);
+ AuDebugOn(!a->h_dir);
+ AuDebugOn(!a->dir);
+ AuDebugOn(!a->mask);
+ LKTRTrace("mask 0x%x %s, i%lu, hi%lu, hci%lu\n",
+ a->mask, in_name(a->mask), a->dir->i_ino, a->h_dir->i_ino,
+ a->h_child_inode ? a->h_child_inode->i_ino : 0);
+
+ inode = NULL;
+ dentry = NULL;
+ /*
+ * do not lock a->dir->i_mutex here
+ * because of d_revalidate() may cause a deadlock.
+ */
+ sb = a->dir->i_sb;
+ AuDebugOn(!sb);
+ sbinfo = au_sbi(sb);
+ AuDebugOn(!sbinfo);
+ /* big aufs lock */
+ si_noflush_write_lock(sb);
+
+ ii_read_lock_parent(a->dir);
+ bfound = -1;
+ bend = au_ibend(a->dir);
+ for (bindex = au_ibstart(a->dir); bindex <= bend; bindex++)
+ if (au_h_iptr(a->dir, bindex) == a->h_dir) {
+ bfound = bindex;
+ break;
+ }
+ ii_read_unlock(a->dir);
+ if (unlikely(bfound < 0))
+ goto out;
+
+ xino = !!au_opt_test(au_mntflags(sb), XINO);
+ h_ino = 0;
+ if (a->h_child_inode)
+ h_ino = a->h_child_inode->i_ino;
+
+ if (a->h_child_nlen
+ && (au_ftest_hinjob(a->flags[CHILD], GEN)
+ || au_ftest_hinjob(a->flags[CHILD], MNTPNT)))
+ dentry = lookup_wlock_by_name(a->h_child_name, a->h_child_nlen,
+ a->dir);
+ if (dentry)
+ inode = dentry->d_inode;
+ if (xino && !inode && h_ino
+ && (au_ftest_hinjob(a->flags[CHILD], XINO0)
+ || au_ftest_hinjob(a->flags[CHILD], TRYXINO0)
+ || au_ftest_hinjob(a->flags[CHILD], GEN)
+ || au_ftest_hinjob(a->flags[CHILD], ATTR)))
+ inode = lookup_wlock_by_ino(sb, bfound, h_ino);
+
+ args.flags = a->flags[CHILD];
+ args.dentry = dentry;
+ args.inode = inode;
+ args.h_inode = a->h_child_inode;
+ args.dir = a->dir;
+ args.h_dir = a->h_dir;
+ args.h_name = a->h_child_name;
+ args.h_nlen = a->h_child_nlen;
+ err = hin_job(&args);
+ if (dentry) {
+ di_write_unlock(dentry);
+ dput(dentry);
+ } else if (inode) {
+ ii_write_unlock(inode);
+ iput(inode);
+ }
+
+ ii_write_lock_parent(a->dir);
+ args.flags = a->flags[PARENT];
+ args.dentry = NULL;
+ args.inode = a->dir;
+ args.h_inode = a->h_dir;
+ args.dir = NULL;
+ args.h_dir = NULL;
+ args.h_name = NULL;
+ args.h_nlen = 0;
+ err = hin_job(&args);
+ ii_write_unlock(a->dir);
+
+ out:
+ si_write_unlock(sb);
+ au_nwt_dec(&sbinfo->si_nowait);
+
+ iput(a->h_child_inode);
+ iput(a->h_dir);
+ iput(a->dir);
+ kfree(a);
+}
+
+/* todo: endian? */
+#ifndef ilog2
+#define ilog2(n) ffz(~(n))
+#endif
+
+static void aufs_inotify(struct inotify_watch *watch, u32 wd, u32 mask,
+ u32 cookie, const char *h_child_name,
+ struct inode *h_child_inode)
+{
+ struct au_hinotify *hinotify;
+ struct postproc_args *args;
+ int len, wkq_err, isdir, isroot, wh, idx;
+ char *p;
+ struct inode *dir;
+ unsigned int flags[2];
+ struct super_block *sb;
+ atomic_t *cnt;
+
+ LKTRTrace("i%lu, wd %d, mask 0x%x %s, cookie 0x%x, hcname %s, hi%lu\n",
+ watch->inode->i_ino, wd, mask, in_name(mask), cookie,
+ h_child_name ? h_child_name : "",
+ h_child_inode ? h_child_inode->i_ino : 0);
+
+ /* if IN_UNMOUNT happens, there must be another bug */
+ if (mask & (IN_IGNORED | IN_UNMOUNT)) {
+ put_inotify_watch(watch);
+ return;
+ }
+
+#ifdef DbgInotify
+ if (!h_child_name || strcmp(h_child_name, AUFS_XINO_FNAME))
+ AuDbg("i%lu, wd %d, mask 0x%x %s, cookie 0x%x, hcname %s,"
+ " hi%lu\n",
+ watch->inode->i_ino, wd, mask, in_name(mask), cookie,
+ h_child_name ? h_child_name : "",
+ h_child_inode ? h_child_inode->i_ino : 0);
+#endif
+
+ hinotify = container_of(watch, struct au_hinotify, hin_watch);
+ AuDebugOn(!hinotify || !hinotify->hin_aufs_inode);
+ idx = ilog2(mask & IN_ALL_EVENTS);
+ AuDebugOn(au_hin_nignore <= idx);
+ cnt = hinotify->hin_ignore + idx;
+ if (0 <= atomic_dec_return(cnt))
+ return;
+ atomic_inc_return(cnt);
+ dir = igrab(hinotify->hin_aufs_inode);
+ if (!dir)
+ return;
+ isroot = (dir->i_ino == AUFS_ROOT_INO);
+ len = 0;
+ wh = 0;
+ if (h_child_name) {
+ len = strlen(h_child_name);
+ if (!memcmp(h_child_name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
+ h_child_name += AUFS_WH_PFX_LEN;
+ len -= AUFS_WH_PFX_LEN;
+ wh = 1;
+ }
+ }
+
+ isdir = 0;
+ if (h_child_inode)
+ isdir = !!S_ISDIR(h_child_inode->i_mode);
+ flags[PARENT] = AuHinJob_ISDIR;
+ flags[CHILD] = 0;
+ if (isdir)
+ flags[CHILD] = AuHinJob_ISDIR;
+ switch (mask & IN_ALL_EVENTS) {
+ case IN_MODIFY:
+ /*FALLTHROUGH*/
+ case IN_ATTRIB:
+ if (h_child_inode) {
+ if (!wh)
+ au_fset_hinjob(flags[CHILD], ATTR);
+ } else
+ au_fset_hinjob(flags[PARENT], ATTR);
+ break;
+
+ /* IN_MOVED_FROM is the first event in rename(2) */
+ case IN_MOVED_FROM:
+ case IN_MOVED_TO:
+ AuDebugOn(!h_child_name || !h_child_inode);
+ au_fset_hinjob(flags[CHILD], GEN);
+ au_fset_hinjob(flags[CHILD], ATTR);
+ if (1 || isdir)
+ au_fset_hinjob(flags[CHILD], XINO0);
+ au_fset_hinjob(flags[CHILD], MNTPNT);
+
+ au_fset_hinjob(flags[PARENT], ATTR);
+ au_fset_hinjob(flags[PARENT], DIRENT);
+ break;
+
+ case IN_CREATE:
+ AuDebugOn(!h_child_name || !h_child_inode);
+ au_fset_hinjob(flags[PARENT], ATTR);
+ au_fset_hinjob(flags[PARENT], DIRENT);
+ au_fset_hinjob(flags[CHILD], GEN);
+ /* hard link */
+ if (!isdir && h_child_inode->i_nlink > 1)
+ au_fset_hinjob(flags[CHILD], ATTR);
+ break;
+
+ case IN_DELETE:
+ /*
+ * aufs never be able to get this child inode.
+ * revalidation should be in d_revalidate()
+ * by checking i_nlink, i_generation or d_unhashed().
+ */
+ AuDebugOn(!h_child_name);
+ au_fset_hinjob(flags[PARENT], ATTR);
+ au_fset_hinjob(flags[PARENT], DIRENT);
+ au_fset_hinjob(flags[CHILD], GEN);
+ au_fset_hinjob(flags[CHILD], TRYXINO0);
+ au_fset_hinjob(flags[CHILD], MNTPNT);
+ break;
+
+ case IN_DELETE_SELF:
+ if (!isroot)
+ au_fset_hinjob(flags[PARENT], GEN);
+ /*FALLTHROUGH*/
+
+ case IN_MOVE_SELF:
+ /*
+ * when an inotify is set to an aufs inode,
+ * such inode can be isolated and this event can be fired
+ * solely.
+ */
+ AuDebugOn(h_child_name || h_child_inode);
+ if (unlikely(isroot)) {
+ AuWarn("root branch was moved\n");
+ iput(dir);
+ return;
+ }
+ au_fset_hinjob(flags[PARENT], XINO0);
+ au_fset_hinjob(flags[PARENT], GEN);
+ au_fset_hinjob(flags[PARENT], ATTR);
+ au_fset_hinjob(flags[PARENT], DIRENT);
+ /* au_fset_hinjob(flags[PARENT], MNTPNT); */
+ break;
+
+ case IN_ACCESS:
+ default:
+ AuDebugOn(1);
+ }
+
+ if (wh)
+ h_child_inode = NULL;
+
+ /* iput() and kfree() will be called in postproc() */
+ /*
+ * inotify_mutex is already acquired and kmalloc/prune_icache may lock
+ * iprune_mutex. strange.
+ */
+ lockdep_off();
+ args = kmalloc(sizeof(*args) + len + 1, GFP_TEMPORARY);
+ lockdep_on();
+ if (unlikely(!args)) {
+ AuErr1("no memory\n");
+ iput(dir);
+ return;
+ }
+ args->flags[PARENT] = flags[PARENT];
+ args->flags[CHILD] = flags[CHILD];
+ args->mask = mask;
+ args->dir = dir;
+ args->h_dir = igrab(watch->inode);
+ if (h_child_inode)
+ igrab(h_child_inode);
+ args->h_child_inode = h_child_inode;
+ args->h_child_nlen = len;
+ if (len) {
+ p = (void *)args;
+ p += sizeof(*args);
+ memcpy(p, h_child_name, len + 1);
+ }
+
+ sb = dir->i_sb;
+ au_nwt_inc(&au_sbi(sb)->si_nowait);
+ lockdep_off();
+ wkq_err = au_wkq_nowait(postproc, args, sb, /*dlgt*/0);
+ lockdep_on();
+ if (unlikely(wkq_err)) {
+ AuErr("wkq %d\n", wkq_err);
+ au_nwt_dec(&au_sbi(sb)->si_nowait);
+ }
+}
+
+static void aufs_inotify_destroy(struct inotify_watch *watch)
+{
+ return;
+}
+
+static struct inotify_operations aufs_inotify_ops = {
+ .handle_event = aufs_inotify,
+ .destroy_watch = aufs_inotify_destroy
+};
+
+/* ---------------------------------------------------------------------- */
+
+static void au_hin_destroy_cache(void)
+{
+ kmem_cache_destroy(au_cachep[AuCache_HINOTIFY]);
+ au_cachep[AuCache_HINOTIFY] = NULL;
+}
+
+int __init au_inotify_init(void)
+{
+ au_hin_nignore = 6;
+ while (1U << au_hin_nignore < AuInMask)
+ au_hin_nignore++;
+ AuDebugOn(au_hin_nignore != 10);
+
+ in_handle = ERR_PTR(-ENOMEM);
+ au_cachep[AuCache_HINOTIFY]
+ = AuCacheX(au_hinotify, sizeof(atomic_t) * au_hin_nignore);
+ if (unlikely(!au_cachep[AuCache_HINOTIFY]))
+ goto out;
+
+ in_handle = inotify_init(&aufs_inotify_ops);
+ if (!IS_ERR(in_handle))
+ return 0;
+
+ au_hin_destroy_cache();
+ out:
+ AuTraceErrPtr(in_handle);
+ return PTR_ERR(in_handle);
+}
+
+void au_inotify_fin(void)
+{
+ inotify_destroy(in_handle);
+ if (au_cachep[AuCache_HINOTIFY])
+ au_hin_destroy_cache();
+}
--
1.5.5.1.308.g1fbb5.dirty
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/