Re: [RFC 1/2] fanotify: new event FAN_MODIFY_DIR

From: Amir Goldstein
Date: Tue Mar 14 2017 - 06:41:08 EST


On Tue, Mar 14, 2017 at 1:16 AM, Filip ÅtÄdronskà <r.lklm@xxxxxxxxxx> wrote:
> An example userspace program that uses FAN_MODIFY_DIR to reliably keep
> an up-to-date internal representation of the file system. It uses some
> filehandle trickery to identify inodes, other heuristics could be also
> used.
>


An I am very happy that you used filehandles to keep track of objects
in your example, because it fits my proposal really well.

See, if you used my proposed API, you would have had

fan_fd = CHK(fanotify_init(FAN_UNLIMITED_QUEUE| \
FAN_EVENT_INFO_PARENT | FAN_EVENT_INFO_FH,
O_RDONLY));

And then you would have gotten the filhandle of parent with the event
and you would need find_inode().

Furthermore, my proposal records the filehandle at the time of the event
and therefore, does not have to keep an elevated refcount of the object
in the events queue.

Again, this needs some work, but I will try to post a testing branch
which demonstrates how my patches should be used with this test
program.

Thanks again for sharing it.

Amir.

> ---
>
> //#define _GNU_SOURCE
>
> #include <stdio.h>
> #include <stdlib.h>
> #include <unistd.h>
> #include <sys/types.h>
> #include <sys/stat.h>
> #include <fcntl.h>
> #include <sys/fanotify.h>
> #include <stdint.h>
> #include <dirent.h>
> #include <assert.h>
> #include <string.h>
>
> #include <map>
> #include <set>
> #include <list>
> using namespace std;
>
> #ifndef FAN_MODIFY_DIR
> #define FAN_MODIFY_DIR 0x00040000
> #endif
>
> // die-on-error helpers
> #define CHK(x) ({ __typeof__(x) r = x; if (r == -1) { perror(#x); abort(); } r; })
> #define CHKN(x) ({ __typeof__(x) r = x; if (r == NULL) { perror(#x); abort(); } r; })
> struct inode_info;
> struct dentry_info;
>
> struct inode_info {
> ino_t ino;
> mode_t mode;
> char handle[MAX_HANDLE_SZ];
> set<struct dentry_info *> links;
> map<string, struct dentry_info *> children; // for directory inodes
> };
>
> struct dentry_info {
> struct inode_info *parent, *inode;
> string name;
> };
>
>
> map<ino_t, inode_info*> inodes;
>
> int root_fd;
> int fan_fd;
>
> bool compare_handles(const void *h1, const void *h2) {
> const struct file_handle *fh1 = (const struct file_handle*) h1;
> const struct file_handle *fh2 = (const struct file_handle*) h2;
> return (fh1->handle_bytes == fh2->handle_bytes
> && memcmp(h1, h2, fh1->handle_bytes) == 0);
> }
>
> bool handle_valid(void *handle) {
> int check_fd = open_by_handle_at(root_fd, (struct file_handle*)handle, O_PATH);
> if (check_fd >= 0) {
> CHK(close(check_fd));
> return true;
> } else if (errno == ESTALE) {
> return false;
> } else {
> perror("open_by_handle_at");
> exit(1);
> }
> }
>
> // Get the path corresponding to an inode (one of its paths, in the presence of
> // hardlinks).
> void inode_path(const struct inode_info *inode, char *buf, size_t bufsiz) {
> list<string> components;
> while (true) {
> if (inode->links.empty()) break;
> struct dentry_info *dentry = *inode->links.begin();
> components.push_front(dentry->name);
> inode = dentry->parent;
> }
> buf[0] = '\0';
> for (auto name: components) {
> int len = snprintf(buf, bufsiz, "/%s", name.c_str());
> buf += len;
> bufsiz -= len;
> }
> }
>
>
> void delete_dentry(struct dentry_info *dentry) {
> assert(dentry->parent->children[dentry->name] == dentry);
>
> char path_buf[4096];
> inode_path(dentry->parent, path_buf, sizeof(path_buf));
> printf("unlinked %s/%s (ino %lu, parent %lu)\n", path_buf, dentry->name.c_str(),
> dentry->inode->ino, dentry->parent->ino);
>
> dentry->parent->children.erase(dentry->name.c_str());
> dentry->inode->links.erase(dentry);
> // TODO: If this was the last dentry pointing to an inode, schedule removing
> // the inode after a timeout (we cannot remove it immediately because
> // the zero-link situation might occur during a rename when the source
> // directory has been processed but the target directory hasn't).
> delete dentry;
> }
>
> struct dentry_info *add_dentry(struct inode_info *parent, const char *name,
> struct inode_info *child) {
> struct dentry_info *dentry = new dentry_info();
> dentry->parent = parent;
> dentry->name = name;
> dentry->inode = child;
> parent->children[name] = dentry;
> child->links.insert(dentry);
>
> char path_buf[4096] = "\0";
> inode_path(parent, path_buf, sizeof(path_buf));
> printf("linked %s/%s (ino %lu, parent %lu)\n", path_buf, name, child->ino, parent->ino);
>
> return dentry;
> }
>
> void delete_inode(struct inode_info *inode) {
> for (auto dentry: inode->links) {
> delete_dentry(dentry);
> }
> delete inode;
> }
>
> // Given a file descriptor, find the corresponding inode object in our database,
> // or create a new one if it does not exist. An O_PATH fd suffices.
> struct inode_info *find_inode(int fd) {
> struct stat st;
> CHK(fstat(fd, &st));
> char handle[sizeof(struct file_handle) + MAX_HANDLE_SZ];
> struct file_handle *fh = (struct file_handle*)handle;
> fh->handle_bytes = sizeof(handle);
> int mntid;
> CHK(name_to_handle_at(fd, "", (struct file_handle*)handle, &mntid,
> AT_EMPTY_PATH));
>
> struct inode_info *info = inodes[st.st_ino];
> if (info) {
> // Handles can refer to the same file despite not being equal.
> // If the old handle can still be opened, we can be assured
> // that the inode number has not been recycled.
> if (compare_handles(handle, info->handle) || handle_valid(info->handle)) {
> return info;
> } else {
> delete_inode(info);
> info = NULL;
> }
> }
>
> inodes[st.st_ino] = info = new inode_info();
> info->ino = st.st_ino;
> info->mode = st.st_mode;
> memcpy(info->handle, handle, fh->handle_bytes);
> return info;
> }
>
> // Scan directory and update internal filesystem representation accordingly.
> // Closes `dirfd`.
> void scan(int dirfd, bool recursive) {
> struct inode_info *dir = find_inode(dirfd);
>
> char path_buf[4096] = "\0";
> inode_path(dir, path_buf, sizeof(path_buf));
> printf("scan %s (%lu)\n", path_buf, dir->ino);
>
> DIR *dp = CHKN(fdopendir(dirfd));
> set<string> seen;
> while (struct dirent *ent = readdir(dp)) {
> if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) continue;
> seen.insert(ent->d_name);
> if (dir->children.find(ent->d_name) != dir->children.end()
> && dir->children[ent->d_name]->inode->ino == ent->d_ino) {
> // Heuristic: It is massively unlikely that an inode number
> // would be recylced at the same path as before. So if we
> // see the same inode for the same child, we skip the more
> // expensive checks altogether. This saves us a buttload of
> // syscalls, especially given that most directory entries
> // will be unchanged after a FAN_MODIFY_DIR.
> //
> // This can be skipped if strict correctness is preferred
> // over speed.
> continue;
> }
> int fd = openat(dirfd, ent->d_name, O_PATH|O_NOFOLLOW);
> if (fd < 0) continue;
> struct inode_info *child = find_inode(fd);
> if (dir->children.find(ent->d_name) != dir->children.end()) {
> struct dentry_info *old_dentry = dir->children[ent->d_name];
> if (child != old_dentry->inode) {
> delete_dentry(old_dentry);
> add_dentry(dir, ent->d_name, child);
> }
> } else {
> add_dentry(dir, ent->d_name, child);
> }
> if (recursive && S_ISDIR(child->mode)) {
> // `fd' is just an O_PATH fd. For scanning we need O_RDONLY.
> int scan_fd = CHK(openat(fd, ".", O_RDONLY|O_DIRECTORY));
> scan(scan_fd, true); // closes scan_fd
> }
> close(fd);
> }
> for (auto it: dir->children) {
> if (seen.find(it.second->name) == seen.end()) delete_dentry(it.second);
> }
> closedir(dp);
> }
>
> void event_loop() {
> while (true) {
> char buf[4096];
> ssize_t len = CHK(read(fan_fd, buf, sizeof(buf)));
> const struct fanotify_event_metadata *event;
> event = (const struct fanotify_event_metadata*) buf;
> while (FAN_EVENT_OK(event, len)) {
> if (event->vers != FANOTIFY_METADATA_VERSION) abort();
> if (event->mask & FAN_MODIFY_DIR) {
> scan(event->fd, false);
> } else if (event->mask & FAN_Q_OVERFLOW) {
> abort(); // TODO: full rescan needed
> } else {
> close(event->fd);
> }
> event = FAN_EVENT_NEXT(event, len);
> }
> }
> }
>
> int main(int argc, char **argv) {
> if (argc != 2) { fprintf(stderr, "Usage: %s MOUNTPOINT\n", argv[0]); return 1; }
>
> root_fd = CHK(open(argv[1], O_RDONLY|O_DIRECTORY));
> // In a real application, FAN_UNLIMITED_QUEUE would be replaced with a secondary
> // userspace queue filled during scanning.
> fan_fd = CHK(fanotify_init(FAN_UNLIMITED_QUEUE, O_RDONLY));
> CHK(fanotify_mark(fan_fd, FAN_MARK_ADD|FAN_MARK_MOUNT, FAN_MODIFY_DIR|FAN_ONDIR,
> root_fd, NULL));
>
> scan(dup(root_fd), true);
>
> event_loop();
>
> return 0;
> }