Re: [RFC PATCH 3/7] FUSE,Inotify,Fsnotify,VFS: Add the fuse_fsnotify_update_mark inode operation

From: Amir Goldstein
Date: Tue Oct 26 2021 - 11:06:34 EST


On Mon, Oct 25, 2021 at 11:47 PM Ioannis Angelakopoulos
<iangelak@xxxxxxxxxx> wrote:
>
> Every time a local watch is placed/modified/removed on/from an inode the
> same operation has to take place in the FUSE server.
>
> Thus add the inode operation "fuse_fsnotify_update_mark", which is
> specific to FUSE inodes. This operation is called from the
> "inotify_add_watch" system call in the inotify subsystem.
>
> Specifically, the operation is called when a process tries to add, modify
> or remove a watch from a FUSE inode and the remote fsnotify support is
> enabled both in the guest kernel and the FUSE server (virtiofsd).
>
> Essentially, when the kernel adds/modifies a watch locally, also send a
> fsnotify request to the FUSE server to do the same. We keep the local watch
> placement since it is essential for the functionality of the fsnotify
> notification subsystem. However, the local events generated by the guest
> kernel will be suppressed if they affect FUSE inodes and the remote
> fsnotify support is enabled.
>
> Also modify the "fsnotify_detach_mark" function in fs/notify/mark.c to add
> support for the remote deletion of watches for FUSE inodes. In contrast to
> the add/modify operation we do not modify the inotify subsystem, but the
> fsnotify subsystem. That is because there are two ways of deleting a watch
> from an inode. The first is by manually calling the "inotify_rm_watch"
> system call and the second is automatically by the kernel when the process
> that has created an inotify instance exits. In that case the kernel is
> responsible for deleting all the watches corresponding to said inotify
> instance.
>
> Thus we send the fsnotify request for the deletion of the remote watch at
> the lowest level within "fsnotify_detach_mark" to catch both watch removal
> cases.
>
> The "fuse_fsnotify_update_mark" function in turn calls the
> "fuse_fsnotify_send_request" function, to send an fsnotify request to the
> FUSE server related to an inode watch.
>
>
> Signed-off-by: Ioannis Angelakopoulos <iangelak@xxxxxxxxxx>
> ---
> fs/fuse/dir.c | 29 +++++++++++++++++++++++++++++
> fs/notify/inotify/inotify_user.c | 11 +++++++++++
> fs/notify/mark.c | 10 ++++++++++
> include/linux/fs.h | 2 ++
> 4 files changed, 52 insertions(+)
>
> diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
> index d9b977c0f38d..f666aafc8d3f 100644
> --- a/fs/fuse/dir.c
> +++ b/fs/fuse/dir.c
> @@ -17,6 +17,8 @@
> #include <linux/xattr.h>
> #include <linux/iversion.h>
> #include <linux/posix_acl.h>
> +#include <linux/fsnotify_backend.h>
> +#include <linux/inotify.h>
>
> static void fuse_advise_use_readdirplus(struct inode *dir)
> {
> @@ -1805,6 +1807,30 @@ static int fuse_getattr(struct user_namespace *mnt_userns,
> return fuse_update_get_attr(inode, NULL, stat, request_mask, flags);
> }
>
> +static int fuse_fsnotify_update_mark(struct inode *inode, uint32_t action,
> + uint64_t group, uint32_t mask)
> +{
> + /*
> + * We have to remove the bits added to the mask before being attached
> + * or detached to the inode, since these bits are going to be
> + * added by the "remote" host kernel. If these bits were still enabled
> + * in the mask that was sent to the "remote" kernel then the watch would
> + * be rejected as an unsupported value. These bits are added by the
> + * fsnotify subsystem thus we use the corresponding fsnotify bits here.
> + */
> + mask = mask & ~(FS_IN_IGNORED | FS_UNMOUNT | FS_IN_ONESHOT |
> + FS_EXCL_UNLINK | FS_EVENT_ON_CHILD);
> +
> + if (!(mask & IN_ALL_EVENTS))
> + return -EINVAL;
> +
> + /*
> + * Action 0: Remove a watch
> + * Action 1: Add/Modify watch
> + */
> + return fuse_fsnotify_send_request(inode, mask, action, group);
> +}
> +
> static const struct inode_operations fuse_dir_inode_operations = {
> .lookup = fuse_lookup,
> .mkdir = fuse_mkdir,
> @@ -1824,6 +1850,7 @@ static const struct inode_operations fuse_dir_inode_operations = {
> .set_acl = fuse_set_acl,
> .fileattr_get = fuse_fileattr_get,
> .fileattr_set = fuse_fileattr_set,
> + .fsnotify_update = fuse_fsnotify_update_mark,
> };
>
> static const struct file_operations fuse_dir_operations = {
> @@ -1846,6 +1873,7 @@ static const struct inode_operations fuse_common_inode_operations = {
> .set_acl = fuse_set_acl,
> .fileattr_get = fuse_fileattr_get,
> .fileattr_set = fuse_fileattr_set,
> + .fsnotify_update = fuse_fsnotify_update_mark,
> };
>
> static const struct inode_operations fuse_symlink_inode_operations = {
> @@ -1853,6 +1881,7 @@ static const struct inode_operations fuse_symlink_inode_operations = {
> .get_link = fuse_get_link,
> .getattr = fuse_getattr,
> .listxattr = fuse_listxattr,
> + .fsnotify_update = fuse_fsnotify_update_mark,
> };
>
> void fuse_init_common(struct inode *inode)
> diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
> index 62051247f6d2..3a0fee09a7c3 100644
> --- a/fs/notify/inotify/inotify_user.c
> +++ b/fs/notify/inotify/inotify_user.c
> @@ -46,6 +46,8 @@
> #define INOTIFY_WATCH_COST (sizeof(struct inotify_inode_mark) + \
> 2 * sizeof(struct inode))
>
> +#define FSNOTIFY_ADD_MODIFY_MARK 1
> +
> /* configurable via /proc/sys/fs/inotify/ */
> static int inotify_max_queued_events __read_mostly;
>
> @@ -764,6 +766,15 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname,
>
> /* create/update an inode mark */
> ret = inotify_update_watch(group, inode, mask);
> + /*
> + * If the inode belongs to a remote filesystem/server that supports
> + * remote inotify events then send the mark to the remote server
> + */
> + if (ret >= 0 && inode->i_op->fsnotify_update) {
> + inode->i_op->fsnotify_update(inode,
> + FSNOTIFY_ADD_MODIFY_MARK,
> + (uint64_t)group, mask);
> + }
> path_put(&path);
> fput_and_out:
> fdput(f);
> diff --git a/fs/notify/mark.c b/fs/notify/mark.c
> index fa1d99101f89..f0d37276afcb 100644
> --- a/fs/notify/mark.c
> +++ b/fs/notify/mark.c
> @@ -77,6 +77,7 @@
> #include "fsnotify.h"
>
> #define FSNOTIFY_REAPER_DELAY (1) /* 1 jiffy */
> +#define FSNOTIFY_DELETE_MARK 0 /* Delete a mark in remote fsnotify */

This define is part of the vfs API it should be in an include file along side
FSNOTIFY_ADD_MODIFY_MARK (if we keep them in the API).

>
> struct srcu_struct fsnotify_mark_srcu;
> struct kmem_cache *fsnotify_mark_connector_cachep;
> @@ -399,6 +400,7 @@ void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info)
> void fsnotify_detach_mark(struct fsnotify_mark *mark)
> {
> struct fsnotify_group *group = mark->group;
> + struct inode *inode = NULL;
>
> WARN_ON_ONCE(!mutex_is_locked(&group->mark_mutex));
> WARN_ON_ONCE(!srcu_read_lock_held(&fsnotify_mark_srcu) &&
> @@ -411,6 +413,14 @@ void fsnotify_detach_mark(struct fsnotify_mark *mark)
> spin_unlock(&mark->lock);
> return;
> }
> +
> + /* Only if the object is an inode send a request to FUSE server */
> + inode = fsnotify_conn_inode(mark->connector);
> + if (inode && inode->i_op->fsnotify_update) {
> + inode->i_op->fsnotify_update(inode, FSNOTIFY_DELETE_MARK,
> + (uint64_t)group, mark->mask);
> + }
> +
> mark->flags &= ~FSNOTIFY_MARK_FLAG_ATTACHED;
> list_del_init(&mark->g_list);
> spin_unlock(&mark->lock);
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index e7a633353fd2..86bcc44e3ab8 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -2149,6 +2149,8 @@ struct inode_operations {
> int (*fileattr_set)(struct user_namespace *mnt_userns,
> struct dentry *dentry, struct fileattr *fa);
> int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa);
> + int (*fsnotify_update)(struct inode *inode, uint32_t action,
> + uint64_t group, uint32_t mask);
> } ____cacheline_aligned;
>

Please split the patch that introduces the API from the FUSE implementation.

Regarding the API, group does not belong in this interface.
The inode object has an "aggregated mask" at i_fsnotify_mask
indicating an interest for an event from any group.
Remote servers should be notified when the aggregated mask changes.

Hence, Miklos has proposed a "remote fsnotify update" API which does
not carry the mask nor the action, only the watched object:
https://lore.kernel.org/linux-fsdevel/20190501205541.GC30899@xxxxxxxxxxxxxxxxxxxxxxxxxx/

On that same thread, you will see that I also proposed the API to support
full filesystem watch (by passing sb).
I am not requiring that you implement sb watch for FUSE/virtiofs, but the
API should take this future extension into account.

Thanks,
Amir.