Re: [PATCH 1/4] fsnotify: fix pinning of marks and groups

From: Amir Goldstein
Date: Fri Oct 20 2017 - 07:37:33 EST


On Thu, Oct 19, 2017 at 4:58 PM, Miklos Szeredi <mszeredi@xxxxxxxxxx> wrote:
> We may fail to pin one of the marks in fsnotify_prepare_user_wait() when
> dropping the srcu read lock, resulting in use after free at the next
> iteration.
>
> Solution is to store both marks in iter_info instead of just the one we'll
> be sending the event for, as well as pinning the group for both (if they
> have the same group: fine, pinnig it twice doesn't hurt).
>
> Also blind increment of group's user_waits is not enough, we could be far
> enough in the group's destruction that it isn't taken into account.
> Instead we need to check (under lock) that the mark is still attached to
> the group after having obtained a ref to the mark. If not, skip it.
>
> In the process of fixing the above, clean up fsnotify_prepare_user_wait()/
> fsnotify_finish_user_wait().
>

Change looks correct, but it was so hard to follow this patch.
The moving of helpers around created a completely garbled diff
where it is impossible to see the logic changed.

Suggest to separate to 3 patches: create helpers (move them around),
pass the 2 marks in iter_info and fix increment of group's user_waits.

> Signed-off-by: Miklos Szeredi <mszeredi@xxxxxxxxxx>
> Fixes: 9385a84d7e1f ("fsnotify: Pass fsnotify_iter_info into handle_event handler")
> Cc: <stable@xxxxxxxxxxxxxxx> # v4.12
> ---
> fs/notify/fsnotify.c | 6 ++--
> fs/notify/mark.c | 100 +++++++++++++++++++++++----------------------------
> 2 files changed, 48 insertions(+), 58 deletions(-)
>
> diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
> index 0c4583b61717..48ec61f4c4d5 100644
> --- a/fs/notify/fsnotify.c
> +++ b/fs/notify/fsnotify.c
> @@ -336,6 +336,9 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
> vfsmount_group = vfsmount_mark->group;
> }
>
> + iter_info.inode_mark = inode_mark;
> + iter_info.vfsmount_mark = vfsmount_mark;
> +
> if (inode_group && vfsmount_group) {
> int cmp = fsnotify_compare_groups(inode_group,
> vfsmount_group);
> @@ -348,9 +351,6 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
> }
> }
>
> - iter_info.inode_mark = inode_mark;
> - iter_info.vfsmount_mark = vfsmount_mark;
> -
> ret = send_to_group(to_tell, inode_mark, vfsmount_mark, mask,
> data, data_is, cookie, file_name,
> &iter_info);
> diff --git a/fs/notify/mark.c b/fs/notify/mark.c
> index 9991f8826734..9a7c8dbeeb59 100644
> --- a/fs/notify/mark.c
> +++ b/fs/notify/mark.c
> @@ -109,16 +109,6 @@ void fsnotify_get_mark(struct fsnotify_mark *mark)
> atomic_inc(&mark->refcnt);
> }
>
> -/*
> - * Get mark reference when we found the mark via lockless traversal of object
> - * list. Mark can be already removed from the list by now and on its way to be
> - * destroyed once SRCU period ends.
> - */
> -static bool fsnotify_get_mark_safe(struct fsnotify_mark *mark)
> -{
> - return atomic_inc_not_zero(&mark->refcnt);
> -}
> -
> static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
> {
> u32 new_mask = 0;
> @@ -256,32 +246,53 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
> FSNOTIFY_REAPER_DELAY);
> }
>
> -bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info)
> +/*
> + * Get mark reference when we found the mark via lockless traversal of object
> + * list. Mark can be already removed from the list by now and on its way to be
> + * destroyed once SRCU period ends.
> + */
> +static bool fsnotify_get_mark_safe(struct fsnotify_mark *mark)
> {
> - struct fsnotify_group *group;
> -
> - if (WARN_ON_ONCE(!iter_info->inode_mark && !iter_info->vfsmount_mark))
> - return false;
> -
> - if (iter_info->inode_mark)
> - group = iter_info->inode_mark->group;
> - else
> - group = iter_info->vfsmount_mark->group;
> + if (!mark)
> + return true;
> +
> + if (atomic_inc_not_zero(&mark->refcnt)) {
> + spin_lock(&mark->lock);
> + if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) {
> + /* mark is attached, group is still alive then */
> + atomic_inc(&mark->group->user_waits);
> + spin_unlock(&mark->lock);
> + return true;
> + }
> + spin_unlock(&mark->lock);
> + fsnotify_put_mark(mark);
> + }
> + return false;
> +}
>
> - /*
> - * Since acquisition of mark reference is an atomic op as well, we can
> - * be sure this inc is seen before any effect of refcount increment.
> - */
> - atomic_inc(&group->user_waits);
> +static void fsnotify_put_mark_wait(struct fsnotify_mark *mark)
> +{
> + if (mark) {
> + struct fsnotify_group *group = mark->group;
>
> - if (iter_info->inode_mark) {
> - /* This can fail if mark is being removed */
> - if (!fsnotify_get_mark_safe(iter_info->inode_mark))
> - goto out_wait;
> + fsnotify_put_mark(mark);
> + /*
> + * We abuse notification_waitq on group shutdown for waiting for all
> + * marks pinned when waiting for userspace.
> + */
> + if (atomic_dec_and_test(&group->user_waits) && group->shutdown)
> + wake_up(&group->notification_waitq);
> }
> - if (iter_info->vfsmount_mark) {
> - if (!fsnotify_get_mark_safe(iter_info->vfsmount_mark))
> - goto out_inode;
> +}
> +
> +bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info)
> +{
> + /* This can fail if mark is being removed */
> + if (!fsnotify_get_mark_safe(iter_info->inode_mark))
> + return false;
> + if (!fsnotify_get_mark_safe(iter_info->vfsmount_mark)) {
> + fsnotify_put_mark_wait(iter_info->inode_mark);
> + return false;
> }
>
> /*
> @@ -292,34 +303,13 @@ bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info)
> srcu_read_unlock(&fsnotify_mark_srcu, iter_info->srcu_idx);
>
> return true;
> -out_inode:
> - if (iter_info->inode_mark)
> - fsnotify_put_mark(iter_info->inode_mark);
> -out_wait:
> - if (atomic_dec_and_test(&group->user_waits) && group->shutdown)
> - wake_up(&group->notification_waitq);
> - return false;
> }
>
> void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info)
> {
> - struct fsnotify_group *group = NULL;
> -
> iter_info->srcu_idx = srcu_read_lock(&fsnotify_mark_srcu);
> - if (iter_info->inode_mark) {
> - group = iter_info->inode_mark->group;
> - fsnotify_put_mark(iter_info->inode_mark);
> - }
> - if (iter_info->vfsmount_mark) {
> - group = iter_info->vfsmount_mark->group;
> - fsnotify_put_mark(iter_info->vfsmount_mark);
> - }
> - /*
> - * We abuse notification_waitq on group shutdown for waiting for all
> - * marks pinned when waiting for userspace.
> - */
> - if (atomic_dec_and_test(&group->user_waits) && group->shutdown)
> - wake_up(&group->notification_waitq);
> + fsnotify_put_mark_wait(iter_info->inode_mark);
> + fsnotify_put_mark_wait(iter_info->vfsmount_mark);
> }
>
> /*
> --
> 2.5.5
>