[PATCH 1/4] fsnotify: fix pinning of marks and groups

From: Miklos Szeredi
Date: Thu Oct 19 2017 - 10:00:07 EST


We may fail to pin one of the marks in fsnotify_prepare_user_wait() when
dropping the srcu read lock, resulting in use after free at the next
iteration.

Solution is to store both marks in iter_info instead of just the one we'll
be sending the event for, as well as pinning the group for both (if they
have the same group: fine, pinnig it twice doesn't hurt).

Also blind increment of group's user_waits is not enough, we could be far
enough in the group's destruction that it isn't taken into account.
Instead we need to check (under lock) that the mark is still attached to
the group after having obtained a ref to the mark. If not, skip it.

In the process of fixing the above, clean up fsnotify_prepare_user_wait()/
fsnotify_finish_user_wait().

Signed-off-by: Miklos Szeredi <mszeredi@xxxxxxxxxx>
Fixes: 9385a84d7e1f ("fsnotify: Pass fsnotify_iter_info into handle_event handler")
Cc: <stable@xxxxxxxxxxxxxxx> # v4.12
---
fs/notify/fsnotify.c | 6 ++--
fs/notify/mark.c | 100 +++++++++++++++++++++++----------------------------
2 files changed, 48 insertions(+), 58 deletions(-)

diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 0c4583b61717..48ec61f4c4d5 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -336,6 +336,9 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
vfsmount_group = vfsmount_mark->group;
}

+ iter_info.inode_mark = inode_mark;
+ iter_info.vfsmount_mark = vfsmount_mark;
+
if (inode_group && vfsmount_group) {
int cmp = fsnotify_compare_groups(inode_group,
vfsmount_group);
@@ -348,9 +351,6 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
}
}

- iter_info.inode_mark = inode_mark;
- iter_info.vfsmount_mark = vfsmount_mark;
-
ret = send_to_group(to_tell, inode_mark, vfsmount_mark, mask,
data, data_is, cookie, file_name,
&iter_info);
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 9991f8826734..9a7c8dbeeb59 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -109,16 +109,6 @@ void fsnotify_get_mark(struct fsnotify_mark *mark)
atomic_inc(&mark->refcnt);
}

-/*
- * Get mark reference when we found the mark via lockless traversal of object
- * list. Mark can be already removed from the list by now and on its way to be
- * destroyed once SRCU period ends.
- */
-static bool fsnotify_get_mark_safe(struct fsnotify_mark *mark)
-{
- return atomic_inc_not_zero(&mark->refcnt);
-}
-
static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
{
u32 new_mask = 0;
@@ -256,32 +246,53 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
FSNOTIFY_REAPER_DELAY);
}

-bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info)
+/*
+ * Get mark reference when we found the mark via lockless traversal of object
+ * list. Mark can be already removed from the list by now and on its way to be
+ * destroyed once SRCU period ends.
+ */
+static bool fsnotify_get_mark_safe(struct fsnotify_mark *mark)
{
- struct fsnotify_group *group;
-
- if (WARN_ON_ONCE(!iter_info->inode_mark && !iter_info->vfsmount_mark))
- return false;
-
- if (iter_info->inode_mark)
- group = iter_info->inode_mark->group;
- else
- group = iter_info->vfsmount_mark->group;
+ if (!mark)
+ return true;
+
+ if (atomic_inc_not_zero(&mark->refcnt)) {
+ spin_lock(&mark->lock);
+ if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) {
+ /* mark is attached, group is still alive then */
+ atomic_inc(&mark->group->user_waits);
+ spin_unlock(&mark->lock);
+ return true;
+ }
+ spin_unlock(&mark->lock);
+ fsnotify_put_mark(mark);
+ }
+ return false;
+}

- /*
- * Since acquisition of mark reference is an atomic op as well, we can
- * be sure this inc is seen before any effect of refcount increment.
- */
- atomic_inc(&group->user_waits);
+static void fsnotify_put_mark_wait(struct fsnotify_mark *mark)
+{
+ if (mark) {
+ struct fsnotify_group *group = mark->group;

- if (iter_info->inode_mark) {
- /* This can fail if mark is being removed */
- if (!fsnotify_get_mark_safe(iter_info->inode_mark))
- goto out_wait;
+ fsnotify_put_mark(mark);
+ /*
+ * We abuse notification_waitq on group shutdown for waiting for all
+ * marks pinned when waiting for userspace.
+ */
+ if (atomic_dec_and_test(&group->user_waits) && group->shutdown)
+ wake_up(&group->notification_waitq);
}
- if (iter_info->vfsmount_mark) {
- if (!fsnotify_get_mark_safe(iter_info->vfsmount_mark))
- goto out_inode;
+}
+
+bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info)
+{
+ /* This can fail if mark is being removed */
+ if (!fsnotify_get_mark_safe(iter_info->inode_mark))
+ return false;
+ if (!fsnotify_get_mark_safe(iter_info->vfsmount_mark)) {
+ fsnotify_put_mark_wait(iter_info->inode_mark);
+ return false;
}

/*
@@ -292,34 +303,13 @@ bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info)
srcu_read_unlock(&fsnotify_mark_srcu, iter_info->srcu_idx);

return true;
-out_inode:
- if (iter_info->inode_mark)
- fsnotify_put_mark(iter_info->inode_mark);
-out_wait:
- if (atomic_dec_and_test(&group->user_waits) && group->shutdown)
- wake_up(&group->notification_waitq);
- return false;
}

void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info)
{
- struct fsnotify_group *group = NULL;
-
iter_info->srcu_idx = srcu_read_lock(&fsnotify_mark_srcu);
- if (iter_info->inode_mark) {
- group = iter_info->inode_mark->group;
- fsnotify_put_mark(iter_info->inode_mark);
- }
- if (iter_info->vfsmount_mark) {
- group = iter_info->vfsmount_mark->group;
- fsnotify_put_mark(iter_info->vfsmount_mark);
- }
- /*
- * We abuse notification_waitq on group shutdown for waiting for all
- * marks pinned when waiting for userspace.
- */
- if (atomic_dec_and_test(&group->user_waits) && group->shutdown)
- wake_up(&group->notification_waitq);
+ fsnotify_put_mark_wait(iter_info->inode_mark);
+ fsnotify_put_mark_wait(iter_info->vfsmount_mark);
}

/*
--
2.5.5