[PATCH 3.16 103/370] fsnotify: Fix possible use-after-free in inode iteration on umount

From: Ben Hutchings
Date: Fri Mar 10 2017 - 09:03:31 EST


3.16.42-rc1 review patch. If anyone has any objections, please let me know.

------------------

From: Jan Kara <jack@xxxxxxx>

commit 5716863e0f8251d3360d4cbfc0e44e08007075df upstream.

fsnotify_unmount_inodes() plays complex tricks to pin next inode in the
sb->s_inodes list when iterating over all inodes. Furthermore the code has a
bug that if the current inode is the last on i_sb_list that does not have e.g.
I_FREEING set, then we leave next_i pointing to inode which may get removed
from the i_sb_list once we drop s_inode_list_lock thus resulting in
use-after-free issues (usually manifesting as infinite looping in
fsnotify_unmount_inodes()).

Fix the problem by keeping current inode pinned somewhat longer. Then we can
make the code much simpler and standard.

Signed-off-by: Jan Kara <jack@xxxxxxx>
[bwh: Backported to 3.16: adjust context]
Signed-off-by: Ben Hutchings <ben@xxxxxxxxxxxxxxx>
---
fs/notify/inode_mark.c | 45 +++++++++------------------------------------
1 file changed, 9 insertions(+), 36 deletions(-)

--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -249,12 +249,10 @@ out:
*/
void fsnotify_unmount_inodes(struct list_head *list)
{
- struct inode *inode, *next_i, *need_iput = NULL;
+ struct inode *inode, *iput_inode = NULL;

spin_lock(&inode_sb_list_lock);
- list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
- struct inode *need_iput_tmp;
-
+ list_for_each_entry(inode, list, i_sb_list) {
/*
* We cannot __iget() an inode in state I_FREEING,
* I_WILL_FREE, or I_NEW which is fine because by that point
@@ -277,49 +275,24 @@ void fsnotify_unmount_inodes(struct list
continue;
}

- need_iput_tmp = need_iput;
- need_iput = NULL;
-
- /* In case fsnotify_inode_delete() drops a reference. */
- if (inode != need_iput_tmp)
- __iget(inode);
- else
- need_iput_tmp = NULL;
+ __iget(inode);
spin_unlock(&inode->i_lock);
-
- /* In case the dropping of a reference would nuke next_i. */
- while (&next_i->i_sb_list != list) {
- spin_lock(&next_i->i_lock);
- if (!(next_i->i_state & (I_FREEING | I_WILL_FREE)) &&
- atomic_read(&next_i->i_count)) {
- __iget(next_i);
- need_iput = next_i;
- spin_unlock(&next_i->i_lock);
- break;
- }
- spin_unlock(&next_i->i_lock);
- next_i = list_next_entry(next_i, i_sb_list);
- }
-
- /*
- * We can safely drop inode_sb_list_lock here because either
- * we actually hold references on both inode and next_i or
- * end of list. Also no new inodes will be added since the
- * umount has begun.
- */
spin_unlock(&inode_sb_list_lock);

- if (need_iput_tmp)
- iput(need_iput_tmp);
+ if (iput_inode)
+ iput(iput_inode);

/* for each watch, send FS_UNMOUNT and then remove it */
fsnotify(inode, FS_UNMOUNT, inode, FSNOTIFY_EVENT_INODE, NULL, 0);

fsnotify_inode_delete(inode);

- iput(inode);
+ iput_inode = inode;

spin_lock(&inode_sb_list_lock);
}
spin_unlock(&inode_sb_list_lock);
+
+ if (iput_inode)
+ iput(iput_inode);
}