[PATCH v2] shmem: move spinlock to the front of iput
From: Gang Li
Date:  Tue Nov 23 2021 - 22:08:52 EST
This patch fixes a data race in commit 779750d20b93 ("shmem: split huge pages
beyond i_size under memory pressure").
Call Trace 1:
 shmem_unused_huge_shrink+0x3ae/0x410
 ? __list_lru_walk_one.isra.5+0x33/0x160
 super_cache_scan+0x17c/0x190
 shrink_slab.part.55+0x1ef/0x3f0
 shrink_node+0x10e/0x330
 kswapd+0x380/0x740
 kthread+0xfc/0x130
 ? mem_cgroup_shrink_node+0x170/0x170
 ? kthread_create_on_node+0x70/0x70
 ret_from_fork+0x1f/0x30
Call Trace 2:
 shmem_evict_inode+0xd8/0x190
 evict+0xbe/0x1c0
 do_unlinkat+0x137/0x330
 do_syscall_64+0x76/0x120
 entry_SYSCALL_64_after_hwframe+0x3d/0xa2
iput out of sbinfo->shrinklist_lock will let shmem_evict_inode grab
and delete the inode, which will berak the consistency between
shrinklist_len and shrinklist. The simultaneous deletion of adjacent
elements in the local list "list" by shmem_unused_huge_shrink and
shmem_evict_inode will also break the list.
Fix it by moving shrinklist_lock to the front of iput.
Fixes: 779750d20b93 ("shmem: split huge pages beyond i_size under memory pressure")
Signed-off-by: Gang Li <ligang.bdlg@xxxxxxxxxxxxx>
---
Changes in v2:
- Move spinlock to the front of iput instead of changing lock type
  since iput will call evict which may cause deadlock by requesting
  shrinklist_lock.
- Add call trace in commit message.
v1: https://lore.kernel.org/lkml/20211122064126.76734-1-ligang.bdlg@xxxxxxxxxxxxx/
---
 mm/shmem.c | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)
diff --git a/mm/shmem.c b/mm/shmem.c
index 9023103ee7d8..2f70a16fc588 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -569,7 +569,6 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
 		/* inode is about to be evicted */
 		if (!inode) {
 			list_del_init(&info->shrinklist);
-			removed++;
 			goto next;
 		}
 
@@ -577,15 +576,16 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
 		if (round_up(inode->i_size, PAGE_SIZE) ==
 				round_up(inode->i_size, HPAGE_PMD_SIZE)) {
 			list_move(&info->shrinklist, &to_remove);
-			removed++;
 			goto next;
 		}
 
 		list_move(&info->shrinklist, &list);
 next:
+		removed++;
 		if (!--batch)
 			break;
 	}
+	sbinfo->shrinklist_len -= removed;
 	spin_unlock(&sbinfo->shrinklist_lock);
 
 	list_for_each_safe(pos, next, &to_remove) {
@@ -602,7 +602,7 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
 		inode = &info->vfs_inode;
 
 		if (nr_to_split && split >= nr_to_split)
-			goto leave;
+			goto move_back;
 
 		page = find_get_page(inode->i_mapping,
 				(inode->i_size & HPAGE_PMD_MASK) >> PAGE_SHIFT);
@@ -616,38 +616,38 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
 		}
 
 		/*
-		 * Leave the inode on the list if we failed to lock
-		 * the page at this time.
+		 * Move the inode on the list back to shrinklist if we failed
+		 * to lock the page at this time.
 		 *
 		 * Waiting for the lock may lead to deadlock in the
 		 * reclaim path.
 		 */
 		if (!trylock_page(page)) {
 			put_page(page);
-			goto leave;
+			goto move_back;
 		}
 
 		ret = split_huge_page(page);
 		unlock_page(page);
 		put_page(page);
 
-		/* If split failed leave the inode on the list */
+		/* If split failed move the inode on the list back to shrinklist */
 		if (ret)
-			goto leave;
+			goto move_back;
 
 		split++;
 drop:
 		list_del_init(&info->shrinklist);
-		removed++;
-leave:
+		goto put;
+move_back:
+		spin_lock(&sbinfo->shrinklist_lock);
+		list_move(pos, &sbinfo->shrinklist);
+		sbinfo->shrinklist_len++;
+		spin_unlock(&sbinfo->shrinklist_lock);
+put:
 		iput(inode);
 	}
 
-	spin_lock(&sbinfo->shrinklist_lock);
-	list_splice_tail(&list, &sbinfo->shrinklist);
-	sbinfo->shrinklist_len -= removed;
-	spin_unlock(&sbinfo->shrinklist_lock);
-
 	return split;
 }
 
-- 
2.20.1