include/linux/wait.h |  2 +-
 kernel/sched/wait.c  |  9 +++++++--
 mm/filemap.c         | 51 ++++++++++++++++++++++++++++++++++++++-------------
 3 files changed, 46 insertions(+), 16 deletions(-)

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 898c890fc153..5ab3df535f39 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -200,7 +200,7 @@ __remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq
 
 void __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key);
 void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key);
-void __wake_up_locked_key_bookmark(struct wait_queue_head *wq_head,
+int __wake_up_locked_key_bookmark(struct wait_queue_head *wq_head,
 		unsigned int mode, void *key, wait_queue_entry_t *bookmark);
 void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void *key);
 void __wake_up_locked_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void *key);
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 01f5d3020589..578f4f4a400d 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -158,10 +158,15 @@ void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, vo
 }
 EXPORT_SYMBOL_GPL(__wake_up_locked_key);
 
-void __wake_up_locked_key_bookmark(struct wait_queue_head *wq_head,
+/*
+ * This returns true if it woke up an exclusive waiter (ie
+ * 'nr_exclusive' dropped from 1 to 0). May be useful for
+ * lock handoff.
+ */
+int __wake_up_locked_key_bookmark(struct wait_queue_head *wq_head,
 		unsigned int mode, void *key, wait_queue_entry_t *bookmark)
 {
-	__wake_up_common(wq_head, mode, 1, 0, key, bookmark);
+	return !__wake_up_common(wq_head, mode, 1, 0, key, bookmark);
 }
 EXPORT_SYMBOL_GPL(__wake_up_locked_key_bookmark);
 
diff --git a/mm/filemap.c b/mm/filemap.c
index 9f131f1cfde3..1e2536b98000 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -998,8 +998,9 @@ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync,
 		return 0;
 
 	/*
-	 * If it's an exclusive wait, we get the bit for it, and
-	 * stop walking if we can't.
+	 * If it's an exclusive wait, we just tell the waker that
+	 * we have done the exclusive wait. It will know never to
+	 * actually even clear the bit.
 	 *
 	 * If it's a non-exclusive wait, then the fact that this
 	 * wake function was called means that the bit already
@@ -1007,11 +1008,8 @@ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync,
 	 * re-took it.
 	 */
 	ret = 0;
-	if (wait->flags & WQ_FLAG_EXCLUSIVE) {
-		if (test_and_set_bit(key->bit_nr, &key->page->flags))
-			return -1;
+	if (wait->flags & WQ_FLAG_EXCLUSIVE)
 		ret = 1;
-	}
 	wait->flags |= WQ_FLAG_WOKEN;
 
 	wake_up_state(wait->private, mode);
@@ -1029,12 +1027,13 @@ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync,
 	return ret;
 }
 
-static void wake_up_page_bit(struct page *page, int bit_nr)
+static int wake_up_page_bit(struct page *page, int bit_nr)
 {
 	wait_queue_head_t *q = page_waitqueue(page);
 	struct wait_page_key key;
 	unsigned long flags;
 	wait_queue_entry_t bookmark;
+	int exclusive;
 
 	key.page = page;
 	key.bit_nr = bit_nr;
@@ -1046,7 +1045,7 @@ static void wake_up_page_bit(struct page *page, int bit_nr)
 	INIT_LIST_HEAD(&bookmark.entry);
 
 	spin_lock_irqsave(&q->lock, flags);
-	__wake_up_locked_key_bookmark(q, TASK_NORMAL, &key, &bookmark);
+	exclusive = __wake_up_locked_key_bookmark(q, TASK_NORMAL, &key, &bookmark);
 
 	while (bookmark.flags & WQ_FLAG_BOOKMARK) {
 		/*
@@ -1058,7 +1057,7 @@ static void wake_up_page_bit(struct page *page, int bit_nr)
 		spin_unlock_irqrestore(&q->lock, flags);
 		cpu_relax();
 		spin_lock_irqsave(&q->lock, flags);
-		__wake_up_locked_key_bookmark(q, TASK_NORMAL, &key, &bookmark);
+		exclusive |= __wake_up_locked_key_bookmark(q, TASK_NORMAL, &key, &bookmark);
 	}
 
 	/*
@@ -1081,6 +1080,7 @@ static void wake_up_page_bit(struct page *page, int bit_nr)
 		 */
 	}
 	spin_unlock_irqrestore(&q->lock, flags);
+	return exclusive;
 }
 
 static void wake_up_page(struct page *page, int bit)
@@ -1339,11 +1339,36 @@ static inline bool clear_bit_unlock_is_negative_byte(long nr, volatile void *mem
  */
 void unlock_page(struct page *page)
 {
-	BUILD_BUG_ON(PG_waiters != 7);
+	unsigned long flags;
+
 	page = compound_head(page);
-	VM_BUG_ON_PAGE(!PageLocked(page), page);
-	if (clear_bit_unlock_is_negative_byte(PG_locked, &page->flags))
-		wake_up_page_bit(page, PG_locked);
+
+	flags = READ_ONCE(page->flags);
+	VM_BUG_ON_PAGE(!(flags & (1 << PG_locked)), page);
+
+	for (;;) {
+		unsigned long new;
+
+		/*
+		 * If wake_up_page_bit() wakes an exclusive
+		 * waiter, it will have handed the lock over
+		 * directly.
+		 */
+		if (flags & (1 << PG_waiters)) {
+			/*
+			 * Lock hand-over serialization. The atomic is the
+			 * spinlock wake_up_page_bit() will do.
+			 */
+			smp_mb__before_atomic();
+			if (wake_up_page_bit(page, PG_locked))
+				return;
+		}
+		new = cmpxchg_release(&page->flags, flags, flags & ~(1 << PG_locked));
+		if (likely(new == flags))
+			return;
+
+		flags = new;
+	}
 }
 EXPORT_SYMBOL(unlock_page);