include/linux/wait.h | 2 +- kernel/sched/wait.c | 9 +++++++-- mm/filemap.c | 51 ++++++++++++++++++++++++++++++++++++++------------- 3 files changed, 46 insertions(+), 16 deletions(-) diff --git a/include/linux/wait.h b/include/linux/wait.h index 898c890fc153..5ab3df535f39 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -200,7 +200,7 @@ __remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq void __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key); void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key); -void __wake_up_locked_key_bookmark(struct wait_queue_head *wq_head, +int __wake_up_locked_key_bookmark(struct wait_queue_head *wq_head, unsigned int mode, void *key, wait_queue_entry_t *bookmark); void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void *key); void __wake_up_locked_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void *key); diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 01f5d3020589..578f4f4a400d 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -158,10 +158,15 @@ void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, vo } EXPORT_SYMBOL_GPL(__wake_up_locked_key); -void __wake_up_locked_key_bookmark(struct wait_queue_head *wq_head, +/* + * This returns true if it woke up an exclusive waiter (ie + * 'nr_exclusive' dropped from 1 to 0). May be useful for + * lock handoff. + */ +int __wake_up_locked_key_bookmark(struct wait_queue_head *wq_head, unsigned int mode, void *key, wait_queue_entry_t *bookmark) { - __wake_up_common(wq_head, mode, 1, 0, key, bookmark); + return !__wake_up_common(wq_head, mode, 1, 0, key, bookmark); } EXPORT_SYMBOL_GPL(__wake_up_locked_key_bookmark); diff --git a/mm/filemap.c b/mm/filemap.c index 9f131f1cfde3..1e2536b98000 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -998,8 +998,9 @@ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, return 0; /* - * If it's an exclusive wait, we get the bit for it, and - * stop walking if we can't. + * If it's an exclusive wait, we just tell the waker that + * we have done the exclusive wait. It will know never to + * actually even clear the bit. * * If it's a non-exclusive wait, then the fact that this * wake function was called means that the bit already @@ -1007,11 +1008,8 @@ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, * re-took it. */ ret = 0; - if (wait->flags & WQ_FLAG_EXCLUSIVE) { - if (test_and_set_bit(key->bit_nr, &key->page->flags)) - return -1; + if (wait->flags & WQ_FLAG_EXCLUSIVE) ret = 1; - } wait->flags |= WQ_FLAG_WOKEN; wake_up_state(wait->private, mode); @@ -1029,12 +1027,13 @@ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, return ret; } -static void wake_up_page_bit(struct page *page, int bit_nr) +static int wake_up_page_bit(struct page *page, int bit_nr) { wait_queue_head_t *q = page_waitqueue(page); struct wait_page_key key; unsigned long flags; wait_queue_entry_t bookmark; + int exclusive; key.page = page; key.bit_nr = bit_nr; @@ -1046,7 +1045,7 @@ static void wake_up_page_bit(struct page *page, int bit_nr) INIT_LIST_HEAD(&bookmark.entry); spin_lock_irqsave(&q->lock, flags); - __wake_up_locked_key_bookmark(q, TASK_NORMAL, &key, &bookmark); + exclusive = __wake_up_locked_key_bookmark(q, TASK_NORMAL, &key, &bookmark); while (bookmark.flags & WQ_FLAG_BOOKMARK) { /* @@ -1058,7 +1057,7 @@ static void wake_up_page_bit(struct page *page, int bit_nr) spin_unlock_irqrestore(&q->lock, flags); cpu_relax(); spin_lock_irqsave(&q->lock, flags); - __wake_up_locked_key_bookmark(q, TASK_NORMAL, &key, &bookmark); + exclusive |= __wake_up_locked_key_bookmark(q, TASK_NORMAL, &key, &bookmark); } /* @@ -1081,6 +1080,7 @@ static void wake_up_page_bit(struct page *page, int bit_nr) */ } spin_unlock_irqrestore(&q->lock, flags); + return exclusive; } static void wake_up_page(struct page *page, int bit) @@ -1339,11 +1339,36 @@ static inline bool clear_bit_unlock_is_negative_byte(long nr, volatile void *mem */ void unlock_page(struct page *page) { - BUILD_BUG_ON(PG_waiters != 7); + unsigned long flags; + page = compound_head(page); - VM_BUG_ON_PAGE(!PageLocked(page), page); - if (clear_bit_unlock_is_negative_byte(PG_locked, &page->flags)) - wake_up_page_bit(page, PG_locked); + + flags = READ_ONCE(page->flags); + VM_BUG_ON_PAGE(!(flags & (1 << PG_locked)), page); + + for (;;) { + unsigned long new; + + /* + * If wake_up_page_bit() wakes an exclusive + * waiter, it will have handed the lock over + * directly. + */ + if (flags & (1 << PG_waiters)) { + /* + * Lock hand-over serialization. The atomic is the + * spinlock wake_up_page_bit() will do. + */ + smp_mb__before_atomic(); + if (wake_up_page_bit(page, PG_locked)) + return; + } + new = cmpxchg_release(&page->flags, flags, flags & ~(1 << PG_locked)); + if (likely(new == flags)) + return; + + flags = new; + } } EXPORT_SYMBOL(unlock_page);