[PATCH 1/1] fs: pipe: wakeup readers everytime new data written is to pipe

From: Sandeep Patil
Date: Thu Jul 29 2021 - 18:27:36 EST


commit '1b6b26ae7053 ("pipe: fix and clarify pipe write wakeup logic")'
changed pipe_write() to wakeup readers only if the pipe was empty.
Prior to this change, threads waiting in epoll_wait(EPOLLET | EPOLLIN)
on non-empty pipes would get woken up on new data.

It meant an applications that,
1. used pipe + epoll for notifications between threads / processes
2. Didn't drain the pipe on each epoll wakeup unless the pipe was full
started to experience hang / timeouts in threads stuck in epoll_wait()

So restore the old behavior to wakeup all readers if any new data is
written to the pipe.

Fixes: 1b6b26ae7053 ("pipe: fix and clarify pipe write wakeup logic")
Signed-off-by: Sandeep Patil <sspatil@xxxxxxxxxxx>
---
fs/pipe.c | 19 ++++++++++---------
1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/fs/pipe.c b/fs/pipe.c
index bfd946a9ad01..dda22a316bb3 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -406,7 +406,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
ssize_t ret = 0;
size_t total_len = iov_iter_count(from);
ssize_t chars;
- bool was_empty = false;
+ bool do_wakeup = false;
bool wake_next_writer = false;

/* Null write succeeds. */
@@ -429,10 +429,11 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
#endif

/*
- * Only wake up if the pipe started out empty, since
- * otherwise there should be no readers waiting.
+ * Wake up readers if the pipe was written to. Regardless
+ * of whether it was empty or not. Otherwise, threads
+ * waiting with EPOLLET will hang until the pipe is emptied.
*
- * If it wasn't empty we try to merge new data into
+ * If pipe wasn't empty we try to merge new data into
* the last buffer.
*
* That naturally merges small writes, but it also
@@ -440,9 +441,8 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
* spanning multiple pages.
*/
head = pipe->head;
- was_empty = pipe_empty(head, pipe->tail);
chars = total_len & (PAGE_SIZE-1);
- if (chars && !was_empty) {
+ if (chars && !pipe_empty(head, pipe->tail)) {
unsigned int mask = pipe->ring_size - 1;
struct pipe_buffer *buf = &pipe->bufs[(head - 1) & mask];
int offset = buf->offset + buf->len;
@@ -460,6 +460,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
}

buf->len += ret;
+ do_wakeup = true;
if (!iov_iter_count(from))
goto out;
}
@@ -526,6 +527,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
ret += copied;
buf->offset = 0;
buf->len = copied;
+ do_wakeup = true;

if (!iov_iter_count(from))
break;
@@ -553,13 +555,12 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
* become empty while we dropped the lock.
*/
__pipe_unlock(pipe);
- if (was_empty) {
+ if (do_wakeup) {
wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
}
wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe));
__pipe_lock(pipe);
- was_empty = pipe_empty(pipe->head, pipe->tail);
wake_next_writer = true;
}
out:
@@ -576,7 +577,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
* how (for example) the GNU make jobserver uses small writes to
* wake up pending jobs
*/
- if (was_empty) {
+ if (do_wakeup) {
wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
}
--
2.32.0.554.ge1b32706d8-goog