Re: [PATCH] pipe_read: don't wake up the writer if the pipe is still full

From: K Prateek Nayak
Date: Mon Mar 03 2025 - 13:33:28 EST


Hello Mateusz,

On 3/3/2025 11:24 PM, Mateusz Guzik wrote:
Can you guys try out the patch below?

It changes things up so that there is no need to read 2 different vars.

It is not the final version and I don't claim to be able to fully
justify the thing at the moment either, but I would like to know if it
fixes the problem.

Happy to help! We've queued the below patch for an overnight run, will
report back once it is done.

Full disclaimer: We're testing on top of commit aaec5a95d596
("pipe_read: don't wake up the writer if the pipe is still full") where the
issue is more reproducible. I've replaced the VFS_BUG_ON() with a plain
BUG_ON() based on [1] since v6.14-rc1 did not include the CONFIG_DEBUG_VFS
bits. Hope that is alright.

[1] https://lore.kernel.org/lkml/20250209185523.745956-2-mjguzik@xxxxxxxxx/

/off to get some shut eyes/

--
Thanks and Regards,
Prateek


If you don't have time that's fine, this is a quick jab. While I can't
reproduce the bug myself even after inserting a delay by hand with
msleep between the loads, I verified it does not outright break either.
:P

diff --git a/fs/pipe.c b/fs/pipe.c
index 19a7948ab234..e61ad589fc2c 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -210,11 +210,21 @@ static const struct pipe_buf_operations anon_pipe_buf_ops = {
/* Done while waiting without holding the pipe lock - thus the READ_ONCE() */
static inline bool pipe_readable(const struct pipe_inode_info *pipe)
{
- unsigned int head = READ_ONCE(pipe->head);
- unsigned int tail = READ_ONCE(pipe->tail);
- unsigned int writers = READ_ONCE(pipe->writers);
+ return !READ_ONCE(pipe->isempty) || !READ_ONCE(pipe->writers);
+}
+
+static inline void pipe_recalc_state(struct pipe_inode_info *pipe)
+{
+ pipe->isempty = pipe_empty(pipe->head, pipe->tail);
+ pipe->isfull = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
+ VFS_BUG_ON(pipe->isempty && pipe->isfull);
+}
- return !pipe_empty(head, tail) || !writers;
+static inline void pipe_update_head(struct pipe_inode_info *pipe,
+ unsigned int head)
+{
+ pipe->head = ++head;
+ pipe_recalc_state(pipe);
}
static inline unsigned int pipe_update_tail(struct pipe_inode_info *pipe,
@@ -244,6 +254,7 @@ static inline unsigned int pipe_update_tail(struct pipe_inode_info *pipe,
* without the spinlock - the mutex is enough.
*/
pipe->tail = ++tail;
+ pipe_recalc_state(pipe);
return tail;
}
@@ -403,12 +414,7 @@ static inline int is_packetized(struct file *file)
/* Done while waiting without holding the pipe lock - thus the READ_ONCE() */
static inline bool pipe_writable(const struct pipe_inode_info *pipe)
{
- unsigned int head = READ_ONCE(pipe->head);
- unsigned int tail = READ_ONCE(pipe->tail);
- unsigned int max_usage = READ_ONCE(pipe->max_usage);
-
- return !pipe_full(head, tail, max_usage) ||
- !READ_ONCE(pipe->readers);
+ return !READ_ONCE(pipe->isfull) || !READ_ONCE(pipe->readers);
}
static ssize_t
@@ -512,7 +518,7 @@ anon_pipe_write(struct kiocb *iocb, struct iov_iter *from)
break;
}
- pipe->head = head + 1;
+ pipe_update_head(pipe, head);
pipe->tmp_page = NULL;
/* Insert it into the buffer array */
buf = &pipe->bufs[head & mask];
@@ -529,10 +535,9 @@ anon_pipe_write(struct kiocb *iocb, struct iov_iter *from)
if (!iov_iter_count(from))
break;
- }
- if (!pipe_full(head, pipe->tail, pipe->max_usage))
continue;
+ }
/* Wait for buffer space to become available. */
if ((filp->f_flags & O_NONBLOCK) ||
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 8ff23bf5a819..d4b7539399b5 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -69,6 +69,8 @@ struct pipe_inode_info {
unsigned int r_counter;
unsigned int w_counter;
bool poll_usage;
+ bool isempty;
+ bool isfull;
#ifdef CONFIG_WATCH_QUEUE
bool note_loss;
#endif