Re: [PATCH] pipe_read: don't wake up the writer if the pipe is still full
From: Linus Torvalds
Date: Mon Mar 03 2025 - 15:48:51 EST
On Mon, 3 Mar 2025 at 10:28, Oleg Nesterov <oleg@xxxxxxxxxx> wrote:
>
> Stupid question... but do we really need to change the code which update
> tail/head if we pack them into a single word?
No. It's only the READ_ONCE() parts that need changing.
See this suggested patch, which does something very similar to what
you were thinking of.
ENTIRELY UNTESTED, but it seems to generate ok code. It might even
generate better code than what we have now.
Linus
fs/pipe.c | 19 ++++++++-----------
include/linux/pipe_fs_i.h | 39 +++++++++++++++++++++++++++++++++++++--
2 files changed, 45 insertions(+), 13 deletions(-)
diff --git a/fs/pipe.c b/fs/pipe.c
index ce1af7592780..97cc70572606 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -210,11 +210,10 @@ static const struct pipe_buf_operations anon_pipe_buf_ops = {
/* Done while waiting without holding the pipe lock - thus the READ_ONCE() */
static inline bool pipe_readable(const struct pipe_inode_info *pipe)
{
- unsigned int head = READ_ONCE(pipe->head);
- unsigned int tail = READ_ONCE(pipe->tail);
+ union pipe_index idx = { READ_ONCE(pipe->head_tail) };
unsigned int writers = READ_ONCE(pipe->writers);
- return !pipe_empty(head, tail) || !writers;
+ return !pipe_empty(idx.head, idx.tail) || !writers;
}
static inline unsigned int pipe_update_tail(struct pipe_inode_info *pipe,
@@ -417,11 +416,10 @@ static inline int is_packetized(struct file *file)
/* Done while waiting without holding the pipe lock - thus the READ_ONCE() */
static inline bool pipe_writable(const struct pipe_inode_info *pipe)
{
- unsigned int head = READ_ONCE(pipe->head);
- unsigned int tail = READ_ONCE(pipe->tail);
+ union pipe_index idx = { READ_ONCE(pipe->head_tail) };
unsigned int max_usage = READ_ONCE(pipe->max_usage);
- return !pipe_full(head, tail, max_usage) ||
+ return !pipe_full(idx.head, idx.tail, max_usage) ||
!READ_ONCE(pipe->readers);
}
@@ -659,7 +657,7 @@ pipe_poll(struct file *filp, poll_table *wait)
{
__poll_t mask;
struct pipe_inode_info *pipe = filp->private_data;
- unsigned int head, tail;
+ union pipe_index idx;
/* Epoll has some historical nasty semantics, this enables them */
WRITE_ONCE(pipe->poll_usage, true);
@@ -680,19 +678,18 @@ pipe_poll(struct file *filp, poll_table *wait)
* if something changes and you got it wrong, the poll
* table entry will wake you up and fix it.
*/
- head = READ_ONCE(pipe->head);
- tail = READ_ONCE(pipe->tail);
+ idx.head_tail = READ_ONCE(pipe->head_tail);
mask = 0;
if (filp->f_mode & FMODE_READ) {
- if (!pipe_empty(head, tail))
+ if (!pipe_empty(idx.head, idx.tail))
mask |= EPOLLIN | EPOLLRDNORM;
if (!pipe->writers && filp->f_pipe != pipe->w_counter)
mask |= EPOLLHUP;
}
if (filp->f_mode & FMODE_WRITE) {
- if (!pipe_full(head, tail, pipe->max_usage))
+ if (!pipe_full(idx.head, idx.tail, pipe->max_usage))
mask |= EPOLLOUT | EPOLLWRNORM;
/*
* Most Unices do not set EPOLLERR for FIFOs but on Linux they
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 8ff23bf5a819..b1a3b99f9ff8 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -31,6 +31,33 @@ struct pipe_buffer {
unsigned long private;
};
+/*
+ * Really only alpha needs 32-bit fields, but
+ * might as well do it for 64-bit architectures
+ * since that's what we've historically done,
+ * and it makes 'head_tail' always be a simple
+ * 'unsigned long'.
+ */
+#ifdef CONFIG_64BIT
+ typedef unsigned int pipe_index_t;
+#else
+ typedef unsigned short pipe_index_t;
+#endif
+
+/*
+ * We have to declare this outside 'struct pipe_inode_info',
+ * but then we can't use 'union pipe_index' for an anonymous
+ * union, so we end up having to duplicate this declaration
+ * below. Annoying.
+ */
+union pipe_index {
+ unsigned long head_tail;
+ struct {
+ pipe_index_t head;
+ pipe_index_t tail;
+ };
+};
+
/**
* struct pipe_inode_info - a linux kernel pipe
* @mutex: mutex protecting the whole thing
@@ -58,8 +85,16 @@ struct pipe_buffer {
struct pipe_inode_info {
struct mutex mutex;
wait_queue_head_t rd_wait, wr_wait;
- unsigned int head;
- unsigned int tail;
+
+ /* This has to match the 'union pipe_index' above */
+ union {
+ unsigned long head_tail;
+ struct {
+ pipe_index_t head;
+ pipe_index_t tail;
+ };
+ };
+
unsigned int max_usage;
unsigned int ring_size;
unsigned int nr_accounted;