[RFC PATCH 2/2] pipe: use pipe busy wait

From: subhra mazumdar
Date: Thu Aug 30 2018 - 16:25:08 EST


Enable busy waiting for pipes. pipe_busy_wait is called if pipe is empty or
full which spins for specified micro seconds. wake_up_busy_poll is called
when data is written or read to signal any busy waiting threads. A tunable
pipe_busy_poll is introduced to enable or disable busy waiting via /proc.
The value of it specifies the amount of spin in microseconds.

Signed-off-by: subhra mazumdar <subhra.mazumdar@xxxxxxxxxx>
---
fs/pipe.c | 58 +++++++++++++++++++++++++++++++++++++++++++++--
include/linux/pipe_fs_i.h | 1 +
kernel/sysctl.c | 7 ++++++
3 files changed, 64 insertions(+), 2 deletions(-)

diff --git a/fs/pipe.c b/fs/pipe.c
index 97e5be8..03ce76a 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -44,6 +44,7 @@ unsigned int pipe_min_size = PAGE_SIZE;
*/
unsigned long pipe_user_pages_hard;
unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
+unsigned int pipe_busy_poll;

/*
* We use a start+len construction, which provides full use of the
@@ -122,6 +123,35 @@ void pipe_wait(struct pipe_inode_info *pipe)
pipe_lock(pipe);
}

+void pipe_busy_wait(struct pipe_inode_info *pipe)
+{
+ unsigned long wait_flag = pipe->pipe_wait_flag;
+ unsigned long start_time = pipe_busy_loop_current_time();
+
+ pipe_unlock(pipe);
+ preempt_disable();
+ for (;;) {
+ if (pipe->pipe_wait_flag > wait_flag) {
+ preempt_enable();
+ pipe_lock(pipe);
+ return;
+ }
+ if (pipe_busy_loop_timeout(pipe, start_time))
+ break;
+ cpu_relax();
+ }
+ preempt_enable();
+ pipe_lock(pipe);
+ if (pipe->pipe_wait_flag > wait_flag)
+ return;
+ pipe_wait(pipe);
+}
+
+void wake_up_busy_poll(struct pipe_inode_info *pipe)
+{
+ pipe->pipe_wait_flag++;
+}
+
static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
@@ -254,6 +284,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
struct pipe_inode_info *pipe = filp->private_data;
int do_wakeup;
ssize_t ret;
+ unsigned int poll = pipe->pipe_ll_usec;

/* Null read succeeds. */
if (unlikely(total_len == 0))
@@ -331,11 +362,18 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
break;
}
if (do_wakeup) {
+ if (poll)
+ wake_up_busy_poll(pipe);
wake_up_interruptible_sync_poll(&pipe->wait, POLLOUT | POLLWRNORM);
kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
}
- pipe_wait(pipe);
+ if (poll)
+ pipe_busy_wait(pipe);
+ else
+ pipe_wait(pipe);
}
+ if (poll && do_wakeup)
+ wake_up_busy_poll(pipe);
__pipe_unlock(pipe);

/* Signal writers asynchronously that there is more room. */
@@ -362,6 +400,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
int do_wakeup = 0;
size_t total_len = iov_iter_count(from);
ssize_t chars;
+ unsigned int poll = pipe->pipe_ll_usec;

/* Null write succeeds. */
if (unlikely(total_len == 0))
@@ -467,15 +506,22 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
break;
}
if (do_wakeup) {
+ if (poll)
+ wake_up_busy_poll(pipe);
wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLRDNORM);
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
do_wakeup = 0;
}
pipe->waiting_writers++;
- pipe_wait(pipe);
+ if (poll)
+ pipe_busy_wait(pipe);
+ else
+ pipe_wait(pipe);
pipe->waiting_writers--;
}
out:
+ if (poll && do_wakeup)
+ wake_up_busy_poll(pipe);
__pipe_unlock(pipe);
if (do_wakeup) {
wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLRDNORM);
@@ -564,6 +610,7 @@ static int
pipe_release(struct inode *inode, struct file *file)
{
struct pipe_inode_info *pipe = file->private_data;
+ unsigned int poll = pipe->pipe_ll_usec;

__pipe_lock(pipe);
if (file->f_mode & FMODE_READ)
@@ -572,6 +619,8 @@ pipe_release(struct inode *inode, struct file *file)
pipe->writers--;

if (pipe->readers || pipe->writers) {
+ if (poll)
+ wake_up_busy_poll(pipe);
wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM | POLLERR | POLLHUP);
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
@@ -840,6 +889,7 @@ SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags)
struct file *files[2];
int fd[2];
int error;
+ struct pipe_inode_info *pipe;

error = __do_pipe_flags(fd, files, flags);
if (!error) {
@@ -853,6 +903,10 @@ SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags)
fd_install(fd[0], files[0]);
fd_install(fd[1], files[1]);
}
+ pipe = files[0]->private_data;
+ pipe->pipe_ll_usec = pipe_busy_poll;
+ pipe = files[1]->private_data;
+ pipe->pipe_ll_usec = pipe_busy_poll;
}
return error;
}
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index fdfd2a2..3b96b05 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -188,6 +188,7 @@ void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *);
extern unsigned int pipe_max_size, pipe_min_size;
extern unsigned long pipe_user_pages_hard;
extern unsigned long pipe_user_pages_soft;
+extern unsigned int pipe_busy_poll;
int pipe_proc_fn(struct ctl_table *, int, void __user *, size_t *, loff_t *);

/* Drop the inode semaphore and wait for a pipe event, atomically */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index d9c31bc..823bde1 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1842,6 +1842,13 @@ static struct ctl_table fs_table[] = {
.proc_handler = proc_doulongvec_minmax,
},
{
+ .procname = "pipe-busy-poll",
+ .data = &pipe_busy_poll,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ },
+ {
.procname = "mount-max",
.data = &sysctl_mount_max,
.maxlen = sizeof(unsigned int),
--
2.9.3