[PATCH 3/7] eventpoll: split out wait handling
From: Jens Axboe
Date: Thu Dec 01 2022 - 13:12:21 EST
In preparation for making changes to how wakeups and sleeps are done,
move the timeout scheduling into a helper and manage it rather than
rely on schedule_hrtimeout_range().
Signed-off-by: Jens Axboe <axboe@xxxxxxxxx>
---
fs/eventpoll.c | 68 ++++++++++++++++++++++++++++++++++++++++----------
1 file changed, 55 insertions(+), 13 deletions(-)
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 64d7331353dd..888f565d0c5f 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1762,6 +1762,47 @@ static int ep_autoremove_wake_function(struct wait_queue_entry *wq_entry,
return ret;
}
+struct epoll_wq {
+ wait_queue_entry_t wait;
+ struct hrtimer timer;
+ bool timed_out;
+};
+
+static enum hrtimer_restart ep_timer(struct hrtimer *timer)
+{
+ struct epoll_wq *ewq = container_of(timer, struct epoll_wq, timer);
+ struct task_struct *task = ewq->wait.private;
+
+ ewq->timed_out = true;
+ wake_up_process(task);
+ return HRTIMER_NORESTART;
+}
+
+static void ep_schedule(struct eventpoll *ep, struct epoll_wq *ewq, ktime_t *to,
+ u64 slack)
+{
+ if (ewq->timed_out)
+ return;
+ if (to && *to == 0) {
+ ewq->timed_out = true;
+ return;
+ }
+ if (!to) {
+ schedule();
+ return;
+ }
+
+ hrtimer_init_on_stack(&ewq->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+ ewq->timer.function = ep_timer;
+ hrtimer_set_expires_range_ns(&ewq->timer, *to, slack);
+ hrtimer_start_expires(&ewq->timer, HRTIMER_MODE_ABS);
+
+ schedule();
+
+ hrtimer_cancel(&ewq->timer);
+ destroy_hrtimer_on_stack(&ewq->timer);
+}
+
/**
* ep_poll - Retrieves ready events, and delivers them to the caller-supplied
* event buffer.
@@ -1782,13 +1823,15 @@ static int ep_autoremove_wake_function(struct wait_queue_entry *wq_entry,
static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
int maxevents, struct timespec64 *timeout)
{
- int res, eavail, timed_out = 0;
+ int res, eavail;
u64 slack = 0;
- wait_queue_entry_t wait;
ktime_t expires, *to = NULL;
+ struct epoll_wq ewq;
lockdep_assert_irqs_enabled();
+ ewq.timed_out = false;
+
if (timeout && (timeout->tv_sec | timeout->tv_nsec)) {
slack = select_estimate_accuracy(timeout);
to = &expires;
@@ -1798,7 +1841,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
* Avoid the unnecessary trip to the wait queue loop, if the
* caller specified a non blocking operation.
*/
- timed_out = 1;
+ ewq.timed_out = true;
}
/*
@@ -1823,7 +1866,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
return res;
}
- if (timed_out)
+ if (ewq.timed_out)
return 0;
eavail = ep_busy_loop(ep);
@@ -1850,8 +1893,8 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
* performance issue if a process is killed, causing all of its
* threads to wake up without being removed normally.
*/
- init_wait(&wait);
- wait.func = ep_autoremove_wake_function;
+ init_wait(&ewq.wait);
+ ewq.wait.func = ep_autoremove_wake_function;
write_lock_irq(&ep->lock);
/*
@@ -1870,10 +1913,9 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
*/
eavail = ep_events_available(ep);
if (!eavail) {
- __add_wait_queue_exclusive(&ep->wq, &wait);
+ __add_wait_queue_exclusive(&ep->wq, &ewq.wait);
write_unlock_irq(&ep->lock);
- timed_out = !schedule_hrtimeout_range(to, slack,
- HRTIMER_MODE_ABS);
+ ep_schedule(ep, &ewq, to, slack);
} else {
write_unlock_irq(&ep->lock);
}
@@ -1887,7 +1929,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
*/
eavail = 1;
- if (!list_empty_careful(&wait.entry)) {
+ if (!list_empty_careful(&ewq.wait.entry)) {
write_lock_irq(&ep->lock);
/*
* If the thread timed out and is not on the wait queue,
@@ -1896,9 +1938,9 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
* Thus, when wait.entry is empty, it needs to harvest
* events.
*/
- if (timed_out)
- eavail = list_empty(&wait.entry);
- __remove_wait_queue(&ep->wq, &wait);
+ if (ewq.timed_out)
+ eavail = list_empty(&ewq.wait.entry);
+ __remove_wait_queue(&ep->wq, &ewq.wait);
write_unlock_irq(&ep->lock);
}
}
--
2.35.1