Re: [PATCH RESEND v4] fs/epoll: Remove unnecessary wakeups of nested epoll that in ET mode

From: Roman Penyaev
Date: Tue Oct 08 2019 - 05:56:04 EST


On 2019-10-07 20:43, Jason Baron wrote:

[...]

But what if to make this wakeup explicit if we have more events to process?
(nothing is tested, just a guess)

@@ -255,6 +255,7 @@ struct ep_pqueue {
Âstruct ep_send_events_data {
ÂÂÂÂÂÂÂ int maxevents;
ÂÂÂÂÂÂÂ struct epoll_event __user *events;
+ÂÂÂÂÂÂ bool have_more;
ÂÂÂÂÂÂÂ int res;
Â};
@@ -1783,14 +1768,17 @@ static __poll_t ep_send_events_proc(struct
eventpoll *ep, struct list_head *head
Â}

Âstatic int ep_send_events(struct eventpoll *ep,
-ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ struct epoll_event __user *events, int maxevents)
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ struct epoll_event __user *events, int maxevents,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ bool *have_more)
Â{
-ÂÂÂÂÂÂ struct ep_send_events_data esed;
-
-ÂÂÂÂÂÂ esed.maxevents = maxevents;
-ÂÂÂÂÂÂ esed.events = events;
+ÂÂÂÂÂÂ struct ep_send_events_data esed = {
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂ .maxevents = maxevents,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂ .events = events,
+ÂÂÂÂÂÂ };

ÂÂÂÂÂÂÂ ep_scan_ready_list(ep, ep_send_events_proc, &esed, 0, false);
+ÂÂÂÂÂÂ *have_more = esed.have_more;
+
ÂÂÂÂÂÂÂ return esed.res;
Â}

@@ -1827,7 +1815,7 @@ static int ep_poll(struct eventpoll *ep, struct
epoll_event __user *events,
Â{
ÂÂÂÂÂÂÂ int res = 0, eavail, timed_out = 0;
ÂÂÂÂÂÂÂ u64 slack = 0;
-ÂÂÂÂÂÂ bool waiter = false;
+ÂÂÂÂÂÂ bool waiter = false, have_more;
ÂÂÂÂÂÂÂ wait_queue_entry_t wait;
ÂÂÂÂÂÂÂ ktime_t expires, *to = NULL;

@@ -1927,7 +1915,8 @@ static int ep_poll(struct eventpoll *ep, struct
epoll_event __user *events,
ÂÂÂÂÂÂÂÂ * more luck.
ÂÂÂÂÂÂÂÂ */
ÂÂÂÂÂÂÂ if (!res && eavail &&
-ÂÂÂÂÂÂÂÂÂÂ !(res = ep_send_events(ep, events, maxevents)) && !timed_out)
+ÂÂÂÂÂÂÂÂÂÂ !(res = ep_send_events(ep, events, maxevents, &have_more)) &&
+ÂÂÂÂÂÂÂÂÂÂ !timed_out)
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ goto fetch_events;

ÂÂÂÂÂÂÂ if (waiter) {
@@ -1935,6 +1924,12 @@ static int ep_poll(struct eventpoll *ep, struct
epoll_event __user *events,
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ __remove_wait_queue(&ep->wq, &wait);
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ spin_unlock_irq(&ep->wq.lock);
ÂÂÂÂÂÂÂ }
+ÂÂÂÂÂÂ /*
+ÂÂÂÂÂÂÂ * We were not able to process all the events, so immediately
+ÂÂÂÂÂÂÂ * wakeup other waiter.
+ÂÂÂÂÂÂÂ */
+ÂÂÂÂÂÂ if (res > 0 && have_more && waitqueue_active(&ep->wq))
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂ wake_up(&ep->wq);

ÂÂÂÂÂÂÂ return res;
Â}




[...]

And I think the above change can go in separately (if we decide we want it).

Hi Jason,

I did measurements using Eric's test http://yhbt.net/eponeshotmt.c
(8 writers, 8 waiters; 1 writer, 8 waiters) and tested the impact
of outrunning wakeup: I do not see any difference. Since write events
are constantly coming, next waiter will be woken up anyway by the
following write event. In order to have some perf gain probably writes
should happen with some interval: produce bunch of events, sleep,
produce bunch of events, sleep, etc, which seems can bring something
only if writer is accidentally synchronized with waiters. Not a clean
way of perf improvement.

--
Roman