[PATCH v4 10/14] epoll: support polling from userspace for ep_modify()

From: Roman Penyaev
Date: Tue Jun 11 2019 - 10:59:57 EST


When epfd is polled from userspace and item is being modified:

1. Update user item with new pointer or poll flags.
2. Add event to user ring if needed.

Signed-off-by: Roman Penyaev <rpenyaev@xxxxxxx>
Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Cc: Al Viro <viro@xxxxxxxxxxxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Cc: linux-fsdevel@xxxxxxxxxxxxxxx
Cc: linux-kernel@xxxxxxxxxxxxxxx
---
fs/eventpoll.c | 32 +++++++++++++++++++++++++++-----
1 file changed, 27 insertions(+), 5 deletions(-)

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index edf7ba28bce0..9f0d48eb360e 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -2239,6 +2239,7 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
static int ep_modify(struct eventpoll *ep, struct epitem *epi,
const struct epoll_event *event)
{
+ __poll_t revents;
int pwake = 0;
poll_table pt;

@@ -2250,10 +2251,24 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi,
* Set the new event interest mask before calling f_op->poll();
* otherwise we might miss an event that happens between the
* f_op->poll() call and the new event set registering.
+ *
+ * Use xchg() here because we can race with ep_clear_public_event_bits()
+ * for the case when events are polled from userspace. Internally
+ * ep_clear_public_event_bits() uses cmpxchg(), thus on some archs
+ * we can't mix normal writes and cmpxchg().
*/
- epi->event.events = event->events; /* need barrier below */
+ xchg(&epi->event.events, event->events);
epi->event.data = event->data; /* protected by mtx */
- if (epi->event.events & EPOLLWAKEUP) {
+
+ /* Update user item, barrier is below */
+ if (ep_polled_by_user(ep)) {
+ struct uepitem *uepi = uep_item_from_epi(epi);
+ struct epoll_uitem *uitem;
+
+ uitem = &ep->user_header->items[uepi->bit];
+ WRITE_ONCE(uitem->events, event->events);
+ WRITE_ONCE(uitem->data, event->data);
+ } else if (epi->event.events & EPOLLWAKEUP) {
if (!ep_has_wakeup_source(epi))
ep_create_wakeup_source(epi);
} else if (ep_has_wakeup_source(epi)) {
@@ -2286,12 +2301,19 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi,
* If the item is "hot" and it is not registered inside the ready
* list, push it inside.
*/
- if (ep_item_poll(epi, &pt, 1)) {
+ revents = ep_item_poll(epi, &pt, 1);
+ if (revents) {
+ bool added = false;
+
write_lock_irq(&ep->lock);
- if (!ep_is_linked(epi)) {
+ if (ep_polled_by_user(ep))
+ added = ep_add_event_to_uring(epi, revents);
+ else if (!ep_is_linked(epi)) {
list_add_tail(&epi->rdllink, &ep->rdllist);
ep_pm_stay_awake(epi);
-
+ added = true;
+ }
+ if (added) {
/* Notify waiting tasks that events are available */
if (waitqueue_active(&ep->wq))
wake_up(&ep->wq);
--
2.21.0