Detect concurrent reader and writer by reading event counter before and
after poll_wait(), and determine feedback with the case of unstable
counter taken into account.
Cut the big comment as the added barriers speak for themselves.
+++ x/fs/eventfd.c
@@ -131,49 +131,20 @@ static __poll_t eventfd_poll(struct file
{
struct eventfd_ctx *ctx = file->private_data;
__poll_t events = 0;
- u64 count;
+ u64 c0, count;
+
+ c0 = ctx->count;
+ smp_rmb();
poll_wait(file, &ctx->wqh, wait);
- /*
- * All writes to ctx->count occur within ctx->wqh.lock. This read
- * can be done outside ctx->wqh.lock because we know that poll_wait
- * takes that lock (through add_wait_queue) if our caller will sleep.
- *
- * The read _can_ therefore seep into add_wait_queue's critical
- * section, but cannot move above it! add_wait_queue's spin_lock acts
- * as an acquire barrier and ensures that the read be ordered properly
- * against the writes. The following CAN happen and is safe:
- *
- * poll write
- * ----------------- ------------
- * lock ctx->wqh.lock (in poll_wait)
- * count = ctx->count
- * __add_wait_queue
- * unlock ctx->wqh.lock
- * lock ctx->qwh.lock
- * ctx->count += n
- * if (waitqueue_active)
- * wake_up_locked_poll
- * unlock ctx->qwh.lock
- * eventfd_poll returns 0
- *
- * but the following, which would miss a wakeup, cannot happen:
- *
- * poll write
- * ----------------- ------------
- * count = ctx->count (INVALID!)
- * lock ctx->qwh.lock
- * ctx->count += n
- * **waitqueue_active is false**
- * **no wake_up_locked_poll!**
- * unlock ctx->qwh.lock
- * lock ctx->wqh.lock (in poll_wait)
- * __add_wait_queue
- * unlock ctx->wqh.lock
- * eventfd_poll returns 0
- */
- count = READ_ONCE(ctx->count);
+ smp_rmb();
+ count = ctx->count;
+
+ if (c0 < count)
+ return EPOLLIN;
+ if (c0 > count)
+ return EPOLLOUT;
if (count > 0)
events |= EPOLLIN;