Re: [PATCH 5/9] io_uring: get next req on subm ref drop

From: Pavel Begunkov
Date: Sun Mar 01 2020 - 16:32:33 EST


On 01/03/2020 19:18, Pavel Begunkov wrote:
> Get next request when dropping the submission reference. However, if
> there is an asynchronous counterpart (i.e. read/write, timeout, etc),
> that would be dangerous to do, so ignore them using new
> REQ_F_DONT_STEAL_NEXT flag.
>
> Signed-off-by: Pavel Begunkov <asml.silence@xxxxxxxxx>
> ---
> fs/io_uring.c | 121 ++++++++++++++++++++++++++------------------------
> 1 file changed, 62 insertions(+), 59 deletions(-)
>
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index daf7c2095523..d456b0ff6835 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -488,6 +488,7 @@ enum {
> REQ_F_NEED_CLEANUP_BIT,
> REQ_F_OVERFLOW_BIT,
> REQ_F_POLLED_BIT,
> + REQ_F_DONT_STEAL_NEXT_BIT,
> };
>
> enum {
> @@ -532,6 +533,8 @@ enum {
> REQ_F_OVERFLOW = BIT(REQ_F_OVERFLOW_BIT),
> /* already went through poll handler */
> REQ_F_POLLED = BIT(REQ_F_POLLED_BIT),
> + /* don't try to get next req, it's async and racy */
> + REQ_F_DONT_STEAL_NEXT = BIT(REQ_F_DONT_STEAL_NEXT_BIT),
> };
>
> struct async_poll {
> @@ -1218,6 +1221,27 @@ static void io_cqring_add_event(struct io_kiocb *req, long res)
> io_cqring_ev_posted(ctx);
> }
>
> +static void io_link_work_cb(struct io_wq_work **workptr)
> +{
> + struct io_wq_work *work = *workptr;
> + struct io_kiocb *link = work->data;
> +
> + io_queue_linked_timeout(link);
> + io_wq_submit_work(workptr);
> +}
> +
> +static void io_wq_assign_next(struct io_wq_work **workptr, struct io_kiocb *nxt)
> +{
> + struct io_kiocb *link;
> +
> + *workptr = &nxt->work;
> + link = io_prep_linked_timeout(nxt);
> + if (link) {
> + nxt->work.func = io_link_work_cb;
> + nxt->work.data = link;
> + }
> +}
> +
> static inline bool io_is_fallback_req(struct io_kiocb *req)
> {
> return req == (struct io_kiocb *)
> @@ -1518,17 +1542,28 @@ static void io_free_req(struct io_kiocb *req)
> io_queue_async_work(nxt);
> }
>
> -/*
> - * Drop reference to request, return next in chain (if there is one) if this
> - * was the last reference to this request.
> - */
> -__attribute__((nonnull))
> -static void io_put_req_find_next(struct io_kiocb *req, struct io_kiocb **nxtptr)
> +__attribute__((warn_unused_result))
> +static struct io_kiocb *io_put_req_submission(struct io_kiocb *req)
> {
> - if (refcount_dec_and_test(&req->refs)) {
> - io_req_find_next(req, nxtptr);
> + bool last_ref = refcount_dec_and_test(&req->refs);
> + struct io_kiocb *nxt = NULL;
> +
> + if (!(req->flags & REQ_F_DONT_STEAL_NEXT) || last_ref)

This is a bit racy, need to take @nxt first and then do atomic.
The fix is trivial, so makes sense to review the rest.

> + io_req_find_next(req, &nxt);
> + if (last_ref)
> __io_free_req(req);
> - }
> +
> + return nxt;
> +}
> +
> +static void io_put_req_async_submission(struct io_kiocb *req,
> + struct io_wq_work **workptr)
> +{
> + static struct io_kiocb *nxt;
> +
> + nxt = io_put_req_submission(req);
> + if (nxt)
> + io_wq_assign_next(workptr, nxt);
> }
>
> static void io_put_req(struct io_kiocb *req)
> @@ -1979,8 +2014,11 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
>
> static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret)
> {
> + struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
> +
> switch (ret) {
> case -EIOCBQUEUED:
> + req->flags |= REQ_F_DONT_STEAL_NEXT;
> break;
> case -ERESTARTSYS:
> case -ERESTARTNOINTR:
> @@ -2526,6 +2564,7 @@ static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe)
> if (unlikely(req->sync.flags & ~IORING_FSYNC_DATASYNC))
> return -EINVAL;
>
> + req->flags |= REQ_F_DONT_STEAL_NEXT;
> req->sync.off = READ_ONCE(sqe->off);
> req->sync.len = READ_ONCE(sqe->len);
> return 0;
> @@ -2543,27 +2582,6 @@ static bool io_req_cancelled(struct io_kiocb *req)
> return false;
> }
>
> -static void io_link_work_cb(struct io_wq_work **workptr)
> -{
> - struct io_wq_work *work = *workptr;
> - struct io_kiocb *link = work->data;
> -
> - io_queue_linked_timeout(link);
> - io_wq_submit_work(workptr);
> -}
> -
> -static void io_wq_assign_next(struct io_wq_work **workptr, struct io_kiocb *nxt)
> -{
> - struct io_kiocb *link;
> -
> - *workptr = &nxt->work;
> - link = io_prep_linked_timeout(nxt);
> - if (link) {
> - nxt->work.func = io_link_work_cb;
> - nxt->work.data = link;
> - }
> -}
> -
> static void __io_fsync(struct io_kiocb *req)
> {
> loff_t end = req->sync.off + req->sync.len;
> @@ -2581,14 +2599,11 @@ static void __io_fsync(struct io_kiocb *req)
> static void io_fsync_finish(struct io_wq_work **workptr)
> {
> struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
> - struct io_kiocb *nxt = NULL;
>
> if (io_req_cancelled(req))
> return;
> __io_fsync(req);
> - io_put_req(req); /* drop submission reference */
> - if (nxt)
> - io_wq_assign_next(workptr, nxt);
> + io_put_req_async_submission(req, workptr);
> }
>
> static int io_fsync(struct io_kiocb *req, bool force_nonblock)
> @@ -2617,14 +2632,11 @@ static void __io_fallocate(struct io_kiocb *req)
> static void io_fallocate_finish(struct io_wq_work **workptr)
> {
> struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
> - struct io_kiocb *nxt = NULL;
>
> if (io_req_cancelled(req))
> return;
> __io_fallocate(req);
> - io_put_req(req); /* drop submission reference */
> - if (nxt)
> - io_wq_assign_next(workptr, nxt);
> + io_put_req_async_submission(req, workptr);
> }
>
> static int io_fallocate_prep(struct io_kiocb *req,
> @@ -2988,13 +3000,10 @@ static void __io_close_finish(struct io_kiocb *req)
> static void io_close_finish(struct io_wq_work **workptr)
> {
> struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
> - struct io_kiocb *nxt = NULL;
>
> /* not cancellable, don't do io_req_cancelled() */
> __io_close_finish(req);
> - io_put_req(req); /* drop submission reference */
> - if (nxt)
> - io_wq_assign_next(workptr, nxt);
> + io_put_req_async_submission(req, workptr);
> }
>
> static int io_close(struct io_kiocb *req, bool force_nonblock)
> @@ -3016,6 +3025,7 @@ static int io_close(struct io_kiocb *req, bool force_nonblock)
> */
> io_queue_async_work(req);
> /* submission ref will be dropped, take it for async */
> + req->flags |= REQ_F_DONT_STEAL_NEXT;
> refcount_inc_not_zero(&req->refs);
> return 0;
> }
> @@ -3062,14 +3072,11 @@ static void __io_sync_file_range(struct io_kiocb *req)
> static void io_sync_file_range_finish(struct io_wq_work **workptr)
> {
> struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
> - struct io_kiocb *nxt = NULL;
>
> if (io_req_cancelled(req))
> return;
> __io_sync_file_range(req);
> - io_put_req(req); /* put submission ref */
> - if (nxt)
> - io_wq_assign_next(workptr, nxt);
> + io_put_req_async_submission(req, workptr);
> }
>
> static int io_sync_file_range(struct io_kiocb *req, bool force_nonblock)
> @@ -3435,14 +3442,11 @@ static int __io_accept(struct io_kiocb *req, bool force_nonblock)
> static void io_accept_finish(struct io_wq_work **workptr)
> {
> struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
> - struct io_kiocb *nxt = NULL;
>
> if (io_req_cancelled(req))
> return;
> __io_accept(req, false);
> - io_put_req(req); /* drop submission reference */
> - if (nxt)
> - io_wq_assign_next(workptr, nxt);
> + io_put_req_async_submission(req, workptr);
> }
> #endif
>
> @@ -3859,9 +3863,10 @@ static void io_poll_task_handler(struct io_kiocb *req, struct io_kiocb **nxt)
> hash_del(&req->hash_node);
> io_poll_complete(req, req->result, 0);
> req->flags |= REQ_F_COMP_LOCKED;
> - io_put_req_find_next(req, nxt);
> spin_unlock_irq(&ctx->completion_lock);
>
> + req->flags &= ~REQ_F_DONT_STEAL_NEXT;
> + *nxt = io_put_req_submission(req);
> io_cqring_ev_posted(ctx);
> }
>
> @@ -3944,6 +3949,8 @@ static int io_poll_add(struct io_kiocb *req)
> io_cqring_ev_posted(ctx);
> io_put_req(req);
> }
> +
> + req->flags |= REQ_F_DONT_STEAL_NEXT;
> return ipt.error;
> }
>
> @@ -4066,6 +4073,7 @@ static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
> if (flags & ~IORING_TIMEOUT_ABS)
> return -EINVAL;
>
> + req->flags |= REQ_F_DONT_STEAL_NEXT;
> req->timeout.count = READ_ONCE(sqe->off);
>
> if (!req->io && io_alloc_async_ctx(req))
> @@ -4680,7 +4688,6 @@ static void io_wq_submit_work(struct io_wq_work **workptr)
> {
> struct io_wq_work *work = *workptr;
> struct io_kiocb *req = container_of(work, struct io_kiocb, work);
> - struct io_kiocb *nxt = NULL;
> int ret = 0;
>
> /* if NO_CANCEL is set, we must still run the work */
> @@ -4709,9 +4716,7 @@ static void io_wq_submit_work(struct io_wq_work **workptr)
> io_put_req(req);
> }
>
> - io_put_req(req); /* drop submission reference */
> - if (nxt)
> - io_wq_assign_next(workptr, nxt);
> + io_put_req_async_submission(req, workptr);
> }
>
> static int io_req_needs_file(struct io_kiocb *req, int fd)
> @@ -4935,10 +4940,6 @@ static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
> }
>
> err:
> - nxt = NULL;
> - /* drop submission reference */
> - io_put_req_find_next(req, &nxt);
> -
> if (linked_timeout) {
> if (!ret)
> io_queue_linked_timeout(linked_timeout);
> @@ -4952,6 +4953,8 @@ static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
> req_set_fail_links(req);
> io_put_req(req);
> }
> +
> + nxt = io_put_req_submission(req);
> if (nxt) {
> req = nxt;
>
>

--
Pavel Begunkov