Re: [PATCH] io_uring: Add to traces the req pointer when available

From: Pavel Begunkov
Date: Tue May 25 2021 - 04:21:47 EST


On 5/25/21 6:54 AM, Olivier Langlois wrote:
> The req pointer uniquely identify a specific request.
> Having it in traces can provide valuable insights that is not possible
> to have if the calling process is reusing the same user_data value.

How about hashing kernel pointers per discussed? Even if it's better
to have it done by tracing or something as you mentioned, there is no
such a thing at the moment, so should be done by hand.

Btw, I'd incline you to split it in two patches, a cleanup and one
adding req, because it's unreadable and hides the real change

> ---
> fs/io_uring.c | 8 +--
> include/trace/events/io_uring.h | 95 ++++++++++++++++++++++-----------
> 2 files changed, 67 insertions(+), 36 deletions(-)
>
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 5f82954004f6..496588ca984c 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -5059,7 +5059,7 @@ static void io_async_task_func(struct callback_head *cb)
> struct async_poll *apoll = req->apoll;
> struct io_ring_ctx *ctx = req->ctx;
>
> - trace_io_uring_task_run(req->ctx, req->opcode, req->user_data);
> + trace_io_uring_task_run(req->ctx, req, req->opcode, req->user_data);
>
> if (io_poll_rewait(req, &apoll->poll)) {
> spin_unlock_irq(&ctx->completion_lock);
> @@ -5192,8 +5192,8 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
> return false;
> }
> spin_unlock_irq(&ctx->completion_lock);
> - trace_io_uring_poll_arm(ctx, req->opcode, req->user_data, mask,
> - apoll->poll.events);
> + trace_io_uring_poll_arm(ctx, req, req->opcode, req->user_data,
> + mask, apoll->poll.events);
> return true;
> }
>
> @@ -6578,7 +6578,7 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
> goto fail_req;
>
> /* don't need @sqe from now on */
> - trace_io_uring_submit_sqe(ctx, req->opcode, req->user_data,
> + trace_io_uring_submit_sqe(ctx, req, req->opcode, req->user_data,
> true, ctx->flags & IORING_SETUP_SQPOLL);
>
> /*
> diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h
> index abb8b24744fd..12861e98c08d 100644
> --- a/include/trace/events/io_uring.h
> +++ b/include/trace/events/io_uring.h
> @@ -129,7 +129,7 @@ TRACE_EVENT(io_uring_file_get,
> *
> * @ctx: pointer to a ring context structure
> * @hashed: type of workqueue, hashed or normal
> - * @req: pointer to a submitted request
> + * @req: pointer to a queued request
> * @work: pointer to a submitted io_wq_work
> *
> * Allows to trace asynchronous work submission.
> @@ -142,9 +142,9 @@ TRACE_EVENT(io_uring_queue_async_work,
> TP_ARGS(ctx, rw, req, work, flags),
>
> TP_STRUCT__entry (
> - __field( void *, ctx )
> - __field( int, rw )
> - __field( void *, req )
> + __field( void *, ctx )
> + __field( int, rw )
> + __field( void *, req )
> __field( struct io_wq_work *, work )
> __field( unsigned int, flags )
> ),
> @@ -196,10 +196,10 @@ TRACE_EVENT(io_uring_defer,
>
> /**
> * io_uring_link - called before the io_uring request added into link_list of
> - * another request
> + * another request
> *
> - * @ctx: pointer to a ring context structure
> - * @req: pointer to a linked request
> + * @ctx: pointer to a ring context structure
> + * @req: pointer to a linked request
> * @target_req: pointer to a previous request, that would contain @req
> *
> * Allows to track linked requests, to understand dependencies between requests
> @@ -212,8 +212,8 @@ TRACE_EVENT(io_uring_link,
> TP_ARGS(ctx, req, target_req),
>
> TP_STRUCT__entry (
> - __field( void *, ctx )
> - __field( void *, req )
> + __field( void *, ctx )
> + __field( void *, req )
> __field( void *, target_req )
> ),
>
> @@ -244,7 +244,7 @@ TRACE_EVENT(io_uring_cqring_wait,
> TP_ARGS(ctx, min_events),
>
> TP_STRUCT__entry (
> - __field( void *, ctx )
> + __field( void *, ctx )
> __field( int, min_events )
> ),
>
> @@ -272,7 +272,7 @@ TRACE_EVENT(io_uring_fail_link,
> TP_ARGS(req, link),
>
> TP_STRUCT__entry (
> - __field( void *, req )
> + __field( void *, req )
> __field( void *, link )
> ),
>
> @@ -314,15 +314,15 @@ TRACE_EVENT(io_uring_complete,
> ),
>
> TP_printk("ring %p, user_data 0x%llx, result %ld, cflags %x",
> - __entry->ctx, (unsigned long long)__entry->user_data,
> - __entry->res, __entry->cflags)
> + __entry->ctx, (unsigned long long)__entry->user_data,
> + __entry->res, __entry->cflags)
> );
>
> -
> /**
> * io_uring_submit_sqe - called before submitting one SQE
> *
> * @ctx: pointer to a ring context structure
> + * @req: pointer to a submitted request
> * @opcode: opcode of request
> * @user_data: user data associated with the request
> * @force_nonblock: whether a context blocking or not
> @@ -333,13 +333,14 @@ TRACE_EVENT(io_uring_complete,
> */
> TRACE_EVENT(io_uring_submit_sqe,
>
> - TP_PROTO(void *ctx, u8 opcode, u64 user_data, bool force_nonblock,
> - bool sq_thread),
> + TP_PROTO(void *ctx, void *req, u8 opcode, u64 user_data,
> + bool force_nonblock, bool sq_thread),
>
> - TP_ARGS(ctx, opcode, user_data, force_nonblock, sq_thread),
> + TP_ARGS(ctx, req, opcode, user_data, force_nonblock, sq_thread),
>
> TP_STRUCT__entry (
> __field( void *, ctx )
> + __field( void *, req )
> __field( u8, opcode )
> __field( u64, user_data )
> __field( bool, force_nonblock )
> @@ -348,26 +349,42 @@ TRACE_EVENT(io_uring_submit_sqe,
>
> TP_fast_assign(
> __entry->ctx = ctx;
> + __entry->req = req;
> __entry->opcode = opcode;
> __entry->user_data = user_data;
> __entry->force_nonblock = force_nonblock;
> __entry->sq_thread = sq_thread;
> ),
>
> - TP_printk("ring %p, op %d, data 0x%llx, non block %d, sq_thread %d",
> - __entry->ctx, __entry->opcode,
> - (unsigned long long) __entry->user_data,
> - __entry->force_nonblock, __entry->sq_thread)
> + TP_printk("ring %p, req %p, op %d, data 0x%llx, non block %d, "
> + "sq_thread %d", __entry->ctx, __entry->req,
> + __entry->opcode, (unsigned long long)__entry->user_data,
> + __entry->force_nonblock, __entry->sq_thread)
> );
>
> +/*
> + * io_uring_poll_arm - called after arming a poll wait if successful
> + *
> + * @ctx: pointer to a ring context structure
> + * @req: pointer to the armed request
> + * @opcode: opcode of request
> + * @user_data: user data associated with the request
> + * @mask: request poll events mask
> + * @events: registered events of interest
> + *
> + * Allows to track which fds are waiting for and what are the events of
> + * interest.
> + */
> TRACE_EVENT(io_uring_poll_arm,
>
> - TP_PROTO(void *ctx, u8 opcode, u64 user_data, int mask, int events),
> + TP_PROTO(void *ctx, void *req, u8 opcode, u64 user_data,
> + int mask, int events),
>
> - TP_ARGS(ctx, opcode, user_data, mask, events),
> + TP_ARGS(ctx, req, opcode, user_data, events, fd),
>
> TP_STRUCT__entry (
> __field( void *, ctx )
> + __field( void *, req )
> __field( u8, opcode )
> __field( u64, user_data )
> __field( int, mask )
> @@ -376,16 +393,17 @@ TRACE_EVENT(io_uring_poll_arm,
>
> TP_fast_assign(
> __entry->ctx = ctx;
> + __entry->req = req;
> __entry->opcode = opcode;
> __entry->user_data = user_data;
> __entry->mask = mask;
> __entry->events = events;
> ),
>
> - TP_printk("ring %p, op %d, data 0x%llx, mask 0x%x, events 0x%x",
> - __entry->ctx, __entry->opcode,
> - (unsigned long long) __entry->user_data,
> - __entry->mask, __entry->events)
> + TP_printk("ring %p, req %p, op %d, data 0x%llx, mask 0x%x, events 0x%x",
> + __entry->ctx, __entry->req, __entry->opcode,
> + (unsigned long long) __entry->user_data,
> + __entry->mask, __entry->events)
> );
>
> TRACE_EVENT(io_uring_poll_wake,
> @@ -440,27 +458,40 @@ TRACE_EVENT(io_uring_task_add,
> __entry->mask)
> );
>
> +/*
> + * io_uring_task_run - called when task_work_run() executes the poll events
> + * notification callbacks
> + *
> + * @ctx: pointer to a ring context structure
> + * @req: pointer to the armed request
> + * @opcode: opcode of request
> + * @user_data: user data associated with the request
> + *
> + * Allows to track when notified poll events are processed
> + */
> TRACE_EVENT(io_uring_task_run,
>
> - TP_PROTO(void *ctx, u8 opcode, u64 user_data),
> + TP_PROTO(void *ctx, void *req, u8 opcode, u64 user_data),
>
> - TP_ARGS(ctx, opcode, user_data),
> + TP_ARGS(ctx, req, opcode, user_data),
>
> TP_STRUCT__entry (
> __field( void *, ctx )
> + __field( void *, req )
> __field( u8, opcode )
> __field( u64, user_data )
> ),
>
> TP_fast_assign(
> __entry->ctx = ctx;
> + __entry->req = req;
> __entry->opcode = opcode;
> __entry->user_data = user_data;
> ),
>
> - TP_printk("ring %p, op %d, data 0x%llx",
> - __entry->ctx, __entry->opcode,
> - (unsigned long long) __entry->user_data)
> + TP_printk("ring %p, req %p, op %d, data 0x%llx",
> + __entry->ctx, __entry->req, __entry->opcode,
> + (unsigned long long) __entry->user_data)
> );
>
> #endif /* _TRACE_IO_URING_H */
>

--
Pavel Begunkov