Re: [PATCH] io_uring: Add to traces the req pointer when available
From: Pavel Begunkov
Date: Tue May 25 2021 - 04:34:43 EST
On 5/25/21 9:21 AM, Pavel Begunkov wrote:
> On 5/25/21 6:54 AM, Olivier Langlois wrote:
>> The req pointer uniquely identify a specific request.
>> Having it in traces can provide valuable insights that is not possible
>> to have if the calling process is reusing the same user_data value.
>
> How about hashing kernel pointers per discussed? Even if it's better
> to have it done by tracing or something as you mentioned, there is no
> such a thing at the moment, so should be done by hand.
Or do you mean that it's already the case? Can anyone
confirm if so?
> Btw, I'd incline you to split it in two patches, a cleanup and one
> adding req, because it's unreadable and hides the real change
>
>> ---
>> fs/io_uring.c | 8 +--
>> include/trace/events/io_uring.h | 95 ++++++++++++++++++++++-----------
>> 2 files changed, 67 insertions(+), 36 deletions(-)
>>
>> diff --git a/fs/io_uring.c b/fs/io_uring.c
>> index 5f82954004f6..496588ca984c 100644
>> --- a/fs/io_uring.c
>> +++ b/fs/io_uring.c
>> @@ -5059,7 +5059,7 @@ static void io_async_task_func(struct callback_head *cb)
>> struct async_poll *apoll = req->apoll;
>> struct io_ring_ctx *ctx = req->ctx;
>>
>> - trace_io_uring_task_run(req->ctx, req->opcode, req->user_data);
>> + trace_io_uring_task_run(req->ctx, req, req->opcode, req->user_data);
>>
>> if (io_poll_rewait(req, &apoll->poll)) {
>> spin_unlock_irq(&ctx->completion_lock);
>> @@ -5192,8 +5192,8 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
>> return false;
>> }
>> spin_unlock_irq(&ctx->completion_lock);
>> - trace_io_uring_poll_arm(ctx, req->opcode, req->user_data, mask,
>> - apoll->poll.events);
>> + trace_io_uring_poll_arm(ctx, req, req->opcode, req->user_data,
>> + mask, apoll->poll.events);
>> return true;
>> }
>>
>> @@ -6578,7 +6578,7 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
>> goto fail_req;
>>
>> /* don't need @sqe from now on */
>> - trace_io_uring_submit_sqe(ctx, req->opcode, req->user_data,
>> + trace_io_uring_submit_sqe(ctx, req, req->opcode, req->user_data,
>> true, ctx->flags & IORING_SETUP_SQPOLL);
>>
>> /*
>> diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h
>> index abb8b24744fd..12861e98c08d 100644
>> --- a/include/trace/events/io_uring.h
>> +++ b/include/trace/events/io_uring.h
>> @@ -129,7 +129,7 @@ TRACE_EVENT(io_uring_file_get,
>> *
>> * @ctx: pointer to a ring context structure
>> * @hashed: type of workqueue, hashed or normal
>> - * @req: pointer to a submitted request
>> + * @req: pointer to a queued request
>> * @work: pointer to a submitted io_wq_work
>> *
>> * Allows to trace asynchronous work submission.
>> @@ -142,9 +142,9 @@ TRACE_EVENT(io_uring_queue_async_work,
>> TP_ARGS(ctx, rw, req, work, flags),
>>
>> TP_STRUCT__entry (
>> - __field( void *, ctx )
>> - __field( int, rw )
>> - __field( void *, req )
>> + __field( void *, ctx )
>> + __field( int, rw )
>> + __field( void *, req )
>> __field( struct io_wq_work *, work )
>> __field( unsigned int, flags )
>> ),
>> @@ -196,10 +196,10 @@ TRACE_EVENT(io_uring_defer,
>>
>> /**
>> * io_uring_link - called before the io_uring request added into link_list of
>> - * another request
>> + * another request
>> *
>> - * @ctx: pointer to a ring context structure
>> - * @req: pointer to a linked request
>> + * @ctx: pointer to a ring context structure
>> + * @req: pointer to a linked request
>> * @target_req: pointer to a previous request, that would contain @req
>> *
>> * Allows to track linked requests, to understand dependencies between requests
>> @@ -212,8 +212,8 @@ TRACE_EVENT(io_uring_link,
>> TP_ARGS(ctx, req, target_req),
>>
>> TP_STRUCT__entry (
>> - __field( void *, ctx )
>> - __field( void *, req )
>> + __field( void *, ctx )
>> + __field( void *, req )
>> __field( void *, target_req )
>> ),
>>
>> @@ -244,7 +244,7 @@ TRACE_EVENT(io_uring_cqring_wait,
>> TP_ARGS(ctx, min_events),
>>
>> TP_STRUCT__entry (
>> - __field( void *, ctx )
>> + __field( void *, ctx )
>> __field( int, min_events )
>> ),
>>
>> @@ -272,7 +272,7 @@ TRACE_EVENT(io_uring_fail_link,
>> TP_ARGS(req, link),
>>
>> TP_STRUCT__entry (
>> - __field( void *, req )
>> + __field( void *, req )
>> __field( void *, link )
>> ),
>>
>> @@ -314,15 +314,15 @@ TRACE_EVENT(io_uring_complete,
>> ),
>>
>> TP_printk("ring %p, user_data 0x%llx, result %ld, cflags %x",
>> - __entry->ctx, (unsigned long long)__entry->user_data,
>> - __entry->res, __entry->cflags)
>> + __entry->ctx, (unsigned long long)__entry->user_data,
>> + __entry->res, __entry->cflags)
>> );
>>
>> -
>> /**
>> * io_uring_submit_sqe - called before submitting one SQE
>> *
>> * @ctx: pointer to a ring context structure
>> + * @req: pointer to a submitted request
>> * @opcode: opcode of request
>> * @user_data: user data associated with the request
>> * @force_nonblock: whether a context blocking or not
>> @@ -333,13 +333,14 @@ TRACE_EVENT(io_uring_complete,
>> */
>> TRACE_EVENT(io_uring_submit_sqe,
>>
>> - TP_PROTO(void *ctx, u8 opcode, u64 user_data, bool force_nonblock,
>> - bool sq_thread),
>> + TP_PROTO(void *ctx, void *req, u8 opcode, u64 user_data,
>> + bool force_nonblock, bool sq_thread),
>>
>> - TP_ARGS(ctx, opcode, user_data, force_nonblock, sq_thread),
>> + TP_ARGS(ctx, req, opcode, user_data, force_nonblock, sq_thread),
>>
>> TP_STRUCT__entry (
>> __field( void *, ctx )
>> + __field( void *, req )
>> __field( u8, opcode )
>> __field( u64, user_data )
>> __field( bool, force_nonblock )
>> @@ -348,26 +349,42 @@ TRACE_EVENT(io_uring_submit_sqe,
>>
>> TP_fast_assign(
>> __entry->ctx = ctx;
>> + __entry->req = req;
>> __entry->opcode = opcode;
>> __entry->user_data = user_data;
>> __entry->force_nonblock = force_nonblock;
>> __entry->sq_thread = sq_thread;
>> ),
>>
>> - TP_printk("ring %p, op %d, data 0x%llx, non block %d, sq_thread %d",
>> - __entry->ctx, __entry->opcode,
>> - (unsigned long long) __entry->user_data,
>> - __entry->force_nonblock, __entry->sq_thread)
>> + TP_printk("ring %p, req %p, op %d, data 0x%llx, non block %d, "
>> + "sq_thread %d", __entry->ctx, __entry->req,
>> + __entry->opcode, (unsigned long long)__entry->user_data,
>> + __entry->force_nonblock, __entry->sq_thread)
>> );
>>
>> +/*
>> + * io_uring_poll_arm - called after arming a poll wait if successful
>> + *
>> + * @ctx: pointer to a ring context structure
>> + * @req: pointer to the armed request
>> + * @opcode: opcode of request
>> + * @user_data: user data associated with the request
>> + * @mask: request poll events mask
>> + * @events: registered events of interest
>> + *
>> + * Allows to track which fds are waiting for and what are the events of
>> + * interest.
>> + */
>> TRACE_EVENT(io_uring_poll_arm,
>>
>> - TP_PROTO(void *ctx, u8 opcode, u64 user_data, int mask, int events),
>> + TP_PROTO(void *ctx, void *req, u8 opcode, u64 user_data,
>> + int mask, int events),
>>
>> - TP_ARGS(ctx, opcode, user_data, mask, events),
>> + TP_ARGS(ctx, req, opcode, user_data, events, fd),
>>
>> TP_STRUCT__entry (
>> __field( void *, ctx )
>> + __field( void *, req )
>> __field( u8, opcode )
>> __field( u64, user_data )
>> __field( int, mask )
>> @@ -376,16 +393,17 @@ TRACE_EVENT(io_uring_poll_arm,
>>
>> TP_fast_assign(
>> __entry->ctx = ctx;
>> + __entry->req = req;
>> __entry->opcode = opcode;
>> __entry->user_data = user_data;
>> __entry->mask = mask;
>> __entry->events = events;
>> ),
>>
>> - TP_printk("ring %p, op %d, data 0x%llx, mask 0x%x, events 0x%x",
>> - __entry->ctx, __entry->opcode,
>> - (unsigned long long) __entry->user_data,
>> - __entry->mask, __entry->events)
>> + TP_printk("ring %p, req %p, op %d, data 0x%llx, mask 0x%x, events 0x%x",
>> + __entry->ctx, __entry->req, __entry->opcode,
>> + (unsigned long long) __entry->user_data,
>> + __entry->mask, __entry->events)
>> );
>>
>> TRACE_EVENT(io_uring_poll_wake,
>> @@ -440,27 +458,40 @@ TRACE_EVENT(io_uring_task_add,
>> __entry->mask)
>> );
>>
>> +/*
>> + * io_uring_task_run - called when task_work_run() executes the poll events
>> + * notification callbacks
>> + *
>> + * @ctx: pointer to a ring context structure
>> + * @req: pointer to the armed request
>> + * @opcode: opcode of request
>> + * @user_data: user data associated with the request
>> + *
>> + * Allows to track when notified poll events are processed
>> + */
>> TRACE_EVENT(io_uring_task_run,
>>
>> - TP_PROTO(void *ctx, u8 opcode, u64 user_data),
>> + TP_PROTO(void *ctx, void *req, u8 opcode, u64 user_data),
>>
>> - TP_ARGS(ctx, opcode, user_data),
>> + TP_ARGS(ctx, req, opcode, user_data),
>>
>> TP_STRUCT__entry (
>> __field( void *, ctx )
>> + __field( void *, req )
>> __field( u8, opcode )
>> __field( u64, user_data )
>> ),
>>
>> TP_fast_assign(
>> __entry->ctx = ctx;
>> + __entry->req = req;
>> __entry->opcode = opcode;
>> __entry->user_data = user_data;
>> ),
>>
>> - TP_printk("ring %p, op %d, data 0x%llx",
>> - __entry->ctx, __entry->opcode,
>> - (unsigned long long) __entry->user_data)
>> + TP_printk("ring %p, req %p, op %d, data 0x%llx",
>> + __entry->ctx, __entry->req, __entry->opcode,
>> + (unsigned long long) __entry->user_data)
>> );
>>
>> #endif /* _TRACE_IO_URING_H */
>>
>
--
Pavel Begunkov