Re: [PATCH v10] io_uring: Statistics of the true utilization of sq threads.

From: Xiaobing Li
Date: Wed Feb 28 2024 - 04:10:14 EST


On 2/28/24 16:51, Xiaobing Li wrote:
>diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c
>index 976e9500f651..42b449e53535 100644
>--- a/io_uring/fdinfo.c
>+++ b/io_uring/fdinfo.c
>@@ -64,6 +64,7 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
> unsigned int sq_shift = 0;
> unsigned int sq_entries, cq_entries;
> int sq_pid = -1, sq_cpu = -1;
>+ u64 sq_total_time = 0, sq_work_time = 0;
> bool has_lock;
> unsigned int i;
>
>@@ -147,10 +148,15 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
>
> sq_pid = sq->task_pid;
> sq_cpu = sq->sq_cpu;
>+ getrusage(sq->thread, RUSAGE_SELF, &sq_usage);
>+ sq_total_time = sq_usage.ru_stime.tv_sec * 1000000 + sq_usage.ru_stime.tv_usec;
>+ sq_work_time = sq->work_time;
> }
>
> seq_printf(m, "SqThread:\t%d\n", sq_pid);
> seq_printf(m, "SqThreadCpu:\t%d\n", sq_cpu);
>+ seq_printf(m, "SqTotalTime:\t%llu\n", sq_total_time);
>+ seq_printf(m, "SqWorkTime:\t%llu\n", sq_work_time);
> seq_printf(m, "UserFiles:\t%u\n", ctx->nr_user_files);
> for (i = 0; has_lock && i < ctx->nr_user_files; i++) {
> struct file *f = io_file_from_index(&ctx->file_table, i);
>diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c
>index 82672eaaee81..363052b4ea76 100644
>--- a/io_uring/sqpoll.c
>+++ b/io_uring/sqpoll.c
>@@ -253,11 +253,23 @@ static bool io_sq_tw_pending(struct llist_node *retry_list)
> return retry_list || !llist_empty(&tctx->task_list);
> }
>
>+static void io_sq_update_worktime(struct io_sq_data *sqd, struct rusage *start)
>+{
>+ struct rusage end;
>+
>+ getrusage(current, RUSAGE_SELF, &end);
>+ end.ru_stime.tv_sec -= start->ru_stime.tv_sec;
>+ end.ru_stime.tv_usec -= start->ru_stime.tv_usec;
>+
>+ sqd->work_time += end.ru_stime.tv_usec + end.ru_stime.tv_sec * 1000000;
>+}
>+
> static int io_sq_thread(void *data)
> {
> struct llist_node *retry_list = NULL;
> struct io_sq_data *sqd = data;
> struct io_ring_ctx *ctx;
>+ struct rusage start;
> unsigned long timeout = 0;
> char buf[TASK_COMM_LEN];
> DEFINE_WAIT(wait);
>@@ -286,6 +298,7 @@ static int io_sq_thread(void *data)
> }
>
> cap_entries = !list_is_singular(&sqd->ctx_list);
>+ getrusage(current, RUSAGE_SELF, &start);
> list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
> int ret = __io_sq_thread(ctx, cap_entries);
>
>@@ -296,8 +309,10 @@ static int io_sq_thread(void *data)
> sqt_spin = true;
>
> if (sqt_spin || !time_after(jiffies, timeout)) {
>- if (sqt_spin)
>+ if (sqt_spin) {
>+ io_sq_update_worktime(sqd, &start);
> timeout = jiffies + sqd->sq_thread_idle;
>+ }
> if (unlikely(need_resched())) {
> mutex_unlock(&sqd->lock);
> cond_resched();
>diff --git a/io_uring/sqpoll.h b/io_uring/sqpoll.h
>index 8df37e8c9149..4171666b1cf4 100644
>--- a/io_uring/sqpoll.h
>+++ b/io_uring/sqpoll.h
>@@ -16,6 +16,7 @@ struct io_sq_data {
> pid_t task_pid;
> pid_t task_tgid;
>
>+ u64 work_time;
> unsigned long state;
> struct completion exited;
> };

Sorry, please ignore this patch, I will resend a v10.

--
Xiaobing Li