Re: [PATCH 4/4] trace_workqueue: Add worklet information

From: Frederic Weisbecker
Date: Mon Apr 20 2009 - 07:36:22 EST


On Mon, Apr 20, 2009 at 03:09:13PM +0800, Zhaolei wrote:
> Now workqueue tracer can display per-worklet information as following:
> # cat [debugfs]/tracing/trace_stat/workqueues
> # CPU INSERTED EXECUTED TASKNAME:PID
> # | | | `-WORKFUNC
> # | | | |
> CPU INSERTED EXECUTED TASKNAME:PID
> 0 157 157 events/0:5
> 0 1 1 |-reg_todo+0x0/0x648
> 0 3 3 |-check_corruption+0x0/0x2c
> 0 1 1 |-power_supply_changed_work+0x0/0x3c
> 0 2 2 |-console_callback+0x0/0x145
> 0 9 9 |-do_cache_clean+0x0/0x6a
> 0 1 1 |-wq_barrier_func+0x0/0x12
> 0 129 129 |-vmstat_update+0x0/0x33
> 0 8 8 |-mce_work_fn+0x0/0x31
> 0 1 1 |-grace_ender+0x0/0x14
> 0 2 2 `-rt_worker_func+0x0/0x2de
> 0 0 0 cpuset:6
> 0 656 656 khelper:7
> 0 656 656 `-__call_usermodehelper+0x0/0xa0
> 0 0 0 netns:11
> 0 592 592 kblockd/0:13
> 0 39 39 |-blk_unplug_work+0x0/0x1e
> 0 553 553 `-cfq_kick_queue+0x0/0x2f
> 0 0 0 kacpid:412
> 0 0 0 kacpi_notify:415
> 0 0 0 ata/0:463
> 0 0 0 ata_aux:509
> 0 0 0 ksuspend_usbd:515
> 0 0 0 aio/0:717
> 0 0 0 nfsiod:718
> 0 0 0 crypto/0:719
> 0 0 0 kpsmoused:1141
> 0 0 0 kstriped:1248
> 0 0 0 kondemand/0:1259
> 0 0 0 usbhid_resumer:1260
> 0 21 21 rpciod/0:1333
> 0 1 1 |-xs_tcp_connect_worker4+0x0/0x1b
> 0 19 19 |-xs_udp_connect_worker4+0x0/0x130
> 0 1 1 `-xprt_autoclose+0x0/0x2c
>
> Signed-off-by: Zhao Lei <zhaolei@xxxxxxxxxxxxxx>
> ---
> kernel/trace/trace_workqueue.c | 219 ++++++++++++++++++++++++++++++++--------
> 1 files changed, 175 insertions(+), 44 deletions(-)
>
> diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
> index 3393c95..c465dbc 100644
> --- a/kernel/trace/trace_workqueue.c
> +++ b/kernel/trace/trace_workqueue.c
> @@ -12,19 +12,33 @@
> #include "trace_stat.h"
> #include "trace.h"
>
> +/* A callback function saved in struct work_struct->func */
> +struct cpu_workqueue_stats;
> +struct workfunc_stats {
> + struct list_head list;
> + struct cpu_workqueue_stats *parent;
> +
> + /* Addr of worklet's callback function */
> + work_func_t func;
> + /* Can be inserted from interrupt or user context, need to be atomic */
> + atomic_t inserted;
> + /*
> + * Don't need to be atomic, works are serialized in a single workqueue
> + * thread on a single CPU.
> + */
> + unsigned int executed;
> +};
>
> /* A cpu workqueue thread */
> struct cpu_workqueue_stats {
> struct list_head list;
> int cpu;
> pid_t pid;
> -/* Can be inserted from interrupt or user context, need to be atomic */
> + /* Refer to comments of struct workfunc_stats for detail */
> atomic_t inserted;
> -/*
> - * Don't need to be atomic, works are serialized in a single workqueue thread
> - * on a single CPU.
> - */
> unsigned int executed;
> + /* list of struct workfunc_stats in this workqueue */
> + struct list_head workfunclist;
> };
>
> /* List of workqueue threads on one cpu */
> @@ -39,6 +53,38 @@ struct workqueue_global_stats {
> static DEFINE_PER_CPU(struct workqueue_global_stats, all_workqueue_stat);
> #define workqueue_cpu_stat(cpu) (&per_cpu(all_workqueue_stat, cpu))
>
> +/*
> + * Update record when insert a work into workqueue
> + * Caller need to hold cpu_workqueue_stats spin_lock
> + */
> +int do_workqueue_insertion(struct cpu_workqueue_stats *cws,
> + struct work_struct *work)
> +{
> + struct workfunc_stats *wfstat;
> +
> + atomic_inc(&cws->inserted);
> +
> + list_for_each_entry(wfstat, &cws->workfunclist, list)
> + if (wfstat->func == work->func)
> + goto found;
> +
> + /* not found, first insert */
> +
> + /* work can be inserted from interrupt */
> + wfstat = kzalloc(sizeof(struct workfunc_stats), GFP_ATOMIC);
> + if (!wfstat)
> + return -ENOMEM;
> +
> + wfstat->parent = cws;
> + wfstat->func = work->func;
> +
> + list_add_tail(&wfstat->list, &cws->workfunclist);
> +found:
> + atomic_inc(&wfstat->inserted);
> +
> + return 0;
> +}
> +
> /* Insertion of a work */
> static void
> probe_workqueue_insertion(struct task_struct *wq_thread,
> @@ -51,7 +97,8 @@ probe_workqueue_insertion(struct task_struct *wq_thread,
> spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
> list_for_each_entry(node, &workqueue_cpu_stat(cpu)->list, list) {
> if (node->pid == wq_thread->pid) {
> - atomic_inc(&node->inserted);
> + /* we ignore error of do_workqueue_insertion */
> + do_workqueue_insertion(node, work);
> goto found;
> }
> }
> @@ -67,17 +114,30 @@ probe_workqueue_execution(struct task_struct *wq_thread,
> {
> int cpu = cpumask_first(&wq_thread->cpus_allowed);
> struct cpu_workqueue_stats *node;
> + struct workfunc_stats *wfnode;
> unsigned long flags;
>
> spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
> - list_for_each_entry(node, &workqueue_cpu_stat(cpu)->list, list) {
> +
> + list_for_each_entry(node, &workqueue_cpu_stat(cpu)->list, list)
> if (node->pid == wq_thread->pid) {
> node->executed++;
> - goto found;
> + goto found_wq;
> }
> - }
> - pr_debug("trace_workqueue: entry not found\n");
> -found:
> + pr_debug("trace_workqueue: workqueue not found\n");
> + goto end;
> +
> +found_wq:
> + list_for_each_entry(wfnode, &node->workfunclist, list)
> + if (wfnode->func == work->func) {
> + wfnode->executed++;
> + goto found_wf;
> + }
> + pr_debug("trace_workqueue: worklet not found\n");
> + goto end;
> +
> +found_wf:
> +end:
> spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
> }
>
> @@ -85,6 +145,7 @@ found:
> static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu)
> {
> struct cpu_workqueue_stats *cws;
> + struct workfunc_stats *wfstat;
> unsigned long flags;
>
> WARN_ON(cpu < 0);
> @@ -93,16 +154,37 @@ static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu)
> cws = kzalloc(sizeof(struct cpu_workqueue_stats), GFP_ATOMIC);
> if (!cws) {
> pr_warning("trace_workqueue: not enough memory\n");
> - return;
> + goto err_alloc_cws;
> }
> INIT_LIST_HEAD(&cws->list);
> cws->cpu = cpu;
>
> cws->pid = wq_thread->pid;
> + INIT_LIST_HEAD(&cws->workfunclist);
> +
> + /*
> + * add a dummy node to cpu_workqueue_stats->workfunclist to:
> + * 1: help print function to print header of work(ex, workqueue info)
> + * 2: used as a seq iter if no work exist in workqueue
> + */
> + wfstat = kzalloc(sizeof(struct workfunc_stats), GFP_ATOMIC);
> + if (!wfstat) {
> + pr_warning("trace_workqueue: not enough memory\n");
> + goto err_alloc_wfstat;
> + }
> + wfstat->parent = cws;
> + list_add_tail(&wfstat->list, &cws->workfunclist);
>
> spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
> list_add_tail(&cws->list, &workqueue_cpu_stat(cpu)->list);
> spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
> +
> + return;
> +
> +err_alloc_wfstat:
> + kfree(cws);
> +err_alloc_cws:
> + return;
> }
>
> /* Destruction of a cpu workqueue thread */
> @@ -115,12 +197,22 @@ static void probe_workqueue_destruction(struct task_struct *wq_thread)
>
> spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
> list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
> - list) {
> - if (node->pid == wq_thread->pid) {
> - list_del(&node->list);
> - kfree(node);
> - goto found;
> + list) {
> + struct workfunc_stats *wfstat, *wfstatnext;
> +
> + if (node->pid != wq_thread->pid)
> + continue;
> +
> + list_for_each_entry_safe(wfstat, wfstatnext,
> + &node->workfunclist, list) {
> + list_del(&wfstat->list);
> + kfree(wfstat);
> }
> +
> + list_del(&node->list);
> + kfree(node);



Sidenote to me: I have to provide a way for a stat
tracer to wait for the end of a pending statistic
output session before the tracer comes to free any
of its exposed entries. Otherwise we could end up
with freed memory dereference.

May be waitqueue.



> +
> + goto found;
> }
>
> pr_debug("trace_workqueue: don't find workqueue to destroy\n");
> @@ -129,17 +221,23 @@ found:
>
> }
>
> -static struct cpu_workqueue_stats *workqueue_stat_start_cpu(int cpu)
> +static struct workfunc_stats *workqueue_stat_start_cpu(int cpu)
> {
> unsigned long flags;
> - struct cpu_workqueue_stats *ret = NULL;
> -
> + struct workfunc_stats *ret = NULL;
>
> spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
>
> - if (!list_empty(&workqueue_cpu_stat(cpu)->list))
> - ret = list_entry(workqueue_cpu_stat(cpu)->list.next,
> - struct cpu_workqueue_stats, list);
> + if (!list_empty(&workqueue_cpu_stat(cpu)->list)) {
> + struct cpu_workqueue_stats *cws;
> + cws = list_entry(workqueue_cpu_stat(cpu)->list.next,
> + struct cpu_workqueue_stats, list);
> + /*
> + * cpu_workqueue_stats->workfunclist at least have a dummy node
> + */
> + ret = list_entry(cws->workfunclist.next, struct workfunc_stats,
> + list);
> + }
>
> spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
>
> @@ -161,43 +259,75 @@ static void *workqueue_stat_start(struct tracer_stat *trace)
>
> static void *workqueue_stat_next(void *prev, int idx)
> {
> - struct cpu_workqueue_stats *prev_cws = prev;
> - int cpu = prev_cws->cpu;
> + struct workfunc_stats *prev_wfstat = prev;
> + int cpu = prev_wfstat->parent->cpu;
> unsigned long flags;
> void *ret = NULL;
>
> spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
> - if (list_is_last(&prev_cws->list, &workqueue_cpu_stat(cpu)->list)) {
> +
> + if (!list_is_last(&prev_wfstat->list,
> + &prev_wfstat->parent->workfunclist)) {
> + ret = list_entry(prev_wfstat->list.next, struct workfunc_stats,
> + list);
> spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
> - do {
> - cpu = cpumask_next(cpu, cpu_possible_mask);
> - if (cpu >= nr_cpu_ids)
> - return NULL;
> - } while (!(ret = workqueue_stat_start_cpu(cpu)));
> return ret;
> }
> +
> + if (!list_is_last(&prev_wfstat->parent->list,
> + &workqueue_cpu_stat(cpu)->list)) {
> + struct cpu_workqueue_stats *cws = list_entry(
> + prev_wfstat->parent->list.next,
> + struct cpu_workqueue_stats, list);
> + ret = list_entry(cws->workfunclist.next, struct workfunc_stats,
> + list);
> + spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
> + return ret;
> + }
> +
> spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
>
> - return list_entry(prev_cws->list.next, struct cpu_workqueue_stats,
> - list);
> + do {
> + cpu = cpumask_next(cpu, cpu_possible_mask);
> + if (cpu >= nr_cpu_ids)


The above test will take the total number of cpus as
the last cpu number. This assumption may be false
if the possible cpus are not contiguous.

Perhaps you'd better use:


for_each_possible_cpu(cpu) {
ret = workqueue_stat_start_cpu(cpu);
if (ret)
break;
}

After a quick look in cpumask.h it seems it will start
from the cpu which follows the one you give in parameter.
So it should be fine.



> + return NULL;
> + } while (!(ret = workqueue_stat_start_cpu(cpu)));
> +
> + return ret;
> }
>
> static int workqueue_stat_show(struct seq_file *s, void *p)
> {
> - struct cpu_workqueue_stats *cws = p;
> + struct workfunc_stats *wfstat = p;
> + struct cpu_workqueue_stats *cws = wfstat->parent;
> struct pid *pid;
> struct task_struct *tsk;
>
> - pid = find_get_pid(cws->pid);
> - if (pid) {
> - tsk = get_pid_task(pid, PIDTYPE_PID);
> - if (tsk) {
> - seq_printf(s, "%3d %6d %6u %s\n", cws->cpu,
> - atomic_read(&cws->inserted), cws->executed,
> - tsk->comm);
> - put_task_struct(tsk);
> + if (!wfstat->func) {
> + /* It is first dummy node, need to print workqueue info */
> + pid = find_get_pid(cws->pid);
> + if (pid) {
> + tsk = get_pid_task(pid, PIDTYPE_PID);
> + if (tsk) {
> + seq_printf(s, "%3d %6d %6u %s:%d\n",
> + cws->cpu,
> + atomic_read(&cws->inserted),
> + cws->executed,
> + tsk->comm,
> + cws->pid);
> + put_task_struct(tsk);
> + }
> + put_pid(pid);
> }
> - put_pid(pid);
> + } else {
> + /* It is effect node, need to print workfunc info */
> + int lastwf = list_is_last(&wfstat->list, &cws->workfunclist);
> + seq_printf(s, "%3d %6d %6u %c-%pF\n",
> + cws->cpu,
> + atomic_read(&wfstat->inserted),
> + wfstat->executed,
> + lastwf ? '`' : '|',
> + wfstat->func);
> }
>
> return 0;
> @@ -205,7 +335,8 @@ static int workqueue_stat_show(struct seq_file *s, void *p)
>
> static int workqueue_stat_headers(struct seq_file *s)
> {
> - seq_printf(s, "# CPU INSERTED EXECUTED NAME\n");
> + seq_printf(s, "# CPU INSERTED EXECUTED TASKNAME:PID\n");
> + seq_printf(s, "# | | | `-WORKFUNC\n");
> seq_printf(s, "# | | | |\n");
> return 0;
> }
> --



Looks very nice, like the rest of the series.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/