Re: [RFC 5/5] workqueue: Print backtraces from CPUs with hung CPU bound workqueues
From: Tejun Heo
Date: Thu Feb 02 2023 - 18:45:17 EST
Hello,
I generally really like it.
> +static bool show_pool_suspicious_workers(struct worker_pool *pool, bool shown_title)
Maybe the name can be a bit more specific? show_cpu_pool_hog()?
> +{
> + bool shown_any = false;
> + struct worker *worker;
> + unsigned long flags;
> + int bkt;
> +
> + raw_spin_lock_irqsave(&pool->lock, flags);
> +
> + if (pool->cpu < 0)
> + goto out;
This can be tested before grabbling the lock.
> + if (!per_cpu(wq_watchdog_hung_cpu, pool->cpu))
> + goto out;
Given that the state is per-pool, would it make sense to mark this on the
pool instead?
> + if (list_empty(&pool->worklist))
> + goto out;
This condition isn't really necessary, right?
> + hash_for_each(pool->busy_hash, bkt, worker, hentry) {
> + if (!task_is_running(worker->task))
> + continue;
> +
> + if (!shown_title) {
> + pr_info("Showing backtraces of running workers in stuck CPU-bound worker pools:\n");
> + shown_title = true;
> + }
> +
> + shown_any = true;
> + pr_info("pool %d:\n", pool->id);
> + sched_show_task(worker->task);
> + }
> +out:
> + raw_spin_unlock_irqrestore(&pool->lock, flags);
> + return shown_any;
> +}
> +
> +static void show_suspicious_workers(void)
> +{
> + struct worker_pool *pool;
> + bool shown_title = false;
> + int pi;
> +
> + rcu_read_lock();
> +
> + for_each_pool(pool, pi) {
> + bool shown;
> +
> + shown = show_pool_suspicious_workers(pool, shown_title);
> + if (shown)
> + shown_title = shown;
Maybe, move shown to the outer scope and:
shown |= show_pool_suspicious_workers(pool, show);
> + }
> +
> + rcu_read_unlock();
> +}
>
> static void wq_watchdog_reset_touched(void)
> {
Thanks.
--
tejun