workqueue: dump workqueues on sysrq-t
authorTejun Heo <tj@kernel.org>
Mon, 9 Mar 2015 13:22:28 +0000 (09:22 -0400)
committerTejun Heo <tj@kernel.org>
Mon, 9 Mar 2015 13:22:28 +0000 (09:22 -0400)
Workqueues are used extensively throughout the kernel but sometimes
it's difficult to debug stalls involving work items because visibility
into its inner workings is fairly limited.  Although sysrq-t task dump
annotates each active worker task with the information on the work
item being executed, it is challenging to find out which work items
are pending or delayed on which queues and how pools are being
managed.

This patch implements show_workqueue_state() which dumps all busy
workqueues and pools and is called from the sysrq-t handler.  At the
end of sysrq-t dump, something like the following is printed.

 Showing busy workqueues and worker pools:
 ...
 workqueue filler_wq: flags=0x0
   pwq 2: cpus=1 node=0 flags=0x0 nice=0 active=2/256
     in-flight: 491:filler_workfn, 507:filler_workfn
   pwq 0: cpus=0 node=0 flags=0x0 nice=0 active=2/256
     in-flight: 501:filler_workfn
     pending: filler_workfn
 ...
 workqueue test_wq: flags=0x8
   pwq 2: cpus=1 node=0 flags=0x0 nice=0 active=1/1
     in-flight: 510(RESCUER):test_workfn BAR(69) BAR(500)
     delayed: test_workfn1 BAR(492), test_workfn2
 ...
 pool 0: cpus=0 node=0 flags=0x0 nice=0 workers=2 manager: 137
 pool 2: cpus=1 node=0 flags=0x0 nice=0 workers=3 manager: 469
 pool 3: cpus=1 node=0 flags=0x0 nice=-20 workers=2 idle: 16
 pool 8: cpus=0-3 flags=0x4 nice=0 workers=2 manager: 62

The above shows that test_wq is executing test_workfn() on pid 510
which is the rescuer and also that there are two tasks 69 and 500
waiting for the work item to finish in flush_work().  As test_wq has
max_active of 1, there are two work items for test_workfn1() and
test_workfn2() which are delayed till the current work item is
finished.  In addition, pid 492 is flushing test_workfn1().

The work item for test_workfn() is being executed on pwq of pool 2
which is the normal priority per-cpu pool for CPU 1.  The pool has
three workers, two of which are executing filler_workfn() for
filler_wq and the last one is assuming the manager role trying to
create more workers.

This extra workqueue state dump will hopefully help chasing down hangs
involving workqueues.

v3: cpulist_pr_cont() replaced with "%*pbl" printf formatting.

v2: As suggested by Andrew, minor formatting change in pr_cont_work(),
    printk()'s replaced with pr_info()'s, and cpumask printing now
    uses cpulist_pr_cont().

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
CC: Ingo Molnar <mingo@redhat.com>
drivers/tty/sysrq.c
include/linux/workqueue.h
kernel/workqueue.c

index 259a4d5a4e8f4ca4ce305a69ca8ed469106c1d16..843f2cdc280b9d178d09469eb0363658d6a69805 100644 (file)
@@ -275,6 +275,7 @@ static struct sysrq_key_op sysrq_showregs_op = {
 static void sysrq_handle_showstate(int key)
 {
        show_state();
+       show_workqueue_state();
 }
 static struct sysrq_key_op sysrq_showstate_op = {
        .handler        = sysrq_handle_showstate,
index f597846ff605cccaaf36d3cc4e6d603b630af156..deee212af8e0939a6d97189569cf8199fb840124 100644 (file)
@@ -454,6 +454,7 @@ extern bool workqueue_congested(int cpu, struct workqueue_struct *wq);
 extern unsigned int work_busy(struct work_struct *work);
 extern __printf(1, 2) void set_worker_desc(const char *fmt, ...);
 extern void print_worker_info(const char *log_lvl, struct task_struct *task);
+extern void show_workqueue_state(void);
 
 /**
  * queue_work - queue work on a workqueue
index 0c329a6f0c5187d10f460d0c98d8f8325c427190..1ca0b1d54e701644f0c714bc01fa999aab304a97 100644 (file)
@@ -4457,6 +4457,166 @@ void print_worker_info(const char *log_lvl, struct task_struct *task)
        }
 }
 
+static void pr_cont_pool_info(struct worker_pool *pool)
+{
+       pr_cont(" cpus=%*pbl", nr_cpumask_bits, pool->attrs->cpumask);
+       if (pool->node != NUMA_NO_NODE)
+               pr_cont(" node=%d", pool->node);
+       pr_cont(" flags=0x%x nice=%d", pool->flags, pool->attrs->nice);
+}
+
+static void pr_cont_work(bool comma, struct work_struct *work)
+{
+       if (work->func == wq_barrier_func) {
+               struct wq_barrier *barr;
+
+               barr = container_of(work, struct wq_barrier, work);
+
+               pr_cont("%s BAR(%d)", comma ? "," : "",
+                       task_pid_nr(barr->task));
+       } else {
+               pr_cont("%s %pf", comma ? "," : "", work->func);
+       }
+}
+
+static void show_pwq(struct pool_workqueue *pwq)
+{
+       struct worker_pool *pool = pwq->pool;
+       struct work_struct *work;
+       struct worker *worker;
+       bool has_in_flight = false, has_pending = false;
+       int bkt;
+
+       pr_info("  pwq %d:", pool->id);
+       pr_cont_pool_info(pool);
+
+       pr_cont(" active=%d/%d%s\n", pwq->nr_active, pwq->max_active,
+               !list_empty(&pwq->mayday_node) ? " MAYDAY" : "");
+
+       hash_for_each(pool->busy_hash, bkt, worker, hentry) {
+               if (worker->current_pwq == pwq) {
+                       has_in_flight = true;
+                       break;
+               }
+       }
+       if (has_in_flight) {
+               bool comma = false;
+
+               pr_info("    in-flight:");
+               hash_for_each(pool->busy_hash, bkt, worker, hentry) {
+                       if (worker->current_pwq != pwq)
+                               continue;
+
+                       pr_cont("%s %d%s:%pf", comma ? "," : "",
+                               task_pid_nr(worker->task),
+                               worker == pwq->wq->rescuer ? "(RESCUER)" : "",
+                               worker->current_func);
+                       list_for_each_entry(work, &worker->scheduled, entry)
+                               pr_cont_work(false, work);
+                       comma = true;
+               }
+               pr_cont("\n");
+       }
+
+       list_for_each_entry(work, &pool->worklist, entry) {
+               if (get_work_pwq(work) == pwq) {
+                       has_pending = true;
+                       break;
+               }
+       }
+       if (has_pending) {
+               bool comma = false;
+
+               pr_info("    pending:");
+               list_for_each_entry(work, &pool->worklist, entry) {
+                       if (get_work_pwq(work) != pwq)
+                               continue;
+
+                       pr_cont_work(comma, work);
+                       comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED);
+               }
+               pr_cont("\n");
+       }
+
+       if (!list_empty(&pwq->delayed_works)) {
+               bool comma = false;
+
+               pr_info("    delayed:");
+               list_for_each_entry(work, &pwq->delayed_works, entry) {
+                       pr_cont_work(comma, work);
+                       comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED);
+               }
+               pr_cont("\n");
+       }
+}
+
+/**
+ * show_workqueue_state - dump workqueue state
+ *
+ * Called from a sysrq handler and prints out all busy workqueues and
+ * pools.
+ */
+void show_workqueue_state(void)
+{
+       struct workqueue_struct *wq;
+       struct worker_pool *pool;
+       unsigned long flags;
+       int pi;
+
+       rcu_read_lock_sched();
+
+       pr_info("Showing busy workqueues and worker pools:\n");
+
+       list_for_each_entry_rcu(wq, &workqueues, list) {
+               struct pool_workqueue *pwq;
+               bool idle = true;
+
+               for_each_pwq(pwq, wq) {
+                       if (pwq->nr_active || !list_empty(&pwq->delayed_works)) {
+                               idle = false;
+                               break;
+                       }
+               }
+               if (idle)
+                       continue;
+
+               pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags);
+
+               for_each_pwq(pwq, wq) {
+                       spin_lock_irqsave(&pwq->pool->lock, flags);
+                       if (pwq->nr_active || !list_empty(&pwq->delayed_works))
+                               show_pwq(pwq);
+                       spin_unlock_irqrestore(&pwq->pool->lock, flags);
+               }
+       }
+
+       for_each_pool(pool, pi) {
+               struct worker *worker;
+               bool first = true;
+
+               spin_lock_irqsave(&pool->lock, flags);
+               if (pool->nr_workers == pool->nr_idle)
+                       goto next_pool;
+
+               pr_info("pool %d:", pool->id);
+               pr_cont_pool_info(pool);
+               pr_cont(" workers=%d", pool->nr_workers);
+               if (pool->manager)
+                       pr_cont(" manager: %d",
+                               task_pid_nr(pool->manager->task));
+               list_for_each_entry(worker, &pool->idle_list, entry) {
+                       pr_cont(" %s%d", first ? "idle: " : "",
+                               task_pid_nr(worker->task));
+                       first = false;
+               }
+               pr_cont("\n");
+       next_pool:
+               spin_unlock_irqrestore(&pool->lock, flags);
+       }
+
+       rcu_read_unlock_sched();
+}
+
 /*
  * CPU hotplug.
  *