workqueue: dump workqueues on sysrq-t

[linux-2.6-block.git] / kernel / workqueue.c
diff --git a/kernel/workqueue.c b/kernel/workqueue.c

index beeeac9e0e3e6829faa3fe16304cb3e7adbe8850..1ca0b1d54e701644f0c714bc01fa999aab304a97 100644 (file)
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -159,6 +159,7 @@ struct worker_pool {
  
         /* see manage_workers() for details on the two manager mutexes */
         struct mutex            manager_arb;    /* manager arbitration */
+       struct worker           *manager;       /* L: purely informational */
         struct mutex            attach_mutex;   /* attach/detach exclusion */
         struct list_head        workers;        /* A: attached workers */
         struct completion       *detach_completion; /* all workers detached */
@@ -230,7 +231,7 @@ struct wq_device;
   */
  struct workqueue_struct {
         struct list_head        pwqs;           /* WR: all pwqs of this wq */
-       struct list_head        list;           /* PL: list of all workqueues */
+       struct list_head        list;           /* PR: list of all workqueues */
  
         struct mutex            mutex;          /* protects this wq */
         int                     work_color;     /* WQ: current work color */
@@ -257,6 +258,13 @@ struct workqueue_struct {
  #endif
         char                    name[WQ_NAME_LEN]; /* I: workqueue name */
  
+       /*
+        * Destruction of workqueue_struct is sched-RCU protected to allow
+        * walking the workqueues list without grabbing wq_pool_mutex.
+        * This is used to dump all workqueues from sysrq.
+        */
+       struct rcu_head         rcu;
+
         /* hot fields used during command issue, aligned to cacheline */
         unsigned int            flags ____cacheline_aligned; /* WQ: WQ_* flags */
         struct pool_workqueue __percpu *cpu_pwqs; /* I: per-cpu pwqs */
@@ -288,7 +296,7 @@ static struct workqueue_attrs *wq_update_unbound_numa_attrs_buf;
  static DEFINE_MUTEX(wq_pool_mutex);    /* protects pools and workqueues list */
  static DEFINE_SPINLOCK(wq_mayday_lock);        /* protects wq->maydays list */
  
-static LIST_HEAD(workqueues);          /* PL: list of all workqueues */
+static LIST_HEAD(workqueues);          /* PR: list of all workqueues */
  static bool workqueue_freezing;                /* PL: have wqs started freezing? */
  
  /* the per-cpu worker pools */
@@ -1911,9 +1919,11 @@ static bool manage_workers(struct worker *worker)
          */
         if (!mutex_trylock(&pool->manager_arb))
                 return false;
+       pool->manager = worker;
  
         maybe_create_worker(pool);
  
+       pool->manager = NULL;
         mutex_unlock(&pool->manager_arb);
         return true;
  }
@@ -2303,6 +2313,7 @@ repeat:
  struct wq_barrier {
         struct work_struct      work;
         struct completion       done;
+       struct task_struct      *task;  /* purely informational */
  };
  
  static void wq_barrier_func(struct work_struct *work)
@@ -2351,6 +2362,7 @@ static void insert_wq_barrier(struct pool_workqueue *pwq,
         INIT_WORK_ONSTACK(&barr->work, wq_barrier_func);
         __set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));
         init_completion(&barr->done);
+       barr->task = current;
  
         /*
          * If @target is currently being executed, schedule the
@@ -2728,19 +2740,57 @@ bool flush_work(struct work_struct *work)
  }
  EXPORT_SYMBOL_GPL(flush_work);
  
+struct cwt_wait {
+       wait_queue_t            wait;
+       struct work_struct      *work;
+};
+
+static int cwt_wakefn(wait_queue_t *wait, unsigned mode, int sync, void *key)
+{
+       struct cwt_wait *cwait = container_of(wait, struct cwt_wait, wait);
+
+       if (cwait->work != key)
+               return 0;
+       return autoremove_wake_function(wait, mode, sync, key);
+}
+
  static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
  {
+       static DECLARE_WAIT_QUEUE_HEAD(cancel_waitq);
         unsigned long flags;
         int ret;
  
         do {
                 ret = try_to_grab_pending(work, is_dwork, &flags);
                 /*
-                * If someone else is canceling, wait for the same event it
-                * would be waiting for before retrying.
+                * If someone else is already canceling, wait for it to
+                * finish.  flush_work() doesn't work for PREEMPT_NONE
+                * because we may get scheduled between @work's completion
+                * and the other canceling task resuming and clearing
+                * CANCELING - flush_work() will return false immediately
+                * as @work is no longer busy, try_to_grab_pending() will
+                * return -ENOENT as @work is still being canceled and the
+                * other canceling task won't be able to clear CANCELING as
+                * we're hogging the CPU.
+                *
+                * Let's wait for completion using a waitqueue.  As this
+                * may lead to the thundering herd problem, use a custom
+                * wake function which matches @work along with exclusive
+                * wait and wakeup.
                  */
-               if (unlikely(ret == -ENOENT))
-                       flush_work(work);
+               if (unlikely(ret == -ENOENT)) {
+                       struct cwt_wait cwait;
+
+                       init_wait(&cwait.wait);
+                       cwait.wait.func = cwt_wakefn;
+                       cwait.work = work;
+
+                       prepare_to_wait_exclusive(&cancel_waitq, &cwait.wait,
+                                                 TASK_UNINTERRUPTIBLE);
+                       if (work_is_canceling(work))
+                               schedule();
+                       finish_wait(&cancel_waitq, &cwait.wait);
+               }
         } while (unlikely(ret < 0));
  
         /* tell other tasks trying to grab @work to back off */
@@ -2749,6 +2799,16 @@ static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
  
         flush_work(work);
         clear_work_data(work);
+
+       /*
+        * Paired with prepare_to_wait() above so that either
+        * waitqueue_active() is visible here or !work_is_canceling() is
+        * visible there.
+        */
+       smp_mb();
+       if (waitqueue_active(&cancel_waitq))
+               __wake_up(&cancel_waitq, TASK_NORMAL, 1, work);
+
         return ret;
  }
  
@@ -3083,10 +3143,9 @@ static ssize_t wq_cpumask_show(struct device *dev,
         int written;
  
         mutex_lock(&wq->mutex);
-       written = cpumask_scnprintf(buf, PAGE_SIZE, wq->unbound_attrs->cpumask);
+       written = scnprintf(buf, PAGE_SIZE, "%*pb\n",
+                           cpumask_pr_args(wq->unbound_attrs->cpumask));
         mutex_unlock(&wq->mutex);
-
-       written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
         return written;
  }
  
@@ -3377,6 +3436,20 @@ static int init_worker_pool(struct worker_pool *pool)
         return 0;
  }
  
+static void rcu_free_wq(struct rcu_head *rcu)
+{
+       struct workqueue_struct *wq =
+               container_of(rcu, struct workqueue_struct, rcu);
+
+       if (!(wq->flags & WQ_UNBOUND))
+               free_percpu(wq->cpu_pwqs);
+       else
+               free_workqueue_attrs(wq->unbound_attrs);
+
+       kfree(wq->rescuer);
+       kfree(wq);
+}
+
  static void rcu_free_pool(struct rcu_head *rcu)
  {
         struct worker_pool *pool = container_of(rcu, struct worker_pool, rcu);
@@ -3554,12 +3627,10 @@ static void pwq_unbound_release_workfn(struct work_struct *work)
  
         /*
          * If we're the last pwq going away, @wq is already dead and no one
-        * is gonna access it anymore.  Free it.
+        * is gonna access it anymore.  Schedule RCU free.
          */
-       if (is_last) {
-               free_workqueue_attrs(wq->unbound_attrs);
-               kfree(wq);
-       }
+       if (is_last)
+               call_rcu_sched(&wq->rcu, rcu_free_wq);
  }
  
  /**
@@ -4096,7 +4167,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
                 pwq_adjust_max_active(pwq);
         mutex_unlock(&wq->mutex);
  
-       list_add(&wq->list, &workqueues);
+       list_add_tail_rcu(&wq->list, &workqueues);
  
         mutex_unlock(&wq_pool_mutex);
  
@@ -4152,24 +4223,20 @@ void destroy_workqueue(struct workqueue_struct *wq)
          * flushing is complete in case freeze races us.
          */
         mutex_lock(&wq_pool_mutex);
-       list_del_init(&wq->list);
+       list_del_rcu(&wq->list);
         mutex_unlock(&wq_pool_mutex);
  
         workqueue_sysfs_unregister(wq);
  
-       if (wq->rescuer) {
+       if (wq->rescuer)
                 kthread_stop(wq->rescuer->task);
-               kfree(wq->rescuer);
-               wq->rescuer = NULL;
-       }
  
         if (!(wq->flags & WQ_UNBOUND)) {
                 /*
                  * The base ref is never dropped on per-cpu pwqs.  Directly
-                * free the pwqs and wq.
+                * schedule RCU free.
                  */
-               free_percpu(wq->cpu_pwqs);
-               kfree(wq);
+               call_rcu_sched(&wq->rcu, rcu_free_wq);
         } else {
                 /*
                  * We're the sole accessor of @wq at this point.  Directly
@@ -4390,6 +4457,166 @@ void print_worker_info(const char *log_lvl, struct task_struct *task)
         }
  }
  
+static void pr_cont_pool_info(struct worker_pool *pool)
+{
+       pr_cont(" cpus=%*pbl", nr_cpumask_bits, pool->attrs->cpumask);
+       if (pool->node != NUMA_NO_NODE)
+               pr_cont(" node=%d", pool->node);
+       pr_cont(" flags=0x%x nice=%d", pool->flags, pool->attrs->nice);
+}
+
+static void pr_cont_work(bool comma, struct work_struct *work)
+{
+       if (work->func == wq_barrier_func) {
+               struct wq_barrier *barr;
+
+               barr = container_of(work, struct wq_barrier, work);
+
+               pr_cont("%s BAR(%d)", comma ? "," : "",
+                       task_pid_nr(barr->task));
+       } else {
+               pr_cont("%s %pf", comma ? "," : "", work->func);
+       }
+}
+
+static void show_pwq(struct pool_workqueue *pwq)
+{
+       struct worker_pool *pool = pwq->pool;
+       struct work_struct *work;
+       struct worker *worker;
+       bool has_in_flight = false, has_pending = false;
+       int bkt;
+
+       pr_info("  pwq %d:", pool->id);
+       pr_cont_pool_info(pool);
+
+       pr_cont(" active=%d/%d%s\n", pwq->nr_active, pwq->max_active,
+               !list_empty(&pwq->mayday_node) ? " MAYDAY" : "");
+
+       hash_for_each(pool->busy_hash, bkt, worker, hentry) {
+               if (worker->current_pwq == pwq) {
+                       has_in_flight = true;
+                       break;
+               }
+       }
+       if (has_in_flight) {
+               bool comma = false;
+
+               pr_info("    in-flight:");
+               hash_for_each(pool->busy_hash, bkt, worker, hentry) {
+                       if (worker->current_pwq != pwq)
+                               continue;
+
+                       pr_cont("%s %d%s:%pf", comma ? "," : "",
+                               task_pid_nr(worker->task),
+                               worker == pwq->wq->rescuer ? "(RESCUER)" : "",
+                               worker->current_func);
+                       list_for_each_entry(work, &worker->scheduled, entry)
+                               pr_cont_work(false, work);
+                       comma = true;
+               }
+               pr_cont("\n");
+       }
+
+       list_for_each_entry(work, &pool->worklist, entry) {
+               if (get_work_pwq(work) == pwq) {
+                       has_pending = true;
+                       break;
+               }
+       }
+       if (has_pending) {
+               bool comma = false;
+
+               pr_info("    pending:");
+               list_for_each_entry(work, &pool->worklist, entry) {
+                       if (get_work_pwq(work) != pwq)
+                               continue;
+
+                       pr_cont_work(comma, work);
+                       comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED);
+               }
+               pr_cont("\n");
+       }
+
+       if (!list_empty(&pwq->delayed_works)) {
+               bool comma = false;
+
+               pr_info("    delayed:");
+               list_for_each_entry(work, &pwq->delayed_works, entry) {
+                       pr_cont_work(comma, work);
+                       comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED);
+               }
+               pr_cont("\n");
+       }
+}
+
+/**
+ * show_workqueue_state - dump workqueue state
+ *
+ * Called from a sysrq handler and prints out all busy workqueues and
+ * pools.
+ */
+void show_workqueue_state(void)
+{
+       struct workqueue_struct *wq;
+       struct worker_pool *pool;
+       unsigned long flags;
+       int pi;
+
+       rcu_read_lock_sched();
+
+       pr_info("Showing busy workqueues and worker pools:\n");
+
+       list_for_each_entry_rcu(wq, &workqueues, list) {
+               struct pool_workqueue *pwq;
+               bool idle = true;
+
+               for_each_pwq(pwq, wq) {
+                       if (pwq->nr_active || !list_empty(&pwq->delayed_works)) {
+                               idle = false;
+                               break;
+                       }
+               }
+               if (idle)
+                       continue;
+
+               pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags);
+
+               for_each_pwq(pwq, wq) {
+                       spin_lock_irqsave(&pwq->pool->lock, flags);
+                       if (pwq->nr_active || !list_empty(&pwq->delayed_works))
+                               show_pwq(pwq);
+                       spin_unlock_irqrestore(&pwq->pool->lock, flags);
+               }
+       }
+
+       for_each_pool(pool, pi) {
+               struct worker *worker;
+               bool first = true;
+
+               spin_lock_irqsave(&pool->lock, flags);
+               if (pool->nr_workers == pool->nr_idle)
+                       goto next_pool;
+
+               pr_info("pool %d:", pool->id);
+               pr_cont_pool_info(pool);
+               pr_cont(" workers=%d", pool->nr_workers);
+               if (pool->manager)
+                       pr_cont(" manager: %d",
+                               task_pid_nr(pool->manager->task));
+               list_for_each_entry(worker, &pool->idle_list, entry) {
+                       pr_cont(" %s%d", first ? "idle: " : "",
+                               task_pid_nr(worker->task));
+                       first = false;
+               }
+               pr_cont("\n");
+       next_pool:
+               spin_unlock_irqrestore(&pool->lock, flags);
+       }
+
+       rcu_read_unlock_sched();
+}
+
  /*
   * CPU hotplug.
   *