workqueue: use std_ prefix for the standard per-cpu pools

[linux-2.6-block.git] / kernel / workqueue.c
diff --git a/kernel/workqueue.c b/kernel/workqueue.c

index 1dae900df798d1c4e1be722567fd2906041f08f5..634251572fddb71d5709cdac03a5908717364241 100644 (file)
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -41,8 +41,9 @@
  #include <linux/debug_locks.h>
  #include <linux/lockdep.h>
  #include <linux/idr.h>
+#include <linux/hashtable.h>
  
-#include "workqueue_sched.h"
+#include "workqueue_internal.h"
  
  enum {
         /*
@@ -79,11 +80,9 @@ enum {
         WORKER_NOT_RUNNING      = WORKER_PREP | WORKER_UNBOUND |
                                   WORKER_CPU_INTENSIVE,
  
-       NR_WORKER_POOLS         = 2,            /* # worker pools per gcwq */
+       NR_STD_WORKER_POOLS     = 2,            /* # standard pools per cpu */
  
         BUSY_WORKER_HASH_ORDER  = 6,            /* 64 pointers */
-       BUSY_WORKER_HASH_SIZE   = 1 << BUSY_WORKER_HASH_ORDER,
-       BUSY_WORKER_HASH_MASK   = BUSY_WORKER_HASH_SIZE - 1,
  
         MAX_IDLE_WORKERS_RATIO  = 4,            /* 1/4 of busy can be idle */
         IDLE_WORKER_TIMEOUT     = 300 * HZ,     /* keep idle ones for 5 mins */
@@ -123,33 +122,7 @@ enum {
   * W: workqueue_lock protected.
   */
  
-struct global_cwq;
-struct worker_pool;
-
-/*
- * The poor guys doing the actual heavy lifting.  All on-duty workers
- * are either serving the manager role, on idle list or on busy hash.
- */
-struct worker {
-       /* on idle list while idle, on busy hash table while busy */
-       union {
-               struct list_head        entry;  /* L: while idle */
-               struct hlist_node       hentry; /* L: while busy */
-       };
-
-       struct work_struct      *current_work;  /* L: work being processed */
-       struct cpu_workqueue_struct *current_cwq; /* L: current_work's cwq */
-       struct list_head        scheduled;      /* L: scheduled works */
-       struct task_struct      *task;          /* I: worker task */
-       struct worker_pool      *pool;          /* I: the associated pool */
-       /* 64 bytes boundary on 64bit, 32 on 32bit */
-       unsigned long           last_active;    /* L: last active timestamp */
-       unsigned int            flags;          /* X: flags */
-       int                     id;             /* I: worker id */
-
-       /* for rebinding worker to CPU */
-       struct work_struct      rebind_work;    /* L: for busy worker */
-};
+/* struct worker is defined in workqueue_internal.h */
  
  struct worker_pool {
         struct global_cwq       *gcwq;          /* I: the owning gcwq */
@@ -180,10 +153,10 @@ struct global_cwq {
         unsigned int            flags;          /* L: GCWQ_* flags */
  
         /* workers are chained either in busy_hash or pool idle_list */
-       struct hlist_head       busy_hash[BUSY_WORKER_HASH_SIZE];
+       DECLARE_HASHTABLE(busy_hash, BUSY_WORKER_HASH_ORDER);
                                                 /* L: hash of busy workers */
  
-       struct worker_pool      pools[NR_WORKER_POOLS];
+       struct worker_pool      pools[NR_STD_WORKER_POOLS];
                                                 /* normal and highpri pools */
  } ____cacheline_aligned_in_smp;
  
@@ -282,11 +255,10 @@ EXPORT_SYMBOL_GPL(system_freezable_wq);
  
  #define for_each_worker_pool(pool, gcwq)                               \
         for ((pool) = &(gcwq)->pools[0];                                \
-            (pool) < &(gcwq)->pools[NR_WORKER_POOLS]; (pool)++)
+            (pool) < &(gcwq)->pools[NR_STD_WORKER_POOLS]; (pool)++)
  
  #define for_each_busy_worker(worker, i, pos, gcwq)                     \
-       for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++)                     \
-               hlist_for_each_entry(worker, pos, &gcwq->busy_hash[i], hentry)
+       hash_for_each(gcwq->busy_hash, i, pos, worker, hentry)
  
  static inline int __next_gcwq_cpu(int cpu, const struct cpumask *mask,
                                   unsigned int sw)
@@ -464,7 +436,7 @@ static bool workqueue_freezing;             /* W: have wqs started freezing? */
   * try_to_wake_up().  Put it in a separate cacheline.
   */
  static DEFINE_PER_CPU(struct global_cwq, global_cwq);
-static DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, pool_nr_running[NR_WORKER_POOLS]);
+static DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, pool_nr_running[NR_STD_WORKER_POOLS]);
  
  /*
   * Global cpu workqueue and nr_running counter for unbound gcwq.  The
@@ -472,13 +444,14 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, pool_nr_running[NR_WORKER_POOLS])
   * workers have WORKER_UNBOUND set.
   */
  static struct global_cwq unbound_global_cwq;
-static atomic_t unbound_pool_nr_running[NR_WORKER_POOLS] = {
-       [0 ... NR_WORKER_POOLS - 1]     = ATOMIC_INIT(0),       /* always 0 */
+static atomic_t unbound_pool_nr_running[NR_STD_WORKER_POOLS] = {
+       [0 ... NR_STD_WORKER_POOLS - 1] = ATOMIC_INIT(0),       /* always 0 */
  };
  
  static int worker_thread(void *__worker);
+static unsigned int work_cpu(struct work_struct *work);
  
-static int worker_pool_pri(struct worker_pool *pool)
+static int std_worker_pool_pri(struct worker_pool *pool)
  {
         return pool - pool->gcwq->pools;
  }
@@ -494,7 +467,7 @@ static struct global_cwq *get_gcwq(unsigned int cpu)
  static atomic_t *get_pool_nr_running(struct worker_pool *pool)
  {
         int cpu = pool->gcwq->cpu;
-       int idx = worker_pool_pri(pool);
+       int idx = std_worker_pool_pri(pool);
  
         if (cpu != WORK_CPU_UNBOUND)
                 return &per_cpu(pool_nr_running, cpu)[idx];
@@ -739,8 +712,10 @@ void wq_worker_waking_up(struct task_struct *task, unsigned int cpu)
  {
         struct worker *worker = kthread_data(task);
  
-       if (!(worker->flags & WORKER_NOT_RUNNING))
+       if (!(worker->flags & WORKER_NOT_RUNNING)) {
+               WARN_ON_ONCE(worker->pool->gcwq->cpu != cpu);
                 atomic_inc(get_pool_nr_running(worker->pool));
+       }
  }
  
  /**
@@ -762,12 +737,20 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task,
                                        unsigned int cpu)
  {
         struct worker *worker = kthread_data(task), *to_wakeup = NULL;
-       struct worker_pool *pool = worker->pool;
-       atomic_t *nr_running = get_pool_nr_running(pool);
+       struct worker_pool *pool;
+       atomic_t *nr_running;
  
+       /*
+        * Rescuers, which may not have all the fields set up like normal
+        * workers, also reach here, let's not access anything before
+        * checking NOT_RUNNING.
+        */
         if (worker->flags & WORKER_NOT_RUNNING)
                 return NULL;
  
+       pool = worker->pool;
+       nr_running = get_pool_nr_running(pool);
+
         /* this can only happen on the local cpu */
         BUG_ON(cpu != raw_smp_processor_id());
  
@@ -857,41 +840,31 @@ static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
  }
  
  /**
- * busy_worker_head - return the busy hash head for a work
- * @gcwq: gcwq of interest
- * @work: work to be hashed
- *
- * Return hash head of @gcwq for @work.
- *
- * CONTEXT:
- * spin_lock_irq(gcwq->lock).
- *
- * RETURNS:
- * Pointer to the hash head.
- */
-static struct hlist_head *busy_worker_head(struct global_cwq *gcwq,
-                                          struct work_struct *work)
-{
-       const int base_shift = ilog2(sizeof(struct work_struct));
-       unsigned long v = (unsigned long)work;
-
-       /* simple shift and fold hash, do we need something better? */
-       v >>= base_shift;
-       v += v >> BUSY_WORKER_HASH_ORDER;
-       v &= BUSY_WORKER_HASH_MASK;
-
-       return &gcwq->busy_hash[v];
-}
-
-/**
- * __find_worker_executing_work - find worker which is executing a work
+ * find_worker_executing_work - find worker which is executing a work
   * @gcwq: gcwq of interest
- * @bwh: hash head as returned by busy_worker_head()
   * @work: work to find worker for
   *
- * Find a worker which is executing @work on @gcwq.  @bwh should be
- * the hash head obtained by calling busy_worker_head() with the same
- * work.
+ * Find a worker which is executing @work on @gcwq by searching
+ * @gcwq->busy_hash which is keyed by the address of @work.  For a worker
+ * to match, its current execution should match the address of @work and
+ * its work function.  This is to avoid unwanted dependency between
+ * unrelated work executions through a work item being recycled while still
+ * being executed.
+ *
+ * This is a bit tricky.  A work item may be freed once its execution
+ * starts and nothing prevents the freed area from being recycled for
+ * another work item.  If the same work item address ends up being reused
+ * before the original execution finishes, workqueue will identify the
+ * recycled work item as currently executing and make it wait until the
+ * current execution finishes, introducing an unwanted dependency.
+ *
+ * This function checks the work item address, work function and workqueue
+ * to avoid false positives.  Note that this isn't complete as one may
+ * construct a work function which can introduce dependency onto itself
+ * through a recycled work item.  Well, if somebody wants to shoot oneself
+ * in the foot that badly, there's only so much we can do, and if such
+ * deadlock actually occurs, it should be easy to locate the culprit work
+ * function.
   *
   * CONTEXT:
   * spin_lock_irq(gcwq->lock).
@@ -900,40 +873,19 @@ static struct hlist_head *busy_worker_head(struct global_cwq *gcwq,
   * Pointer to worker which is executing @work if found, NULL
   * otherwise.
   */
-static struct worker *__find_worker_executing_work(struct global_cwq *gcwq,
-                                                  struct hlist_head *bwh,
-                                                  struct work_struct *work)
+static struct worker *find_worker_executing_work(struct global_cwq *gcwq,
+                                                struct work_struct *work)
  {
         struct worker *worker;
         struct hlist_node *tmp;
  
-       hlist_for_each_entry(worker, tmp, bwh, hentry)
-               if (worker->current_work == work)
+       hash_for_each_possible(gcwq->busy_hash, worker, tmp, hentry,
+                              (unsigned long)work)
+               if (worker->current_work == work &&
+                   worker->current_func == work->func)
                         return worker;
-       return NULL;
-}
  
-/**
- * find_worker_executing_work - find worker which is executing a work
- * @gcwq: gcwq of interest
- * @work: work to find worker for
- *
- * Find a worker which is executing @work on @gcwq.  This function is
- * identical to __find_worker_executing_work() except that this
- * function calculates @bwh itself.
- *
- * CONTEXT:
- * spin_lock_irq(gcwq->lock).
- *
- * RETURNS:
- * Pointer to worker which is executing @work if found, NULL
- * otherwise.
- */
-static struct worker *find_worker_executing_work(struct global_cwq *gcwq,
-                                                struct work_struct *work)
-{
-       return __find_worker_executing_work(gcwq, busy_worker_head(gcwq, work),
-                                           work);
+       return NULL;
  }
  
  /**
@@ -1736,7 +1688,7 @@ static void rebind_workers(struct global_cwq *gcwq)
                  * wq doesn't really matter but let's keep @worker->pool
                  * and @cwq->pool consistent for sanity.
                  */
-               if (worker_pool_pri(worker->pool))
+               if (std_worker_pool_pri(worker->pool))
                         wq = system_highpri_wq;
                 else
                         wq = system_wq;
@@ -1779,7 +1731,7 @@ static struct worker *alloc_worker(void)
  static struct worker *create_worker(struct worker_pool *pool)
  {
         struct global_cwq *gcwq = pool->gcwq;
-       const char *pri = worker_pool_pri(pool) ? "H" : "";
+       const char *pri = std_worker_pool_pri(pool) ? "H" : "";
         struct worker *worker = NULL;
         int id = -1;
  
@@ -1809,7 +1761,7 @@ static struct worker *create_worker(struct worker_pool *pool)
         if (IS_ERR(worker->task))
                 goto fail;
  
-       if (worker_pool_pri(pool))
+       if (std_worker_pool_pri(pool))
                 set_user_nice(worker->task, HIGHPRI_NICE_LEVEL);
  
         /*
@@ -2164,9 +2116,7 @@ __acquires(&gcwq->lock)
         struct cpu_workqueue_struct *cwq = get_work_cwq(work);
         struct worker_pool *pool = worker->pool;
         struct global_cwq *gcwq = pool->gcwq;
-       struct hlist_head *bwh = busy_worker_head(gcwq, work);
         bool cpu_intensive = cwq->wq->flags & WQ_CPU_INTENSIVE;
-       work_func_t f = work->func;
         int work_color;
         struct worker *collision;
  #ifdef CONFIG_LOCKDEP
@@ -2196,7 +2146,7 @@ __acquires(&gcwq->lock)
          * already processing the work.  If so, defer the work to the
          * currently executing one.
          */
-       collision = __find_worker_executing_work(gcwq, bwh, work);
+       collision = find_worker_executing_work(gcwq, work);
         if (unlikely(collision)) {
                 move_linked_works(work, &collision->scheduled, NULL);
                 return;
@@ -2204,8 +2154,9 @@ __acquires(&gcwq->lock)
  
         /* claim and dequeue */
         debug_work_deactivate(work);
-       hlist_add_head(&worker->hentry, bwh);
+       hash_add(gcwq->busy_hash, &worker->hentry, (unsigned long)work);
         worker->current_work = work;
+       worker->current_func = work->func;
         worker->current_cwq = cwq;
         work_color = get_work_color(work);
  
@@ -2238,7 +2189,7 @@ __acquires(&gcwq->lock)
         lock_map_acquire_read(&cwq->wq->lockdep_map);
         lock_map_acquire(&lockdep_map);
         trace_workqueue_execute_start(work);
-       f(work);
+       worker->current_func(work);
         /*
          * While we must be careful to not use "work" after this, the trace
          * point will only record its address.
@@ -2250,7 +2201,8 @@ __acquires(&gcwq->lock)
         if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
                 pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n"
                        "     last function: %pf\n",
-                      current->comm, preempt_count(), task_pid_nr(current), f);
+                      current->comm, preempt_count(), task_pid_nr(current),
+                      worker->current_func);
                 debug_show_held_locks(current);
                 dump_stack();
         }
@@ -2262,8 +2214,9 @@ __acquires(&gcwq->lock)
                 worker_clr_flags(worker, WORKER_CPU_INTENSIVE);
  
         /* we're done with it, release */
-       hlist_del_init(&worker->hentry);
+       hash_del(&worker->hentry);
         worker->current_work = NULL;
+       worker->current_func = NULL;
         worker->current_cwq = NULL;
         cwq_dec_nr_in_flight(cwq, work_color);
  }
@@ -2386,7 +2339,7 @@ sleep:
  
  /**
   * rescuer_thread - the rescuer thread function
- * @__wq: the associated workqueue
+ * @__rescuer: self
   *
   * Workqueue rescuer thread function.  There's one rescuer for each
   * workqueue which has WQ_RESCUER set.
@@ -2403,20 +2356,27 @@ sleep:
   *
   * This should happen rarely.
   */
-static int rescuer_thread(void *__wq)
+static int rescuer_thread(void *__rescuer)
  {
-       struct workqueue_struct *wq = __wq;
-       struct worker *rescuer = wq->rescuer;
+       struct worker *rescuer = __rescuer;
+       struct workqueue_struct *wq = rescuer->rescue_wq;
         struct list_head *scheduled = &rescuer->scheduled;
         bool is_unbound = wq->flags & WQ_UNBOUND;
         unsigned int cpu;
  
         set_user_nice(current, RESCUER_NICE_LEVEL);
+
+       /*
+        * Mark rescuer as worker too.  As WORKER_PREP is never cleared, it
+        * doesn't participate in concurrency management.
+        */
+       rescuer->task->flags |= PF_WQ_WORKER;
  repeat:
         set_current_state(TASK_INTERRUPTIBLE);
  
         if (kthread_should_stop()) {
                 __set_current_state(TASK_RUNNING);
+               rescuer->task->flags &= ~PF_WQ_WORKER;
                 return 0;
         }
  
@@ -2460,6 +2420,8 @@ repeat:
                 spin_unlock_irq(&gcwq->lock);
         }
  
+       /* rescuers should never participate in concurrency management */
+       WARN_ON_ONCE(!(rescuer->flags & WORKER_NOT_RUNNING));
         schedule();
         goto repeat;
  }
@@ -3295,7 +3257,8 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
                 if (!rescuer)
                         goto err;
  
-               rescuer->task = kthread_create(rescuer_thread, wq, "%s",
+               rescuer->rescue_wq = wq;
+               rescuer->task = kthread_create(rescuer_thread, rescuer, "%s",
                                                wq->name);
                 if (IS_ERR(rescuer->task))
                         goto err;
@@ -3457,13 +3420,12 @@ EXPORT_SYMBOL_GPL(workqueue_congested);
   * RETURNS:
   * CPU number if @work was ever queued.  WORK_CPU_NONE otherwise.
   */
-unsigned int work_cpu(struct work_struct *work)
+static unsigned int work_cpu(struct work_struct *work)
  {
         struct global_cwq *gcwq = get_work_gcwq(work);
  
         return gcwq ? gcwq->cpu : WORK_CPU_NONE;
  }
-EXPORT_SYMBOL_GPL(work_cpu);
  
  /**
   * work_busy - test whether a work is currently pending or running
@@ -3485,7 +3447,7 @@ unsigned int work_busy(struct work_struct *work)
         unsigned int ret = 0;
  
         if (!gcwq)
-               return false;
+               return 0;
  
         spin_lock_irqsave(&gcwq->lock, flags);
  
@@ -3829,7 +3791,6 @@ out_unlock:
  static int __init init_workqueues(void)
  {
         unsigned int cpu;
-       int i;
  
         /* make sure we have enough bits for OFFQ CPU number */
         BUILD_BUG_ON((1LU << (BITS_PER_LONG - WORK_OFFQ_CPU_SHIFT)) <
@@ -3847,8 +3808,7 @@ static int __init init_workqueues(void)
                 gcwq->cpu = cpu;
                 gcwq->flags |= GCWQ_DISASSOCIATED;
  
-               for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++)
-                       INIT_HLIST_HEAD(&gcwq->busy_hash[i]);
+               hash_init(gcwq->busy_hash);
  
                 for_each_worker_pool(pool, gcwq) {
                         pool->gcwq = gcwq;