implement flush_work()

[linux-block.git] / kernel / workqueue.c
diff --git a/kernel/workqueue.c b/kernel/workqueue.c

index db49886bfae17271ee19709a984c57c208c5065b..918d55267a12a98c43aa1c9ee43e17644712af8d 100644 (file)
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -36,26 +36,17 @@
  /*
   * The per-CPU workqueue (if single thread, we always use the first
   * possible cpu).
- *
- * The sequence counters are for flush_scheduled_work().  It wants to wait
- * until all currently-scheduled works are completed, but it doesn't
- * want to be livelocked by new, incoming ones.  So it waits until
- * remove_sequence is >= the insert_sequence which pertained when
- * flush_scheduled_work() was called.
   */
  struct cpu_workqueue_struct {
  
         spinlock_t lock;
  
-       long remove_sequence;   /* Least-recently added (next to run) */
-       long insert_sequence;   /* Next to add */
-
         struct list_head worklist;
         wait_queue_head_t more_work;
-       wait_queue_head_t work_done;
  
         struct workqueue_struct *wq;
         struct task_struct *thread;
+       struct work_struct *current_work;
  
         int run_depth;          /* Detect run_workqueue() recursion depth */
  
@@ -96,13 +87,13 @@ static inline void set_wq_data(struct work_struct *work, void *wq)
         BUG_ON(!work_pending(work));
  
         new = (unsigned long) wq | (1UL << WORK_STRUCT_PENDING);
-       new |= work->management & WORK_STRUCT_FLAG_MASK;
-       work->management = new;
+       new |= WORK_STRUCT_FLAG_MASK & *work_data_bits(work);
+       atomic_long_set(&work->data, new);
  }
  
  static inline void *get_wq_data(struct work_struct *work)
  {
-       return (void *) (work->management & WORK_STRUCT_WQ_DATA_MASK);
+       return (void *) (atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK);
  }
  
  static int __run_work(struct cpu_workqueue_struct *cwq, struct work_struct *work)
@@ -130,16 +121,16 @@ static int __run_work(struct cpu_workqueue_struct *cwq, struct work_struct *work
             && work_pending(work)
             && !list_empty(&work->entry)) {
                 work_func_t f = work->func;
+               cwq->current_work = work;
                 list_del_init(&work->entry);
                 spin_unlock_irqrestore(&cwq->lock, flags);
  
-               if (!test_bit(WORK_STRUCT_NOAUTOREL, &work->management))
+               if (!test_bit(WORK_STRUCT_NOAUTOREL, work_data_bits(work)))
                         work_release(work);
                 f(work);
  
                 spin_lock_irqsave(&cwq->lock, flags);
-               cwq->remove_sequence++;
-               wake_up(&cwq->work_done);
+               cwq->current_work = NULL;
                 ret = 1;
         }
         spin_unlock_irqrestore(&cwq->lock, flags);
@@ -178,6 +169,17 @@ int fastcall run_scheduled_work(struct work_struct *work)
  }
  EXPORT_SYMBOL(run_scheduled_work);
  
+static void insert_work(struct cpu_workqueue_struct *cwq,
+                               struct work_struct *work, int tail)
+{
+       set_wq_data(work, cwq);
+       if (tail)
+               list_add_tail(&work->entry, &cwq->worklist);
+       else
+               list_add(&work->entry, &cwq->worklist);
+       wake_up(&cwq->more_work);
+}
+
  /* Preempt must be disabled. */
  static void __queue_work(struct cpu_workqueue_struct *cwq,
                          struct work_struct *work)
@@ -185,10 +187,7 @@ static void __queue_work(struct cpu_workqueue_struct *cwq,
         unsigned long flags;
  
         spin_lock_irqsave(&cwq->lock, flags);
-       set_wq_data(work, cwq);
-       list_add_tail(&work->entry, &cwq->worklist);
-       cwq->insert_sequence++;
-       wake_up(&cwq->more_work);
+       insert_work(cwq, work, 1);
         spin_unlock_irqrestore(&cwq->lock, flags);
  }
  
@@ -206,7 +205,7 @@ int fastcall queue_work(struct workqueue_struct *wq, struct work_struct *work)
  {
         int ret = 0, cpu = get_cpu();
  
-       if (!test_and_set_bit(WORK_STRUCT_PENDING, &work->management)) {
+       if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
                 if (unlikely(is_single_threaded(wq)))
                         cpu = singlethread_cpu;
                 BUG_ON(!list_empty(&work->entry));
@@ -218,7 +217,7 @@ int fastcall queue_work(struct workqueue_struct *wq, struct work_struct *work)
  }
  EXPORT_SYMBOL_GPL(queue_work);
  
-static void delayed_work_timer_fn(unsigned long __data)
+void delayed_work_timer_fn(unsigned long __data)
  {
         struct delayed_work *dwork = (struct delayed_work *)__data;
         struct workqueue_struct *wq = get_wq_data(&dwork->work);
@@ -233,7 +232,7 @@ static void delayed_work_timer_fn(unsigned long __data)
  /**
   * queue_delayed_work - queue work on a workqueue after delay
   * @wq: workqueue to use
- * @work: delayable work to queue
+ * @dwork: delayable work to queue
   * @delay: number of jiffies to wait before queueing
   *
   * Returns 0 if @work was already on a queue, non-zero otherwise.
@@ -245,10 +244,11 @@ int fastcall queue_delayed_work(struct workqueue_struct *wq,
         struct timer_list *timer = &dwork->timer;
         struct work_struct *work = &dwork->work;
  
+       timer_stats_timer_set_start_info(timer);
         if (delay == 0)
                 return queue_work(wq, work);
  
-       if (!test_and_set_bit(WORK_STRUCT_PENDING, &work->management)) {
+       if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
                 BUG_ON(timer_pending(timer));
                 BUG_ON(!list_empty(&work->entry));
  
@@ -268,7 +268,7 @@ EXPORT_SYMBOL_GPL(queue_delayed_work);
   * queue_delayed_work_on - queue work on specific CPU after delay
   * @cpu: CPU number to execute work on
   * @wq: workqueue to use
- * @work: work to queue
+ * @dwork: work to queue
   * @delay: number of jiffies to wait before queueing
   *
   * Returns 0 if @work was already on a queue, non-zero otherwise.
@@ -280,7 +280,7 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
         struct timer_list *timer = &dwork->timer;
         struct work_struct *work = &dwork->work;
  
-       if (!test_and_set_bit(WORK_STRUCT_PENDING, &work->management)) {
+       if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
                 BUG_ON(timer_pending(timer));
                 BUG_ON(!list_empty(&work->entry));
  
@@ -317,11 +317,12 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
                                                 struct work_struct, entry);
                 work_func_t f = work->func;
  
+               cwq->current_work = work;
                 list_del_init(cwq->worklist.next);
                 spin_unlock_irqrestore(&cwq->lock, flags);
  
                 BUG_ON(get_wq_data(work) != cwq);
-               if (!test_bit(WORK_STRUCT_NOAUTOREL, &work->management))
+               if (!test_bit(WORK_STRUCT_NOAUTOREL, work_data_bits(work)))
                         work_release(work);
                 f(work);
  
@@ -337,8 +338,7 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
                 }
  
                 spin_lock_irqsave(&cwq->lock, flags);
-               cwq->remove_sequence++;
-               wake_up(&cwq->work_done);
+               cwq->current_work = NULL;
         }
         cwq->run_depth--;
         spin_unlock_irqrestore(&cwq->lock, flags);
@@ -393,6 +393,25 @@ static int worker_thread(void *__cwq)
         return 0;
  }
  
+struct wq_barrier {
+       struct work_struct      work;
+       struct completion       done;
+};
+
+static void wq_barrier_func(struct work_struct *work)
+{
+       struct wq_barrier *barr = container_of(work, struct wq_barrier, work);
+       complete(&barr->done);
+}
+
+static inline void init_wq_barrier(struct wq_barrier *barr)
+{
+       INIT_WORK(&barr->work, wq_barrier_func);
+       __set_bit(WORK_STRUCT_PENDING, work_data_bits(&barr->work));
+
+       init_completion(&barr->done);
+}
+
  static void flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
  {
         if (cwq->thread == current) {
@@ -400,23 +419,18 @@ static void flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
                  * Probably keventd trying to flush its own queue. So simply run
                  * it by hand rather than deadlocking.
                  */
+               mutex_unlock(&workqueue_mutex);
                 run_workqueue(cwq);
+               mutex_lock(&workqueue_mutex);
         } else {
-               DEFINE_WAIT(wait);
-               long sequence_needed;
+               struct wq_barrier barr;
  
-               spin_lock_irq(&cwq->lock);
-               sequence_needed = cwq->insert_sequence;
+               init_wq_barrier(&barr);
+               __queue_work(cwq, &barr.work);
  
-               while (sequence_needed - cwq->remove_sequence > 0) {
-                       prepare_to_wait(&cwq->work_done, &wait,
-                                       TASK_UNINTERRUPTIBLE);
-                       spin_unlock_irq(&cwq->lock);
-                       schedule();
-                       spin_lock_irq(&cwq->lock);
-               }
-               finish_wait(&cwq->work_done, &wait);
-               spin_unlock_irq(&cwq->lock);
+               mutex_unlock(&workqueue_mutex);
+               wait_for_completion(&barr.done);
+               mutex_lock(&workqueue_mutex);
         }
  }
  
@@ -427,32 +441,97 @@ static void flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
   * Forces execution of the workqueue and blocks until its completion.
   * This is typically used in driver shutdown handlers.
   *
- * This function will sample each workqueue's current insert_sequence number and
- * will sleep until the head sequence is greater than or equal to that.  This
- * means that we sleep until all works which were queued on entry have been
- * handled, but we are not livelocked by new incoming ones.
+ * We sleep until all works which were queued on entry have been handled,
+ * but we are not livelocked by new incoming ones.
   *
   * This function used to run the workqueues itself.  Now we just wait for the
   * helper threads to do it.
   */
  void fastcall flush_workqueue(struct workqueue_struct *wq)
  {
-       might_sleep();
-
+       mutex_lock(&workqueue_mutex);
         if (is_single_threaded(wq)) {
                 /* Always use first cpu's area. */
                 flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, singlethread_cpu));
         } else {
                 int cpu;
  
-               mutex_lock(&workqueue_mutex);
                 for_each_online_cpu(cpu)
                         flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu));
-               mutex_unlock(&workqueue_mutex);
         }
+       mutex_unlock(&workqueue_mutex);
  }
  EXPORT_SYMBOL_GPL(flush_workqueue);
  
+static void wait_on_work(struct cpu_workqueue_struct *cwq,
+                               struct work_struct *work)
+{
+       struct wq_barrier barr;
+       int running = 0;
+
+       spin_lock_irq(&cwq->lock);
+       if (unlikely(cwq->current_work == work)) {
+               init_wq_barrier(&barr);
+               insert_work(cwq, &barr.work, 0);
+               running = 1;
+       }
+       spin_unlock_irq(&cwq->lock);
+
+       if (unlikely(running)) {
+               mutex_unlock(&workqueue_mutex);
+               wait_for_completion(&barr.done);
+               mutex_lock(&workqueue_mutex);
+       }
+}
+
+/**
+ * flush_work - block until a work_struct's callback has terminated
+ * @wq: the workqueue on which the work is queued
+ * @work: the work which is to be flushed
+ *
+ * flush_work() will attempt to cancel the work if it is queued.  If the work's
+ * callback appears to be running, flush_work() will block until it has
+ * completed.
+ *
+ * flush_work() is designed to be used when the caller is tearing down data
+ * structures which the callback function operates upon.  It is expected that,
+ * prior to calling flush_work(), the caller has arranged for the work to not
+ * be requeued.
+ */
+void flush_work(struct workqueue_struct *wq, struct work_struct *work)
+{
+       struct cpu_workqueue_struct *cwq;
+
+       mutex_lock(&workqueue_mutex);
+       cwq = get_wq_data(work);
+       /* Was it ever queued ? */
+       if (!cwq)
+               goto out;
+
+       /*
+        * This work can't be re-queued, and the lock above protects us
+        * from take_over_work(), no need to re-check that get_wq_data()
+        * is still the same when we take cwq->lock.
+        */
+       spin_lock_irq(&cwq->lock);
+       list_del_init(&work->entry);
+       work_release(work);
+       spin_unlock_irq(&cwq->lock);
+
+       if (is_single_threaded(wq)) {
+               /* Always use first cpu's area. */
+               wait_on_work(per_cpu_ptr(wq->cpu_wq, singlethread_cpu), work);
+       } else {
+               int cpu;
+
+               for_each_online_cpu(cpu)
+                       wait_on_work(per_cpu_ptr(wq->cpu_wq, cpu), work);
+       }
+out:
+       mutex_unlock(&workqueue_mutex);
+}
+EXPORT_SYMBOL_GPL(flush_work);
+
  static struct task_struct *create_workqueue_thread(struct workqueue_struct *wq,
                                                    int cpu, int freezeable)
  {
@@ -462,12 +541,9 @@ static struct task_struct *create_workqueue_thread(struct workqueue_struct *wq,
         spin_lock_init(&cwq->lock);
         cwq->wq = wq;
         cwq->thread = NULL;
-       cwq->insert_sequence = 0;
-       cwq->remove_sequence = 0;
         cwq->freezeable = freezeable;
         INIT_LIST_HEAD(&cwq->worklist);
         init_waitqueue_head(&cwq->more_work);
-       init_waitqueue_head(&cwq->work_done);
  
         if (is_single_threaded(wq))
                 p = kthread_create(worker_thread, cwq, "%s", wq->name);
@@ -593,8 +669,10 @@ EXPORT_SYMBOL(schedule_work);
   * After waiting for a given time this puts a job in the kernel-global
   * workqueue.
   */
-int fastcall schedule_delayed_work(struct delayed_work *dwork, unsigned long delay)
+int fastcall schedule_delayed_work(struct delayed_work *dwork,
+                                       unsigned long delay)
  {
+       timer_stats_timer_set_start_info(&dwork->timer);
         return queue_delayed_work(keventd_wq, dwork, delay);
  }
  EXPORT_SYMBOL(schedule_delayed_work);
@@ -635,13 +713,15 @@ int schedule_on_each_cpu(work_func_t func)
         if (!works)
                 return -ENOMEM;
  
-       mutex_lock(&workqueue_mutex);
+       preempt_disable();              /* CPU hotplug */
         for_each_online_cpu(cpu) {
-               INIT_WORK(per_cpu_ptr(works, cpu), func);
-               __queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu),
-                               per_cpu_ptr(works, cpu));
+               struct work_struct *work = per_cpu_ptr(works, cpu);
+
+               INIT_WORK(work, func);
+               set_bit(WORK_STRUCT_PENDING, work_data_bits(work));
+               __queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu), work);
         }
-       mutex_unlock(&workqueue_mutex);
+       preempt_enable();
         flush_workqueue(keventd_wq);
         free_percpu(works);
         return 0;
@@ -653,9 +733,14 @@ void flush_scheduled_work(void)
  }
  EXPORT_SYMBOL(flush_scheduled_work);
  
+void flush_work_keventd(struct work_struct *work)
+{
+       flush_work(keventd_wq, work);
+}
+EXPORT_SYMBOL(flush_work_keventd);
+
  /**
- * cancel_rearming_delayed_workqueue - reliably kill off a delayed
- *                     work whose handler rearms the delayed work.
+ * cancel_rearming_delayed_workqueue - reliably kill off a delayed work whose handler rearms the delayed work.
   * @wq:   the controlling workqueue structure
   * @dwork: the delayed work struct
   */
@@ -668,8 +753,7 @@ void cancel_rearming_delayed_workqueue(struct workqueue_struct *wq,
  EXPORT_SYMBOL(cancel_rearming_delayed_workqueue);
  
  /**
- * cancel_rearming_delayed_work - reliably kill off a delayed keventd
- *                     work whose handler rearms the delayed work.
+ * cancel_rearming_delayed_work - reliably kill off a delayed keventd work whose handler rearms the delayed work.
   * @dwork: the delayed work struct
   */
  void cancel_rearming_delayed_work(struct delayed_work *dwork)