implement flush_work()
[linux-block.git] / kernel / workqueue.c
index db49886bfae17271ee19709a984c57c208c5065b..918d55267a12a98c43aa1c9ee43e17644712af8d 100644 (file)
 /*
  * The per-CPU workqueue (if single thread, we always use the first
  * possible cpu).
- *
- * The sequence counters are for flush_scheduled_work().  It wants to wait
- * until all currently-scheduled works are completed, but it doesn't
- * want to be livelocked by new, incoming ones.  So it waits until
- * remove_sequence is >= the insert_sequence which pertained when
- * flush_scheduled_work() was called.
  */
 struct cpu_workqueue_struct {
 
        spinlock_t lock;
 
-       long remove_sequence;   /* Least-recently added (next to run) */
-       long insert_sequence;   /* Next to add */
-
        struct list_head worklist;
        wait_queue_head_t more_work;
-       wait_queue_head_t work_done;
 
        struct workqueue_struct *wq;
        struct task_struct *thread;
+       struct work_struct *current_work;
 
        int run_depth;          /* Detect run_workqueue() recursion depth */
 
@@ -96,13 +87,13 @@ static inline void set_wq_data(struct work_struct *work, void *wq)
        BUG_ON(!work_pending(work));
 
        new = (unsigned long) wq | (1UL << WORK_STRUCT_PENDING);
-       new |= work->management & WORK_STRUCT_FLAG_MASK;
-       work->management = new;
+       new |= WORK_STRUCT_FLAG_MASK & *work_data_bits(work);
+       atomic_long_set(&work->data, new);
 }
 
 static inline void *get_wq_data(struct work_struct *work)
 {
-       return (void *) (work->management & WORK_STRUCT_WQ_DATA_MASK);
+       return (void *) (atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK);
 }
 
 static int __run_work(struct cpu_workqueue_struct *cwq, struct work_struct *work)
@@ -130,16 +121,16 @@ static int __run_work(struct cpu_workqueue_struct *cwq, struct work_struct *work
            && work_pending(work)
            && !list_empty(&work->entry)) {
                work_func_t f = work->func;
+               cwq->current_work = work;
                list_del_init(&work->entry);
                spin_unlock_irqrestore(&cwq->lock, flags);
 
-               if (!test_bit(WORK_STRUCT_NOAUTOREL, &work->management))
+               if (!test_bit(WORK_STRUCT_NOAUTOREL, work_data_bits(work)))
                        work_release(work);
                f(work);
 
                spin_lock_irqsave(&cwq->lock, flags);
-               cwq->remove_sequence++;
-               wake_up(&cwq->work_done);
+               cwq->current_work = NULL;
                ret = 1;
        }
        spin_unlock_irqrestore(&cwq->lock, flags);
@@ -178,6 +169,17 @@ int fastcall run_scheduled_work(struct work_struct *work)
 }
 EXPORT_SYMBOL(run_scheduled_work);
 
+static void insert_work(struct cpu_workqueue_struct *cwq,
+                               struct work_struct *work, int tail)
+{
+       set_wq_data(work, cwq);
+       if (tail)
+               list_add_tail(&work->entry, &cwq->worklist);
+       else
+               list_add(&work->entry, &cwq->worklist);
+       wake_up(&cwq->more_work);
+}
+
 /* Preempt must be disabled. */
 static void __queue_work(struct cpu_workqueue_struct *cwq,
                         struct work_struct *work)
@@ -185,10 +187,7 @@ static void __queue_work(struct cpu_workqueue_struct *cwq,
        unsigned long flags;
 
        spin_lock_irqsave(&cwq->lock, flags);
-       set_wq_data(work, cwq);
-       list_add_tail(&work->entry, &cwq->worklist);
-       cwq->insert_sequence++;
-       wake_up(&cwq->more_work);
+       insert_work(cwq, work, 1);
        spin_unlock_irqrestore(&cwq->lock, flags);
 }
 
@@ -206,7 +205,7 @@ int fastcall queue_work(struct workqueue_struct *wq, struct work_struct *work)
 {
        int ret = 0, cpu = get_cpu();
 
-       if (!test_and_set_bit(WORK_STRUCT_PENDING, &work->management)) {
+       if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
                if (unlikely(is_single_threaded(wq)))
                        cpu = singlethread_cpu;
                BUG_ON(!list_empty(&work->entry));
@@ -218,7 +217,7 @@ int fastcall queue_work(struct workqueue_struct *wq, struct work_struct *work)
 }
 EXPORT_SYMBOL_GPL(queue_work);
 
-static void delayed_work_timer_fn(unsigned long __data)
+void delayed_work_timer_fn(unsigned long __data)
 {
        struct delayed_work *dwork = (struct delayed_work *)__data;
        struct workqueue_struct *wq = get_wq_data(&dwork->work);
@@ -233,7 +232,7 @@ static void delayed_work_timer_fn(unsigned long __data)
 /**
  * queue_delayed_work - queue work on a workqueue after delay
  * @wq: workqueue to use
- * @work: delayable work to queue
+ * @dwork: delayable work to queue
  * @delay: number of jiffies to wait before queueing
  *
  * Returns 0 if @work was already on a queue, non-zero otherwise.
@@ -245,10 +244,11 @@ int fastcall queue_delayed_work(struct workqueue_struct *wq,
        struct timer_list *timer = &dwork->timer;
        struct work_struct *work = &dwork->work;
 
+       timer_stats_timer_set_start_info(timer);
        if (delay == 0)
                return queue_work(wq, work);
 
-       if (!test_and_set_bit(WORK_STRUCT_PENDING, &work->management)) {
+       if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
                BUG_ON(timer_pending(timer));
                BUG_ON(!list_empty(&work->entry));
 
@@ -268,7 +268,7 @@ EXPORT_SYMBOL_GPL(queue_delayed_work);
  * queue_delayed_work_on - queue work on specific CPU after delay
  * @cpu: CPU number to execute work on
  * @wq: workqueue to use
- * @work: work to queue
+ * @dwork: work to queue
  * @delay: number of jiffies to wait before queueing
  *
  * Returns 0 if @work was already on a queue, non-zero otherwise.
@@ -280,7 +280,7 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
        struct timer_list *timer = &dwork->timer;
        struct work_struct *work = &dwork->work;
 
-       if (!test_and_set_bit(WORK_STRUCT_PENDING, &work->management)) {
+       if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
                BUG_ON(timer_pending(timer));
                BUG_ON(!list_empty(&work->entry));
 
@@ -317,11 +317,12 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
                                                struct work_struct, entry);
                work_func_t f = work->func;
 
+               cwq->current_work = work;
                list_del_init(cwq->worklist.next);
                spin_unlock_irqrestore(&cwq->lock, flags);
 
                BUG_ON(get_wq_data(work) != cwq);
-               if (!test_bit(WORK_STRUCT_NOAUTOREL, &work->management))
+               if (!test_bit(WORK_STRUCT_NOAUTOREL, work_data_bits(work)))
                        work_release(work);
                f(work);
 
@@ -337,8 +338,7 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
                }
 
                spin_lock_irqsave(&cwq->lock, flags);
-               cwq->remove_sequence++;
-               wake_up(&cwq->work_done);
+               cwq->current_work = NULL;
        }
        cwq->run_depth--;
        spin_unlock_irqrestore(&cwq->lock, flags);
@@ -393,6 +393,25 @@ static int worker_thread(void *__cwq)
        return 0;
 }
 
+struct wq_barrier {
+       struct work_struct      work;
+       struct completion       done;
+};
+
+static void wq_barrier_func(struct work_struct *work)
+{
+       struct wq_barrier *barr = container_of(work, struct wq_barrier, work);
+       complete(&barr->done);
+}
+
+static inline void init_wq_barrier(struct wq_barrier *barr)
+{
+       INIT_WORK(&barr->work, wq_barrier_func);
+       __set_bit(WORK_STRUCT_PENDING, work_data_bits(&barr->work));
+
+       init_completion(&barr->done);
+}
+
 static void flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
 {
        if (cwq->thread == current) {
@@ -400,23 +419,18 @@ static void flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
                 * Probably keventd trying to flush its own queue. So simply run
                 * it by hand rather than deadlocking.
                 */
+               mutex_unlock(&workqueue_mutex);
                run_workqueue(cwq);
+               mutex_lock(&workqueue_mutex);
        } else {
-               DEFINE_WAIT(wait);
-               long sequence_needed;
+               struct wq_barrier barr;
 
-               spin_lock_irq(&cwq->lock);
-               sequence_needed = cwq->insert_sequence;
+               init_wq_barrier(&barr);
+               __queue_work(cwq, &barr.work);
 
-               while (sequence_needed - cwq->remove_sequence > 0) {
-                       prepare_to_wait(&cwq->work_done, &wait,
-                                       TASK_UNINTERRUPTIBLE);
-                       spin_unlock_irq(&cwq->lock);
-                       schedule();
-                       spin_lock_irq(&cwq->lock);
-               }
-               finish_wait(&cwq->work_done, &wait);
-               spin_unlock_irq(&cwq->lock);
+               mutex_unlock(&workqueue_mutex);
+               wait_for_completion(&barr.done);
+               mutex_lock(&workqueue_mutex);
        }
 }
 
@@ -427,32 +441,97 @@ static void flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
  * Forces execution of the workqueue and blocks until its completion.
  * This is typically used in driver shutdown handlers.
  *
- * This function will sample each workqueue's current insert_sequence number and
- * will sleep until the head sequence is greater than or equal to that.  This
- * means that we sleep until all works which were queued on entry have been
- * handled, but we are not livelocked by new incoming ones.
+ * We sleep until all works which were queued on entry have been handled,
+ * but we are not livelocked by new incoming ones.
  *
  * This function used to run the workqueues itself.  Now we just wait for the
  * helper threads to do it.
  */
 void fastcall flush_workqueue(struct workqueue_struct *wq)
 {
-       might_sleep();
-
+       mutex_lock(&workqueue_mutex);
        if (is_single_threaded(wq)) {
                /* Always use first cpu's area. */
                flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, singlethread_cpu));
        } else {
                int cpu;
 
-               mutex_lock(&workqueue_mutex);
                for_each_online_cpu(cpu)
                        flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu));
-               mutex_unlock(&workqueue_mutex);
        }
+       mutex_unlock(&workqueue_mutex);
 }
 EXPORT_SYMBOL_GPL(flush_workqueue);
 
+static void wait_on_work(struct cpu_workqueue_struct *cwq,
+                               struct work_struct *work)
+{
+       struct wq_barrier barr;
+       int running = 0;
+
+       spin_lock_irq(&cwq->lock);
+       if (unlikely(cwq->current_work == work)) {
+               init_wq_barrier(&barr);
+               insert_work(cwq, &barr.work, 0);
+               running = 1;
+       }
+       spin_unlock_irq(&cwq->lock);
+
+       if (unlikely(running)) {
+               mutex_unlock(&workqueue_mutex);
+               wait_for_completion(&barr.done);
+               mutex_lock(&workqueue_mutex);
+       }
+}
+
+/**
+ * flush_work - block until a work_struct's callback has terminated
+ * @wq: the workqueue on which the work is queued
+ * @work: the work which is to be flushed
+ *
+ * flush_work() will attempt to cancel the work if it is queued.  If the work's
+ * callback appears to be running, flush_work() will block until it has
+ * completed.
+ *
+ * flush_work() is designed to be used when the caller is tearing down data
+ * structures which the callback function operates upon.  It is expected that,
+ * prior to calling flush_work(), the caller has arranged for the work to not
+ * be requeued.
+ */
+void flush_work(struct workqueue_struct *wq, struct work_struct *work)
+{
+       struct cpu_workqueue_struct *cwq;
+
+       mutex_lock(&workqueue_mutex);
+       cwq = get_wq_data(work);
+       /* Was it ever queued ? */
+       if (!cwq)
+               goto out;
+
+       /*
+        * This work can't be re-queued, and the lock above protects us
+        * from take_over_work(), no need to re-check that get_wq_data()
+        * is still the same when we take cwq->lock.
+        */
+       spin_lock_irq(&cwq->lock);
+       list_del_init(&work->entry);
+       work_release(work);
+       spin_unlock_irq(&cwq->lock);
+
+       if (is_single_threaded(wq)) {
+               /* Always use first cpu's area. */
+               wait_on_work(per_cpu_ptr(wq->cpu_wq, singlethread_cpu), work);
+       } else {
+               int cpu;
+
+               for_each_online_cpu(cpu)
+                       wait_on_work(per_cpu_ptr(wq->cpu_wq, cpu), work);
+       }
+out:
+       mutex_unlock(&workqueue_mutex);
+}
+EXPORT_SYMBOL_GPL(flush_work);
+
 static struct task_struct *create_workqueue_thread(struct workqueue_struct *wq,
                                                   int cpu, int freezeable)
 {
@@ -462,12 +541,9 @@ static struct task_struct *create_workqueue_thread(struct workqueue_struct *wq,
        spin_lock_init(&cwq->lock);
        cwq->wq = wq;
        cwq->thread = NULL;
-       cwq->insert_sequence = 0;
-       cwq->remove_sequence = 0;
        cwq->freezeable = freezeable;
        INIT_LIST_HEAD(&cwq->worklist);
        init_waitqueue_head(&cwq->more_work);
-       init_waitqueue_head(&cwq->work_done);
 
        if (is_single_threaded(wq))
                p = kthread_create(worker_thread, cwq, "%s", wq->name);
@@ -593,8 +669,10 @@ EXPORT_SYMBOL(schedule_work);
  * After waiting for a given time this puts a job in the kernel-global
  * workqueue.
  */
-int fastcall schedule_delayed_work(struct delayed_work *dwork, unsigned long delay)
+int fastcall schedule_delayed_work(struct delayed_work *dwork,
+                                       unsigned long delay)
 {
+       timer_stats_timer_set_start_info(&dwork->timer);
        return queue_delayed_work(keventd_wq, dwork, delay);
 }
 EXPORT_SYMBOL(schedule_delayed_work);
@@ -635,13 +713,15 @@ int schedule_on_each_cpu(work_func_t func)
        if (!works)
                return -ENOMEM;
 
-       mutex_lock(&workqueue_mutex);
+       preempt_disable();              /* CPU hotplug */
        for_each_online_cpu(cpu) {
-               INIT_WORK(per_cpu_ptr(works, cpu), func);
-               __queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu),
-                               per_cpu_ptr(works, cpu));
+               struct work_struct *work = per_cpu_ptr(works, cpu);
+
+               INIT_WORK(work, func);
+               set_bit(WORK_STRUCT_PENDING, work_data_bits(work));
+               __queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu), work);
        }
-       mutex_unlock(&workqueue_mutex);
+       preempt_enable();
        flush_workqueue(keventd_wq);
        free_percpu(works);
        return 0;
@@ -653,9 +733,14 @@ void flush_scheduled_work(void)
 }
 EXPORT_SYMBOL(flush_scheduled_work);
 
+void flush_work_keventd(struct work_struct *work)
+{
+       flush_work(keventd_wq, work);
+}
+EXPORT_SYMBOL(flush_work_keventd);
+
 /**
- * cancel_rearming_delayed_workqueue - reliably kill off a delayed
- *                     work whose handler rearms the delayed work.
+ * cancel_rearming_delayed_workqueue - reliably kill off a delayed work whose handler rearms the delayed work.
  * @wq:   the controlling workqueue structure
  * @dwork: the delayed work struct
  */
@@ -668,8 +753,7 @@ void cancel_rearming_delayed_workqueue(struct workqueue_struct *wq,
 EXPORT_SYMBOL(cancel_rearming_delayed_workqueue);
 
 /**
- * cancel_rearming_delayed_work - reliably kill off a delayed keventd
- *                     work whose handler rearms the delayed work.
+ * cancel_rearming_delayed_work - reliably kill off a delayed keventd work whose handler rearms the delayed work.
  * @dwork: the delayed work struct
  */
 void cancel_rearming_delayed_work(struct delayed_work *dwork)