Revert "dm: only run the queue on completion if congested or no requests pending"
[linux-2.6-block.git] / drivers / md / dm.c
index 2caf492890d64b27a0a88f24f4f04d1778448d9a..de703778d39fddd6377dfc33da4819db43566470 100644 (file)
@@ -86,6 +86,9 @@ struct dm_rq_target_io {
        struct kthread_work work;
        int error;
        union map_info info;
+       struct dm_stats_aux stats_aux;
+       unsigned long duration_jiffies;
+       unsigned n_sectors;
 };
 
 /*
@@ -1046,6 +1049,17 @@ static struct dm_rq_target_io *tio_from_request(struct request *rq)
        return (rq->q->mq_ops ? blk_mq_rq_to_pdu(rq) : rq->special);
 }
 
+static void rq_end_stats(struct mapped_device *md, struct request *orig)
+{
+       if (unlikely(dm_stats_used(&md->stats))) {
+               struct dm_rq_target_io *tio = tio_from_request(orig);
+               tio->duration_jiffies = jiffies - tio->duration_jiffies;
+               dm_stats_account_io(&md->stats, orig->cmd_flags, blk_rq_pos(orig),
+                                   tio->n_sectors, true, tio->duration_jiffies,
+                                   &tio->stats_aux);
+       }
+}
+
 /*
  * Don't touch any member of the md after calling this function because
  * the md may be freed in dm_put() at the end of this function.
@@ -1053,13 +1067,10 @@ static struct dm_rq_target_io *tio_from_request(struct request *rq)
  */
 static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
 {
-       int nr_requests_pending;
-
        atomic_dec(&md->pending[rw]);
 
        /* nudge anyone waiting on suspend queue */
-       nr_requests_pending = md_in_flight(md);
-       if (!nr_requests_pending)
+       if (!md_in_flight(md))
                wake_up(&md->wait);
 
        /*
@@ -1071,8 +1082,7 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
        if (run_queue) {
                if (md->queue->mq_ops)
                        blk_mq_run_hw_queues(md->queue, true);
-               else if (!nr_requests_pending ||
-                        (nr_requests_pending >= md->queue->nr_congestion_on))
+               else
                        blk_run_queue_async(md->queue);
        }
 
@@ -1131,6 +1141,7 @@ static void dm_end_request(struct request *clone, int error)
        }
 
        free_rq_clone(clone);
+       rq_end_stats(md, rq);
        if (!rq->q->mq_ops)
                blk_end_request_all(rq, error);
        else
@@ -1166,13 +1177,14 @@ static void old_requeue_request(struct request *rq)
        spin_unlock_irqrestore(q->queue_lock, flags);
 }
 
-static void dm_requeue_unmapped_original_request(struct mapped_device *md,
-                                                struct request *rq)
+static void dm_requeue_original_request(struct mapped_device *md,
+                                       struct request *rq)
 {
        int rw = rq_data_dir(rq);
 
        dm_unprep_request(rq);
 
+       rq_end_stats(md, rq);
        if (!rq->q->mq_ops)
                old_requeue_request(rq);
        else {
@@ -1183,13 +1195,6 @@ static void dm_requeue_unmapped_original_request(struct mapped_device *md,
        rq_completed(md, rw, false);
 }
 
-static void dm_requeue_unmapped_request(struct request *clone)
-{
-       struct dm_rq_target_io *tio = clone->end_io_data;
-
-       dm_requeue_unmapped_original_request(tio->md, tio->orig);
-}
-
 static void old_stop_queue(struct request_queue *q)
 {
        unsigned long flags;
@@ -1253,7 +1258,7 @@ static void dm_done(struct request *clone, int error, bool mapped)
                return;
        else if (r == DM_ENDIO_REQUEUE)
                /* The target wants to requeue the I/O */
-               dm_requeue_unmapped_request(clone);
+               dm_requeue_original_request(tio->md, tio->orig);
        else {
                DMWARN("unimplemented target endio return value: %d", r);
                BUG();
@@ -1271,6 +1276,7 @@ static void dm_softirq_done(struct request *rq)
        int rw;
 
        if (!clone) {
+               rq_end_stats(tio->md, rq);
                rw = rq_data_dir(rq);
                if (!rq->q->mq_ops) {
                        blk_end_request_all(rq, tio->error);
@@ -1998,7 +2004,7 @@ static int map_request(struct dm_rq_target_io *tio, struct request *rq,
                break;
        case DM_MAPIO_REQUEUE:
                /* The target wants to requeue the I/O */
-               dm_requeue_unmapped_request(clone);
+               dm_requeue_original_request(md, tio->orig);
                break;
        default:
                if (r > 0) {
@@ -2021,7 +2027,7 @@ static void map_tio_request(struct kthread_work *work)
        struct mapped_device *md = tio->md;
 
        if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE)
-               dm_requeue_unmapped_original_request(md, rq);
+               dm_requeue_original_request(md, rq);
 }
 
 static void dm_start_request(struct mapped_device *md, struct request *orig)
@@ -2038,6 +2044,14 @@ static void dm_start_request(struct mapped_device *md, struct request *orig)
                md->last_rq_start_time = ktime_get();
        }
 
+       if (unlikely(dm_stats_used(&md->stats))) {
+               struct dm_rq_target_io *tio = tio_from_request(orig);
+               tio->duration_jiffies = jiffies;
+               tio->n_sectors = blk_rq_sectors(orig);
+               dm_stats_account_io(&md->stats, orig->cmd_flags, blk_rq_pos(orig),
+                                   tio->n_sectors, false, 0, &tio->stats_aux);
+       }
+
        /*
         * Hold the md reference here for the in-flight I/O.
         * We can't rely on the reference count by device opener,
@@ -2168,7 +2182,7 @@ static int dm_any_congested(void *congested_data, int bdi_bits)
                         * the query about congestion status of request_queue
                         */
                        if (dm_request_based(md))
-                               r = md->queue->backing_dev_info.state &
+                               r = md->queue->backing_dev_info.wb.state &
                                    bdi_bits;
                        else
                                r = dm_table_any_congested(map, bdi_bits);
@@ -2261,6 +2275,40 @@ static void dm_init_old_md_queue(struct mapped_device *md)
        blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
 }
 
+static void cleanup_mapped_device(struct mapped_device *md)
+{
+       cleanup_srcu_struct(&md->io_barrier);
+
+       if (md->wq)
+               destroy_workqueue(md->wq);
+       if (md->kworker_task)
+               kthread_stop(md->kworker_task);
+       if (md->io_pool)
+               mempool_destroy(md->io_pool);
+       if (md->rq_pool)
+               mempool_destroy(md->rq_pool);
+       if (md->bs)
+               bioset_free(md->bs);
+
+       if (md->disk) {
+               spin_lock(&_minor_lock);
+               md->disk->private_data = NULL;
+               spin_unlock(&_minor_lock);
+               if (blk_get_integrity(md->disk))
+                       blk_integrity_unregister(md->disk);
+               del_gendisk(md->disk);
+               put_disk(md->disk);
+       }
+
+       if (md->queue)
+               blk_cleanup_queue(md->queue);
+
+       if (md->bdev) {
+               bdput(md->bdev);
+               md->bdev = NULL;
+       }
+}
+
 /*
  * Allocate and initialise a blank device with a given minor.
  */
@@ -2306,13 +2354,13 @@ static struct mapped_device *alloc_dev(int minor)
 
        md->queue = blk_alloc_queue(GFP_KERNEL);
        if (!md->queue)
-               goto bad_queue;
+               goto bad;
 
        dm_init_md_queue(md);
 
        md->disk = alloc_disk(1);
        if (!md->disk)
-               goto bad_disk;
+               goto bad;
 
        atomic_set(&md->pending[0], 0);
        atomic_set(&md->pending[1], 0);
@@ -2333,11 +2381,11 @@ static struct mapped_device *alloc_dev(int minor)
 
        md->wq = alloc_workqueue("kdmflush", WQ_MEM_RECLAIM, 0);
        if (!md->wq)
-               goto bad_thread;
+               goto bad;
 
        md->bdev = bdget_disk(md->disk, 0);
        if (!md->bdev)
-               goto bad_bdev;
+               goto bad;
 
        bio_init(&md->flush_bio);
        md->flush_bio.bi_bdev = md->bdev;
@@ -2354,15 +2402,8 @@ static struct mapped_device *alloc_dev(int minor)
 
        return md;
 
-bad_bdev:
-       destroy_workqueue(md->wq);
-bad_thread:
-       del_gendisk(md->disk);
-       put_disk(md->disk);
-bad_disk:
-       blk_cleanup_queue(md->queue);
-bad_queue:
-       cleanup_srcu_struct(&md->io_barrier);
+bad:
+       cleanup_mapped_device(md);
 bad_io_barrier:
        free_minor(minor);
 bad_minor:
@@ -2379,32 +2420,13 @@ static void free_dev(struct mapped_device *md)
        int minor = MINOR(disk_devt(md->disk));
 
        unlock_fs(md);
-       destroy_workqueue(md->wq);
 
-       if (md->kworker_task)
-               kthread_stop(md->kworker_task);
-       if (md->io_pool)
-               mempool_destroy(md->io_pool);
-       if (md->rq_pool)
-               mempool_destroy(md->rq_pool);
-       if (md->bs)
-               bioset_free(md->bs);
+       cleanup_mapped_device(md);
+       if (md->use_blk_mq)
+               blk_mq_free_tag_set(&md->tag_set);
 
-       cleanup_srcu_struct(&md->io_barrier);
        free_table_devices(&md->table_devices);
        dm_stats_cleanup(&md->stats);
-
-       spin_lock(&_minor_lock);
-       md->disk->private_data = NULL;
-       spin_unlock(&_minor_lock);
-       if (blk_get_integrity(md->disk))
-               blk_integrity_unregister(md->disk);
-       del_gendisk(md->disk);
-       put_disk(md->disk);
-       blk_cleanup_queue(md->queue);
-       if (md->use_blk_mq)
-               blk_mq_free_tag_set(&md->tag_set);
-       bdput(md->bdev);
        free_minor(minor);
 
        module_put(THIS_MODULE);
@@ -2765,6 +2787,7 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
                /* Direct call is fine since .queue_rq allows allocations */
                if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) {
                        /* Undo dm_start_request() before requeuing */
+                       rq_end_stats(md, rq);
                        rq_completed(md, rq_data_dir(rq), false);
                        return BLK_MQ_RQ_QUEUE_BUSY;
                }