block: add a bi_error field to struct bio

[linux-2.6-block.git] / drivers / md / dm.c
diff --git a/drivers/md/dm.c b/drivers/md/dm.c

index a930b72314ac985da702f8b47a8054a75b2e2ba8..7f367fcace035a23e5da8c2ccef6b43384445221 100644 (file)
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -86,6 +86,9 @@ struct dm_rq_target_io {
         struct kthread_work work;
         int error;
         union map_info info;
+       struct dm_stats_aux stats_aux;
+       unsigned long duration_jiffies;
+       unsigned n_sectors;
  };
  
  /*
@@ -941,7 +944,8 @@ static void dec_pending(struct dm_io *io, int error)
                 } else {
                         /* done with normal IO or empty flush */
                         trace_block_bio_complete(md->queue, bio, io_error);
-                       bio_endio(bio, io_error);
+                       bio->bi_error = io_error;
+                       bio_endio(bio);
                 }
         }
  }
@@ -954,17 +958,15 @@ static void disable_write_same(struct mapped_device *md)
         limits->max_write_same_sectors = 0;
  }
  
-static void clone_endio(struct bio *bio, int error)
+static void clone_endio(struct bio *bio)
  {
+       int error = bio->bi_error;
         int r = error;
         struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
         struct dm_io *io = tio->io;
         struct mapped_device *md = tio->io->md;
         dm_endio_fn endio = tio->ti->type->end_io;
  
-       if (!bio_flagged(bio, BIO_UPTODATE) && !error)
-               error = -EIO;
-
         if (endio) {
                 r = endio(tio->ti, bio, error);
                 if (r < 0 || r == DM_ENDIO_REQUEUE)
@@ -993,7 +995,7 @@ static void clone_endio(struct bio *bio, int error)
  /*
   * Partial completion handling for request-based dm
   */
-static void end_clone_bio(struct bio *clone, int error)
+static void end_clone_bio(struct bio *clone)
  {
         struct dm_rq_clone_bio_info *info =
                 container_of(clone, struct dm_rq_clone_bio_info, clone);
@@ -1010,13 +1012,13 @@ static void end_clone_bio(struct bio *clone, int error)
                  * the remainder.
                  */
                 return;
-       else if (error) {
+       else if (bio->bi_error) {
                 /*
                  * Don't notice the error to the upper layer yet.
                  * The error handling decision is made by the target driver,
                  * when the request is completed.
                  */
-               tio->error = error;
+               tio->error = bio->bi_error;
                 return;
         }
  
@@ -1046,6 +1048,17 @@ static struct dm_rq_target_io *tio_from_request(struct request *rq)
         return (rq->q->mq_ops ? blk_mq_rq_to_pdu(rq) : rq->special);
  }
  
+static void rq_end_stats(struct mapped_device *md, struct request *orig)
+{
+       if (unlikely(dm_stats_used(&md->stats))) {
+               struct dm_rq_target_io *tio = tio_from_request(orig);
+               tio->duration_jiffies = jiffies - tio->duration_jiffies;
+               dm_stats_account_io(&md->stats, orig->cmd_flags, blk_rq_pos(orig),
+                                   tio->n_sectors, true, tio->duration_jiffies,
+                                   &tio->stats_aux);
+       }
+}
+
  /*
   * Don't touch any member of the md after calling this function because
   * the md may be freed in dm_put() at the end of this function.
@@ -1082,13 +1095,11 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
         dm_put(md);
  }
  
-static void free_rq_clone(struct request *clone, bool must_be_mapped)
+static void free_rq_clone(struct request *clone)
  {
         struct dm_rq_target_io *tio = clone->end_io_data;
         struct mapped_device *md = tio->md;
  
-       WARN_ON_ONCE(must_be_mapped && !clone->q);
-
         blk_rq_unprep_clone(clone);
  
         if (md->type == DM_TYPE_MQ_REQUEST_BASED)
@@ -1132,7 +1143,8 @@ static void dm_end_request(struct request *clone, int error)
                         rq->sense_len = clone->sense_len;
         }
  
-       free_rq_clone(clone, true);
+       free_rq_clone(clone);
+       rq_end_stats(md, rq);
         if (!rq->q->mq_ops)
                 blk_end_request_all(rq, error);
         else
@@ -1151,7 +1163,7 @@ static void dm_unprep_request(struct request *rq)
         }
  
         if (clone)
-               free_rq_clone(clone, false);
+               free_rq_clone(clone);
  }
  
  /*
@@ -1164,16 +1176,18 @@ static void old_requeue_request(struct request *rq)
  
         spin_lock_irqsave(q->queue_lock, flags);
         blk_requeue_request(q, rq);
+       blk_run_queue_async(q);
         spin_unlock_irqrestore(q->queue_lock, flags);
  }
  
-static void dm_requeue_unmapped_original_request(struct mapped_device *md,
-                                                struct request *rq)
+static void dm_requeue_original_request(struct mapped_device *md,
+                                       struct request *rq)
  {
         int rw = rq_data_dir(rq);
  
         dm_unprep_request(rq);
  
+       rq_end_stats(md, rq);
         if (!rq->q->mq_ops)
                 old_requeue_request(rq);
         else {
@@ -1184,13 +1198,6 @@ static void dm_requeue_unmapped_original_request(struct mapped_device *md,
         rq_completed(md, rw, false);
  }
  
-static void dm_requeue_unmapped_request(struct request *clone)
-{
-       struct dm_rq_target_io *tio = clone->end_io_data;
-
-       dm_requeue_unmapped_original_request(tio->md, tio->orig);
-}
-
  static void old_stop_queue(struct request_queue *q)
  {
         unsigned long flags;
@@ -1254,7 +1261,7 @@ static void dm_done(struct request *clone, int error, bool mapped)
                 return;
         else if (r == DM_ENDIO_REQUEUE)
                 /* The target wants to requeue the I/O */
-               dm_requeue_unmapped_request(clone);
+               dm_requeue_original_request(tio->md, tio->orig);
         else {
                 DMWARN("unimplemented target endio return value: %d", r);
                 BUG();
@@ -1272,6 +1279,7 @@ static void dm_softirq_done(struct request *rq)
         int rw;
  
         if (!clone) {
+               rq_end_stats(tio->md, rq);
                 rw = rq_data_dir(rq);
                 if (!rq->q->mq_ops) {
                         blk_end_request_all(rq, tio->error);
@@ -1724,8 +1732,7 @@ static int dm_merge_bvec(struct request_queue *q,
         struct mapped_device *md = q->queuedata;
         struct dm_table *map = dm_get_live_table_fast(md);
         struct dm_target *ti;
-       sector_t max_sectors;
-       int max_size = 0;
+       sector_t max_sectors, max_size = 0;
  
         if (unlikely(!map))
                 goto out;
@@ -1740,8 +1747,16 @@ static int dm_merge_bvec(struct request_queue *q,
         max_sectors = min(max_io_len(bvm->bi_sector, ti),
                           (sector_t) queue_max_sectors(q));
         max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size;
-       if (unlikely(max_size < 0)) /* this shouldn't _ever_ happen */
-               max_size = 0;
+
+       /*
+        * FIXME: this stop-gap fix _must_ be cleaned up (by passing a sector_t
+        * to the targets' merge function since it holds sectors not bytes).
+        * Just doing this as an interim fix for stable@ because the more
+        * comprehensive cleanup of switching to sector_t will impact every
+        * DM target that implements a ->merge hook.
+        */
+       if (max_size > INT_MAX)
+               max_size = INT_MAX;
  
         /*
          * merge_bvec_fn() returns number of bytes
@@ -1749,7 +1764,7 @@ static int dm_merge_bvec(struct request_queue *q,
          * max is precomputed maximal io size
          */
         if (max_size && ti->type->merge)
-               max_size = ti->type->merge(ti, bvm, biovec, max_size);
+               max_size = ti->type->merge(ti, bvm, biovec, (int) max_size);
         /*
          * If the target doesn't support merge method and some of the devices
          * provided their merge_bvec method (we know this by looking for the
@@ -1971,8 +1986,8 @@ static int map_request(struct dm_rq_target_io *tio, struct request *rq,
                         dm_kill_unmapped_request(rq, r);
                         return r;
                 }
-               if (IS_ERR(clone))
-                       return DM_MAPIO_REQUEUE;
+               if (r != DM_MAPIO_REMAPPED)
+                       return r;
                 if (setup_clone(clone, rq, tio, GFP_ATOMIC)) {
                         /* -ENOMEM */
                         ti->type->release_clone_rq(clone);
@@ -1992,7 +2007,7 @@ static int map_request(struct dm_rq_target_io *tio, struct request *rq,
                 break;
         case DM_MAPIO_REQUEUE:
                 /* The target wants to requeue the I/O */
-               dm_requeue_unmapped_request(clone);
+               dm_requeue_original_request(md, tio->orig);
                 break;
         default:
                 if (r > 0) {
@@ -2015,7 +2030,7 @@ static void map_tio_request(struct kthread_work *work)
         struct mapped_device *md = tio->md;
  
         if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE)
-               dm_requeue_unmapped_original_request(md, rq);
+               dm_requeue_original_request(md, rq);
  }
  
  static void dm_start_request(struct mapped_device *md, struct request *orig)
@@ -2032,6 +2047,14 @@ static void dm_start_request(struct mapped_device *md, struct request *orig)
                 md->last_rq_start_time = ktime_get();
         }
  
+       if (unlikely(dm_stats_used(&md->stats))) {
+               struct dm_rq_target_io *tio = tio_from_request(orig);
+               tio->duration_jiffies = jiffies;
+               tio->n_sectors = blk_rq_sectors(orig);
+               dm_stats_account_io(&md->stats, orig->cmd_flags, blk_rq_pos(orig),
+                                   tio->n_sectors, false, 0, &tio->stats_aux);
+       }
+
         /*
          * Hold the md reference here for the in-flight I/O.
          * We can't rely on the reference count by device opener,
@@ -2162,7 +2185,7 @@ static int dm_any_congested(void *congested_data, int bdi_bits)
                          * the query about congestion status of request_queue
                          */
                         if (dm_request_based(md))
-                               r = md->queue->backing_dev_info.state &
+                               r = md->queue->backing_dev_info.wb.state &
                                     bdi_bits;
                         else
                                 r = dm_table_any_congested(map, bdi_bits);
@@ -2255,6 +2278,40 @@ static void dm_init_old_md_queue(struct mapped_device *md)
         blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
  }
  
+static void cleanup_mapped_device(struct mapped_device *md)
+{
+       cleanup_srcu_struct(&md->io_barrier);
+
+       if (md->wq)
+               destroy_workqueue(md->wq);
+       if (md->kworker_task)
+               kthread_stop(md->kworker_task);
+       if (md->io_pool)
+               mempool_destroy(md->io_pool);
+       if (md->rq_pool)
+               mempool_destroy(md->rq_pool);
+       if (md->bs)
+               bioset_free(md->bs);
+
+       if (md->disk) {
+               spin_lock(&_minor_lock);
+               md->disk->private_data = NULL;
+               spin_unlock(&_minor_lock);
+               if (blk_get_integrity(md->disk))
+                       blk_integrity_unregister(md->disk);
+               del_gendisk(md->disk);
+               put_disk(md->disk);
+       }
+
+       if (md->queue)
+               blk_cleanup_queue(md->queue);
+
+       if (md->bdev) {
+               bdput(md->bdev);
+               md->bdev = NULL;
+       }
+}
+
  /*
   * Allocate and initialise a blank device with a given minor.
   */
@@ -2300,13 +2357,13 @@ static struct mapped_device *alloc_dev(int minor)
  
         md->queue = blk_alloc_queue(GFP_KERNEL);
         if (!md->queue)
-               goto bad_queue;
+               goto bad;
  
         dm_init_md_queue(md);
  
         md->disk = alloc_disk(1);
         if (!md->disk)
-               goto bad_disk;
+               goto bad;
  
         atomic_set(&md->pending[0], 0);
         atomic_set(&md->pending[1], 0);
@@ -2327,11 +2384,11 @@ static struct mapped_device *alloc_dev(int minor)
  
         md->wq = alloc_workqueue("kdmflush", WQ_MEM_RECLAIM, 0);
         if (!md->wq)
-               goto bad_thread;
+               goto bad;
  
         md->bdev = bdget_disk(md->disk, 0);
         if (!md->bdev)
-               goto bad_bdev;
+               goto bad;
  
         bio_init(&md->flush_bio);
         md->flush_bio.bi_bdev = md->bdev;
@@ -2348,15 +2405,8 @@ static struct mapped_device *alloc_dev(int minor)
  
         return md;
  
-bad_bdev:
-       destroy_workqueue(md->wq);
-bad_thread:
-       del_gendisk(md->disk);
-       put_disk(md->disk);
-bad_disk:
-       blk_cleanup_queue(md->queue);
-bad_queue:
-       cleanup_srcu_struct(&md->io_barrier);
+bad:
+       cleanup_mapped_device(md);
  bad_io_barrier:
         free_minor(minor);
  bad_minor:
@@ -2373,32 +2423,13 @@ static void free_dev(struct mapped_device *md)
         int minor = MINOR(disk_devt(md->disk));
  
         unlock_fs(md);
-       destroy_workqueue(md->wq);
  
-       if (md->kworker_task)
-               kthread_stop(md->kworker_task);
-       if (md->io_pool)
-               mempool_destroy(md->io_pool);
-       if (md->rq_pool)
-               mempool_destroy(md->rq_pool);
-       if (md->bs)
-               bioset_free(md->bs);
+       cleanup_mapped_device(md);
+       if (md->use_blk_mq)
+               blk_mq_free_tag_set(&md->tag_set);
  
-       cleanup_srcu_struct(&md->io_barrier);
         free_table_devices(&md->table_devices);
         dm_stats_cleanup(&md->stats);
-
-       spin_lock(&_minor_lock);
-       md->disk->private_data = NULL;
-       spin_unlock(&_minor_lock);
-       if (blk_get_integrity(md->disk))
-               blk_integrity_unregister(md->disk);
-       del_gendisk(md->disk);
-       put_disk(md->disk);
-       blk_cleanup_queue(md->queue);
-       if (md->use_blk_mq)
-               blk_mq_free_tag_set(&md->tag_set);
-       bdput(md->bdev);
         free_minor(minor);
  
         module_put(THIS_MODULE);
@@ -2753,13 +2784,16 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
         if (dm_table_get_type(map) == DM_TYPE_REQUEST_BASED) {
                 /* clone request is allocated at the end of the pdu */
                 tio->clone = (void *)blk_mq_rq_to_pdu(rq) + sizeof(struct dm_rq_target_io);
-               if (!clone_rq(rq, md, tio, GFP_ATOMIC))
-                       return BLK_MQ_RQ_QUEUE_BUSY;
+               (void) clone_rq(rq, md, tio, GFP_ATOMIC);
                 queue_kthread_work(&md->kworker, &tio->work);
         } else {
                 /* Direct call is fine since .queue_rq allows allocations */
-               if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE)
-                       dm_requeue_unmapped_original_request(md, rq);
+               if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) {
+                       /* Undo dm_start_request() before requeuing */
+                       rq_end_stats(md, rq);
+                       rq_completed(md, rq_data_dir(rq), false);
+                       return BLK_MQ_RQ_QUEUE_BUSY;
+               }
         }
  
         return BLK_MQ_RQ_QUEUE_OK;