struct kthread_work work;
int error;
union map_info info;
+ struct dm_stats_aux stats_aux;
+ unsigned long duration_jiffies;
+ unsigned n_sectors;
};
/*
} else {
/* done with normal IO or empty flush */
trace_block_bio_complete(md->queue, bio, io_error);
- bio_endio(bio, io_error);
+ bio->bi_error = io_error;
+ bio_endio(bio);
}
}
}
limits->max_write_same_sectors = 0;
}
-static void clone_endio(struct bio *bio, int error)
+static void clone_endio(struct bio *bio)
{
+ int error = bio->bi_error;
int r = error;
struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
struct dm_io *io = tio->io;
struct mapped_device *md = tio->io->md;
dm_endio_fn endio = tio->ti->type->end_io;
- if (!bio_flagged(bio, BIO_UPTODATE) && !error)
- error = -EIO;
-
if (endio) {
r = endio(tio->ti, bio, error);
if (r < 0 || r == DM_ENDIO_REQUEUE)
/*
* Partial completion handling for request-based dm
*/
-static void end_clone_bio(struct bio *clone, int error)
+static void end_clone_bio(struct bio *clone)
{
struct dm_rq_clone_bio_info *info =
container_of(clone, struct dm_rq_clone_bio_info, clone);
* the remainder.
*/
return;
- else if (error) {
+ else if (bio->bi_error) {
/*
* Don't notice the error to the upper layer yet.
* The error handling decision is made by the target driver,
* when the request is completed.
*/
- tio->error = error;
+ tio->error = bio->bi_error;
return;
}
return (rq->q->mq_ops ? blk_mq_rq_to_pdu(rq) : rq->special);
}
+static void rq_end_stats(struct mapped_device *md, struct request *orig)
+{
+ if (unlikely(dm_stats_used(&md->stats))) {
+ struct dm_rq_target_io *tio = tio_from_request(orig);
+ tio->duration_jiffies = jiffies - tio->duration_jiffies;
+ dm_stats_account_io(&md->stats, orig->cmd_flags, blk_rq_pos(orig),
+ tio->n_sectors, true, tio->duration_jiffies,
+ &tio->stats_aux);
+ }
+}
+
/*
* Don't touch any member of the md after calling this function because
* the md may be freed in dm_put() at the end of this function.
dm_put(md);
}
-static void free_rq_clone(struct request *clone, bool must_be_mapped)
+static void free_rq_clone(struct request *clone)
{
struct dm_rq_target_io *tio = clone->end_io_data;
struct mapped_device *md = tio->md;
- WARN_ON_ONCE(must_be_mapped && !clone->q);
-
blk_rq_unprep_clone(clone);
if (md->type == DM_TYPE_MQ_REQUEST_BASED)
rq->sense_len = clone->sense_len;
}
- free_rq_clone(clone, true);
+ free_rq_clone(clone);
+ rq_end_stats(md, rq);
if (!rq->q->mq_ops)
blk_end_request_all(rq, error);
else
}
if (clone)
- free_rq_clone(clone, false);
+ free_rq_clone(clone);
}
/*
spin_lock_irqsave(q->queue_lock, flags);
blk_requeue_request(q, rq);
+ blk_run_queue_async(q);
spin_unlock_irqrestore(q->queue_lock, flags);
}
-static void dm_requeue_unmapped_original_request(struct mapped_device *md,
- struct request *rq)
+static void dm_requeue_original_request(struct mapped_device *md,
+ struct request *rq)
{
int rw = rq_data_dir(rq);
dm_unprep_request(rq);
+ rq_end_stats(md, rq);
if (!rq->q->mq_ops)
old_requeue_request(rq);
else {
rq_completed(md, rw, false);
}
-static void dm_requeue_unmapped_request(struct request *clone)
-{
- struct dm_rq_target_io *tio = clone->end_io_data;
-
- dm_requeue_unmapped_original_request(tio->md, tio->orig);
-}
-
static void old_stop_queue(struct request_queue *q)
{
unsigned long flags;
return;
else if (r == DM_ENDIO_REQUEUE)
/* The target wants to requeue the I/O */
- dm_requeue_unmapped_request(clone);
+ dm_requeue_original_request(tio->md, tio->orig);
else {
DMWARN("unimplemented target endio return value: %d", r);
BUG();
int rw;
if (!clone) {
+ rq_end_stats(tio->md, rq);
rw = rq_data_dir(rq);
if (!rq->q->mq_ops) {
blk_end_request_all(rq, tio->error);
struct mapped_device *md = q->queuedata;
struct dm_table *map = dm_get_live_table_fast(md);
struct dm_target *ti;
- sector_t max_sectors;
- int max_size = 0;
+ sector_t max_sectors, max_size = 0;
if (unlikely(!map))
goto out;
max_sectors = min(max_io_len(bvm->bi_sector, ti),
(sector_t) queue_max_sectors(q));
max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size;
- if (unlikely(max_size < 0)) /* this shouldn't _ever_ happen */
- max_size = 0;
+
+ /*
+ * FIXME: this stop-gap fix _must_ be cleaned up (by passing a sector_t
+ * to the targets' merge function since it holds sectors not bytes).
+ * Just doing this as an interim fix for stable@ because the more
+ * comprehensive cleanup of switching to sector_t will impact every
+ * DM target that implements a ->merge hook.
+ */
+ if (max_size > INT_MAX)
+ max_size = INT_MAX;
/*
* merge_bvec_fn() returns number of bytes
* max is precomputed maximal io size
*/
if (max_size && ti->type->merge)
- max_size = ti->type->merge(ti, bvm, biovec, max_size);
+ max_size = ti->type->merge(ti, bvm, biovec, (int) max_size);
/*
* If the target doesn't support merge method and some of the devices
* provided their merge_bvec method (we know this by looking for the
dm_kill_unmapped_request(rq, r);
return r;
}
- if (IS_ERR(clone))
- return DM_MAPIO_REQUEUE;
+ if (r != DM_MAPIO_REMAPPED)
+ return r;
if (setup_clone(clone, rq, tio, GFP_ATOMIC)) {
/* -ENOMEM */
ti->type->release_clone_rq(clone);
break;
case DM_MAPIO_REQUEUE:
/* The target wants to requeue the I/O */
- dm_requeue_unmapped_request(clone);
+ dm_requeue_original_request(md, tio->orig);
break;
default:
if (r > 0) {
struct mapped_device *md = tio->md;
if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE)
- dm_requeue_unmapped_original_request(md, rq);
+ dm_requeue_original_request(md, rq);
}
static void dm_start_request(struct mapped_device *md, struct request *orig)
md->last_rq_start_time = ktime_get();
}
+ if (unlikely(dm_stats_used(&md->stats))) {
+ struct dm_rq_target_io *tio = tio_from_request(orig);
+ tio->duration_jiffies = jiffies;
+ tio->n_sectors = blk_rq_sectors(orig);
+ dm_stats_account_io(&md->stats, orig->cmd_flags, blk_rq_pos(orig),
+ tio->n_sectors, false, 0, &tio->stats_aux);
+ }
+
/*
* Hold the md reference here for the in-flight I/O.
* We can't rely on the reference count by device opener,
* the query about congestion status of request_queue
*/
if (dm_request_based(md))
- r = md->queue->backing_dev_info.state &
+ r = md->queue->backing_dev_info.wb.state &
bdi_bits;
else
r = dm_table_any_congested(map, bdi_bits);
blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
}
+static void cleanup_mapped_device(struct mapped_device *md)
+{
+ cleanup_srcu_struct(&md->io_barrier);
+
+ if (md->wq)
+ destroy_workqueue(md->wq);
+ if (md->kworker_task)
+ kthread_stop(md->kworker_task);
+ if (md->io_pool)
+ mempool_destroy(md->io_pool);
+ if (md->rq_pool)
+ mempool_destroy(md->rq_pool);
+ if (md->bs)
+ bioset_free(md->bs);
+
+ if (md->disk) {
+ spin_lock(&_minor_lock);
+ md->disk->private_data = NULL;
+ spin_unlock(&_minor_lock);
+ if (blk_get_integrity(md->disk))
+ blk_integrity_unregister(md->disk);
+ del_gendisk(md->disk);
+ put_disk(md->disk);
+ }
+
+ if (md->queue)
+ blk_cleanup_queue(md->queue);
+
+ if (md->bdev) {
+ bdput(md->bdev);
+ md->bdev = NULL;
+ }
+}
+
/*
* Allocate and initialise a blank device with a given minor.
*/
md->queue = blk_alloc_queue(GFP_KERNEL);
if (!md->queue)
- goto bad_queue;
+ goto bad;
dm_init_md_queue(md);
md->disk = alloc_disk(1);
if (!md->disk)
- goto bad_disk;
+ goto bad;
atomic_set(&md->pending[0], 0);
atomic_set(&md->pending[1], 0);
md->wq = alloc_workqueue("kdmflush", WQ_MEM_RECLAIM, 0);
if (!md->wq)
- goto bad_thread;
+ goto bad;
md->bdev = bdget_disk(md->disk, 0);
if (!md->bdev)
- goto bad_bdev;
+ goto bad;
bio_init(&md->flush_bio);
md->flush_bio.bi_bdev = md->bdev;
return md;
-bad_bdev:
- destroy_workqueue(md->wq);
-bad_thread:
- del_gendisk(md->disk);
- put_disk(md->disk);
-bad_disk:
- blk_cleanup_queue(md->queue);
-bad_queue:
- cleanup_srcu_struct(&md->io_barrier);
+bad:
+ cleanup_mapped_device(md);
bad_io_barrier:
free_minor(minor);
bad_minor:
int minor = MINOR(disk_devt(md->disk));
unlock_fs(md);
- destroy_workqueue(md->wq);
- if (md->kworker_task)
- kthread_stop(md->kworker_task);
- if (md->io_pool)
- mempool_destroy(md->io_pool);
- if (md->rq_pool)
- mempool_destroy(md->rq_pool);
- if (md->bs)
- bioset_free(md->bs);
+ cleanup_mapped_device(md);
+ if (md->use_blk_mq)
+ blk_mq_free_tag_set(&md->tag_set);
- cleanup_srcu_struct(&md->io_barrier);
free_table_devices(&md->table_devices);
dm_stats_cleanup(&md->stats);
-
- spin_lock(&_minor_lock);
- md->disk->private_data = NULL;
- spin_unlock(&_minor_lock);
- if (blk_get_integrity(md->disk))
- blk_integrity_unregister(md->disk);
- del_gendisk(md->disk);
- put_disk(md->disk);
- blk_cleanup_queue(md->queue);
- if (md->use_blk_mq)
- blk_mq_free_tag_set(&md->tag_set);
- bdput(md->bdev);
free_minor(minor);
module_put(THIS_MODULE);
if (dm_table_get_type(map) == DM_TYPE_REQUEST_BASED) {
/* clone request is allocated at the end of the pdu */
tio->clone = (void *)blk_mq_rq_to_pdu(rq) + sizeof(struct dm_rq_target_io);
- if (!clone_rq(rq, md, tio, GFP_ATOMIC))
- return BLK_MQ_RQ_QUEUE_BUSY;
+ (void) clone_rq(rq, md, tio, GFP_ATOMIC);
queue_kthread_work(&md->kworker, &tio->work);
} else {
/* Direct call is fine since .queue_rq allows allocations */
- if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE)
- dm_requeue_unmapped_original_request(md, rq);
+ if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) {
+ /* Undo dm_start_request() before requeuing */
+ rq_end_stats(md, rq);
+ rq_completed(md, rq_data_dir(rq), false);
+ return BLK_MQ_RQ_QUEUE_BUSY;
+ }
}
return BLK_MQ_RQ_QUEUE_OK;