dm: submit stacked requests in irq enabled context
authorKeith Busch <keith.busch@intel.com>
Fri, 17 Oct 2014 23:46:36 +0000 (17:46 -0600)
committerMike Snitzer <snitzer@redhat.com>
Mon, 9 Feb 2015 18:06:47 +0000 (13:06 -0500)
Switch to having request-based DM enqueue all prep'ed requests into work
processed by another thread.  This allows request-based DM to invoke
block APIs that assume interrupt enabled context (e.g. blk_get_request)
and is a prerequisite for adding blk-mq support to request-based DM.

The new kernel thread is only initialized for request-based DM devices.

multipath_map() is now always in irq enabled context so change multipath
spinlock (m->lock) locking to always disable interrupts.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
drivers/md/dm-mpath.c
drivers/md/dm.c

index 7b6b0f0f831a443b0bd947628bca643b821c8b95..2552b88f8953efc4a1a0eb32184d2364a19d6664 100644 (file)
@@ -384,12 +384,11 @@ static int multipath_map(struct dm_target *ti, struct request *clone,
        struct multipath *m = (struct multipath *) ti->private;
        int r = DM_MAPIO_REQUEUE;
        size_t nr_bytes = blk_rq_bytes(clone);
-       unsigned long flags;
        struct pgpath *pgpath;
        struct block_device *bdev;
        struct dm_mpath_io *mpio;
 
-       spin_lock_irqsave(&m->lock, flags);
+       spin_lock_irq(&m->lock);
 
        /* Do we need to select a new pgpath? */
        if (!m->current_pgpath ||
@@ -411,21 +410,26 @@ static int multipath_map(struct dm_target *ti, struct request *clone,
                /* ENOMEM, requeue */
                goto out_unlock;
 
+       mpio = map_context->ptr;
+       mpio->pgpath = pgpath;
+       mpio->nr_bytes = nr_bytes;
+
        bdev = pgpath->path.dev->bdev;
+
        clone->q = bdev_get_queue(bdev);
        clone->rq_disk = bdev->bd_disk;
        clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;
-       mpio = map_context->ptr;
-       mpio->pgpath = pgpath;
-       mpio->nr_bytes = nr_bytes;
+
+       spin_unlock_irq(&m->lock);
+
        if (pgpath->pg->ps.type->start_io)
                pgpath->pg->ps.type->start_io(&pgpath->pg->ps,
                                              &pgpath->path,
                                              nr_bytes);
-       r = DM_MAPIO_REMAPPED;
+       return DM_MAPIO_REMAPPED;
 
 out_unlock:
-       spin_unlock_irqrestore(&m->lock, flags);
+       spin_unlock_irq(&m->lock);
 
        return r;
 }
index 9a857e33790250ea350e83aa508667ab78cced48..f0e34070c11df4cd50020ee833977bd6be7a6532 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/hdreg.h>
 #include <linux/delay.h>
 #include <linux/wait.h>
+#include <linux/kthread.h>
 
 #include <trace/events/block.h>
 
@@ -79,6 +80,7 @@ struct dm_rq_target_io {
        struct mapped_device *md;
        struct dm_target *ti;
        struct request *orig, *clone;
+       struct kthread_work work;
        int error;
        union map_info info;
 };
@@ -208,6 +210,9 @@ struct mapped_device {
        struct bio flush_bio;
 
        struct dm_stats stats;
+
+       struct kthread_worker kworker;
+       struct task_struct *kworker_task;
 };
 
 /*
@@ -1773,6 +1778,8 @@ static struct request *__clone_rq(struct request *rq, struct mapped_device *md,
        return clone;
 }
 
+static void map_tio_request(struct kthread_work *work);
+
 static struct request *clone_rq(struct request *rq, struct mapped_device *md,
                                gfp_t gfp_mask)
 {
@@ -1789,6 +1796,7 @@ static struct request *clone_rq(struct request *rq, struct mapped_device *md,
        tio->orig = rq;
        tio->error = 0;
        memset(&tio->info, 0, sizeof(tio->info));
+       init_kthread_work(&tio->work, map_tio_request);
 
        clone = __clone_rq(rq, md, tio, GFP_ATOMIC);
        if (!clone) {
@@ -1833,7 +1841,6 @@ static int map_request(struct dm_target *ti, struct request *clone,
        int r, requeued = 0;
        struct dm_rq_target_io *tio = clone->end_io_data;
 
-       tio->ti = ti;
        r = ti->type->map_rq(ti, clone, &tio->info);
        switch (r) {
        case DM_MAPIO_SUBMITTED:
@@ -1864,6 +1871,13 @@ static int map_request(struct dm_target *ti, struct request *clone,
        return requeued;
 }
 
+static void map_tio_request(struct kthread_work *work)
+{
+       struct dm_rq_target_io *tio = container_of(work, struct dm_rq_target_io, work);
+
+       map_request(tio->ti, tio->clone, tio->md);
+}
+
 static struct request *dm_start_request(struct mapped_device *md, struct request *orig)
 {
        struct request *clone;
@@ -1895,6 +1909,7 @@ static void dm_request_fn(struct request_queue *q)
        struct dm_table *map = dm_get_live_table(md, &srcu_idx);
        struct dm_target *ti;
        struct request *rq, *clone;
+       struct dm_rq_target_io *tio;
        sector_t pos;
 
        /*
@@ -1930,20 +1945,15 @@ static void dm_request_fn(struct request_queue *q)
 
                clone = dm_start_request(md, rq);
 
-               spin_unlock(q->queue_lock);
-               if (map_request(ti, clone, md))
-                       goto requeued;
-
+               tio = rq->special;
+               /* Establish tio->ti before queuing work (map_tio_request) */
+               tio->ti = ti;
+               queue_kthread_work(&md->kworker, &tio->work);
                BUG_ON(!irqs_disabled());
-               spin_lock(q->queue_lock);
        }
 
        goto out;
 
-requeued:
-       BUG_ON(!irqs_disabled());
-       spin_lock(q->queue_lock);
-
 delay_and_out:
        blk_delay_queue(q, HZ / 10);
 out:
@@ -2129,6 +2139,7 @@ static struct mapped_device *alloc_dev(int minor)
        INIT_WORK(&md->work, dm_wq_work);
        init_waitqueue_head(&md->eventq);
        init_completion(&md->kobj_holder.completion);
+       md->kworker_task = NULL;
 
        md->disk->major = _major;
        md->disk->first_minor = minor;
@@ -2189,6 +2200,9 @@ static void free_dev(struct mapped_device *md)
        unlock_fs(md);
        bdput(md->bdev);
        destroy_workqueue(md->wq);
+
+       if (md->kworker_task)
+               kthread_stop(md->kworker_task);
        if (md->io_pool)
                mempool_destroy(md->io_pool);
        if (md->rq_pool)
@@ -2484,6 +2498,11 @@ static int dm_init_request_based_queue(struct mapped_device *md)
        blk_queue_prep_rq(md->queue, dm_prep_fn);
        blk_queue_lld_busy(md->queue, dm_lld_busy);
 
+       /* Also initialize the request-based DM worker thread */
+       init_kthread_worker(&md->kworker);
+       md->kworker_task = kthread_run(kthread_worker_fn, &md->kworker,
+                                      "kdmwork-%s", dm_device_name(md));
+
        elv_register_queue(md->queue);
 
        return 1;
@@ -2574,6 +2593,9 @@ static void __dm_destroy(struct mapped_device *md, bool wait)
        set_bit(DMF_FREEING, &md->flags);
        spin_unlock(&_minor_lock);
 
+       if (dm_request_based(md))
+               flush_kthread_worker(&md->kworker);
+
        if (!dm_suspended_md(md)) {
                dm_table_presuspend_targets(map);
                dm_table_postsuspend_targets(map);
@@ -2817,8 +2839,10 @@ static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
         * Stop md->queue before flushing md->wq in case request-based
         * dm defers requests to md->wq from md->queue.
         */
-       if (dm_request_based(md))
+       if (dm_request_based(md)) {
                stop_queue(md->queue);
+               flush_kthread_worker(&md->kworker);
+       }
 
        flush_workqueue(md->wq);