diff options
author | Shaohua Li <shli@fusionio.com> | 2013-05-22 10:31:20 +0200 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2013-05-23 13:12:49 +0200 |
commit | 816d743eb5f8ee1fb1ded3da3f14249790b17c2f (patch) | |
tree | 1b06d33830f7ac9556d181403c915bbc553306a1 | |
parent | 2e3baca580617b2dac1283685454fb9df6820355 (diff) |
blk-mq: cpu hot plug/unplug fixesnew-queue-20130523
The requests belonging to offline CPU could be in several stages:
1. in ctx->rq_list. blk_mq_hctx_notify() handles them.
2. in plug flush list. preemption doesn't flush plug list. So we need check if
requests are in online cpu queue.
3. dispatching list. Doesn't matter if requests are in online cpu queue.
4. end_io. we need check if ipi target is online cpu.
To fully support cpuhotplug, we need re-initialize cpu-hctx mapping, some data
structure must be re-initialized too. This is a bit challenging because there
might be request pending/running (we need a mechanism to free the queue for
example). So we just initialize all data structures staticlly (at queue init
time). If CPU is offline, it will be mapped to the first hard queue. In the
future, we need better cpuhotplug handling.
Signed-off-by: Shaohua Li <shli@fusionio.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r-- | block/blk-mq-cpumap.c | 1 | ||||
-rw-r--r-- | block/blk-mq.c | 53 |
2 files changed, 42 insertions, 12 deletions
diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c index c19e2fde6bdb..21b8c0f305ad 100644 --- a/block/blk-mq-cpumap.c +++ b/block/blk-mq-cpumap.c @@ -87,6 +87,7 @@ unsigned int *blk_mq_make_queue_map(struct blk_mq_reg *reg) { unsigned int *map; + /* If cpus are offline, map them to first hctx */ map = kzalloc_node(sizeof(*map) * num_possible_cpus(), GFP_KERNEL, reg->numa_node); if (!map) diff --git a/block/blk-mq.c b/block/blk-mq.c index 4bc37f1c3c5b..46dbd25cee7f 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -254,7 +254,7 @@ void blk_mq_end_io(struct request *rq, int error) return __blk_mq_end_io(rq, error); cpu = get_cpu(); - if (cpu == ctx->cpu) + if (cpu == ctx->cpu || !cpu_online(ctx->cpu)) __blk_mq_end_io(rq, error); else { struct call_single_data *data = &rq->csd; @@ -606,7 +606,7 @@ void blk_mq_insert_request(struct request_queue *q, struct request *rq, bool run_queue) { struct blk_mq_hw_ctx *hctx; - struct blk_mq_ctx *ctx; + struct blk_mq_ctx *ctx, *current_ctx; ctx = rq->mq_ctx; hctx = q->mq_ops->map_queue(q, ctx->cpu); @@ -614,9 +614,18 @@ void blk_mq_insert_request(struct request_queue *q, struct request *rq, if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA)) { blk_insert_flush(rq); } else { + current_ctx = blk_mq_get_ctx(q); + + if (!cpu_online(ctx->cpu)) { + ctx = current_ctx; + hctx = q->mq_ops->map_queue(q, ctx->cpu); + rq->mq_ctx = ctx; + } spin_lock(&ctx->lock); __blk_mq_insert_request(hctx, rq); spin_unlock(&ctx->lock); + + blk_mq_put_ctx(current_ctx); } if (run_queue) @@ -632,15 +641,24 @@ void blk_mq_run_request(struct request *rq, bool run_queue, bool async) { struct request_queue *q = rq->q; struct blk_mq_hw_ctx *hctx; - struct blk_mq_ctx *ctx; + struct blk_mq_ctx *ctx, *current_ctx; + + current_ctx = blk_mq_get_ctx(q); ctx = rq->mq_ctx; + if (!cpu_online(ctx->cpu)) { + ctx = current_ctx; + rq->mq_ctx = ctx; + } hctx = q->mq_ops->map_queue(q, ctx->cpu); + /* ctx->cpu might be offline */ spin_lock(&ctx->lock); __blk_mq_insert_request(hctx, rq); spin_unlock(&ctx->lock); + blk_mq_put_ctx(current_ctx); + if (run_queue) blk_mq_run_hw_queue(hctx, async); } @@ -651,18 +669,32 @@ static void __blk_mq_insert_requests(struct request_queue *q, bool run_queue, bool from_schedule) { - struct blk_mq_hw_ctx *hctx = q->mq_ops->map_queue(q, ctx->cpu); + struct blk_mq_hw_ctx *hctx; + struct blk_mq_ctx *current_ctx; + + current_ctx = blk_mq_get_ctx(q); + + if (!cpu_online(ctx->cpu)) + ctx = current_ctx; + hctx = q->mq_ops->map_queue(q, ctx->cpu); + /* + * preemption doesn't flush plug list, so it's possible ctx->cpu is + * offline now + */ spin_lock(&ctx->lock); while (!list_empty(list)) { struct request *rq; rq = list_first_entry(list, struct request, queuelist); list_del_init(&rq->queuelist); + rq->mq_ctx = ctx; __blk_mq_insert_request(hctx, rq); } spin_unlock(&ctx->lock); + blk_mq_put_ctx(current_ctx); + if (run_queue) blk_mq_run_hw_queue(hctx, from_schedule); } @@ -1135,12 +1167,13 @@ static void blk_mq_init_cpu_queues(struct request_queue *q, INIT_LIST_HEAD(&__ctx->rq_list); __ctx->queue = q; - if (!cpu_online(i)) - continue; - + /* If the cpu isn't online, the cpu is mapped to first hctx */ hctx = q->mq_ops->map_queue(q, i); hctx->nr_ctx++; + if (!cpu_online(i)) + continue; + /* * Set local node, IFF we have more than one hw queue. If * not, we remain on the home node of the device @@ -1224,11 +1257,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg, * Map software to hardware queues */ queue_for_each_ctx(q, ctx, i) { - if (!cpu_online(i)) { - ctx->index_hw = -1; - continue; - } - + /* If the cpu isn't online, the cpu is mapped to first hctx */ hctx = q->mq_ops->map_queue(q, i); ctx->index_hw = hctx->nr_ctx; hctx->ctxs[hctx->nr_ctx++] = ctx; |