nvme-pci: Allow PCI bus-level PM to be used if ASPM is disabled
[linux-2.6-block.git] / block / blk-mq-tag.c
CommitLineData
3dcf60bc 1// SPDX-License-Identifier: GPL-2.0
75bb4625 2/*
88459642
OS
3 * Tag allocation using scalable bitmaps. Uses active queue tracking to support
4 * fairer distribution of tags between multiple submitters when a shared tag map
5 * is used.
75bb4625
JA
6 *
7 * Copyright (C) 2013-2014 Jens Axboe
8 */
320ae51f
JA
9#include <linux/kernel.h>
10#include <linux/module.h>
320ae51f
JA
11
12#include <linux/blk-mq.h>
13#include "blk.h"
14#include "blk-mq.h"
15#include "blk-mq-tag.h"
16
320ae51f
JA
17bool blk_mq_has_free_tags(struct blk_mq_tags *tags)
18{
4bb659b1
JA
19 if (!tags)
20 return true;
21
88459642 22 return sbitmap_any_bit_clear(&tags->bitmap_tags.sb);
0d2602ca
JA
23}
24
25/*
26 * If a previously inactive queue goes active, bump the active user count.
d263ed99
JW
27 * We need to do this before try to allocate driver tag, then even if fail
28 * to get tag when first time, the other shared-tag users could reserve
29 * budget for it.
0d2602ca
JA
30 */
31bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
32{
33 if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) &&
34 !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
35 atomic_inc(&hctx->tags->active_queues);
36
37 return true;
38}
39
40/*
aed3ea94 41 * Wakeup all potentially sleeping on tags
0d2602ca 42 */
aed3ea94 43void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve)
0d2602ca 44{
88459642
OS
45 sbitmap_queue_wake_all(&tags->bitmap_tags);
46 if (include_reserve)
47 sbitmap_queue_wake_all(&tags->breserved_tags);
0d2602ca
JA
48}
49
e3a2b3f9
JA
50/*
51 * If a previously busy queue goes inactive, potential waiters could now
52 * be allowed to queue. Wake them up and check.
53 */
54void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
55{
56 struct blk_mq_tags *tags = hctx->tags;
57
58 if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
59 return;
60
61 atomic_dec(&tags->active_queues);
62
aed3ea94 63 blk_mq_tag_wakeup_all(tags, false);
e3a2b3f9
JA
64}
65
0d2602ca
JA
66/*
67 * For shared tag users, we track the number of currently active users
68 * and attempt to provide a fair share of the tag depth for each of them.
69 */
70static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
88459642 71 struct sbitmap_queue *bt)
0d2602ca
JA
72{
73 unsigned int depth, users;
74
75 if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_SHARED))
76 return true;
77 if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
78 return true;
79
80 /*
81 * Don't try dividing an ant
82 */
88459642 83 if (bt->sb.depth == 1)
0d2602ca
JA
84 return true;
85
86 users = atomic_read(&hctx->tags->active_queues);
87 if (!users)
88 return true;
89
90 /*
91 * Allow at least some tags
92 */
88459642 93 depth = max((bt->sb.depth + users - 1) / users, 4U);
0d2602ca
JA
94 return atomic_read(&hctx->nr_active) < depth;
95}
96
200e86b3
JA
97static int __blk_mq_get_tag(struct blk_mq_alloc_data *data,
98 struct sbitmap_queue *bt)
4bb659b1 99{
200e86b3
JA
100 if (!(data->flags & BLK_MQ_REQ_INTERNAL) &&
101 !hctx_may_queue(data->hctx, bt))
0d2602ca 102 return -1;
229a9287
OS
103 if (data->shallow_depth)
104 return __sbitmap_queue_get_shallow(bt, data->shallow_depth);
105 else
106 return __sbitmap_queue_get(bt);
4bb659b1
JA
107}
108
4941115b 109unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
320ae51f 110{
4941115b
JA
111 struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
112 struct sbitmap_queue *bt;
88459642 113 struct sbq_wait_state *ws;
5d2ee712 114 DEFINE_SBQ_WAIT(wait);
4941115b 115 unsigned int tag_offset;
320ae51f
JA
116 int tag;
117
4941115b
JA
118 if (data->flags & BLK_MQ_REQ_RESERVED) {
119 if (unlikely(!tags->nr_reserved_tags)) {
120 WARN_ON_ONCE(1);
121 return BLK_MQ_TAG_FAIL;
122 }
123 bt = &tags->breserved_tags;
124 tag_offset = 0;
125 } else {
126 bt = &tags->bitmap_tags;
127 tag_offset = tags->nr_reserved_tags;
128 }
129
200e86b3 130 tag = __blk_mq_get_tag(data, bt);
4bb659b1 131 if (tag != -1)
4941115b 132 goto found_tag;
4bb659b1 133
6f3b0e8b 134 if (data->flags & BLK_MQ_REQ_NOWAIT)
4941115b 135 return BLK_MQ_TAG_FAIL;
4bb659b1 136
4941115b 137 ws = bt_wait_ptr(bt, data->hctx);
4bb659b1 138 do {
e6fc4649
ML
139 struct sbitmap_queue *bt_prev;
140
b3223207
BVA
141 /*
142 * We're out of tags on this hardware queue, kick any
143 * pending IO submits before going to sleep waiting for
8cecb07d 144 * some to complete.
b3223207 145 */
8cecb07d 146 blk_mq_run_hw_queue(data->hctx, false);
b3223207 147
080ff351
JA
148 /*
149 * Retry tag allocation after running the hardware queue,
150 * as running the queue may also have found completions.
151 */
200e86b3 152 tag = __blk_mq_get_tag(data, bt);
080ff351
JA
153 if (tag != -1)
154 break;
155
5d2ee712 156 sbitmap_prepare_to_wait(bt, ws, &wait, TASK_UNINTERRUPTIBLE);
4e5dff41
JA
157
158 tag = __blk_mq_get_tag(data, bt);
159 if (tag != -1)
160 break;
161
e6fc4649 162 bt_prev = bt;
4bb659b1 163 io_schedule();
cb96a42c 164
5d2ee712
JA
165 sbitmap_finish_wait(bt, ws, &wait);
166
cb96a42c 167 data->ctx = blk_mq_get_ctx(data->q);
f9afca4d 168 data->hctx = blk_mq_map_queue(data->q, data->cmd_flags,
8ccdf4a3 169 data->ctx);
4941115b
JA
170 tags = blk_mq_tags_from_data(data);
171 if (data->flags & BLK_MQ_REQ_RESERVED)
172 bt = &tags->breserved_tags;
173 else
174 bt = &tags->bitmap_tags;
175
e6fc4649
ML
176 /*
177 * If destination hw queue is changed, fake wake up on
178 * previous queue for compensating the wake up miss, so
179 * other allocations on previous queue won't be starved.
180 */
181 if (bt != bt_prev)
182 sbitmap_queue_wake_up(bt_prev);
183
4941115b 184 ws = bt_wait_ptr(bt, data->hctx);
4bb659b1
JA
185 } while (1);
186
5d2ee712 187 sbitmap_finish_wait(bt, ws, &wait);
320ae51f 188
4941115b
JA
189found_tag:
190 return tag + tag_offset;
320ae51f
JA
191}
192
4941115b
JA
193void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags,
194 struct blk_mq_ctx *ctx, unsigned int tag)
320ae51f 195{
415b806d 196 if (!blk_mq_tag_is_reserved(tags, tag)) {
4bb659b1
JA
197 const int real_tag = tag - tags->nr_reserved_tags;
198
70114c39 199 BUG_ON(real_tag >= tags->nr_tags);
f4a644db 200 sbitmap_queue_clear(&tags->bitmap_tags, real_tag, ctx->cpu);
70114c39
JA
201 } else {
202 BUG_ON(tag >= tags->nr_reserved_tags);
f4a644db 203 sbitmap_queue_clear(&tags->breserved_tags, tag, ctx->cpu);
70114c39 204 }
320ae51f
JA
205}
206
88459642
OS
207struct bt_iter_data {
208 struct blk_mq_hw_ctx *hctx;
209 busy_iter_fn *fn;
210 void *data;
211 bool reserved;
212};
213
214static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
320ae51f 215{
88459642
OS
216 struct bt_iter_data *iter_data = data;
217 struct blk_mq_hw_ctx *hctx = iter_data->hctx;
218 struct blk_mq_tags *tags = hctx->tags;
219 bool reserved = iter_data->reserved;
81481eb4 220 struct request *rq;
4bb659b1 221
88459642
OS
222 if (!reserved)
223 bitnr += tags->nr_reserved_tags;
224 rq = tags->rqs[bitnr];
4bb659b1 225
7f5562d5
JA
226 /*
227 * We can hit rq == NULL here, because the tagging functions
c7b1bf5c 228 * test and set the bit before assigning ->rqs[].
7f5562d5
JA
229 */
230 if (rq && rq->q == hctx->queue)
7baa8572 231 return iter_data->fn(hctx, rq, iter_data->data, reserved);
88459642
OS
232 return true;
233}
4bb659b1 234
c7b1bf5c
BVA
235/**
236 * bt_for_each - iterate over the requests associated with a hardware queue
237 * @hctx: Hardware queue to examine.
238 * @bt: sbitmap to examine. This is either the breserved_tags member
239 * or the bitmap_tags member of struct blk_mq_tags.
240 * @fn: Pointer to the function that will be called for each request
241 * associated with @hctx that has been assigned a driver tag.
242 * @fn will be called as follows: @fn(@hctx, rq, @data, @reserved)
ab11fe5a
JA
243 * where rq is a pointer to a request. Return true to continue
244 * iterating tags, false to stop.
c7b1bf5c
BVA
245 * @data: Will be passed as third argument to @fn.
246 * @reserved: Indicates whether @bt is the breserved_tags member or the
247 * bitmap_tags member of struct blk_mq_tags.
248 */
88459642
OS
249static void bt_for_each(struct blk_mq_hw_ctx *hctx, struct sbitmap_queue *bt,
250 busy_iter_fn *fn, void *data, bool reserved)
251{
252 struct bt_iter_data iter_data = {
253 .hctx = hctx,
254 .fn = fn,
255 .data = data,
256 .reserved = reserved,
257 };
258
259 sbitmap_for_each_set(&bt->sb, bt_iter, &iter_data);
320ae51f
JA
260}
261
88459642
OS
262struct bt_tags_iter_data {
263 struct blk_mq_tags *tags;
264 busy_tag_iter_fn *fn;
265 void *data;
266 bool reserved;
267};
268
269static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
f26cdc85 270{
88459642
OS
271 struct bt_tags_iter_data *iter_data = data;
272 struct blk_mq_tags *tags = iter_data->tags;
273 bool reserved = iter_data->reserved;
f26cdc85 274 struct request *rq;
f26cdc85 275
88459642
OS
276 if (!reserved)
277 bitnr += tags->nr_reserved_tags;
7f5562d5
JA
278
279 /*
280 * We can hit rq == NULL here, because the tagging functions
281 * test and set the bit before assining ->rqs[].
282 */
88459642 283 rq = tags->rqs[bitnr];
2d5ba0e2 284 if (rq && blk_mq_request_started(rq))
7baa8572 285 return iter_data->fn(rq, iter_data->data, reserved);
f26cdc85 286
88459642
OS
287 return true;
288}
289
c7b1bf5c
BVA
290/**
291 * bt_tags_for_each - iterate over the requests in a tag map
292 * @tags: Tag map to iterate over.
293 * @bt: sbitmap to examine. This is either the breserved_tags member
294 * or the bitmap_tags member of struct blk_mq_tags.
295 * @fn: Pointer to the function that will be called for each started
296 * request. @fn will be called as follows: @fn(rq, @data,
ab11fe5a
JA
297 * @reserved) where rq is a pointer to a request. Return true
298 * to continue iterating tags, false to stop.
c7b1bf5c
BVA
299 * @data: Will be passed as second argument to @fn.
300 * @reserved: Indicates whether @bt is the breserved_tags member or the
301 * bitmap_tags member of struct blk_mq_tags.
302 */
88459642
OS
303static void bt_tags_for_each(struct blk_mq_tags *tags, struct sbitmap_queue *bt,
304 busy_tag_iter_fn *fn, void *data, bool reserved)
305{
306 struct bt_tags_iter_data iter_data = {
307 .tags = tags,
308 .fn = fn,
309 .data = data,
310 .reserved = reserved,
311 };
312
313 if (tags->rqs)
314 sbitmap_for_each_set(&bt->sb, bt_tags_iter, &iter_data);
f26cdc85
KB
315}
316
c7b1bf5c
BVA
317/**
318 * blk_mq_all_tag_busy_iter - iterate over all started requests in a tag map
319 * @tags: Tag map to iterate over.
320 * @fn: Pointer to the function that will be called for each started
321 * request. @fn will be called as follows: @fn(rq, @priv,
322 * reserved) where rq is a pointer to a request. 'reserved'
ab11fe5a
JA
323 * indicates whether or not @rq is a reserved request. Return
324 * true to continue iterating tags, false to stop.
c7b1bf5c
BVA
325 * @priv: Will be passed as second argument to @fn.
326 */
e8f1e163
SG
327static void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags,
328 busy_tag_iter_fn *fn, void *priv)
f26cdc85
KB
329{
330 if (tags->nr_reserved_tags)
88459642
OS
331 bt_tags_for_each(tags, &tags->breserved_tags, fn, priv, true);
332 bt_tags_for_each(tags, &tags->bitmap_tags, fn, priv, false);
f26cdc85 333}
f26cdc85 334
c7b1bf5c
BVA
335/**
336 * blk_mq_tagset_busy_iter - iterate over all started requests in a tag set
337 * @tagset: Tag set to iterate over.
338 * @fn: Pointer to the function that will be called for each started
339 * request. @fn will be called as follows: @fn(rq, @priv,
340 * reserved) where rq is a pointer to a request. 'reserved'
ab11fe5a
JA
341 * indicates whether or not @rq is a reserved request. Return
342 * true to continue iterating tags, false to stop.
c7b1bf5c
BVA
343 * @priv: Will be passed as second argument to @fn.
344 */
e0489487
SG
345void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
346 busy_tag_iter_fn *fn, void *priv)
347{
348 int i;
349
350 for (i = 0; i < tagset->nr_hw_queues; i++) {
351 if (tagset->tags && tagset->tags[i])
352 blk_mq_all_tag_busy_iter(tagset->tags[i], fn, priv);
353 }
354}
355EXPORT_SYMBOL(blk_mq_tagset_busy_iter);
356
c7b1bf5c
BVA
357/**
358 * blk_mq_queue_tag_busy_iter - iterate over all requests with a driver tag
359 * @q: Request queue to examine.
360 * @fn: Pointer to the function that will be called for each request
361 * on @q. @fn will be called as follows: @fn(hctx, rq, @priv,
362 * reserved) where rq is a pointer to a request and hctx points
363 * to the hardware queue associated with the request. 'reserved'
364 * indicates whether or not @rq is a reserved request.
365 * @priv: Will be passed as third argument to @fn.
366 *
367 * Note: if @q->tag_set is shared with other request queues then @fn will be
368 * called for all requests on all queues that share that tag set and not only
369 * for requests associated with @q.
370 */
0bf6cd5b 371void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
81481eb4 372 void *priv)
320ae51f 373{
0bf6cd5b
CH
374 struct blk_mq_hw_ctx *hctx;
375 int i;
376
f5bbbbe4 377 /*
c7b1bf5c
BVA
378 * __blk_mq_update_nr_hw_queues() updates nr_hw_queues and queue_hw_ctx
379 * while the queue is frozen. So we can use q_usage_counter to avoid
380 * racing with it. __blk_mq_update_nr_hw_queues() uses
381 * synchronize_rcu() to ensure this function left the critical section
382 * below.
f5bbbbe4 383 */
530ca2c9 384 if (!percpu_ref_tryget(&q->q_usage_counter))
f5bbbbe4 385 return;
0bf6cd5b
CH
386
387 queue_for_each_hw_ctx(q, hctx, i) {
388 struct blk_mq_tags *tags = hctx->tags;
389
390 /*
c7b1bf5c 391 * If no software queues are currently mapped to this
0bf6cd5b
CH
392 * hardware queue, there's nothing to check
393 */
394 if (!blk_mq_hw_queue_mapped(hctx))
395 continue;
396
397 if (tags->nr_reserved_tags)
88459642
OS
398 bt_for_each(hctx, &tags->breserved_tags, fn, priv, true);
399 bt_for_each(hctx, &tags->bitmap_tags, fn, priv, false);
4bb659b1 400 }
530ca2c9 401 blk_queue_exit(q);
4bb659b1
JA
402}
403
f4a644db
OS
404static int bt_alloc(struct sbitmap_queue *bt, unsigned int depth,
405 bool round_robin, int node)
4bb659b1 406{
f4a644db
OS
407 return sbitmap_queue_init_node(bt, depth, -1, round_robin, GFP_KERNEL,
408 node);
4bb659b1
JA
409}
410
411static struct blk_mq_tags *blk_mq_init_bitmap_tags(struct blk_mq_tags *tags,
24391c0d 412 int node, int alloc_policy)
4bb659b1
JA
413{
414 unsigned int depth = tags->nr_tags - tags->nr_reserved_tags;
f4a644db 415 bool round_robin = alloc_policy == BLK_TAG_ALLOC_RR;
4bb659b1 416
f4a644db 417 if (bt_alloc(&tags->bitmap_tags, depth, round_robin, node))
88459642 418 goto free_tags;
f4a644db
OS
419 if (bt_alloc(&tags->breserved_tags, tags->nr_reserved_tags, round_robin,
420 node))
88459642 421 goto free_bitmap_tags;
4bb659b1
JA
422
423 return tags;
88459642
OS
424free_bitmap_tags:
425 sbitmap_queue_free(&tags->bitmap_tags);
426free_tags:
4bb659b1
JA
427 kfree(tags);
428 return NULL;
429}
430
320ae51f 431struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
24391c0d
SL
432 unsigned int reserved_tags,
433 int node, int alloc_policy)
320ae51f 434{
320ae51f 435 struct blk_mq_tags *tags;
320ae51f
JA
436
437 if (total_tags > BLK_MQ_TAG_MAX) {
438 pr_err("blk-mq: tag depth too large\n");
439 return NULL;
440 }
441
442 tags = kzalloc_node(sizeof(*tags), GFP_KERNEL, node);
443 if (!tags)
444 return NULL;
445
320ae51f
JA
446 tags->nr_tags = total_tags;
447 tags->nr_reserved_tags = reserved_tags;
320ae51f 448
24391c0d 449 return blk_mq_init_bitmap_tags(tags, node, alloc_policy);
320ae51f
JA
450}
451
452void blk_mq_free_tags(struct blk_mq_tags *tags)
453{
88459642
OS
454 sbitmap_queue_free(&tags->bitmap_tags);
455 sbitmap_queue_free(&tags->breserved_tags);
320ae51f
JA
456 kfree(tags);
457}
458
70f36b60
JA
459int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
460 struct blk_mq_tags **tagsptr, unsigned int tdepth,
461 bool can_grow)
e3a2b3f9 462{
70f36b60
JA
463 struct blk_mq_tags *tags = *tagsptr;
464
465 if (tdepth <= tags->nr_reserved_tags)
e3a2b3f9
JA
466 return -EINVAL;
467
468 /*
70f36b60
JA
469 * If we are allowed to grow beyond the original size, allocate
470 * a new set of tags before freeing the old one.
e3a2b3f9 471 */
70f36b60
JA
472 if (tdepth > tags->nr_tags) {
473 struct blk_mq_tag_set *set = hctx->queue->tag_set;
474 struct blk_mq_tags *new;
475 bool ret;
476
477 if (!can_grow)
478 return -EINVAL;
479
480 /*
481 * We need some sort of upper limit, set it high enough that
482 * no valid use cases should require more.
483 */
484 if (tdepth > 16 * BLKDEV_MAX_RQ)
485 return -EINVAL;
486
75d6e175
ML
487 new = blk_mq_alloc_rq_map(set, hctx->queue_num, tdepth,
488 tags->nr_reserved_tags);
70f36b60
JA
489 if (!new)
490 return -ENOMEM;
491 ret = blk_mq_alloc_rqs(set, new, hctx->queue_num, tdepth);
492 if (ret) {
493 blk_mq_free_rq_map(new);
494 return -ENOMEM;
495 }
496
497 blk_mq_free_rqs(set, *tagsptr, hctx->queue_num);
498 blk_mq_free_rq_map(*tagsptr);
499 *tagsptr = new;
500 } else {
501 /*
502 * Don't need (or can't) update reserved tags here, they
503 * remain static and should never need resizing.
504 */
75d6e175
ML
505 sbitmap_queue_resize(&tags->bitmap_tags,
506 tdepth - tags->nr_reserved_tags);
70f36b60 507 }
88459642 508
e3a2b3f9
JA
509 return 0;
510}
511
205fb5f5
BVA
512/**
513 * blk_mq_unique_tag() - return a tag that is unique queue-wide
514 * @rq: request for which to compute a unique tag
515 *
516 * The tag field in struct request is unique per hardware queue but not over
517 * all hardware queues. Hence this function that returns a tag with the
518 * hardware context index in the upper bits and the per hardware queue tag in
519 * the lower bits.
520 *
521 * Note: When called for a request that is queued on a non-multiqueue request
522 * queue, the hardware context index is set to zero.
523 */
524u32 blk_mq_unique_tag(struct request *rq)
525{
ea4f995e 526 return (rq->mq_hctx->queue_num << BLK_MQ_UNIQUE_TAG_BITS) |
205fb5f5
BVA
527 (rq->tag & BLK_MQ_UNIQUE_TAG_MASK);
528}
529EXPORT_SYMBOL(blk_mq_unique_tag);