Commit | Line | Data |
---|---|---|
b2441318 | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
320ae51f JA |
2 | #ifndef BLK_MQ_H |
3 | #define BLK_MQ_H | |
4 | ||
5 | #include <linux/blkdev.h> | |
88459642 | 6 | #include <linux/sbitmap.h> |
6a83e74d | 7 | #include <linux/srcu.h> |
320ae51f JA |
8 | |
9 | struct blk_mq_tags; | |
f70ced09 | 10 | struct blk_flush_queue; |
320ae51f | 11 | |
fe644072 LW |
12 | /** |
13 | * struct blk_mq_hw_ctx - State for a hardware queue facing the hardware block device | |
14 | */ | |
320ae51f JA |
15 | struct blk_mq_hw_ctx { |
16 | struct { | |
17 | spinlock_t lock; | |
18 | struct list_head dispatch; | |
8d354f13 | 19 | unsigned long state; /* BLK_MQ_S_* flags */ |
320ae51f JA |
20 | } ____cacheline_aligned_in_smp; |
21 | ||
9f993737 | 22 | struct delayed_work run_work; |
e4043dcf | 23 | cpumask_var_t cpumask; |
506e931f JA |
24 | int next_cpu; |
25 | int next_cpu_batch; | |
320ae51f JA |
26 | |
27 | unsigned long flags; /* BLK_MQ_F_* flags */ | |
28 | ||
bd166ef1 | 29 | void *sched_data; |
320ae51f | 30 | struct request_queue *queue; |
f70ced09 | 31 | struct blk_flush_queue *fq; |
320ae51f JA |
32 | |
33 | void *driver_data; | |
34 | ||
88459642 | 35 | struct sbitmap ctx_map; |
1429d7c9 | 36 | |
b347689f | 37 | struct blk_mq_ctx *dispatch_from; |
6e768717 | 38 | unsigned int dispatch_busy; |
b347689f | 39 | |
f31967f0 JA |
40 | unsigned short type; |
41 | unsigned short nr_ctx; | |
6e768717 | 42 | struct blk_mq_ctx **ctxs; |
4bb659b1 | 43 | |
5815839b | 44 | spinlock_t dispatch_wait_lock; |
eb619fdb | 45 | wait_queue_entry_t dispatch_wait; |
8537b120 | 46 | atomic_t wait_index; |
320ae51f | 47 | |
320ae51f | 48 | struct blk_mq_tags *tags; |
bd166ef1 | 49 | struct blk_mq_tags *sched_tags; |
320ae51f JA |
50 | |
51 | unsigned long queued; | |
52 | unsigned long run; | |
8d354f13 | 53 | #define BLK_MQ_MAX_DISPATCH_ORDER 7 |
320ae51f JA |
54 | unsigned long dispatched[BLK_MQ_MAX_DISPATCH_ORDER]; |
55 | ||
320ae51f | 56 | unsigned int numa_node; |
17ded320 | 57 | unsigned int queue_num; |
320ae51f | 58 | |
0d2602ca JA |
59 | atomic_t nr_active; |
60 | ||
9467f859 | 61 | struct hlist_node cpuhp_dead; |
320ae51f | 62 | struct kobject kobj; |
05229bee | 63 | |
6e219353 | 64 | unsigned long poll_considered; |
05229bee JA |
65 | unsigned long poll_invoked; |
66 | unsigned long poll_success; | |
9c1051aa OS |
67 | |
68 | #ifdef CONFIG_BLK_DEBUG_FS | |
69 | struct dentry *debugfs_dir; | |
d332ce09 | 70 | struct dentry *sched_debugfs_dir; |
9c1051aa | 71 | #endif |
07319678 | 72 | |
2f8f1336 ML |
73 | struct list_head hctx_list; |
74 | ||
07319678 | 75 | /* Must be the last member - see also blk_mq_hw_ctx_size(). */ |
05707b64 | 76 | struct srcu_struct srcu[0]; |
320ae51f JA |
77 | }; |
78 | ||
7a18312c BVA |
79 | /** |
80 | * struct blk_mq_queue_map - ctx -> hctx mapping | |
81 | * @mq_map: CPU ID to hardware queue index map. This is an array | |
82 | * with nr_cpu_ids elements. Each element has a value in the range | |
83 | * [@queue_offset, @queue_offset + @nr_queues). | |
84 | * @nr_queues: Number of hardware queues to map CPU IDs onto. | |
85 | * @queue_offset: First hardware queue to map onto. Used by the PCIe NVMe | |
86 | * driver to map each hardware queue type (enum hctx_type) onto a distinct | |
87 | * set of hardware queues. | |
88 | */ | |
ed76e329 JA |
89 | struct blk_mq_queue_map { |
90 | unsigned int *mq_map; | |
91 | unsigned int nr_queues; | |
843477d4 | 92 | unsigned int queue_offset; |
ed76e329 JA |
93 | }; |
94 | ||
e20ba6e1 CH |
95 | enum hctx_type { |
96 | HCTX_TYPE_DEFAULT, /* all I/O not otherwise accounted for */ | |
97 | HCTX_TYPE_READ, /* just for READ I/O */ | |
98 | HCTX_TYPE_POLL, /* polled I/O of any kind */ | |
99 | ||
100 | HCTX_MAX_TYPES, | |
ed76e329 JA |
101 | }; |
102 | ||
7a18312c BVA |
103 | /** |
104 | * struct blk_mq_tag_set - tag set that can be shared between request queues | |
105 | * @map: One or more ctx -> hctx mappings. One map exists for each | |
106 | * hardware queue type (enum hctx_type) that the driver wishes | |
107 | * to support. There are no restrictions on maps being of the | |
108 | * same size, and it's perfectly legal to share maps between | |
109 | * types. | |
110 | * @nr_maps: Number of elements in the @map array. A number in the range | |
111 | * [1, HCTX_MAX_TYPES]. | |
112 | * @ops: Pointers to functions that implement block driver behavior. | |
113 | * @nr_hw_queues: Number of hardware queues supported by the block driver that | |
114 | * owns this data structure. | |
115 | * @queue_depth: Number of tags per hardware queue, reserved tags included. | |
116 | * @reserved_tags: Number of tags to set aside for BLK_MQ_REQ_RESERVED tag | |
117 | * allocations. | |
118 | * @cmd_size: Number of additional bytes to allocate per request. The block | |
119 | * driver owns these additional bytes. | |
120 | * @numa_node: NUMA node the storage adapter has been connected to. | |
121 | * @timeout: Request processing timeout in jiffies. | |
122 | * @flags: Zero or more BLK_MQ_F_* flags. | |
123 | * @driver_data: Pointer to data owned by the block driver that created this | |
124 | * tag set. | |
125 | * @tags: Tag sets. One tag set per hardware queue. Has @nr_hw_queues | |
126 | * elements. | |
127 | * @tag_list_lock: Serializes tag_list accesses. | |
128 | * @tag_list: List of the request queues that use this tag set. See also | |
129 | * request_queue.tag_set_list. | |
130 | */ | |
24d2f903 | 131 | struct blk_mq_tag_set { |
ed76e329 | 132 | struct blk_mq_queue_map map[HCTX_MAX_TYPES]; |
7a18312c | 133 | unsigned int nr_maps; |
f8a5b122 | 134 | const struct blk_mq_ops *ops; |
7a18312c BVA |
135 | unsigned int nr_hw_queues; |
136 | unsigned int queue_depth; | |
320ae51f | 137 | unsigned int reserved_tags; |
7a18312c | 138 | unsigned int cmd_size; |
320ae51f JA |
139 | int numa_node; |
140 | unsigned int timeout; | |
7a18312c | 141 | unsigned int flags; |
24d2f903 CH |
142 | void *driver_data; |
143 | ||
144 | struct blk_mq_tags **tags; | |
0d2602ca JA |
145 | |
146 | struct mutex tag_list_lock; | |
147 | struct list_head tag_list; | |
320ae51f JA |
148 | }; |
149 | ||
74c45052 JA |
150 | struct blk_mq_queue_data { |
151 | struct request *rq; | |
74c45052 JA |
152 | bool last; |
153 | }; | |
154 | ||
fc17b653 CH |
155 | typedef blk_status_t (queue_rq_fn)(struct blk_mq_hw_ctx *, |
156 | const struct blk_mq_queue_data *); | |
d666ba98 | 157 | typedef void (commit_rqs_fn)(struct blk_mq_hw_ctx *); |
88022d72 | 158 | typedef bool (get_budget_fn)(struct blk_mq_hw_ctx *); |
de148297 | 159 | typedef void (put_budget_fn)(struct blk_mq_hw_ctx *); |
0152fb6b | 160 | typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool); |
320ae51f JA |
161 | typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int); |
162 | typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int); | |
d6296d39 | 163 | typedef int (init_request_fn)(struct blk_mq_tag_set *set, struct request *, |
24d2f903 | 164 | unsigned int, unsigned int); |
d6296d39 | 165 | typedef void (exit_request_fn)(struct blk_mq_tag_set *set, struct request *, |
24d2f903 | 166 | unsigned int); |
320ae51f | 167 | |
7baa8572 | 168 | typedef bool (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *, |
81481eb4 | 169 | bool); |
7baa8572 | 170 | typedef bool (busy_tag_iter_fn)(struct request *, void *, bool); |
9743139c | 171 | typedef int (poll_fn)(struct blk_mq_hw_ctx *); |
da695ba2 | 172 | typedef int (map_queues_fn)(struct blk_mq_tag_set *set); |
9ba20527 | 173 | typedef bool (busy_fn)(struct request_queue *); |
c7bb9ad1 | 174 | typedef void (complete_fn)(struct request *); |
226b4fc7 | 175 | typedef void (cleanup_rq_fn)(struct request *); |
05229bee | 176 | |
81481eb4 | 177 | |
320ae51f JA |
178 | struct blk_mq_ops { |
179 | /* | |
180 | * Queue request | |
181 | */ | |
182 | queue_rq_fn *queue_rq; | |
183 | ||
d666ba98 JA |
184 | /* |
185 | * If a driver uses bd->last to judge when to submit requests to | |
186 | * hardware, it must define this function. In case of errors that | |
187 | * make us stop issuing further requests, this hook serves the | |
188 | * purpose of kicking the hardware (which the last request otherwise | |
189 | * would have done). | |
190 | */ | |
191 | commit_rqs_fn *commit_rqs; | |
192 | ||
de148297 ML |
193 | /* |
194 | * Reserve budget before queue request, once .queue_rq is | |
195 | * run, it is driver's responsibility to release the | |
196 | * reserved budget. Also we have to handle failure case | |
197 | * of .get_budget for avoiding I/O deadlock. | |
198 | */ | |
199 | get_budget_fn *get_budget; | |
200 | put_budget_fn *put_budget; | |
201 | ||
320ae51f JA |
202 | /* |
203 | * Called on request timeout | |
204 | */ | |
0152fb6b | 205 | timeout_fn *timeout; |
320ae51f | 206 | |
05229bee JA |
207 | /* |
208 | * Called to poll for completion of a specific tag. | |
209 | */ | |
210 | poll_fn *poll; | |
211 | ||
c7bb9ad1 | 212 | complete_fn *complete; |
30a91cb4 | 213 | |
320ae51f JA |
214 | /* |
215 | * Called when the block layer side of a hardware queue has been | |
216 | * set up, allowing the driver to allocate/init matching structures. | |
217 | * Ditto for exit/teardown. | |
218 | */ | |
219 | init_hctx_fn *init_hctx; | |
220 | exit_hctx_fn *exit_hctx; | |
e9b267d9 CH |
221 | |
222 | /* | |
223 | * Called for every command allocated by the block layer to allow | |
224 | * the driver to set up driver specific data. | |
f70ced09 ML |
225 | * |
226 | * Tag greater than or equal to queue_depth is for setting up | |
227 | * flush request. | |
228 | * | |
e9b267d9 CH |
229 | * Ditto for exit/teardown. |
230 | */ | |
231 | init_request_fn *init_request; | |
232 | exit_request_fn *exit_request; | |
d280bab3 BVA |
233 | /* Called from inside blk_get_request() */ |
234 | void (*initialize_rq_fn)(struct request *rq); | |
da695ba2 | 235 | |
226b4fc7 ML |
236 | /* |
237 | * Called before freeing one request which isn't completed yet, | |
238 | * and usually for freeing the driver private data | |
239 | */ | |
240 | cleanup_rq_fn *cleanup_rq; | |
241 | ||
9ba20527 JA |
242 | /* |
243 | * If set, returns whether or not this queue currently is busy | |
244 | */ | |
245 | busy_fn *busy; | |
246 | ||
da695ba2 | 247 | map_queues_fn *map_queues; |
2836ee4b BVA |
248 | |
249 | #ifdef CONFIG_BLK_DEBUG_FS | |
250 | /* | |
251 | * Used by the debugfs implementation to show driver-specific | |
252 | * information about a request. | |
253 | */ | |
254 | void (*show_rq)(struct seq_file *m, struct request *rq); | |
255 | #endif | |
320ae51f JA |
256 | }; |
257 | ||
258 | enum { | |
320ae51f | 259 | BLK_MQ_F_SHOULD_MERGE = 1 << 0, |
8a58d1f1 | 260 | BLK_MQ_F_TAG_SHARED = 1 << 1, |
1b792f2f | 261 | BLK_MQ_F_BLOCKING = 1 << 5, |
d3484991 | 262 | BLK_MQ_F_NO_SCHED = 1 << 6, |
24391c0d SL |
263 | BLK_MQ_F_ALLOC_POLICY_START_BIT = 8, |
264 | BLK_MQ_F_ALLOC_POLICY_BITS = 1, | |
320ae51f | 265 | |
5d12f905 | 266 | BLK_MQ_S_STOPPED = 0, |
0d2602ca | 267 | BLK_MQ_S_TAG_ACTIVE = 1, |
bd166ef1 | 268 | BLK_MQ_S_SCHED_RESTART = 2, |
320ae51f | 269 | |
a4391c64 | 270 | BLK_MQ_MAX_DEPTH = 10240, |
506e931f JA |
271 | |
272 | BLK_MQ_CPU_WORK_BATCH = 8, | |
320ae51f | 273 | }; |
24391c0d SL |
274 | #define BLK_MQ_FLAG_TO_ALLOC_POLICY(flags) \ |
275 | ((flags >> BLK_MQ_F_ALLOC_POLICY_START_BIT) & \ | |
276 | ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1)) | |
277 | #define BLK_ALLOC_POLICY_TO_MQ_FLAG(policy) \ | |
278 | ((policy & ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1)) \ | |
279 | << BLK_MQ_F_ALLOC_POLICY_START_BIT) | |
320ae51f | 280 | |
24d2f903 | 281 | struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); |
b62c21b7 | 282 | struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, |
737eb78e DLM |
283 | struct request_queue *q, |
284 | bool elevator_init); | |
9316a9ed JA |
285 | struct request_queue *blk_mq_init_sq_queue(struct blk_mq_tag_set *set, |
286 | const struct blk_mq_ops *ops, | |
287 | unsigned int queue_depth, | |
288 | unsigned int set_flags); | |
b21d5b30 | 289 | void blk_mq_unregister_dev(struct device *, struct request_queue *); |
320ae51f | 290 | |
24d2f903 CH |
291 | int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set); |
292 | void blk_mq_free_tag_set(struct blk_mq_tag_set *set); | |
293 | ||
320ae51f JA |
294 | void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); |
295 | ||
320ae51f JA |
296 | void blk_mq_free_request(struct request *rq); |
297 | bool blk_mq_can_queue(struct blk_mq_hw_ctx *); | |
6f3b0e8b | 298 | |
3c94d83c | 299 | bool blk_mq_queue_inflight(struct request_queue *q); |
ae879912 | 300 | |
6f3b0e8b | 301 | enum { |
9a95e4ef BVA |
302 | /* return when out of requests */ |
303 | BLK_MQ_REQ_NOWAIT = (__force blk_mq_req_flags_t)(1 << 0), | |
304 | /* allocate from reserved pool */ | |
305 | BLK_MQ_REQ_RESERVED = (__force blk_mq_req_flags_t)(1 << 1), | |
306 | /* allocate internal/sched tag */ | |
307 | BLK_MQ_REQ_INTERNAL = (__force blk_mq_req_flags_t)(1 << 2), | |
308 | /* set RQF_PREEMPT */ | |
309 | BLK_MQ_REQ_PREEMPT = (__force blk_mq_req_flags_t)(1 << 3), | |
6f3b0e8b CH |
310 | }; |
311 | ||
cd6ce148 | 312 | struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op, |
9a95e4ef | 313 | blk_mq_req_flags_t flags); |
cd6ce148 | 314 | struct request *blk_mq_alloc_request_hctx(struct request_queue *q, |
9a95e4ef BVA |
315 | unsigned int op, blk_mq_req_flags_t flags, |
316 | unsigned int hctx_idx); | |
0e62f51f | 317 | struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag); |
320ae51f | 318 | |
205fb5f5 BVA |
319 | enum { |
320 | BLK_MQ_UNIQUE_TAG_BITS = 16, | |
321 | BLK_MQ_UNIQUE_TAG_MASK = (1 << BLK_MQ_UNIQUE_TAG_BITS) - 1, | |
322 | }; | |
323 | ||
324 | u32 blk_mq_unique_tag(struct request *rq); | |
325 | ||
326 | static inline u16 blk_mq_unique_tag_to_hwq(u32 unique_tag) | |
327 | { | |
328 | return unique_tag >> BLK_MQ_UNIQUE_TAG_BITS; | |
329 | } | |
330 | ||
331 | static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag) | |
332 | { | |
333 | return unique_tag & BLK_MQ_UNIQUE_TAG_MASK; | |
334 | } | |
335 | ||
320ae51f | 336 | |
973c0191 | 337 | int blk_mq_request_started(struct request *rq); |
aa306ab7 | 338 | int blk_mq_request_completed(struct request *rq); |
e2490073 | 339 | void blk_mq_start_request(struct request *rq); |
2a842aca CH |
340 | void blk_mq_end_request(struct request *rq, blk_status_t error); |
341 | void __blk_mq_end_request(struct request *rq, blk_status_t error); | |
320ae51f | 342 | |
2b053aca | 343 | void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list); |
6fca6a61 | 344 | void blk_mq_kick_requeue_list(struct request_queue *q); |
2849450a | 345 | void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs); |
16c15eb1 | 346 | bool blk_mq_complete_request(struct request *rq); |
9c558734 | 347 | bool blk_mq_bio_list_merge(struct request_queue *q, struct list_head *list, |
14ccb66b | 348 | struct bio *bio, unsigned int nr_segs); |
fd001443 | 349 | bool blk_mq_queue_stopped(struct request_queue *q); |
320ae51f JA |
350 | void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx); |
351 | void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx); | |
280d45f6 | 352 | void blk_mq_stop_hw_queues(struct request_queue *q); |
2f268556 | 353 | void blk_mq_start_hw_queues(struct request_queue *q); |
ae911c5e | 354 | void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); |
1b4a3258 | 355 | void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async); |
97e01209 | 356 | void blk_mq_quiesce_queue(struct request_queue *q); |
e4e73913 | 357 | void blk_mq_unquiesce_queue(struct request_queue *q); |
7587a5ae | 358 | void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); |
79f720a7 | 359 | bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); |
b94ec296 | 360 | void blk_mq_run_hw_queues(struct request_queue *q, bool async); |
e0489487 SG |
361 | void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, |
362 | busy_tag_iter_fn *fn, void *priv); | |
f9934a80 | 363 | void blk_mq_tagset_wait_completed_request(struct blk_mq_tag_set *tagset); |
c761d96b | 364 | void blk_mq_freeze_queue(struct request_queue *q); |
b4c6a028 | 365 | void blk_mq_unfreeze_queue(struct request_queue *q); |
1671d522 | 366 | void blk_freeze_queue_start(struct request_queue *q); |
6bae363e | 367 | void blk_mq_freeze_queue_wait(struct request_queue *q); |
f91328c4 KB |
368 | int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, |
369 | unsigned long timeout); | |
320ae51f | 370 | |
ed76e329 | 371 | int blk_mq_map_queues(struct blk_mq_queue_map *qmap); |
868f2f0b KB |
372 | void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues); |
373 | ||
852ec809 | 374 | void blk_mq_quiesce_queue_nowait(struct request_queue *q); |
4f084b41 | 375 | |
9cf2bab6 JA |
376 | unsigned int blk_mq_rq_cpu(struct request *rq); |
377 | ||
320ae51f JA |
378 | /* |
379 | * Driver command data is immediately after the request. So subtract request | |
2963e3f7 | 380 | * size to get back to the original request, add request size to get the PDU. |
320ae51f JA |
381 | */ |
382 | static inline struct request *blk_mq_rq_from_pdu(void *pdu) | |
383 | { | |
384 | return pdu - sizeof(struct request); | |
385 | } | |
386 | static inline void *blk_mq_rq_to_pdu(struct request *rq) | |
387 | { | |
2963e3f7 | 388 | return rq + 1; |
320ae51f JA |
389 | } |
390 | ||
320ae51f | 391 | #define queue_for_each_hw_ctx(q, hctx, i) \ |
0d0b7d42 JA |
392 | for ((i) = 0; (i) < (q)->nr_hw_queues && \ |
393 | ({ hctx = (q)->queue_hw_ctx[i]; 1; }); (i)++) | |
320ae51f | 394 | |
320ae51f | 395 | #define hctx_for_each_ctx(hctx, ctx, i) \ |
0d0b7d42 JA |
396 | for ((i) = 0; (i) < (hctx)->nr_ctx && \ |
397 | ({ ctx = (hctx)->ctxs[(i)]; 1; }); (i)++) | |
320ae51f | 398 | |
7b7ab780 SG |
399 | static inline blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, |
400 | struct request *rq) | |
401 | { | |
402 | if (rq->tag != -1) | |
403 | return rq->tag | (hctx->queue_num << BLK_QC_T_SHIFT); | |
404 | ||
405 | return rq->internal_tag | (hctx->queue_num << BLK_QC_T_SHIFT) | | |
406 | BLK_QC_T_INTERNAL; | |
407 | } | |
408 | ||
226b4fc7 ML |
409 | static inline void blk_mq_cleanup_rq(struct request *rq) |
410 | { | |
411 | if (rq->q->mq_ops->cleanup_rq) | |
412 | rq->q->mq_ops->cleanup_rq(rq); | |
413 | } | |
414 | ||
320ae51f | 415 | #endif |