1 /* SPDX-License-Identifier: GPL-2.0 */
5 #include <linux/blkdev.h>
6 #include <linux/sbitmap.h>
7 #include <linux/srcu.h>
8 #include <linux/lockdep.h>
9 #include <linux/scatterlist.h>
10 #include <linux/prefetch.h>
13 struct blk_flush_queue;
15 #define BLKDEV_MIN_RQ 4
16 #define BLKDEV_DEFAULT_RQ 128
18 typedef void (rq_end_io_fn)(struct request *, blk_status_t);
22 typedef __u32 __bitwise req_flags_t;
24 /* drive already may have started this one */
25 #define RQF_STARTED ((__force req_flags_t)(1 << 1))
26 /* may not be passed by ioscheduler */
27 #define RQF_SOFTBARRIER ((__force req_flags_t)(1 << 3))
28 /* request for flush sequence */
29 #define RQF_FLUSH_SEQ ((__force req_flags_t)(1 << 4))
30 /* merge of different types, fail separately */
31 #define RQF_MIXED_MERGE ((__force req_flags_t)(1 << 5))
32 /* track inflight for MQ */
33 #define RQF_MQ_INFLIGHT ((__force req_flags_t)(1 << 6))
34 /* don't call prep for this one */
35 #define RQF_DONTPREP ((__force req_flags_t)(1 << 7))
36 /* vaguely specified driver internal error. Ignored by the block layer */
37 #define RQF_FAILED ((__force req_flags_t)(1 << 10))
38 /* don't warn about errors */
39 #define RQF_QUIET ((__force req_flags_t)(1 << 11))
40 /* elevator private data attached */
41 #define RQF_ELVPRIV ((__force req_flags_t)(1 << 12))
42 /* account into disk and partition IO statistics */
43 #define RQF_IO_STAT ((__force req_flags_t)(1 << 13))
44 /* runtime pm request */
45 #define RQF_PM ((__force req_flags_t)(1 << 15))
46 /* on IO scheduler merge hash */
47 #define RQF_HASHED ((__force req_flags_t)(1 << 16))
48 /* track IO completion time */
49 #define RQF_STATS ((__force req_flags_t)(1 << 17))
50 /* Look at ->special_vec for the actual data payload instead of the
52 #define RQF_SPECIAL_PAYLOAD ((__force req_flags_t)(1 << 18))
53 /* The per-zone write lock is held for this request */
54 #define RQF_ZONE_WRITE_LOCKED ((__force req_flags_t)(1 << 19))
55 /* already slept for hybrid poll */
56 #define RQF_MQ_POLL_SLEPT ((__force req_flags_t)(1 << 20))
57 /* ->timeout has been called, don't expire again */
58 #define RQF_TIMED_OUT ((__force req_flags_t)(1 << 21))
59 /* queue has elevator attached */
60 #define RQF_ELV ((__force req_flags_t)(1 << 22))
62 /* flags that prevent us from merging requests: */
63 #define RQF_NOMERGE_FLAGS \
64 (RQF_STARTED | RQF_SOFTBARRIER | RQF_FLUSH_SEQ | RQF_SPECIAL_PAYLOAD)
73 * Try to put the fields that are referenced together in the same cacheline.
75 * If you modify this structure, make sure to update blk_rq_init() and
76 * especially blk_mq_rq_ctx_init() to take care of the added fields.
79 struct request_queue *q;
80 struct blk_mq_ctx *mq_ctx;
81 struct blk_mq_hw_ctx *mq_hctx;
83 unsigned int cmd_flags; /* op and common flags */
91 /* the following two fields are internal, NEVER access directly */
92 unsigned int __data_len; /* total data len */
93 sector_t __sector; /* sector cursor */
99 struct list_head queuelist;
100 struct request *rq_next;
103 struct gendisk *rq_disk;
104 struct block_device *part;
105 #ifdef CONFIG_BLK_RQ_ALLOC_TIME
106 /* Time that the first bio started allocating this request. */
109 /* Time that this request was allocated for this IO. */
111 /* Time that I/O was submitted to the device. */
112 u64 io_start_time_ns;
114 #ifdef CONFIG_BLK_WBT
115 unsigned short wbt_flags;
118 * rq sectors used for blk stats. It has the same value
119 * with blk_rq_sectors(rq), except that it never be zeroed
122 unsigned short stats_sectors;
125 * Number of scatter-gather DMA addr+len pairs after
126 * physical address coalescing is performed.
128 unsigned short nr_phys_segments;
130 #ifdef CONFIG_BLK_DEV_INTEGRITY
131 unsigned short nr_integrity_segments;
134 #ifdef CONFIG_BLK_INLINE_ENCRYPTION
135 struct bio_crypt_ctx *crypt_ctx;
136 struct blk_crypto_keyslot *crypt_keyslot;
139 unsigned short write_hint;
140 unsigned short ioprio;
142 enum mq_rq_state state;
145 unsigned long deadline;
148 * The hash is used inside the scheduler, and killed once the
149 * request reaches the dispatch list. The ipi_list is only used
150 * to queue the request for softirq completion, which is long
151 * after the request has been unhashed (and even removed from
152 * the dispatch list).
155 struct hlist_node hash; /* merge hash */
156 struct llist_node ipi_list;
160 * The rb_node is only used inside the io scheduler, requests
161 * are pruned when moved to the dispatch queue. So let the
162 * completion_data share space with the rb_node.
165 struct rb_node rb_node; /* sort/lookup */
166 struct bio_vec special_vec;
167 void *completion_data;
168 int error_count; /* for legacy drivers, don't use */
173 * Three pointers are available for the IO schedulers, if they need
174 * more they have to dynamically allocate it. Flush requests are
175 * never put on the IO scheduler. So let the flush fields share
176 * space with the elevator data.
186 struct list_head list;
187 rq_end_io_fn *saved_end_io;
192 struct __call_single_data csd;
197 * completion callback.
199 rq_end_io_fn *end_io;
203 #define req_op(req) \
204 ((req)->cmd_flags & REQ_OP_MASK)
206 static inline bool blk_rq_is_passthrough(struct request *rq)
208 return blk_op_is_passthrough(req_op(rq));
211 static inline unsigned short req_get_ioprio(struct request *req)
216 #define rq_data_dir(rq) (op_is_write(req_op(rq)) ? WRITE : READ)
218 #define rq_dma_dir(rq) \
219 (op_is_write(req_op(rq)) ? DMA_TO_DEVICE : DMA_FROM_DEVICE)
221 enum blk_eh_timer_return {
222 BLK_EH_DONE, /* drivers has completed the command */
223 BLK_EH_RESET_TIMER, /* reset timer and try again */
226 #define BLK_TAG_ALLOC_FIFO 0 /* allocate starting from 0 */
227 #define BLK_TAG_ALLOC_RR 1 /* allocate starting from last allocated tag */
230 * struct blk_mq_hw_ctx - State for a hardware queue facing the hardware
233 struct blk_mq_hw_ctx {
235 /** @lock: Protects the dispatch list. */
238 * @dispatch: Used for requests that are ready to be
239 * dispatched to the hardware but for some reason (e.g. lack of
240 * resources) could not be sent to the hardware. As soon as the
241 * driver can send new requests, requests at this list will
242 * be sent first for a fairer dispatch.
244 struct list_head dispatch;
246 * @state: BLK_MQ_S_* flags. Defines the state of the hw
247 * queue (active, scheduled to restart, stopped).
250 } ____cacheline_aligned_in_smp;
253 * @run_work: Used for scheduling a hardware queue run at a later time.
255 struct delayed_work run_work;
256 /** @cpumask: Map of available CPUs where this hctx can run. */
257 cpumask_var_t cpumask;
259 * @next_cpu: Used by blk_mq_hctx_next_cpu() for round-robin CPU
260 * selection from @cpumask.
264 * @next_cpu_batch: Counter of how many works left in the batch before
265 * changing to the next CPU.
269 /** @flags: BLK_MQ_F_* flags. Defines the behaviour of the queue. */
273 * @sched_data: Pointer owned by the IO scheduler attached to a request
274 * queue. It's up to the IO scheduler how to use this pointer.
278 * @queue: Pointer to the request queue that owns this hardware context.
280 struct request_queue *queue;
281 /** @fq: Queue of requests that need to perform a flush operation. */
282 struct blk_flush_queue *fq;
285 * @driver_data: Pointer to data owned by the block driver that created
291 * @ctx_map: Bitmap for each software queue. If bit is on, there is a
292 * pending request in that software queue.
294 struct sbitmap ctx_map;
297 * @dispatch_from: Software queue to be used when no scheduler was
300 struct blk_mq_ctx *dispatch_from;
302 * @dispatch_busy: Number used by blk_mq_update_dispatch_busy() to
303 * decide if the hw_queue is busy using Exponential Weighted Moving
306 unsigned int dispatch_busy;
308 /** @type: HCTX_TYPE_* flags. Type of hardware queue. */
310 /** @nr_ctx: Number of software queues. */
311 unsigned short nr_ctx;
312 /** @ctxs: Array of software queues. */
313 struct blk_mq_ctx **ctxs;
315 /** @dispatch_wait_lock: Lock for dispatch_wait queue. */
316 spinlock_t dispatch_wait_lock;
318 * @dispatch_wait: Waitqueue to put requests when there is no tag
319 * available at the moment, to wait for another try in the future.
321 wait_queue_entry_t dispatch_wait;
324 * @wait_index: Index of next available dispatch_wait queue to insert
330 * @tags: Tags owned by the block driver. A tag at this set is only
331 * assigned when a request is dispatched from a hardware queue.
333 struct blk_mq_tags *tags;
335 * @sched_tags: Tags owned by I/O scheduler. If there is an I/O
336 * scheduler associated with a request queue, a tag is assigned when
337 * that request is allocated. Else, this member is not used.
339 struct blk_mq_tags *sched_tags;
341 /** @queued: Number of queued requests. */
342 unsigned long queued;
343 /** @run: Number of dispatched requests. */
346 /** @numa_node: NUMA node the storage adapter has been connected to. */
347 unsigned int numa_node;
348 /** @queue_num: Index of this hardware queue. */
349 unsigned int queue_num;
352 * @nr_active: Number of active requests. Only used when a tag set is
353 * shared across request queues.
357 /** @cpuhp_online: List to store request if CPU is going to die */
358 struct hlist_node cpuhp_online;
359 /** @cpuhp_dead: List to store request if some CPU die. */
360 struct hlist_node cpuhp_dead;
361 /** @kobj: Kernel object for sysfs. */
364 #ifdef CONFIG_BLK_DEBUG_FS
366 * @debugfs_dir: debugfs directory for this hardware queue. Named
367 * as cpu<cpu_number>.
369 struct dentry *debugfs_dir;
370 /** @sched_debugfs_dir: debugfs directory for the scheduler. */
371 struct dentry *sched_debugfs_dir;
375 * @hctx_list: if this hctx is not in use, this is an entry in
376 * q->unused_hctx_list.
378 struct list_head hctx_list;
381 * @srcu: Sleepable RCU. Use as lock when type of the hardware queue is
382 * blocking (BLK_MQ_F_BLOCKING). Must be the last member - see also
383 * blk_mq_hw_ctx_size().
385 struct srcu_struct srcu[];
389 * struct blk_mq_queue_map - Map software queues to hardware queues
390 * @mq_map: CPU ID to hardware queue index map. This is an array
391 * with nr_cpu_ids elements. Each element has a value in the range
392 * [@queue_offset, @queue_offset + @nr_queues).
393 * @nr_queues: Number of hardware queues to map CPU IDs onto.
394 * @queue_offset: First hardware queue to map onto. Used by the PCIe NVMe
395 * driver to map each hardware queue type (enum hctx_type) onto a distinct
396 * set of hardware queues.
398 struct blk_mq_queue_map {
399 unsigned int *mq_map;
400 unsigned int nr_queues;
401 unsigned int queue_offset;
405 * enum hctx_type - Type of hardware queue
406 * @HCTX_TYPE_DEFAULT: All I/O not otherwise accounted for.
407 * @HCTX_TYPE_READ: Just for READ I/O.
408 * @HCTX_TYPE_POLL: Polled I/O of any kind.
409 * @HCTX_MAX_TYPES: Number of types of hctx.
420 * struct blk_mq_tag_set - tag set that can be shared between request queues
421 * @map: One or more ctx -> hctx mappings. One map exists for each
422 * hardware queue type (enum hctx_type) that the driver wishes
423 * to support. There are no restrictions on maps being of the
424 * same size, and it's perfectly legal to share maps between
426 * @nr_maps: Number of elements in the @map array. A number in the range
427 * [1, HCTX_MAX_TYPES].
428 * @ops: Pointers to functions that implement block driver behavior.
429 * @nr_hw_queues: Number of hardware queues supported by the block driver that
430 * owns this data structure.
431 * @queue_depth: Number of tags per hardware queue, reserved tags included.
432 * @reserved_tags: Number of tags to set aside for BLK_MQ_REQ_RESERVED tag
434 * @cmd_size: Number of additional bytes to allocate per request. The block
435 * driver owns these additional bytes.
436 * @numa_node: NUMA node the storage adapter has been connected to.
437 * @timeout: Request processing timeout in jiffies.
438 * @flags: Zero or more BLK_MQ_F_* flags.
439 * @driver_data: Pointer to data owned by the block driver that created this
441 * @tags: Tag sets. One tag set per hardware queue. Has @nr_hw_queues
444 * Shared set of tags. Has @nr_hw_queues elements. If set,
445 * shared by all @tags.
446 * @tag_list_lock: Serializes tag_list accesses.
447 * @tag_list: List of the request queues that use this tag set. See also
448 * request_queue.tag_set_list.
450 struct blk_mq_tag_set {
451 struct blk_mq_queue_map map[HCTX_MAX_TYPES];
452 unsigned int nr_maps;
453 const struct blk_mq_ops *ops;
454 unsigned int nr_hw_queues;
455 unsigned int queue_depth;
456 unsigned int reserved_tags;
457 unsigned int cmd_size;
459 unsigned int timeout;
463 struct blk_mq_tags **tags;
465 struct blk_mq_tags *shared_tags;
467 struct mutex tag_list_lock;
468 struct list_head tag_list;
472 * struct blk_mq_queue_data - Data about a request inserted in a queue
474 * @rq: Request pointer.
475 * @last: If it is the last request in the queue.
477 struct blk_mq_queue_data {
482 typedef bool (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *,
484 typedef bool (busy_tag_iter_fn)(struct request *, void *, bool);
487 * struct blk_mq_ops - Callback functions that implements block driver
492 * @queue_rq: Queue a new request from block IO.
494 blk_status_t (*queue_rq)(struct blk_mq_hw_ctx *,
495 const struct blk_mq_queue_data *);
498 * @commit_rqs: If a driver uses bd->last to judge when to submit
499 * requests to hardware, it must define this function. In case of errors
500 * that make us stop issuing further requests, this hook serves the
501 * purpose of kicking the hardware (which the last request otherwise
504 void (*commit_rqs)(struct blk_mq_hw_ctx *);
507 * @get_budget: Reserve budget before queue request, once .queue_rq is
508 * run, it is driver's responsibility to release the
509 * reserved budget. Also we have to handle failure case
510 * of .get_budget for avoiding I/O deadlock.
512 int (*get_budget)(struct request_queue *);
515 * @put_budget: Release the reserved budget.
517 void (*put_budget)(struct request_queue *, int);
520 * @set_rq_budget_token: store rq's budget token
522 void (*set_rq_budget_token)(struct request *, int);
524 * @get_rq_budget_token: retrieve rq's budget token
526 int (*get_rq_budget_token)(struct request *);
529 * @timeout: Called on request timeout.
531 enum blk_eh_timer_return (*timeout)(struct request *, bool);
534 * @poll: Called to poll for completion of a specific tag.
536 int (*poll)(struct blk_mq_hw_ctx *, struct io_comp_batch *);
539 * @complete: Mark the request as complete.
541 void (*complete)(struct request *);
544 * @init_hctx: Called when the block layer side of a hardware queue has
545 * been set up, allowing the driver to allocate/init matching
548 int (*init_hctx)(struct blk_mq_hw_ctx *, void *, unsigned int);
550 * @exit_hctx: Ditto for exit/teardown.
552 void (*exit_hctx)(struct blk_mq_hw_ctx *, unsigned int);
555 * @init_request: Called for every command allocated by the block layer
556 * to allow the driver to set up driver specific data.
558 * Tag greater than or equal to queue_depth is for setting up
561 int (*init_request)(struct blk_mq_tag_set *set, struct request *,
562 unsigned int, unsigned int);
564 * @exit_request: Ditto for exit/teardown.
566 void (*exit_request)(struct blk_mq_tag_set *set, struct request *,
570 * @initialize_rq_fn: Called from inside blk_get_request().
572 void (*initialize_rq_fn)(struct request *rq);
575 * @cleanup_rq: Called before freeing one request which isn't completed
576 * yet, and usually for freeing the driver private data.
578 void (*cleanup_rq)(struct request *);
581 * @busy: If set, returns whether or not this queue currently is busy.
583 bool (*busy)(struct request_queue *);
586 * @map_queues: This allows drivers specify their own queue mapping by
587 * overriding the setup-time function that builds the mq_map.
589 int (*map_queues)(struct blk_mq_tag_set *set);
591 #ifdef CONFIG_BLK_DEBUG_FS
593 * @show_rq: Used by the debugfs implementation to show driver-specific
594 * information about a request.
596 void (*show_rq)(struct seq_file *m, struct request *rq);
601 BLK_MQ_F_SHOULD_MERGE = 1 << 0,
602 BLK_MQ_F_TAG_QUEUE_SHARED = 1 << 1,
604 * Set when this device requires underlying blk-mq device for
607 BLK_MQ_F_STACKING = 1 << 2,
608 BLK_MQ_F_TAG_HCTX_SHARED = 1 << 3,
609 BLK_MQ_F_BLOCKING = 1 << 5,
610 /* Do not allow an I/O scheduler to be configured. */
611 BLK_MQ_F_NO_SCHED = 1 << 6,
613 * Select 'none' during queue registration in case of a single hwq
614 * or shared hwqs instead of 'mq-deadline'.
616 BLK_MQ_F_NO_SCHED_BY_DEFAULT = 1 << 7,
617 BLK_MQ_F_ALLOC_POLICY_START_BIT = 8,
618 BLK_MQ_F_ALLOC_POLICY_BITS = 1,
620 BLK_MQ_S_STOPPED = 0,
621 BLK_MQ_S_TAG_ACTIVE = 1,
622 BLK_MQ_S_SCHED_RESTART = 2,
624 /* hw queue is inactive after all its CPUs become offline */
625 BLK_MQ_S_INACTIVE = 3,
627 BLK_MQ_MAX_DEPTH = 10240,
629 BLK_MQ_CPU_WORK_BATCH = 8,
631 #define BLK_MQ_FLAG_TO_ALLOC_POLICY(flags) \
632 ((flags >> BLK_MQ_F_ALLOC_POLICY_START_BIT) & \
633 ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1))
634 #define BLK_ALLOC_POLICY_TO_MQ_FLAG(policy) \
635 ((policy & ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1)) \
636 << BLK_MQ_F_ALLOC_POLICY_START_BIT)
638 #define BLK_MQ_NO_HCTX_IDX (-1U)
640 struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata,
641 struct lock_class_key *lkclass);
642 #define blk_mq_alloc_disk(set, queuedata) \
644 static struct lock_class_key __key; \
646 __blk_mq_alloc_disk(set, queuedata, &__key); \
648 struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *);
649 int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
650 struct request_queue *q);
651 void blk_mq_unregister_dev(struct device *, struct request_queue *);
653 int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set);
654 int blk_mq_alloc_sq_tag_set(struct blk_mq_tag_set *set,
655 const struct blk_mq_ops *ops, unsigned int queue_depth,
656 unsigned int set_flags);
657 void blk_mq_free_tag_set(struct blk_mq_tag_set *set);
659 void blk_mq_free_request(struct request *rq);
661 bool blk_mq_queue_inflight(struct request_queue *q);
664 /* return when out of requests */
665 BLK_MQ_REQ_NOWAIT = (__force blk_mq_req_flags_t)(1 << 0),
666 /* allocate from reserved pool */
667 BLK_MQ_REQ_RESERVED = (__force blk_mq_req_flags_t)(1 << 1),
669 BLK_MQ_REQ_PM = (__force blk_mq_req_flags_t)(1 << 2),
672 struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
673 blk_mq_req_flags_t flags);
674 struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
675 unsigned int op, blk_mq_req_flags_t flags,
676 unsigned int hctx_idx);
679 * Tag address space map.
682 unsigned int nr_tags;
683 unsigned int nr_reserved_tags;
685 atomic_t active_queues;
687 struct sbitmap_queue bitmap_tags;
688 struct sbitmap_queue breserved_tags;
690 struct request **rqs;
691 struct request **static_rqs;
692 struct list_head page_list;
695 * used to clear request reference in rqs[] before freeing one
701 static inline struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags,
704 if (tag < tags->nr_tags) {
705 prefetch(tags->rqs[tag]);
706 return tags->rqs[tag];
713 BLK_MQ_UNIQUE_TAG_BITS = 16,
714 BLK_MQ_UNIQUE_TAG_MASK = (1 << BLK_MQ_UNIQUE_TAG_BITS) - 1,
717 u32 blk_mq_unique_tag(struct request *rq);
719 static inline u16 blk_mq_unique_tag_to_hwq(u32 unique_tag)
721 return unique_tag >> BLK_MQ_UNIQUE_TAG_BITS;
724 static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag)
726 return unique_tag & BLK_MQ_UNIQUE_TAG_MASK;
730 * blk_mq_rq_state() - read the current MQ_RQ_* state of a request
731 * @rq: target request.
733 static inline enum mq_rq_state blk_mq_rq_state(struct request *rq)
735 return READ_ONCE(rq->state);
738 static inline int blk_mq_request_started(struct request *rq)
740 return blk_mq_rq_state(rq) != MQ_RQ_IDLE;
743 static inline int blk_mq_request_completed(struct request *rq)
745 return blk_mq_rq_state(rq) == MQ_RQ_COMPLETE;
750 * Set the state to complete when completing a request from inside ->queue_rq.
751 * This is used by drivers that want to ensure special complete actions that
752 * need access to the request are called on failure, e.g. by nvme for
755 static inline void blk_mq_set_request_complete(struct request *rq)
757 WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);
760 void blk_mq_start_request(struct request *rq);
761 void blk_mq_end_request(struct request *rq, blk_status_t error);
762 void __blk_mq_end_request(struct request *rq, blk_status_t error);
763 void blk_mq_end_request_batch(struct io_comp_batch *ib);
766 * Only need start/end time stamping if we have iostat or
767 * blk stats enabled, or using an IO scheduler.
769 static inline bool blk_mq_need_time_stamp(struct request *rq)
771 return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_ELV));
775 * Batched completions only work when there is no I/O error and no special
778 static inline bool blk_mq_add_to_batch(struct request *req,
779 struct io_comp_batch *iob, int ioerror,
780 void (*complete)(struct io_comp_batch *))
782 if (!iob || (req->rq_flags & RQF_ELV) || req->end_io || ioerror)
785 iob->complete = complete;
786 else if (iob->complete != complete)
788 iob->need_ts |= blk_mq_need_time_stamp(req);
789 rq_list_add(&iob->req_list, req);
793 void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list);
794 void blk_mq_kick_requeue_list(struct request_queue *q);
795 void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs);
796 void blk_mq_complete_request(struct request *rq);
797 bool blk_mq_complete_request_remote(struct request *rq);
798 bool blk_mq_queue_stopped(struct request_queue *q);
799 void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx);
800 void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx);
801 void blk_mq_stop_hw_queues(struct request_queue *q);
802 void blk_mq_start_hw_queues(struct request_queue *q);
803 void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
804 void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
805 void blk_mq_quiesce_queue(struct request_queue *q);
806 void blk_mq_unquiesce_queue(struct request_queue *q);
807 void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
808 void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
809 void blk_mq_run_hw_queues(struct request_queue *q, bool async);
810 void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs);
811 void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
812 busy_tag_iter_fn *fn, void *priv);
813 void blk_mq_tagset_wait_completed_request(struct blk_mq_tag_set *tagset);
814 void blk_mq_freeze_queue(struct request_queue *q);
815 void blk_mq_unfreeze_queue(struct request_queue *q);
816 void blk_freeze_queue_start(struct request_queue *q);
817 void blk_mq_freeze_queue_wait(struct request_queue *q);
818 int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
819 unsigned long timeout);
821 int blk_mq_map_queues(struct blk_mq_queue_map *qmap);
822 void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
824 void blk_mq_quiesce_queue_nowait(struct request_queue *q);
826 unsigned int blk_mq_rq_cpu(struct request *rq);
828 bool __blk_should_fake_timeout(struct request_queue *q);
829 static inline bool blk_should_fake_timeout(struct request_queue *q)
831 if (IS_ENABLED(CONFIG_FAIL_IO_TIMEOUT) &&
832 test_bit(QUEUE_FLAG_FAIL_IO, &q->queue_flags))
833 return __blk_should_fake_timeout(q);
838 * blk_mq_rq_from_pdu - cast a PDU to a request
839 * @pdu: the PDU (Protocol Data Unit) to be casted
843 * Driver command data is immediately after the request. So subtract request
844 * size to get back to the original request.
846 static inline struct request *blk_mq_rq_from_pdu(void *pdu)
848 return pdu - sizeof(struct request);
852 * blk_mq_rq_to_pdu - cast a request to a PDU
853 * @rq: the request to be casted
855 * Return: pointer to the PDU
857 * Driver command data is immediately after the request. So add request to get
860 static inline void *blk_mq_rq_to_pdu(struct request *rq)
865 #define queue_for_each_hw_ctx(q, hctx, i) \
866 for ((i) = 0; (i) < (q)->nr_hw_queues && \
867 ({ hctx = (q)->queue_hw_ctx[i]; 1; }); (i)++)
869 #define hctx_for_each_ctx(hctx, ctx, i) \
870 for ((i) = 0; (i) < (hctx)->nr_ctx && \
871 ({ ctx = (hctx)->ctxs[(i)]; 1; }); (i)++)
873 static inline void blk_mq_cleanup_rq(struct request *rq)
875 if (rq->q->mq_ops->cleanup_rq)
876 rq->q->mq_ops->cleanup_rq(rq);
879 static inline void blk_rq_bio_prep(struct request *rq, struct bio *bio,
880 unsigned int nr_segs)
882 rq->nr_phys_segments = nr_segs;
883 rq->__data_len = bio->bi_iter.bi_size;
884 rq->bio = rq->biotail = bio;
885 rq->ioprio = bio_prio(bio);
888 rq->rq_disk = bio->bi_bdev->bd_disk;
891 void blk_mq_hctx_set_fq_lock_class(struct blk_mq_hw_ctx *hctx,
892 struct lock_class_key *key);
894 static inline bool rq_is_sync(struct request *rq)
896 return op_is_sync(rq->cmd_flags);
899 void blk_rq_init(struct request_queue *q, struct request *rq);
900 void blk_put_request(struct request *rq);
901 struct request *blk_get_request(struct request_queue *q, unsigned int op,
902 blk_mq_req_flags_t flags);
903 int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
904 struct bio_set *bs, gfp_t gfp_mask,
905 int (*bio_ctr)(struct bio *, struct bio *, void *), void *data);
906 void blk_rq_unprep_clone(struct request *rq);
907 blk_status_t blk_insert_cloned_request(struct request_queue *q,
914 unsigned long offset;
919 int blk_rq_map_user(struct request_queue *, struct request *,
920 struct rq_map_data *, void __user *, unsigned long, gfp_t);
921 int blk_rq_map_user_iov(struct request_queue *, struct request *,
922 struct rq_map_data *, const struct iov_iter *, gfp_t);
923 int blk_rq_unmap_user(struct bio *);
924 int blk_rq_map_kern(struct request_queue *, struct request *, void *,
925 unsigned int, gfp_t);
926 int blk_rq_append_bio(struct request *rq, struct bio *bio);
927 void blk_execute_rq_nowait(struct gendisk *, struct request *, int,
929 blk_status_t blk_execute_rq(struct gendisk *bd_disk, struct request *rq,
932 struct req_iterator {
933 struct bvec_iter iter;
937 #define __rq_for_each_bio(_bio, rq) \
939 for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next)
941 #define rq_for_each_segment(bvl, _rq, _iter) \
942 __rq_for_each_bio(_iter.bio, _rq) \
943 bio_for_each_segment(bvl, _iter.bio, _iter.iter)
945 #define rq_for_each_bvec(bvl, _rq, _iter) \
946 __rq_for_each_bio(_iter.bio, _rq) \
947 bio_for_each_bvec(bvl, _iter.bio, _iter.iter)
949 #define rq_iter_last(bvec, _iter) \
950 (_iter.bio->bi_next == NULL && \
951 bio_iter_last(bvec, _iter.iter))
954 * blk_rq_pos() : the current sector
955 * blk_rq_bytes() : bytes left in the entire request
956 * blk_rq_cur_bytes() : bytes left in the current segment
957 * blk_rq_err_bytes() : bytes left till the next error boundary
958 * blk_rq_sectors() : sectors left in the entire request
959 * blk_rq_cur_sectors() : sectors left in the current segment
960 * blk_rq_stats_sectors() : sectors of the entire request used for stats
962 static inline sector_t blk_rq_pos(const struct request *rq)
967 static inline unsigned int blk_rq_bytes(const struct request *rq)
969 return rq->__data_len;
972 static inline int blk_rq_cur_bytes(const struct request *rq)
976 if (!bio_has_data(rq->bio)) /* dataless requests such as discard */
977 return rq->bio->bi_iter.bi_size;
978 return bio_iovec(rq->bio).bv_len;
981 unsigned int blk_rq_err_bytes(const struct request *rq);
983 static inline unsigned int blk_rq_sectors(const struct request *rq)
985 return blk_rq_bytes(rq) >> SECTOR_SHIFT;
988 static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
990 return blk_rq_cur_bytes(rq) >> SECTOR_SHIFT;
993 static inline unsigned int blk_rq_stats_sectors(const struct request *rq)
995 return rq->stats_sectors;
999 * Some commands like WRITE SAME have a payload or data transfer size which
1000 * is different from the size of the request. Any driver that supports such
1001 * commands using the RQF_SPECIAL_PAYLOAD flag needs to use this helper to
1002 * calculate the data transfer size.
1004 static inline unsigned int blk_rq_payload_bytes(struct request *rq)
1006 if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
1007 return rq->special_vec.bv_len;
1008 return blk_rq_bytes(rq);
1012 * Return the first full biovec in the request. The caller needs to check that
1013 * there are any bvecs before calling this helper.
1015 static inline struct bio_vec req_bvec(struct request *rq)
1017 if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
1018 return rq->special_vec;
1019 return mp_bvec_iter_bvec(rq->bio->bi_io_vec, rq->bio->bi_iter);
1022 static inline unsigned int blk_rq_count_bios(struct request *rq)
1024 unsigned int nr_bios = 0;
1027 __rq_for_each_bio(bio, rq)
1033 void blk_steal_bios(struct bio_list *list, struct request *rq);
1036 * Request completion related functions.
1038 * blk_update_request() completes given number of bytes and updates
1039 * the request without completing it.
1041 bool blk_update_request(struct request *rq, blk_status_t error,
1042 unsigned int nr_bytes);
1043 void blk_abort_request(struct request *);
1046 * Number of physical segments as sent to the device.
1048 * Normally this is the number of discontiguous data segments sent by the
1049 * submitter. But for data-less command like discard we might have no
1050 * actual data segments submitted, but the driver might have to add it's
1051 * own special payload. In that case we still return 1 here so that this
1052 * special payload will be mapped.
1054 static inline unsigned short blk_rq_nr_phys_segments(struct request *rq)
1056 if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
1058 return rq->nr_phys_segments;
1062 * Number of discard segments (or ranges) the driver needs to fill in.
1063 * Each discard bio merged into a request is counted as one segment.
1065 static inline unsigned short blk_rq_nr_discard_segments(struct request *rq)
1067 return max_t(unsigned short, rq->nr_phys_segments, 1);
1070 int __blk_rq_map_sg(struct request_queue *q, struct request *rq,
1071 struct scatterlist *sglist, struct scatterlist **last_sg);
1072 static inline int blk_rq_map_sg(struct request_queue *q, struct request *rq,
1073 struct scatterlist *sglist)
1075 struct scatterlist *last_sg = NULL;
1077 return __blk_rq_map_sg(q, rq, sglist, &last_sg);
1079 void blk_dump_rq_flags(struct request *, char *);
1081 #ifdef CONFIG_BLK_DEV_ZONED
1082 static inline unsigned int blk_rq_zone_no(struct request *rq)
1084 return blk_queue_zone_no(rq->q, blk_rq_pos(rq));
1087 static inline unsigned int blk_rq_zone_is_seq(struct request *rq)
1089 return blk_queue_zone_is_seq(rq->q, blk_rq_pos(rq));
1092 bool blk_req_needs_zone_write_lock(struct request *rq);
1093 bool blk_req_zone_write_trylock(struct request *rq);
1094 void __blk_req_zone_write_lock(struct request *rq);
1095 void __blk_req_zone_write_unlock(struct request *rq);
1097 static inline void blk_req_zone_write_lock(struct request *rq)
1099 if (blk_req_needs_zone_write_lock(rq))
1100 __blk_req_zone_write_lock(rq);
1103 static inline void blk_req_zone_write_unlock(struct request *rq)
1105 if (rq->rq_flags & RQF_ZONE_WRITE_LOCKED)
1106 __blk_req_zone_write_unlock(rq);
1109 static inline bool blk_req_zone_is_write_locked(struct request *rq)
1111 return rq->q->seq_zones_wlock &&
1112 test_bit(blk_rq_zone_no(rq), rq->q->seq_zones_wlock);
1115 static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
1117 if (!blk_req_needs_zone_write_lock(rq))
1119 return !blk_req_zone_is_write_locked(rq);
1121 #else /* CONFIG_BLK_DEV_ZONED */
1122 static inline bool blk_req_needs_zone_write_lock(struct request *rq)
1127 static inline void blk_req_zone_write_lock(struct request *rq)
1131 static inline void blk_req_zone_write_unlock(struct request *rq)
1134 static inline bool blk_req_zone_is_write_locked(struct request *rq)
1139 static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
1143 #endif /* CONFIG_BLK_DEV_ZONED */
1145 #ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
1146 # error "You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform"
1148 #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
1149 void rq_flush_dcache_pages(struct request *rq);
1151 static inline void rq_flush_dcache_pages(struct request *rq)
1154 #endif /* ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE */
1155 #endif /* BLK_MQ_H */