1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef _LINUX_BLKDEV_H
3 #define _LINUX_BLKDEV_H
5 #include <linux/sched.h>
6 #include <linux/genhd.h>
7 #include <linux/list.h>
8 #include <linux/llist.h>
9 #include <linux/minmax.h>
10 #include <linux/timer.h>
11 #include <linux/workqueue.h>
12 #include <linux/wait.h>
13 #include <linux/bio.h>
14 #include <linux/gfp.h>
15 #include <linux/rcupdate.h>
16 #include <linux/percpu-refcount.h>
17 #include <linux/blkzoned.h>
18 #include <linux/sbitmap.h>
22 struct elevator_queue;
27 struct blk_flush_queue;
31 struct blk_queue_stats;
32 struct blk_stat_callback;
33 struct blk_crypto_profile;
35 /* Must be consistent with blk_mq_poll_stats_bkt() */
36 #define BLK_MQ_POLL_STATS_BKTS 16
38 /* Doing classic polling */
39 #define BLK_MQ_POLL_CLASSIC -1
42 * Maximum number of blkcg policies allowed to be registered concurrently.
43 * Defined here to simplify include dependency.
45 #define BLKCG_MAX_POLS 6
47 static inline int blk_validate_block_size(unsigned int bsize)
49 if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize))
55 static inline bool blk_op_is_passthrough(unsigned int op)
58 return op == REQ_OP_DRV_IN || op == REQ_OP_DRV_OUT;
62 * Zoned block device models (zoned limit).
64 * Note: This needs to be ordered from the least to the most severe
65 * restrictions for the inheritance in blk_stack_limits() to work.
67 enum blk_zoned_model {
68 BLK_ZONED_NONE = 0, /* Regular block device */
69 BLK_ZONED_HA, /* Host-aware zoned block device */
70 BLK_ZONED_HM, /* Host-managed zoned block device */
74 * BLK_BOUNCE_NONE: never bounce (default)
75 * BLK_BOUNCE_HIGH: bounce all highmem pages
83 enum blk_bounce bounce;
84 unsigned long seg_boundary_mask;
85 unsigned long virt_boundary_mask;
87 unsigned int max_hw_sectors;
88 unsigned int max_dev_sectors;
89 unsigned int chunk_sectors;
90 unsigned int max_sectors;
91 unsigned int max_segment_size;
92 unsigned int physical_block_size;
93 unsigned int logical_block_size;
94 unsigned int alignment_offset;
97 unsigned int max_discard_sectors;
98 unsigned int max_hw_discard_sectors;
99 unsigned int max_write_same_sectors;
100 unsigned int max_write_zeroes_sectors;
101 unsigned int max_zone_append_sectors;
102 unsigned int discard_granularity;
103 unsigned int discard_alignment;
104 unsigned int zone_write_granularity;
106 unsigned short max_segments;
107 unsigned short max_integrity_segments;
108 unsigned short max_discard_segments;
110 unsigned char misaligned;
111 unsigned char discard_misaligned;
112 unsigned char raid_partial_stripes_expensive;
113 enum blk_zoned_model zoned;
116 typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx,
119 void blk_queue_set_zoned(struct gendisk *disk, enum blk_zoned_model model);
121 #ifdef CONFIG_BLK_DEV_ZONED
123 #define BLK_ALL_ZONES ((unsigned int)-1)
124 int blkdev_report_zones(struct block_device *bdev, sector_t sector,
125 unsigned int nr_zones, report_zones_cb cb, void *data);
126 unsigned int blkdev_nr_zones(struct gendisk *disk);
127 extern int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
128 sector_t sectors, sector_t nr_sectors,
130 int blk_revalidate_disk_zones(struct gendisk *disk,
131 void (*update_driver_data)(struct gendisk *disk));
133 extern int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
134 unsigned int cmd, unsigned long arg);
135 extern int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
136 unsigned int cmd, unsigned long arg);
138 #else /* CONFIG_BLK_DEV_ZONED */
140 static inline unsigned int blkdev_nr_zones(struct gendisk *disk)
145 static inline int blkdev_report_zones_ioctl(struct block_device *bdev,
146 fmode_t mode, unsigned int cmd,
152 static inline int blkdev_zone_mgmt_ioctl(struct block_device *bdev,
153 fmode_t mode, unsigned int cmd,
159 #endif /* CONFIG_BLK_DEV_ZONED */
162 * Independent access ranges: struct blk_independent_access_range describes
163 * a range of contiguous sectors that can be accessed using device command
164 * execution resources that are independent from the resources used for
165 * other access ranges. This is typically found with single-LUN multi-actuator
166 * HDDs where each access range is served by a different set of heads.
167 * The set of independent ranges supported by the device is defined using
168 * struct blk_independent_access_ranges. The independent ranges must not overlap
169 * and must include all sectors within the disk capacity (no sector holes
171 * For a device with multiple ranges, requests targeting sectors in different
172 * ranges can be executed in parallel. A request can straddle an access range
175 struct blk_independent_access_range {
177 struct request_queue *queue;
182 struct blk_independent_access_ranges {
184 bool sysfs_registered;
185 unsigned int nr_ia_ranges;
186 struct blk_independent_access_range ia_range[];
189 struct request_queue {
190 struct request *last_merge;
191 struct elevator_queue *elevator;
193 struct percpu_ref q_usage_counter;
195 struct blk_queue_stats *stats;
196 struct rq_qos *rq_qos;
198 const struct blk_mq_ops *mq_ops;
201 struct blk_mq_ctx __percpu *queue_ctx;
203 unsigned int queue_depth;
205 /* hw dispatch queues */
206 struct blk_mq_hw_ctx **queue_hw_ctx;
207 unsigned int nr_hw_queues;
210 * The queue owner gets to use this for whatever they like.
211 * ll_rw_blk doesn't touch it.
216 * various queue flags, see QUEUE_* below
218 unsigned long queue_flags;
220 * Number of contexts that have called blk_set_pm_only(). If this
221 * counter is above zero then only RQF_PM requests are processed.
226 * ida allocated id for this queue. Used to index queues from
231 spinlock_t queue_lock;
233 struct gendisk *disk;
243 struct kobject *mq_kobj;
245 #ifdef CONFIG_BLK_DEV_INTEGRITY
246 struct blk_integrity integrity;
247 #endif /* CONFIG_BLK_DEV_INTEGRITY */
251 enum rpm_status rpm_status;
257 unsigned long nr_requests; /* Max # of requests */
259 unsigned int dma_pad_mask;
260 unsigned int dma_alignment;
262 #ifdef CONFIG_BLK_INLINE_ENCRYPTION
263 struct blk_crypto_profile *crypto_profile;
266 unsigned int rq_timeout;
269 struct blk_stat_callback *poll_cb;
270 struct blk_rq_stat poll_stat[BLK_MQ_POLL_STATS_BKTS];
272 struct timer_list timeout;
273 struct work_struct timeout_work;
275 atomic_t nr_active_requests_shared_tags;
277 struct blk_mq_tags *sched_shared_tags;
279 struct list_head icq_list;
280 #ifdef CONFIG_BLK_CGROUP
281 DECLARE_BITMAP (blkcg_pols, BLKCG_MAX_POLS);
282 struct blkcg_gq *root_blkg;
283 struct list_head blkg_list;
286 struct queue_limits limits;
288 unsigned int required_elevator_features;
290 #ifdef CONFIG_BLK_DEV_ZONED
292 * Zoned block device information for request dispatch control.
293 * nr_zones is the total number of zones of the device. This is always
294 * 0 for regular block devices. conv_zones_bitmap is a bitmap of nr_zones
295 * bits which indicates if a zone is conventional (bit set) or
296 * sequential (bit clear). seq_zones_wlock is a bitmap of nr_zones
297 * bits which indicates if a zone is write locked, that is, if a write
298 * request targeting the zone was dispatched. All three fields are
299 * initialized by the low level device driver (e.g. scsi/sd.c).
300 * Stacking drivers (device mappers) may or may not initialize
303 * Reads of this information must be protected with blk_queue_enter() /
304 * blk_queue_exit(). Modifying this information is only allowed while
305 * no requests are being processed. See also blk_mq_freeze_queue() and
306 * blk_mq_unfreeze_queue().
308 unsigned int nr_zones;
309 unsigned long *conv_zones_bitmap;
310 unsigned long *seq_zones_wlock;
311 unsigned int max_open_zones;
312 unsigned int max_active_zones;
313 #endif /* CONFIG_BLK_DEV_ZONED */
316 struct mutex debugfs_mutex;
317 #ifdef CONFIG_BLK_DEV_IO_TRACE
318 struct blk_trace __rcu *blk_trace;
321 * for flush operations
323 struct blk_flush_queue *fq;
325 struct list_head requeue_list;
326 spinlock_t requeue_lock;
327 struct delayed_work requeue_work;
329 struct mutex sysfs_lock;
330 struct mutex sysfs_dir_lock;
333 * for reusing dead hctx instance in case of updating
336 struct list_head unused_hctx_list;
337 spinlock_t unused_hctx_lock;
341 #ifdef CONFIG_BLK_DEV_THROTTLING
343 struct throtl_data *td;
345 struct rcu_head rcu_head;
346 wait_queue_head_t mq_freeze_wq;
348 * Protect concurrent access to q_usage_counter by
349 * percpu_ref_kill() and percpu_ref_reinit().
351 struct mutex mq_freeze_lock;
355 struct blk_mq_tag_set *tag_set;
356 struct list_head tag_set_list;
357 struct bio_set bio_split;
359 struct dentry *debugfs_dir;
361 #ifdef CONFIG_BLK_DEBUG_FS
362 struct dentry *sched_debugfs_dir;
363 struct dentry *rqos_debugfs_dir;
366 bool mq_sysfs_init_done;
368 #define BLK_MAX_WRITE_HINTS 5
369 u64 write_hints[BLK_MAX_WRITE_HINTS];
372 * Independent sector access ranges. This is always NULL for
373 * devices that do not have multiple independent access ranges.
375 struct blk_independent_access_ranges *ia_ranges;
378 /* Keep blk_queue_flag_name[] in sync with the definitions below */
379 #define QUEUE_FLAG_STOPPED 0 /* queue is stopped */
380 #define QUEUE_FLAG_DYING 1 /* queue being torn down */
381 #define QUEUE_FLAG_NOMERGES 3 /* disable merge attempts */
382 #define QUEUE_FLAG_SAME_COMP 4 /* complete on same CPU-group */
383 #define QUEUE_FLAG_FAIL_IO 5 /* fake timeout */
384 #define QUEUE_FLAG_NONROT 6 /* non-rotational device (SSD) */
385 #define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */
386 #define QUEUE_FLAG_IO_STAT 7 /* do disk/partitions IO accounting */
387 #define QUEUE_FLAG_DISCARD 8 /* supports DISCARD */
388 #define QUEUE_FLAG_NOXMERGES 9 /* No extended merges */
389 #define QUEUE_FLAG_ADD_RANDOM 10 /* Contributes to random pool */
390 #define QUEUE_FLAG_SECERASE 11 /* supports secure erase */
391 #define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */
392 #define QUEUE_FLAG_DEAD 13 /* queue tear-down finished */
393 #define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */
394 #define QUEUE_FLAG_STABLE_WRITES 15 /* don't modify blks until WB is done */
395 #define QUEUE_FLAG_POLL 16 /* IO polling enabled if set */
396 #define QUEUE_FLAG_WC 17 /* Write back caching */
397 #define QUEUE_FLAG_FUA 18 /* device supports FUA writes */
398 #define QUEUE_FLAG_DAX 19 /* device supports DAX */
399 #define QUEUE_FLAG_STATS 20 /* track IO start and completion times */
400 #define QUEUE_FLAG_POLL_STATS 21 /* collecting stats for hybrid polling */
401 #define QUEUE_FLAG_REGISTERED 22 /* queue has been registered to a disk */
402 #define QUEUE_FLAG_QUIESCED 24 /* queue has been quiesced */
403 #define QUEUE_FLAG_PCI_P2PDMA 25 /* device supports PCI p2p requests */
404 #define QUEUE_FLAG_ZONE_RESETALL 26 /* supports Zone Reset All */
405 #define QUEUE_FLAG_RQ_ALLOC_TIME 27 /* record rq->alloc_time_ns */
406 #define QUEUE_FLAG_HCTX_ACTIVE 28 /* at least one blk-mq hctx is active */
407 #define QUEUE_FLAG_NOWAIT 29 /* device supports NOWAIT */
409 #define QUEUE_FLAG_MQ_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
410 (1 << QUEUE_FLAG_SAME_COMP) | \
411 (1 << QUEUE_FLAG_NOWAIT))
413 void blk_queue_flag_set(unsigned int flag, struct request_queue *q);
414 void blk_queue_flag_clear(unsigned int flag, struct request_queue *q);
415 bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
417 #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
418 #define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags)
419 #define blk_queue_dead(q) test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags)
420 #define blk_queue_init_done(q) test_bit(QUEUE_FLAG_INIT_DONE, &(q)->queue_flags)
421 #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
422 #define blk_queue_noxmerges(q) \
423 test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags)
424 #define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags)
425 #define blk_queue_stable_writes(q) \
426 test_bit(QUEUE_FLAG_STABLE_WRITES, &(q)->queue_flags)
427 #define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags)
428 #define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags)
429 #define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags)
430 #define blk_queue_zone_resetall(q) \
431 test_bit(QUEUE_FLAG_ZONE_RESETALL, &(q)->queue_flags)
432 #define blk_queue_secure_erase(q) \
433 (test_bit(QUEUE_FLAG_SECERASE, &(q)->queue_flags))
434 #define blk_queue_dax(q) test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags)
435 #define blk_queue_pci_p2pdma(q) \
436 test_bit(QUEUE_FLAG_PCI_P2PDMA, &(q)->queue_flags)
437 #ifdef CONFIG_BLK_RQ_ALLOC_TIME
438 #define blk_queue_rq_alloc_time(q) \
439 test_bit(QUEUE_FLAG_RQ_ALLOC_TIME, &(q)->queue_flags)
441 #define blk_queue_rq_alloc_time(q) false
444 #define blk_noretry_request(rq) \
445 ((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \
446 REQ_FAILFAST_DRIVER))
447 #define blk_queue_quiesced(q) test_bit(QUEUE_FLAG_QUIESCED, &(q)->queue_flags)
448 #define blk_queue_pm_only(q) atomic_read(&(q)->pm_only)
449 #define blk_queue_fua(q) test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags)
450 #define blk_queue_registered(q) test_bit(QUEUE_FLAG_REGISTERED, &(q)->queue_flags)
451 #define blk_queue_nowait(q) test_bit(QUEUE_FLAG_NOWAIT, &(q)->queue_flags)
453 extern void blk_set_pm_only(struct request_queue *q);
454 extern void blk_clear_pm_only(struct request_queue *q);
456 #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist)
458 #define dma_map_bvec(dev, bv, dir, attrs) \
459 dma_map_page_attrs(dev, (bv)->bv_page, (bv)->bv_offset, (bv)->bv_len, \
462 static inline bool queue_is_mq(struct request_queue *q)
468 static inline enum rpm_status queue_rpm_status(struct request_queue *q)
470 return q->rpm_status;
473 static inline enum rpm_status queue_rpm_status(struct request_queue *q)
479 static inline enum blk_zoned_model
480 blk_queue_zoned_model(struct request_queue *q)
482 if (IS_ENABLED(CONFIG_BLK_DEV_ZONED))
483 return q->limits.zoned;
484 return BLK_ZONED_NONE;
487 static inline bool blk_queue_is_zoned(struct request_queue *q)
489 switch (blk_queue_zoned_model(q)) {
498 static inline sector_t blk_queue_zone_sectors(struct request_queue *q)
500 return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0;
503 #ifdef CONFIG_BLK_DEV_ZONED
504 static inline unsigned int blk_queue_nr_zones(struct request_queue *q)
506 return blk_queue_is_zoned(q) ? q->nr_zones : 0;
509 static inline unsigned int blk_queue_zone_no(struct request_queue *q,
512 if (!blk_queue_is_zoned(q))
514 return sector >> ilog2(q->limits.chunk_sectors);
517 static inline bool blk_queue_zone_is_seq(struct request_queue *q,
520 if (!blk_queue_is_zoned(q))
522 if (!q->conv_zones_bitmap)
524 return !test_bit(blk_queue_zone_no(q, sector), q->conv_zones_bitmap);
527 static inline void blk_queue_max_open_zones(struct request_queue *q,
528 unsigned int max_open_zones)
530 q->max_open_zones = max_open_zones;
533 static inline unsigned int queue_max_open_zones(const struct request_queue *q)
535 return q->max_open_zones;
538 static inline void blk_queue_max_active_zones(struct request_queue *q,
539 unsigned int max_active_zones)
541 q->max_active_zones = max_active_zones;
544 static inline unsigned int queue_max_active_zones(const struct request_queue *q)
546 return q->max_active_zones;
548 #else /* CONFIG_BLK_DEV_ZONED */
549 static inline unsigned int blk_queue_nr_zones(struct request_queue *q)
553 static inline bool blk_queue_zone_is_seq(struct request_queue *q,
558 static inline unsigned int blk_queue_zone_no(struct request_queue *q,
563 static inline unsigned int queue_max_open_zones(const struct request_queue *q)
567 static inline unsigned int queue_max_active_zones(const struct request_queue *q)
571 #endif /* CONFIG_BLK_DEV_ZONED */
573 static inline unsigned int blk_queue_depth(struct request_queue *q)
576 return q->queue_depth;
578 return q->nr_requests;
582 * default timeout for SG_IO if none specified
584 #define BLK_DEFAULT_SG_TIMEOUT (60 * HZ)
585 #define BLK_MIN_SG_TIMEOUT (7 * HZ)
587 /* This should not be used directly - use rq_for_each_segment */
588 #define for_each_bio(_bio) \
589 for (; _bio; _bio = _bio->bi_next)
592 extern int blk_register_queue(struct gendisk *disk);
593 extern void blk_unregister_queue(struct gendisk *disk);
594 void submit_bio_noacct(struct bio *bio);
596 extern int blk_lld_busy(struct request_queue *q);
597 extern void blk_queue_split(struct bio **);
598 extern int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags);
599 extern void blk_queue_exit(struct request_queue *q);
600 extern void blk_sync_queue(struct request_queue *q);
602 /* Helper to convert REQ_OP_XXX to its string format XXX */
603 extern const char *blk_op_str(unsigned int op);
605 int blk_status_to_errno(blk_status_t status);
606 blk_status_t errno_to_blk_status(int errno);
608 /* only poll the hardware once, don't continue until a completion was found */
609 #define BLK_POLL_ONESHOT (1 << 0)
610 /* do not sleep to wait for the expected completion time */
611 #define BLK_POLL_NOSLEEP (1 << 1)
612 int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags);
613 int iocb_bio_iopoll(struct kiocb *kiocb, struct io_comp_batch *iob,
616 static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
618 return bdev->bd_queue; /* this is never NULL */
621 #ifdef CONFIG_BLK_DEV_ZONED
623 /* Helper to convert BLK_ZONE_ZONE_XXX to its string format XXX */
624 const char *blk_zone_cond_str(enum blk_zone_cond zone_cond);
626 static inline unsigned int bio_zone_no(struct bio *bio)
628 return blk_queue_zone_no(bdev_get_queue(bio->bi_bdev),
629 bio->bi_iter.bi_sector);
632 static inline unsigned int bio_zone_is_seq(struct bio *bio)
634 return blk_queue_zone_is_seq(bdev_get_queue(bio->bi_bdev),
635 bio->bi_iter.bi_sector);
637 #endif /* CONFIG_BLK_DEV_ZONED */
639 static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
642 if (unlikely(op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE))
643 return min(q->limits.max_discard_sectors,
644 UINT_MAX >> SECTOR_SHIFT);
646 if (unlikely(op == REQ_OP_WRITE_SAME))
647 return q->limits.max_write_same_sectors;
649 if (unlikely(op == REQ_OP_WRITE_ZEROES))
650 return q->limits.max_write_zeroes_sectors;
652 return q->limits.max_sectors;
656 * Return maximum size of a request at given offset. Only valid for
657 * file system requests.
659 static inline unsigned int blk_max_size_offset(struct request_queue *q,
661 unsigned int chunk_sectors)
663 if (!chunk_sectors) {
664 if (q->limits.chunk_sectors)
665 chunk_sectors = q->limits.chunk_sectors;
667 return q->limits.max_sectors;
670 if (likely(is_power_of_2(chunk_sectors)))
671 chunk_sectors -= offset & (chunk_sectors - 1);
673 chunk_sectors -= sector_div(offset, chunk_sectors);
675 return min(q->limits.max_sectors, chunk_sectors);
679 * Access functions for manipulating queue properties
681 extern void blk_cleanup_queue(struct request_queue *);
682 void blk_queue_bounce_limit(struct request_queue *q, enum blk_bounce limit);
683 extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int);
684 extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int);
685 extern void blk_queue_max_segments(struct request_queue *, unsigned short);
686 extern void blk_queue_max_discard_segments(struct request_queue *,
688 extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
689 extern void blk_queue_max_discard_sectors(struct request_queue *q,
690 unsigned int max_discard_sectors);
691 extern void blk_queue_max_write_same_sectors(struct request_queue *q,
692 unsigned int max_write_same_sectors);
693 extern void blk_queue_max_write_zeroes_sectors(struct request_queue *q,
694 unsigned int max_write_same_sectors);
695 extern void blk_queue_logical_block_size(struct request_queue *, unsigned int);
696 extern void blk_queue_max_zone_append_sectors(struct request_queue *q,
697 unsigned int max_zone_append_sectors);
698 extern void blk_queue_physical_block_size(struct request_queue *, unsigned int);
699 void blk_queue_zone_write_granularity(struct request_queue *q,
701 extern void blk_queue_alignment_offset(struct request_queue *q,
702 unsigned int alignment);
703 void disk_update_readahead(struct gendisk *disk);
704 extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min);
705 extern void blk_queue_io_min(struct request_queue *q, unsigned int min);
706 extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt);
707 extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt);
708 extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth);
709 extern void blk_set_default_limits(struct queue_limits *lim);
710 extern void blk_set_stacking_limits(struct queue_limits *lim);
711 extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
713 extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
715 extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int);
716 extern void blk_queue_segment_boundary(struct request_queue *, unsigned long);
717 extern void blk_queue_virt_boundary(struct request_queue *, unsigned long);
718 extern void blk_queue_dma_alignment(struct request_queue *, int);
719 extern void blk_queue_update_dma_alignment(struct request_queue *, int);
720 extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
721 extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua);
723 struct blk_independent_access_ranges *
724 disk_alloc_independent_access_ranges(struct gendisk *disk, int nr_ia_ranges);
725 void disk_set_independent_access_ranges(struct gendisk *disk,
726 struct blk_independent_access_ranges *iars);
729 * Elevator features for blk_queue_required_elevator_features:
731 /* Supports zoned block devices sequential write constraint */
732 #define ELEVATOR_F_ZBD_SEQ_WRITE (1U << 0)
733 /* Supports scheduling on multiple hardware queues */
734 #define ELEVATOR_F_MQ_AWARE (1U << 1)
736 extern void blk_queue_required_elevator_features(struct request_queue *q,
737 unsigned int features);
738 extern bool blk_queue_can_use_dma_map_merging(struct request_queue *q,
741 bool __must_check blk_get_queue(struct request_queue *);
742 extern void blk_put_queue(struct request_queue *);
743 extern void blk_set_queue_dying(struct request_queue *);
747 * blk_plug permits building a queue of related requests by holding the I/O
748 * fragments for a short period. This allows merging of sequential requests
749 * into single larger request. As the requests are moved from a per-task list to
750 * the device's request_queue in a batch, this results in improved scalability
751 * as the lock contention for request_queue lock is reduced.
753 * It is ok not to disable preemption when adding the request to the plug list
754 * or when attempting a merge. For details, please see schedule() where
755 * blk_flush_plug() is called.
758 struct request *mq_list; /* blk-mq requests */
760 /* if ios_left is > 1, we can batch tag/rq allocations */
761 struct request *cached_rq;
762 unsigned short nr_ios;
764 unsigned short rq_count;
766 bool multiple_queues;
770 struct list_head cb_list; /* md requires an unplug callback */
774 typedef void (*blk_plug_cb_fn)(struct blk_plug_cb *, bool);
776 struct list_head list;
777 blk_plug_cb_fn callback;
780 extern struct blk_plug_cb *blk_check_plugged(blk_plug_cb_fn unplug,
781 void *data, int size);
782 extern void blk_start_plug(struct blk_plug *);
783 extern void blk_start_plug_nr_ios(struct blk_plug *, unsigned short);
784 extern void blk_finish_plug(struct blk_plug *);
786 void blk_flush_plug(struct blk_plug *plug, bool from_schedule);
788 static inline bool blk_needs_flush_plug(struct task_struct *tsk)
790 struct blk_plug *plug = tsk->plug;
793 (plug->mq_list || !list_empty(&plug->cb_list));
796 int blkdev_issue_flush(struct block_device *bdev);
797 long nr_blockdev_pages(void);
798 #else /* CONFIG_BLOCK */
802 static inline void blk_start_plug_nr_ios(struct blk_plug *plug,
803 unsigned short nr_ios)
807 static inline void blk_start_plug(struct blk_plug *plug)
811 static inline void blk_finish_plug(struct blk_plug *plug)
815 static inline void blk_flush_plug(struct blk_plug *plug, bool async)
819 static inline bool blk_needs_flush_plug(struct task_struct *tsk)
824 static inline int blkdev_issue_flush(struct block_device *bdev)
829 static inline long nr_blockdev_pages(void)
833 #endif /* CONFIG_BLOCK */
835 extern void blk_io_schedule(void);
837 extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
838 sector_t nr_sects, gfp_t gfp_mask, struct page *page);
840 #define BLKDEV_DISCARD_SECURE (1 << 0) /* issue a secure erase */
842 extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
843 sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
844 extern int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
845 sector_t nr_sects, gfp_t gfp_mask, int flags,
848 #define BLKDEV_ZERO_NOUNMAP (1 << 0) /* do not free blocks */
849 #define BLKDEV_ZERO_NOFALLBACK (1 << 1) /* don't write explicit zeroes */
851 extern int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
852 sector_t nr_sects, gfp_t gfp_mask, struct bio **biop,
854 extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
855 sector_t nr_sects, gfp_t gfp_mask, unsigned flags);
857 static inline int sb_issue_discard(struct super_block *sb, sector_t block,
858 sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags)
860 return blkdev_issue_discard(sb->s_bdev,
861 block << (sb->s_blocksize_bits -
863 nr_blocks << (sb->s_blocksize_bits -
867 static inline int sb_issue_zeroout(struct super_block *sb, sector_t block,
868 sector_t nr_blocks, gfp_t gfp_mask)
870 return blkdev_issue_zeroout(sb->s_bdev,
871 block << (sb->s_blocksize_bits -
873 nr_blocks << (sb->s_blocksize_bits -
878 static inline bool bdev_is_partition(struct block_device *bdev)
880 return bdev->bd_partno;
883 enum blk_default_limits {
884 BLK_MAX_SEGMENTS = 128,
885 BLK_SAFE_MAX_SECTORS = 255,
886 BLK_DEF_MAX_SECTORS = 2560,
887 BLK_MAX_SEGMENT_SIZE = 65536,
888 BLK_SEG_BOUNDARY_MASK = 0xFFFFFFFFUL,
891 static inline unsigned long queue_segment_boundary(const struct request_queue *q)
893 return q->limits.seg_boundary_mask;
896 static inline unsigned long queue_virt_boundary(const struct request_queue *q)
898 return q->limits.virt_boundary_mask;
901 static inline unsigned int queue_max_sectors(const struct request_queue *q)
903 return q->limits.max_sectors;
906 static inline unsigned int queue_max_bytes(struct request_queue *q)
908 return min_t(unsigned int, queue_max_sectors(q), INT_MAX >> 9) << 9;
911 static inline unsigned int queue_max_hw_sectors(const struct request_queue *q)
913 return q->limits.max_hw_sectors;
916 static inline unsigned short queue_max_segments(const struct request_queue *q)
918 return q->limits.max_segments;
921 static inline unsigned short queue_max_discard_segments(const struct request_queue *q)
923 return q->limits.max_discard_segments;
926 static inline unsigned int queue_max_segment_size(const struct request_queue *q)
928 return q->limits.max_segment_size;
931 static inline unsigned int queue_max_zone_append_sectors(const struct request_queue *q)
934 const struct queue_limits *l = &q->limits;
936 return min(l->max_zone_append_sectors, l->max_sectors);
939 static inline unsigned queue_logical_block_size(const struct request_queue *q)
943 if (q && q->limits.logical_block_size)
944 retval = q->limits.logical_block_size;
949 static inline unsigned int bdev_logical_block_size(struct block_device *bdev)
951 return queue_logical_block_size(bdev_get_queue(bdev));
954 static inline unsigned int queue_physical_block_size(const struct request_queue *q)
956 return q->limits.physical_block_size;
959 static inline unsigned int bdev_physical_block_size(struct block_device *bdev)
961 return queue_physical_block_size(bdev_get_queue(bdev));
964 static inline unsigned int queue_io_min(const struct request_queue *q)
966 return q->limits.io_min;
969 static inline int bdev_io_min(struct block_device *bdev)
971 return queue_io_min(bdev_get_queue(bdev));
974 static inline unsigned int queue_io_opt(const struct request_queue *q)
976 return q->limits.io_opt;
979 static inline int bdev_io_opt(struct block_device *bdev)
981 return queue_io_opt(bdev_get_queue(bdev));
984 static inline unsigned int
985 queue_zone_write_granularity(const struct request_queue *q)
987 return q->limits.zone_write_granularity;
990 static inline unsigned int
991 bdev_zone_write_granularity(struct block_device *bdev)
993 return queue_zone_write_granularity(bdev_get_queue(bdev));
996 static inline int queue_alignment_offset(const struct request_queue *q)
998 if (q->limits.misaligned)
1001 return q->limits.alignment_offset;
1004 static inline int queue_limit_alignment_offset(struct queue_limits *lim, sector_t sector)
1006 unsigned int granularity = max(lim->physical_block_size, lim->io_min);
1007 unsigned int alignment = sector_div(sector, granularity >> SECTOR_SHIFT)
1010 return (granularity + lim->alignment_offset - alignment) % granularity;
1013 static inline int bdev_alignment_offset(struct block_device *bdev)
1015 struct request_queue *q = bdev_get_queue(bdev);
1017 if (q->limits.misaligned)
1019 if (bdev_is_partition(bdev))
1020 return queue_limit_alignment_offset(&q->limits,
1021 bdev->bd_start_sect);
1022 return q->limits.alignment_offset;
1025 static inline int queue_discard_alignment(const struct request_queue *q)
1027 if (q->limits.discard_misaligned)
1030 return q->limits.discard_alignment;
1033 static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector_t sector)
1035 unsigned int alignment, granularity, offset;
1037 if (!lim->max_discard_sectors)
1040 /* Why are these in bytes, not sectors? */
1041 alignment = lim->discard_alignment >> SECTOR_SHIFT;
1042 granularity = lim->discard_granularity >> SECTOR_SHIFT;
1046 /* Offset of the partition start in 'granularity' sectors */
1047 offset = sector_div(sector, granularity);
1049 /* And why do we do this modulus *again* in blkdev_issue_discard()? */
1050 offset = (granularity + alignment - offset) % granularity;
1052 /* Turn it back into bytes, gaah */
1053 return offset << SECTOR_SHIFT;
1056 static inline int bdev_discard_alignment(struct block_device *bdev)
1058 struct request_queue *q = bdev_get_queue(bdev);
1060 if (bdev_is_partition(bdev))
1061 return queue_limit_discard_alignment(&q->limits,
1062 bdev->bd_start_sect);
1063 return q->limits.discard_alignment;
1066 static inline unsigned int bdev_write_same(struct block_device *bdev)
1068 struct request_queue *q = bdev_get_queue(bdev);
1071 return q->limits.max_write_same_sectors;
1076 static inline unsigned int bdev_write_zeroes_sectors(struct block_device *bdev)
1078 struct request_queue *q = bdev_get_queue(bdev);
1081 return q->limits.max_write_zeroes_sectors;
1086 static inline enum blk_zoned_model bdev_zoned_model(struct block_device *bdev)
1088 struct request_queue *q = bdev_get_queue(bdev);
1091 return blk_queue_zoned_model(q);
1093 return BLK_ZONED_NONE;
1096 static inline bool bdev_is_zoned(struct block_device *bdev)
1098 struct request_queue *q = bdev_get_queue(bdev);
1101 return blk_queue_is_zoned(q);
1106 static inline sector_t bdev_zone_sectors(struct block_device *bdev)
1108 struct request_queue *q = bdev_get_queue(bdev);
1111 return blk_queue_zone_sectors(q);
1115 static inline unsigned int bdev_max_open_zones(struct block_device *bdev)
1117 struct request_queue *q = bdev_get_queue(bdev);
1120 return queue_max_open_zones(q);
1124 static inline unsigned int bdev_max_active_zones(struct block_device *bdev)
1126 struct request_queue *q = bdev_get_queue(bdev);
1129 return queue_max_active_zones(q);
1133 static inline int queue_dma_alignment(const struct request_queue *q)
1135 return q ? q->dma_alignment : 511;
1138 static inline int blk_rq_aligned(struct request_queue *q, unsigned long addr,
1141 unsigned int alignment = queue_dma_alignment(q) | q->dma_pad_mask;
1142 return !(addr & alignment) && !(len & alignment);
1145 /* assumes size > 256 */
1146 static inline unsigned int blksize_bits(unsigned int size)
1148 unsigned int bits = 8;
1152 } while (size > 256);
1156 static inline unsigned int block_size(struct block_device *bdev)
1158 return 1 << bdev->bd_inode->i_blkbits;
1161 int kblockd_schedule_work(struct work_struct *work);
1162 int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay);
1164 #define MODULE_ALIAS_BLOCKDEV(major,minor) \
1165 MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor))
1166 #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \
1167 MODULE_ALIAS("block-major-" __stringify(major) "-*")
1169 #ifdef CONFIG_BLK_INLINE_ENCRYPTION
1171 bool blk_crypto_register(struct blk_crypto_profile *profile,
1172 struct request_queue *q);
1174 void blk_crypto_unregister(struct request_queue *q);
1176 #else /* CONFIG_BLK_INLINE_ENCRYPTION */
1178 static inline bool blk_crypto_register(struct blk_crypto_profile *profile,
1179 struct request_queue *q)
1184 static inline void blk_crypto_unregister(struct request_queue *q) { }
1186 #endif /* CONFIG_BLK_INLINE_ENCRYPTION */
1188 enum blk_unique_id {
1189 /* these match the Designator Types specified in SPC */
1195 #define NFL4_UFLG_MASK 0x0000003F
1197 struct block_device_operations {
1198 void (*submit_bio)(struct bio *bio);
1199 int (*open) (struct block_device *, fmode_t);
1200 void (*release) (struct gendisk *, fmode_t);
1201 int (*rw_page)(struct block_device *, sector_t, struct page *, unsigned int);
1202 int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
1203 int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
1204 unsigned int (*check_events) (struct gendisk *disk,
1205 unsigned int clearing);
1206 void (*unlock_native_capacity) (struct gendisk *);
1207 int (*getgeo)(struct block_device *, struct hd_geometry *);
1208 int (*set_read_only)(struct block_device *bdev, bool ro);
1209 /* this callback is with swap_lock and sometimes page table lock held */
1210 void (*swap_slot_free_notify) (struct block_device *, unsigned long);
1211 int (*report_zones)(struct gendisk *, sector_t sector,
1212 unsigned int nr_zones, report_zones_cb cb, void *data);
1213 char *(*devnode)(struct gendisk *disk, umode_t *mode);
1214 /* returns the length of the identifier or a negative errno: */
1215 int (*get_unique_id)(struct gendisk *disk, u8 id[16],
1216 enum blk_unique_id id_type);
1217 struct module *owner;
1218 const struct pr_ops *pr_ops;
1221 * Special callback for probing GPT entry at a given sector.
1222 * Needed by Android devices, used by GPT scanner and MMC blk
1225 int (*alternative_gpt_sector)(struct gendisk *disk, sector_t *sector);
1228 #ifdef CONFIG_COMPAT
1229 extern int blkdev_compat_ptr_ioctl(struct block_device *, fmode_t,
1230 unsigned int, unsigned long);
1232 #define blkdev_compat_ptr_ioctl NULL
1235 extern int bdev_read_page(struct block_device *, sector_t, struct page *);
1236 extern int bdev_write_page(struct block_device *, sector_t, struct page *,
1237 struct writeback_control *);
1239 static inline void blk_wake_io_task(struct task_struct *waiter)
1242 * If we're polling, the task itself is doing the completions. For
1243 * that case, we don't need to signal a wakeup, it's enough to just
1244 * mark us as RUNNING.
1246 if (waiter == current)
1247 __set_current_state(TASK_RUNNING);
1249 wake_up_process(waiter);
1252 unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors,
1254 void disk_end_io_acct(struct gendisk *disk, unsigned int op,
1255 unsigned long start_time);
1257 unsigned long bio_start_io_acct(struct bio *bio);
1258 void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time,
1259 struct block_device *orig_bdev);
1262 * bio_end_io_acct - end I/O accounting for bio based drivers
1263 * @bio: bio to end account for
1264 * @start: start time returned by bio_start_io_acct()
1266 static inline void bio_end_io_acct(struct bio *bio, unsigned long start_time)
1268 return bio_end_io_acct_remapped(bio, start_time, bio->bi_bdev);
1271 int bdev_read_only(struct block_device *bdev);
1272 int set_blocksize(struct block_device *bdev, int size);
1274 const char *bdevname(struct block_device *bdev, char *buffer);
1275 int lookup_bdev(const char *pathname, dev_t *dev);
1277 void blkdev_show(struct seq_file *seqf, off_t offset);
1279 #define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */
1280 #define BDEVT_SIZE 10 /* Largest string for MAJ:MIN for blkdev */
1282 #define BLKDEV_MAJOR_MAX 512
1284 #define BLKDEV_MAJOR_MAX 0
1287 struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
1289 struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder);
1290 int bd_prepare_to_claim(struct block_device *bdev, void *holder);
1291 void bd_abort_claiming(struct block_device *bdev, void *holder);
1292 void blkdev_put(struct block_device *bdev, fmode_t mode);
1294 /* just for blk-cgroup, don't use elsewhere */
1295 struct block_device *blkdev_get_no_open(dev_t dev);
1296 void blkdev_put_no_open(struct block_device *bdev);
1298 struct block_device *bdev_alloc(struct gendisk *disk, u8 partno);
1299 void bdev_add(struct block_device *bdev, dev_t dev);
1300 struct block_device *I_BDEV(struct inode *inode);
1301 int truncate_bdev_range(struct block_device *bdev, fmode_t mode, loff_t lstart,
1305 void invalidate_bdev(struct block_device *bdev);
1306 int sync_blockdev(struct block_device *bdev);
1307 int sync_blockdev_nowait(struct block_device *bdev);
1308 void sync_bdevs(bool wait);
1310 static inline void invalidate_bdev(struct block_device *bdev)
1313 static inline int sync_blockdev(struct block_device *bdev)
1317 static inline int sync_blockdev_nowait(struct block_device *bdev)
1321 static inline void sync_bdevs(bool wait)
1325 int fsync_bdev(struct block_device *bdev);
1327 int freeze_bdev(struct block_device *bdev);
1328 int thaw_bdev(struct block_device *bdev);
1330 struct io_comp_batch {
1331 struct request *req_list;
1333 void (*complete)(struct io_comp_batch *);
1336 #define DEFINE_IO_COMP_BATCH(name) struct io_comp_batch name = { }
1338 #define rq_list_add(listptr, rq) do { \
1339 (rq)->rq_next = *(listptr); \
1343 #define rq_list_pop(listptr) \
1345 struct request *__req = NULL; \
1346 if ((listptr) && *(listptr)) { \
1347 __req = *(listptr); \
1348 *(listptr) = __req->rq_next; \
1353 #define rq_list_peek(listptr) \
1355 struct request *__req = NULL; \
1356 if ((listptr) && *(listptr)) \
1357 __req = *(listptr); \
1361 #define rq_list_for_each(listptr, pos) \
1362 for (pos = rq_list_peek((listptr)); pos; pos = rq_list_next(pos)) \
1364 #define rq_list_next(rq) (rq)->rq_next
1365 #define rq_list_empty(list) ((list) == (struct request *) NULL)
1367 #endif /* _LINUX_BLKDEV_H */