Linux 6.16-rc6
[linux-block.git] / block / blk-merge.c
CommitLineData
b2441318 1// SPDX-License-Identifier: GPL-2.0
d6d48196
JA
2/*
3 * Functions related to segment and merge handling
4 */
5#include <linux/kernel.h>
6#include <linux/module.h>
7#include <linux/bio.h>
8#include <linux/blkdev.h>
fe45e630 9#include <linux/blk-integrity.h>
82d981d4 10#include <linux/part_stat.h>
6b2b0459 11#include <linux/blk-cgroup.h>
d6d48196 12
cda22646
MK
13#include <trace/events/block.h>
14
d6d48196 15#include "blk.h"
2aa7745b 16#include "blk-mq-sched.h"
8e756373 17#include "blk-rq-qos.h"
a7b36ee6 18#include "blk-throttle.h"
d6d48196 19
ff18d77b
CH
20static inline void bio_get_first_bvec(struct bio *bio, struct bio_vec *bv)
21{
22 *bv = mp_bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
23}
24
25static inline void bio_get_last_bvec(struct bio *bio, struct bio_vec *bv)
26{
27 struct bvec_iter iter = bio->bi_iter;
28 int idx;
29
30 bio_get_first_bvec(bio, bv);
31 if (bv->bv_len == bio->bi_iter.bi_size)
32 return; /* this bio only has a single bvec */
33
34 bio_advance_iter(bio, &iter, iter.bi_size);
35
36 if (!iter.bi_bvec_done)
37 idx = iter.bi_idx - 1;
38 else /* in the middle of bvec */
39 idx = iter.bi_idx;
40
41 *bv = bio->bi_io_vec[idx];
42
43 /*
44 * iter.bi_bvec_done records actual length of the last bvec
45 * if this bio ends in the middle of one io vector
46 */
47 if (iter.bi_bvec_done)
48 bv->bv_len = iter.bi_bvec_done;
49}
50
e9907009
CH
51static inline bool bio_will_gap(struct request_queue *q,
52 struct request *prev_rq, struct bio *prev, struct bio *next)
53{
54 struct bio_vec pb, nb;
55
56 if (!bio_has_data(prev) || !queue_virt_boundary(q))
57 return false;
58
59 /*
60 * Don't merge if the 1st bio starts with non-zero offset, otherwise it
61 * is quite difficult to respect the sg gap limit. We work hard to
62 * merge a huge number of small single bios in case of mkfs.
63 */
64 if (prev_rq)
65 bio_get_first_bvec(prev_rq->bio, &pb);
66 else
67 bio_get_first_bvec(prev, &pb);
df376b2e 68 if (pb.bv_offset & queue_virt_boundary(q))
e9907009
CH
69 return true;
70
71 /*
72 * We don't need to worry about the situation that the merged segment
73 * ends in unaligned virt boundary:
74 *
75 * - if 'pb' ends aligned, the merged segment ends aligned
76 * - if 'pb' ends unaligned, the next bio must include
77 * one single bvec of 'nb', otherwise the 'nb' can't
78 * merge with 'pb'
79 */
80 bio_get_last_bvec(prev, &pb);
81 bio_get_first_bvec(next, &nb);
200a9aff 82 if (biovec_phys_mergeable(q, &pb, &nb))
e9907009 83 return false;
c55ddd90 84 return __bvec_gap_to_prev(&q->limits, &pb, nb.bv_offset);
e9907009
CH
85}
86
87static inline bool req_gap_back_merge(struct request *req, struct bio *bio)
88{
89 return bio_will_gap(req->q, req, req->biotail, bio);
90}
91
92static inline bool req_gap_front_merge(struct request *req, struct bio *bio)
93{
94 return bio_will_gap(req->q, NULL, bio, req->bio);
95}
96
b6dc6198
CH
97/*
98 * The max size one bio can handle is UINT_MAX becasue bvec_iter.bi_size
99 * is defined as 'unsigned int', meantime it has to be aligned to with the
100 * logical block size, which is the minimum accepted unit by hardware.
101 */
aa261f20 102static unsigned int bio_allowed_max_sectors(const struct queue_limits *lim)
b6dc6198 103{
c55ddd90 104 return round_down(UINT_MAX, lim->logical_block_size) >> SECTOR_SHIFT;
b6dc6198
CH
105}
106
b35243a4
CH
107static struct bio *bio_submit_split(struct bio *bio, int split_sectors)
108{
6eb09685
JG
109 if (unlikely(split_sectors < 0))
110 goto error;
b35243a4
CH
111
112 if (split_sectors) {
113 struct bio *split;
114
115 split = bio_split(bio, split_sectors, GFP_NOIO,
116 &bio->bi_bdev->bd_disk->bio_split);
6eb09685
JG
117 if (IS_ERR(split)) {
118 split_sectors = PTR_ERR(split);
119 goto error;
120 }
b35243a4
CH
121 split->bi_opf |= REQ_NOMERGE;
122 blkcg_bio_issue_init(split);
123 bio_chain(split, bio);
124 trace_block_split(split, bio->bi_iter.bi_sector);
125 WARN_ON_ONCE(bio_zone_write_plugging(bio));
126 submit_bio_noacct(bio);
127 return split;
128 }
129
130 return bio;
6eb09685
JG
131error:
132 bio->bi_status = errno_to_blk_status(split_sectors);
133 bio_endio(bio);
134 return NULL;
b35243a4
CH
135}
136
137struct bio *bio_split_discard(struct bio *bio, const struct queue_limits *lim,
138 unsigned *nsegs)
54efd50b
KO
139{
140 unsigned int max_discard_sectors, granularity;
54efd50b
KO
141 sector_t tmp;
142 unsigned split_sectors;
143
bdced438
ML
144 *nsegs = 1;
145
c55ddd90 146 granularity = max(lim->discard_granularity >> 9, 1U);
54efd50b 147
c55ddd90
CH
148 max_discard_sectors =
149 min(lim->max_discard_sectors, bio_allowed_max_sectors(lim));
54efd50b 150 max_discard_sectors -= max_discard_sectors % granularity;
928a5dd3 151 if (unlikely(!max_discard_sectors))
b35243a4 152 return bio;
54efd50b
KO
153
154 if (bio_sectors(bio) <= max_discard_sectors)
b35243a4 155 return bio;
54efd50b
KO
156
157 split_sectors = max_discard_sectors;
158
159 /*
160 * If the next starting sector would be misaligned, stop the discard at
161 * the previous aligned sector.
162 */
c55ddd90
CH
163 tmp = bio->bi_iter.bi_sector + split_sectors -
164 ((lim->discard_alignment >> 9) % granularity);
54efd50b
KO
165 tmp = sector_div(tmp, granularity);
166
167 if (split_sectors > tmp)
168 split_sectors -= tmp;
169
b35243a4 170 return bio_submit_split(bio, split_sectors);
54efd50b
KO
171}
172
9da3d1e9
JG
173static inline unsigned int blk_boundary_sectors(const struct queue_limits *lim,
174 bool is_atomic)
f70167a7 175{
9da3d1e9
JG
176 /*
177 * chunk_sectors must be a multiple of atomic_write_boundary_sectors if
178 * both non-zero.
179 */
180 if (is_atomic && lim->atomic_write_boundary_sectors)
181 return lim->atomic_write_boundary_sectors;
182
f70167a7
JG
183 return lim->chunk_sectors;
184}
185
9cc5169c
BVA
186/*
187 * Return the maximum number of sectors from the start of a bio that may be
188 * submitted as a single request to a block device. If enough sectors remain,
189 * align the end to the physical block size. Otherwise align the end to the
190 * logical block size. This approach minimizes the number of non-aligned
191 * requests that are submitted to a block device if the start of a bio is not
192 * aligned to a physical block boundary.
193 */
5a97806f 194static inline unsigned get_max_io_size(struct bio *bio,
aa261f20 195 const struct queue_limits *lim)
d0e5fbb0 196{
c55ddd90
CH
197 unsigned pbs = lim->physical_block_size >> SECTOR_SHIFT;
198 unsigned lbs = lim->logical_block_size >> SECTOR_SHIFT;
9da3d1e9
JG
199 bool is_atomic = bio->bi_opf & REQ_ATOMIC;
200 unsigned boundary_sectors = blk_boundary_sectors(lim, is_atomic);
201 unsigned max_sectors, start, end;
202
203 /*
204 * We ignore lim->max_sectors for atomic writes because it may less
205 * than the actual bio size, which we cannot tolerate.
206 */
60dc5ea6
CH
207 if (bio_op(bio) == REQ_OP_WRITE_ZEROES)
208 max_sectors = lim->max_write_zeroes_sectors;
209 else if (is_atomic)
9da3d1e9
JG
210 max_sectors = lim->atomic_write_max_sectors;
211 else
212 max_sectors = lim->max_sectors;
d0e5fbb0 213
f70167a7 214 if (boundary_sectors) {
efef739d 215 max_sectors = min(max_sectors,
f70167a7
JG
216 blk_boundary_sectors_left(bio->bi_iter.bi_sector,
217 boundary_sectors));
efef739d 218 }
d0e5fbb0 219
84613bed
CH
220 start = bio->bi_iter.bi_sector & (pbs - 1);
221 end = (start + max_sectors) & ~(pbs - 1);
222 if (end > start)
223 return end - start;
224 return max_sectors & ~(lbs - 1);
d0e5fbb0
ML
225}
226
708b25b3
BVA
227/**
228 * bvec_split_segs - verify whether or not a bvec should be split in the middle
c55ddd90 229 * @lim: [in] queue limits to split based on
708b25b3
BVA
230 * @bv: [in] bvec to examine
231 * @nsegs: [in,out] Number of segments in the bio being built. Incremented
232 * by the number of segments from @bv that may be appended to that
233 * bio without exceeding @max_segs
67927d22
KB
234 * @bytes: [in,out] Number of bytes in the bio being built. Incremented
235 * by the number of bytes from @bv that may be appended to that
236 * bio without exceeding @max_bytes
708b25b3 237 * @max_segs: [in] upper bound for *@nsegs
67927d22 238 * @max_bytes: [in] upper bound for *@bytes
708b25b3
BVA
239 *
240 * When splitting a bio, it can happen that a bvec is encountered that is too
241 * big to fit in a single segment and hence that it has to be split in the
242 * middle. This function verifies whether or not that should happen. The value
243 * %true is returned if and only if appending the entire @bv to a bio with
244 * *@nsegs segments and *@sectors sectors would make that bio unacceptable for
245 * the block driver.
dcebd755 246 */
aa261f20
BVA
247static bool bvec_split_segs(const struct queue_limits *lim,
248 const struct bio_vec *bv, unsigned *nsegs, unsigned *bytes,
249 unsigned max_segs, unsigned max_bytes)
dcebd755 250{
43c70b10 251 unsigned max_len = max_bytes - *bytes;
708b25b3 252 unsigned len = min(bv->bv_len, max_len);
dcebd755 253 unsigned total_len = 0;
ff9811b3 254 unsigned seg_size = 0;
dcebd755 255
ff9811b3 256 while (len && *nsegs < max_segs) {
09595e0c 257 seg_size = get_max_segment_size(lim, bvec_phys(bv) + total_len, len);
dcebd755 258
ff9811b3 259 (*nsegs)++;
dcebd755
ML
260 total_len += seg_size;
261 len -= seg_size;
262
c55ddd90 263 if ((bv->bv_offset + total_len) & lim->virt_boundary_mask)
dcebd755
ML
264 break;
265 }
266
67927d22 267 *bytes += total_len;
dcebd755 268
708b25b3
BVA
269 /* tell the caller to split the bvec if it is too big to fit */
270 return len > 0 || bv->bv_len > max_len;
dcebd755
ML
271}
272
7ecd2cd4
CH
273static unsigned int bio_split_alignment(struct bio *bio,
274 const struct queue_limits *lim)
275{
276 if (op_is_write(bio_op(bio)) && lim->zone_write_granularity)
277 return lim->zone_write_granularity;
278 return lim->logical_block_size;
279}
280
dad77584 281/**
b35243a4 282 * bio_split_rw_at - check if and where to split a read/write bio
dad77584 283 * @bio: [in] bio to be split
c55ddd90 284 * @lim: [in] queue limits to split based on
dad77584 285 * @segs: [out] number of segments in the bio with the first half of the sectors
a85b3637 286 * @max_bytes: [in] maximum number of bytes per bio
dad77584 287 *
b35243a4
CH
288 * Find out if @bio needs to be split to fit the queue limits in @lim and a
289 * maximum size of @max_bytes. Returns a negative error number if @bio can't be
290 * split, 0 if the bio doesn't have to be split, or a positive sector offset if
291 * @bio needs to be split.
dad77584 292 */
b35243a4
CH
293int bio_split_rw_at(struct bio *bio, const struct queue_limits *lim,
294 unsigned *segs, unsigned max_bytes)
54efd50b 295{
5014c311 296 struct bio_vec bv, bvprv, *bvprvp = NULL;
54efd50b 297 struct bvec_iter iter;
67927d22 298 unsigned nsegs = 0, bytes = 0;
54efd50b 299
dcebd755 300 bio_for_each_bvec(bv, bio, iter) {
54efd50b
KO
301 /*
302 * If the queue doesn't support SG gaps and adding this
303 * offset would create a gap, disallow it.
304 */
c55ddd90 305 if (bvprvp && bvec_gap_to_prev(lim, bvprvp, bv.bv_offset))
54efd50b
KO
306 goto split;
307
c55ddd90 308 if (nsegs < lim->max_segments &&
67927d22 309 bytes + bv.bv_len <= max_bytes &&
889c5706 310 bv.bv_offset + bv.bv_len <= lim->min_segment_size) {
708b25b3 311 nsegs++;
67927d22 312 bytes += bv.bv_len;
c55ddd90
CH
313 } else {
314 if (bvec_split_segs(lim, &bv, &nsegs, &bytes,
315 lim->max_segments, max_bytes))
316 goto split;
e36f6204
KB
317 }
318
54efd50b 319 bvprv = bv;
578270bf 320 bvprvp = &bvprv;
54efd50b
KO
321 }
322
d627065d 323 *segs = nsegs;
b35243a4 324 return 0;
54efd50b 325split:
b35243a4
CH
326 if (bio->bi_opf & REQ_ATOMIC)
327 return -EINVAL;
328
9cea62b2
JA
329 /*
330 * We can't sanely support splitting for a REQ_NOWAIT bio. End it
331 * with EAGAIN if splitting is required and return an error pointer.
332 */
b35243a4
CH
333 if (bio->bi_opf & REQ_NOWAIT)
334 return -EAGAIN;
9cea62b2 335
bdced438 336 *segs = nsegs;
cc29e1bf 337
67927d22
KB
338 /*
339 * Individual bvecs might not be logical block aligned. Round down the
340 * split size so that each bio is properly block size aligned, even if
341 * we do not use the full hardware limits.
342 */
7ecd2cd4 343 bytes = ALIGN_DOWN(bytes, bio_split_alignment(bio, lim));
67927d22 344
cc29e1bf
JX
345 /*
346 * Bio splitting may cause subtle trouble such as hang when doing sync
347 * iopoll in direct IO routine. Given performance gain of iopoll for
348 * big IO can be trival, disable iopoll when split needed.
349 */
6ce913fe 350 bio_clear_polled(bio);
b35243a4 351 return bytes >> SECTOR_SHIFT;
54efd50b 352}
b35243a4 353EXPORT_SYMBOL_GPL(bio_split_rw_at);
54efd50b 354
b35243a4
CH
355struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim,
356 unsigned *nr_segs)
54efd50b 357{
b35243a4
CH
358 return bio_submit_split(bio,
359 bio_split_rw_at(bio, lim, nr_segs,
360 get_max_io_size(bio, lim) << SECTOR_SHIFT));
54efd50b 361}
14ccb66b 362
1e8a7f6a
CH
363/*
364 * REQ_OP_ZONE_APPEND bios must never be split by the block layer.
365 *
366 * But we want the nr_segs calculation provided by bio_split_rw_at, and having
367 * a good sanity check that the submitter built the bio correctly is nice to
368 * have as well.
369 */
370struct bio *bio_split_zone_append(struct bio *bio,
371 const struct queue_limits *lim, unsigned *nr_segs)
372{
1e8a7f6a
CH
373 int split_sectors;
374
375 split_sectors = bio_split_rw_at(bio, lim, nr_segs,
559218d4 376 lim->max_zone_append_sectors << SECTOR_SHIFT);
1e8a7f6a
CH
377 if (WARN_ON_ONCE(split_sectors > 0))
378 split_sectors = -EINVAL;
379 return bio_submit_split(bio, split_sectors);
380}
381
60dc5ea6
CH
382struct bio *bio_split_write_zeroes(struct bio *bio,
383 const struct queue_limits *lim, unsigned *nsegs)
384{
385 unsigned int max_sectors = get_max_io_size(bio, lim);
386
387 *nsegs = 0;
388
389 /*
390 * An unset limit should normally not happen, as bio submission is keyed
391 * off having a non-zero limit. But SCSI can clear the limit in the
392 * I/O completion handler, and we can race and see this. Splitting to a
393 * zero limit obviously doesn't make sense, so band-aid it here.
394 */
395 if (!max_sectors)
396 return bio;
397 if (bio_sectors(bio) <= max_sectors)
398 return bio;
399 return bio_submit_split(bio, max_sectors);
400}
401
dad77584 402/**
5a97806f
CH
403 * bio_split_to_limits - split a bio to fit the queue limits
404 * @bio: bio to be split
405 *
406 * Check if @bio needs splitting based on the queue limits of @bio->bi_bdev, and
407 * if so split off a bio fitting the limits from the beginning of @bio and
408 * return it. @bio is shortened to the remainder and re-submitted.
dad77584 409 *
5a97806f
CH
410 * The split bio is allocated from @q->bio_split, which is provided by the
411 * block layer.
dad77584 412 */
5a97806f 413struct bio *bio_split_to_limits(struct bio *bio)
14ccb66b
CH
414{
415 unsigned int nr_segs;
416
2f5a65ef 417 return __bio_split_to_limits(bio, bdev_limits(bio->bi_bdev), &nr_segs);
14ccb66b 418}
5a97806f 419EXPORT_SYMBOL(bio_split_to_limits);
54efd50b 420
e9cd19c0 421unsigned int blk_recalc_rq_segments(struct request *rq)
d6d48196 422{
6869875f 423 unsigned int nr_phys_segs = 0;
67927d22 424 unsigned int bytes = 0;
e9cd19c0 425 struct req_iterator iter;
6869875f 426 struct bio_vec bv;
d6d48196 427
e9cd19c0 428 if (!rq->bio)
1e428079 429 return 0;
d6d48196 430
e9cd19c0 431 switch (bio_op(rq->bio)) {
a6f0788e
CK
432 case REQ_OP_DISCARD:
433 case REQ_OP_SECURE_ERASE:
a958937f
DJ
434 if (queue_max_discard_segments(rq->q) > 1) {
435 struct bio *bio = rq->bio;
436
437 for_each_bio(bio)
438 nr_phys_segs++;
439 return nr_phys_segs;
440 }
441 return 1;
a6f0788e 442 case REQ_OP_WRITE_ZEROES:
f9d03f96 443 return 0;
2d9b02be
BVA
444 default:
445 break;
a6f0788e 446 }
5cb8850c 447
e9cd19c0 448 rq_for_each_bvec(bv, rq, iter)
c55ddd90 449 bvec_split_segs(&rq->q->limits, &bv, &nr_phys_segs, &bytes,
708b25b3 450 UINT_MAX, UINT_MAX);
1e428079
JA
451 return nr_phys_segs;
452}
453
badf7f64
CH
454static inline unsigned int blk_rq_get_max_sectors(struct request *rq,
455 sector_t offset)
456{
457 struct request_queue *q = rq->q;
f70167a7
JG
458 struct queue_limits *lim = &q->limits;
459 unsigned int max_sectors, boundary_sectors;
9da3d1e9 460 bool is_atomic = rq->cmd_flags & REQ_ATOMIC;
badf7f64
CH
461
462 if (blk_rq_is_passthrough(rq))
463 return q->limits.max_hw_sectors;
464
9da3d1e9 465 boundary_sectors = blk_boundary_sectors(lim, is_atomic);
8d1dfd51
JG
466 max_sectors = blk_queue_get_max_sectors(rq);
467
f70167a7 468 if (!boundary_sectors ||
badf7f64
CH
469 req_op(rq) == REQ_OP_DISCARD ||
470 req_op(rq) == REQ_OP_SECURE_ERASE)
c8875190
CH
471 return max_sectors;
472 return min(max_sectors,
f70167a7 473 blk_boundary_sectors_left(offset, boundary_sectors));
badf7f64
CH
474}
475
14ccb66b
CH
476static inline int ll_new_hw_segment(struct request *req, struct bio *bio,
477 unsigned int nr_phys_segs)
d6d48196 478{
6b2b0459
TH
479 if (!blk_cgroup_mergeable(req, bio))
480 goto no_merge;
481
2705dfb2 482 if (blk_integrity_merge_bio(req->q, req, bio) == false)
13f05c8d
MP
483 goto no_merge;
484
2705dfb2
ML
485 /* discard request merge won't add new segment */
486 if (req_op(req) == REQ_OP_DISCARD)
487 return 1;
488
489 if (req->nr_phys_segments + nr_phys_segs > blk_rq_get_max_segments(req))
13f05c8d 490 goto no_merge;
d6d48196
JA
491
492 /*
493 * This will form the start of a new hw segment. Bump both
494 * counters.
495 */
d6d48196 496 req->nr_phys_segments += nr_phys_segs;
d148d750
KB
497 if (bio_integrity(bio))
498 req->nr_integrity_segments += blk_rq_count_integrity_sg(req->q,
499 bio);
d6d48196 500 return 1;
13f05c8d
MP
501
502no_merge:
14ccb66b 503 req_set_nomerge(req->q, req);
13f05c8d 504 return 0;
d6d48196
JA
505}
506
14ccb66b 507int ll_back_merge_fn(struct request *req, struct bio *bio, unsigned int nr_segs)
d6d48196 508{
5e7c4274
JA
509 if (req_gap_back_merge(req, bio))
510 return 0;
7f39add3
SG
511 if (blk_integrity_rq(req) &&
512 integrity_req_gap_back_merge(req, bio))
513 return 0;
a892c8d5
ST
514 if (!bio_crypt_ctx_back_mergeable(req, bio))
515 return 0;
f31dc1cd 516 if (blk_rq_sectors(req) + bio_sectors(bio) >
17007f39 517 blk_rq_get_max_sectors(req, blk_rq_pos(req))) {
14ccb66b 518 req_set_nomerge(req->q, req);
d6d48196
JA
519 return 0;
520 }
d6d48196 521
14ccb66b 522 return ll_new_hw_segment(req, bio, nr_segs);
d6d48196
JA
523}
524
eda5cc99
CH
525static int ll_front_merge_fn(struct request *req, struct bio *bio,
526 unsigned int nr_segs)
d6d48196 527{
5e7c4274
JA
528 if (req_gap_front_merge(req, bio))
529 return 0;
7f39add3
SG
530 if (blk_integrity_rq(req) &&
531 integrity_req_gap_front_merge(req, bio))
532 return 0;
a892c8d5
ST
533 if (!bio_crypt_ctx_front_mergeable(req, bio))
534 return 0;
f31dc1cd 535 if (blk_rq_sectors(req) + bio_sectors(bio) >
17007f39 536 blk_rq_get_max_sectors(req, bio->bi_iter.bi_sector)) {
14ccb66b 537 req_set_nomerge(req->q, req);
d6d48196
JA
538 return 0;
539 }
d6d48196 540
14ccb66b 541 return ll_new_hw_segment(req, bio, nr_segs);
d6d48196
JA
542}
543
445251d0
JA
544static bool req_attempt_discard_merge(struct request_queue *q, struct request *req,
545 struct request *next)
546{
547 unsigned short segments = blk_rq_nr_discard_segments(req);
548
549 if (segments >= queue_max_discard_segments(q))
550 goto no_merge;
551 if (blk_rq_sectors(req) + bio_sectors(next->bio) >
552 blk_rq_get_max_sectors(req, blk_rq_pos(req)))
553 goto no_merge;
554
555 req->nr_phys_segments = segments + blk_rq_nr_discard_segments(next);
556 return true;
557no_merge:
558 req_set_nomerge(q, req);
559 return false;
560}
561
d6d48196
JA
562static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
563 struct request *next)
564{
565 int total_phys_segments;
d6d48196 566
5e7c4274 567 if (req_gap_back_merge(req, next->bio))
854fbb9c
KB
568 return 0;
569
d6d48196
JA
570 /*
571 * Will it become too large?
572 */
f31dc1cd 573 if ((blk_rq_sectors(req) + blk_rq_sectors(next)) >
17007f39 574 blk_rq_get_max_sectors(req, blk_rq_pos(req)))
d6d48196
JA
575 return 0;
576
577 total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
943b40c8 578 if (total_phys_segments > blk_rq_get_max_segments(req))
d6d48196
JA
579 return 0;
580
6b2b0459
TH
581 if (!blk_cgroup_mergeable(req, next->bio))
582 return 0;
583
4eaf99be 584 if (blk_integrity_merge_rq(q, req, next) == false)
13f05c8d
MP
585 return 0;
586
a892c8d5
ST
587 if (!bio_crypt_ctx_merge_rq(req, next))
588 return 0;
589
d6d48196
JA
590 /* Merge is OK... */
591 req->nr_phys_segments = total_phys_segments;
d148d750 592 req->nr_integrity_segments += next->nr_integrity_segments;
d6d48196
JA
593 return 1;
594}
595
80a761fd
TH
596/**
597 * blk_rq_set_mixed_merge - mark a request as mixed merge
598 * @rq: request to mark as mixed merge
599 *
600 * Description:
601 * @rq is about to be mixed merged. Make sure the attributes
602 * which can be mixed are set in each bio and mark @rq as mixed
603 * merged.
604 */
dc53d9ea 605static void blk_rq_set_mixed_merge(struct request *rq)
80a761fd 606{
16458cf3 607 blk_opf_t ff = rq->cmd_flags & REQ_FAILFAST_MASK;
80a761fd
TH
608 struct bio *bio;
609
e8064021 610 if (rq->rq_flags & RQF_MIXED_MERGE)
80a761fd
TH
611 return;
612
613 /*
614 * @rq will no longer represent mixable attributes for all the
615 * contained bios. It will just track those of the first one.
616 * Distributes the attributs to each bio.
617 */
618 for (bio = rq->bio; bio; bio = bio->bi_next) {
1eff9d32
JA
619 WARN_ON_ONCE((bio->bi_opf & REQ_FAILFAST_MASK) &&
620 (bio->bi_opf & REQ_FAILFAST_MASK) != ff);
621 bio->bi_opf |= ff;
80a761fd 622 }
e8064021 623 rq->rq_flags |= RQF_MIXED_MERGE;
80a761fd
TH
624}
625
f3ca7386 626static inline blk_opf_t bio_failfast(const struct bio *bio)
3ce6a115
ML
627{
628 if (bio->bi_opf & REQ_RAHEAD)
629 return REQ_FAILFAST_MASK;
630
631 return bio->bi_opf & REQ_FAILFAST_MASK;
632}
633
634/*
635 * After we are marked as MIXED_MERGE, any new RA bio has to be updated
636 * as failfast, and request's failfast has to be updated in case of
637 * front merge.
638 */
639static inline void blk_update_mixed_merge(struct request *req,
640 struct bio *bio, bool front_merge)
641{
642 if (req->rq_flags & RQF_MIXED_MERGE) {
643 if (bio->bi_opf & REQ_RAHEAD)
644 bio->bi_opf |= REQ_FAILFAST_MASK;
645
646 if (front_merge) {
647 req->cmd_flags &= ~REQ_FAILFAST_MASK;
648 req->cmd_flags |= bio->bi_opf & REQ_FAILFAST_MASK;
649 }
650 }
651}
652
b9c54f56 653static void blk_account_io_merge_request(struct request *req)
26308eab 654{
e3569eca 655 if (req->rq_flags & RQF_IO_STAT) {
112f158f 656 part_stat_lock();
b9c54f56 657 part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
99dc4223
YK
658 part_stat_local_dec(req->part,
659 in_flight[op_is_write(req_op(req))]);
26308eab
JM
660 part_stat_unlock();
661 }
662}
b9c54f56 663
e96c0d83
EB
664static enum elv_merge blk_try_req_merge(struct request *req,
665 struct request *next)
69840466
JW
666{
667 if (blk_discard_mergable(req))
668 return ELEVATOR_DISCARD_MERGE;
669 else if (blk_rq_pos(req) + blk_rq_sectors(req) == blk_rq_pos(next))
670 return ELEVATOR_BACK_MERGE;
671
672 return ELEVATOR_NO_MERGE;
673}
26308eab 674
9da3d1e9
JG
675static bool blk_atomic_write_mergeable_rq_bio(struct request *rq,
676 struct bio *bio)
677{
678 return (rq->cmd_flags & REQ_ATOMIC) == (bio->bi_opf & REQ_ATOMIC);
679}
680
681static bool blk_atomic_write_mergeable_rqs(struct request *rq,
682 struct request *next)
683{
684 return (rq->cmd_flags & REQ_ATOMIC) == (next->cmd_flags & REQ_ATOMIC);
685}
686
d6d48196 687/*
b973cb7e
JA
688 * For non-mq, this has to be called with the request spinlock acquired.
689 * For mq with scheduling, the appropriate queue wide lock should be held.
d6d48196 690 */
b973cb7e
JA
691static struct request *attempt_merge(struct request_queue *q,
692 struct request *req, struct request *next)
d6d48196
JA
693{
694 if (!rq_mergeable(req) || !rq_mergeable(next))
b973cb7e 695 return NULL;
d6d48196 696
288dab8a 697 if (req_op(req) != req_op(next))
b973cb7e 698 return NULL;
f31dc1cd 699
81314bfb
CH
700 if (req->bio->bi_write_hint != next->bio->bi_write_hint)
701 return NULL;
5006f85e
CH
702 if (req->bio->bi_write_stream != next->bio->bi_write_stream)
703 return NULL;
81314bfb
CH
704 if (req->bio->bi_ioprio != next->bio->bi_ioprio)
705 return NULL;
9da3d1e9
JG
706 if (!blk_atomic_write_mergeable_rqs(req, next))
707 return NULL;
708
d6d48196
JA
709 /*
710 * If we are allowed to merge, then append bio list
711 * from next to rq and release next. merge_requests_fn
712 * will have updated segment counts, update sector
445251d0
JA
713 * counts here. Handle DISCARDs separately, as they
714 * have separate settings.
d6d48196 715 */
69840466
JW
716
717 switch (blk_try_req_merge(req, next)) {
718 case ELEVATOR_DISCARD_MERGE:
445251d0
JA
719 if (!req_attempt_discard_merge(q, req, next))
720 return NULL;
69840466
JW
721 break;
722 case ELEVATOR_BACK_MERGE:
723 if (!ll_merge_requests_fn(q, req, next))
724 return NULL;
725 break;
726 default:
b973cb7e 727 return NULL;
69840466 728 }
d6d48196 729
80a761fd
TH
730 /*
731 * If failfast settings disagree or any of the two is already
732 * a mixed merge, mark both as mixed before proceeding. This
733 * makes sure that all involved bios have mixable attributes
734 * set properly.
735 */
e8064021 736 if (((req->rq_flags | next->rq_flags) & RQF_MIXED_MERGE) ||
80a761fd
TH
737 (req->cmd_flags & REQ_FAILFAST_MASK) !=
738 (next->cmd_flags & REQ_FAILFAST_MASK)) {
739 blk_rq_set_mixed_merge(req);
740 blk_rq_set_mixed_merge(next);
741 }
742
d6d48196 743 /*
522a7775
OS
744 * At this point we have either done a back merge or front merge. We
745 * need the smaller start_time_ns of the merged requests to be the
746 * current request for accounting purposes.
d6d48196 747 */
522a7775
OS
748 if (next->start_time_ns < req->start_time_ns)
749 req->start_time_ns = next->start_time_ns;
d6d48196
JA
750
751 req->biotail->bi_next = next->bio;
752 req->biotail = next->biotail;
753
a2dec7b3 754 req->__data_len += blk_rq_bytes(next);
d6d48196 755
2a5cf35c 756 if (!blk_discard_mergable(req))
445251d0 757 elv_merge_requests(q, req, next);
d6d48196 758
9cd1e566
EB
759 blk_crypto_rq_put_keyslot(next);
760
42dad764
JM
761 /*
762 * 'next' is going away, so update stats accordingly
763 */
b9c54f56 764 blk_account_io_merge_request(next);
d6d48196 765
a54895fa 766 trace_block_rq_merge(next);
f3bdc62f 767
e4d750c9
JA
768 /*
769 * ownership of bio passed from next to req, return 'next' for
770 * the caller to free
771 */
1cd96c24 772 next->bio = NULL;
b973cb7e 773 return next;
d6d48196
JA
774}
775
eda5cc99
CH
776static struct request *attempt_back_merge(struct request_queue *q,
777 struct request *rq)
d6d48196
JA
778{
779 struct request *next = elv_latter_request(q, rq);
780
781 if (next)
782 return attempt_merge(q, rq, next);
783
b973cb7e 784 return NULL;
d6d48196
JA
785}
786
eda5cc99
CH
787static struct request *attempt_front_merge(struct request_queue *q,
788 struct request *rq)
d6d48196
JA
789{
790 struct request *prev = elv_former_request(q, rq);
791
792 if (prev)
793 return attempt_merge(q, prev, rq);
794
b973cb7e 795 return NULL;
d6d48196 796}
5e84ea3a 797
fd2ef39c
JK
798/*
799 * Try to merge 'next' into 'rq'. Return true if the merge happened, false
800 * otherwise. The caller is responsible for freeing 'next' if the merge
801 * happened.
802 */
803bool blk_attempt_req_merge(struct request_queue *q, struct request *rq,
804 struct request *next)
5e84ea3a 805{
fd2ef39c 806 return attempt_merge(q, rq, next);
5e84ea3a 807}
050c8ea8
TH
808
809bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
810{
e2a60da7 811 if (!rq_mergeable(rq) || !bio_mergeable(bio))
050c8ea8
TH
812 return false;
813
288dab8a 814 if (req_op(rq) != bio_op(bio))
f31dc1cd
MP
815 return false;
816
6b2b0459
TH
817 if (!blk_cgroup_mergeable(rq, bio))
818 return false;
4eaf99be 819 if (blk_integrity_merge_bio(rq->q, rq, bio) == false)
050c8ea8 820 return false;
a892c8d5
ST
821 if (!bio_crypt_rq_ctx_compatible(rq, bio))
822 return false;
81314bfb
CH
823 if (rq->bio->bi_write_hint != bio->bi_write_hint)
824 return false;
5006f85e
CH
825 if (rq->bio->bi_write_stream != bio->bi_write_stream)
826 return false;
81314bfb
CH
827 if (rq->bio->bi_ioprio != bio->bi_ioprio)
828 return false;
9da3d1e9
JG
829 if (blk_atomic_write_mergeable_rq_bio(rq, bio) == false)
830 return false;
831
050c8ea8
TH
832 return true;
833}
834
34fe7c05 835enum elv_merge blk_try_merge(struct request *rq, struct bio *bio)
050c8ea8 836{
69840466 837 if (blk_discard_mergable(rq))
1e739730
CH
838 return ELEVATOR_DISCARD_MERGE;
839 else if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector)
050c8ea8 840 return ELEVATOR_BACK_MERGE;
4f024f37 841 else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_iter.bi_sector)
050c8ea8
TH
842 return ELEVATOR_FRONT_MERGE;
843 return ELEVATOR_NO_MERGE;
844}
8e756373
BW
845
846static void blk_account_io_merge_bio(struct request *req)
847{
e3569eca
JA
848 if (req->rq_flags & RQF_IO_STAT) {
849 part_stat_lock();
850 part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
851 part_stat_unlock();
852 }
8e756373
BW
853}
854
dd850ff3 855enum bio_merge_status bio_attempt_back_merge(struct request *req,
eda5cc99 856 struct bio *bio, unsigned int nr_segs)
8e756373 857{
3ce6a115 858 const blk_opf_t ff = bio_failfast(bio);
8e756373
BW
859
860 if (!ll_back_merge_fn(req, bio, nr_segs))
7d7ca7c5 861 return BIO_MERGE_FAILED;
8e756373 862
e8a676d6 863 trace_block_bio_backmerge(bio);
8e756373
BW
864 rq_qos_merge(req->q, req, bio);
865
866 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
867 blk_rq_set_mixed_merge(req);
868
3ce6a115
ML
869 blk_update_mixed_merge(req, bio, false);
870
dd291d77
DLM
871 if (req->rq_flags & RQF_ZONE_WRITE_PLUGGING)
872 blk_zone_write_plug_bio_merged(bio);
873
8e756373
BW
874 req->biotail->bi_next = bio;
875 req->biotail = bio;
876 req->__data_len += bio->bi_iter.bi_size;
877
878 bio_crypt_free_ctx(bio);
879
880 blk_account_io_merge_bio(req);
7d7ca7c5 881 return BIO_MERGE_OK;
8e756373
BW
882}
883
eda5cc99
CH
884static enum bio_merge_status bio_attempt_front_merge(struct request *req,
885 struct bio *bio, unsigned int nr_segs)
8e756373 886{
3ce6a115 887 const blk_opf_t ff = bio_failfast(bio);
8e756373 888
dd291d77
DLM
889 /*
890 * A front merge for writes to sequential zones of a zoned block device
891 * can happen only if the user submitted writes out of order. Do not
892 * merge such write to let it fail.
893 */
894 if (req->rq_flags & RQF_ZONE_WRITE_PLUGGING)
895 return BIO_MERGE_FAILED;
896
8e756373 897 if (!ll_front_merge_fn(req, bio, nr_segs))
7d7ca7c5 898 return BIO_MERGE_FAILED;
8e756373 899
e8a676d6 900 trace_block_bio_frontmerge(bio);
8e756373
BW
901 rq_qos_merge(req->q, req, bio);
902
903 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
904 blk_rq_set_mixed_merge(req);
905
3ce6a115
ML
906 blk_update_mixed_merge(req, bio, true);
907
8e756373
BW
908 bio->bi_next = req->bio;
909 req->bio = bio;
910
911 req->__sector = bio->bi_iter.bi_sector;
912 req->__data_len += bio->bi_iter.bi_size;
913
914 bio_crypt_do_front_merge(req, bio);
915
916 blk_account_io_merge_bio(req);
7d7ca7c5 917 return BIO_MERGE_OK;
8e756373
BW
918}
919
eda5cc99
CH
920static enum bio_merge_status bio_attempt_discard_merge(struct request_queue *q,
921 struct request *req, struct bio *bio)
8e756373
BW
922{
923 unsigned short segments = blk_rq_nr_discard_segments(req);
924
925 if (segments >= queue_max_discard_segments(q))
926 goto no_merge;
927 if (blk_rq_sectors(req) + bio_sectors(bio) >
928 blk_rq_get_max_sectors(req, blk_rq_pos(req)))
929 goto no_merge;
930
931 rq_qos_merge(q, req, bio);
932
933 req->biotail->bi_next = bio;
934 req->biotail = bio;
935 req->__data_len += bio->bi_iter.bi_size;
936 req->nr_phys_segments = segments + 1;
937
938 blk_account_io_merge_bio(req);
7d7ca7c5 939 return BIO_MERGE_OK;
8e756373
BW
940no_merge:
941 req_set_nomerge(q, req);
7d7ca7c5
BW
942 return BIO_MERGE_FAILED;
943}
944
945static enum bio_merge_status blk_attempt_bio_merge(struct request_queue *q,
946 struct request *rq,
947 struct bio *bio,
948 unsigned int nr_segs,
949 bool sched_allow_merge)
950{
951 if (!blk_rq_merge_ok(rq, bio))
952 return BIO_MERGE_NONE;
953
954 switch (blk_try_merge(rq, bio)) {
955 case ELEVATOR_BACK_MERGE:
265600b7 956 if (!sched_allow_merge || blk_mq_sched_allow_merge(q, rq, bio))
7d7ca7c5
BW
957 return bio_attempt_back_merge(rq, bio, nr_segs);
958 break;
959 case ELEVATOR_FRONT_MERGE:
265600b7 960 if (!sched_allow_merge || blk_mq_sched_allow_merge(q, rq, bio))
7d7ca7c5
BW
961 return bio_attempt_front_merge(rq, bio, nr_segs);
962 break;
963 case ELEVATOR_DISCARD_MERGE:
964 return bio_attempt_discard_merge(q, rq, bio);
965 default:
966 return BIO_MERGE_NONE;
967 }
968
969 return BIO_MERGE_FAILED;
8e756373
BW
970}
971
972/**
973 * blk_attempt_plug_merge - try to merge with %current's plugged list
974 * @q: request_queue new bio is being queued at
975 * @bio: new bio being queued
976 * @nr_segs: number of segments in @bio
87c037d1 977 * from the passed in @q already in the plug list
8e756373 978 *
d38a9c04
JA
979 * Determine whether @bio being queued on @q can be merged with the previous
980 * request on %current's plugged list. Returns %true if merge was successful,
8e756373
BW
981 * otherwise %false.
982 *
983 * Plugging coalesces IOs from the same issuer for the same purpose without
984 * going through @q->queue_lock. As such it's more of an issuing mechanism
985 * than scheduling, and the request, while may have elvpriv data, is not
986 * added on the elevator at this point. In addition, we don't have
987 * reliable access to the elevator outside queue lock. Only check basic
988 * merging parameters without querying the elevator.
989 *
990 * Caller must ensure !blk_queue_nomerges(q) beforehand.
991 */
992bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
0c5bcc92 993 unsigned int nr_segs)
8e756373 994{
99a9476b 995 struct blk_plug *plug = current->plug;
8e756373 996 struct request *rq;
8e756373 997
a3396b99 998 if (!plug || rq_list_empty(&plug->mq_list))
8e756373
BW
999 return false;
1000
961296e8
JA
1001 rq = plug->mq_list.tail;
1002 if (rq->q == q)
1003 return blk_attempt_bio_merge(q, rq, bio, nr_segs, false) ==
1004 BIO_MERGE_OK;
1005 else if (!plug->multiple_queues)
1006 return false;
5b205071 1007
961296e8
JA
1008 rq_list_for_each(&plug->mq_list, rq) {
1009 if (rq->q != q)
1010 continue;
1011 if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) ==
1012 BIO_MERGE_OK)
1013 return true;
1014 break;
8e756373 1015 }
8e756373
BW
1016 return false;
1017}
bdc6a287
BW
1018
1019/*
1020 * Iterate list of requests and see if we can merge this bio with any
1021 * of them.
1022 */
1023bool blk_bio_list_merge(struct request_queue *q, struct list_head *list,
1024 struct bio *bio, unsigned int nr_segs)
1025{
1026 struct request *rq;
1027 int checked = 8;
1028
1029 list_for_each_entry_reverse(rq, list, queuelist) {
bdc6a287
BW
1030 if (!checked--)
1031 break;
1032
7d7ca7c5
BW
1033 switch (blk_attempt_bio_merge(q, rq, bio, nr_segs, true)) {
1034 case BIO_MERGE_NONE:
bdc6a287 1035 continue;
7d7ca7c5
BW
1036 case BIO_MERGE_OK:
1037 return true;
1038 case BIO_MERGE_FAILED:
1039 return false;
bdc6a287
BW
1040 }
1041
bdc6a287
BW
1042 }
1043
1044 return false;
1045}
1046EXPORT_SYMBOL_GPL(blk_bio_list_merge);
eda5cc99
CH
1047
1048bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
1049 unsigned int nr_segs, struct request **merged_request)
1050{
1051 struct request *rq;
1052
1053 switch (elv_merge(q, &rq, bio)) {
1054 case ELEVATOR_BACK_MERGE:
1055 if (!blk_mq_sched_allow_merge(q, rq, bio))
1056 return false;
1057 if (bio_attempt_back_merge(rq, bio, nr_segs) != BIO_MERGE_OK)
1058 return false;
1059 *merged_request = attempt_back_merge(q, rq);
1060 if (!*merged_request)
1061 elv_merged_request(q, rq, ELEVATOR_BACK_MERGE);
1062 return true;
1063 case ELEVATOR_FRONT_MERGE:
1064 if (!blk_mq_sched_allow_merge(q, rq, bio))
1065 return false;
1066 if (bio_attempt_front_merge(rq, bio, nr_segs) != BIO_MERGE_OK)
1067 return false;
1068 *merged_request = attempt_front_merge(q, rq);
1069 if (!*merged_request)
1070 elv_merged_request(q, rq, ELEVATOR_FRONT_MERGE);
1071 return true;
1072 case ELEVATOR_DISCARD_MERGE:
1073 return bio_attempt_discard_merge(q, rq, bio) == BIO_MERGE_OK;
1074 default:
1075 return false;
1076 }
1077}
1078EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge);