block: setup bi_phys_segments after splitting
[linux-2.6-block.git] / block / blk-merge.c
1 /*
2  * Functions related to segment and merge handling
3  */
4 #include <linux/kernel.h>
5 #include <linux/module.h>
6 #include <linux/bio.h>
7 #include <linux/blkdev.h>
8 #include <linux/scatterlist.h>
9
10 #include "blk.h"
11
12 static struct bio *blk_bio_discard_split(struct request_queue *q,
13                                          struct bio *bio,
14                                          struct bio_set *bs,
15                                          unsigned *nsegs)
16 {
17         unsigned int max_discard_sectors, granularity;
18         int alignment;
19         sector_t tmp;
20         unsigned split_sectors;
21
22         *nsegs = 1;
23
24         /* Zero-sector (unknown) and one-sector granularities are the same.  */
25         granularity = max(q->limits.discard_granularity >> 9, 1U);
26
27         max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9);
28         max_discard_sectors -= max_discard_sectors % granularity;
29
30         if (unlikely(!max_discard_sectors)) {
31                 /* XXX: warn */
32                 return NULL;
33         }
34
35         if (bio_sectors(bio) <= max_discard_sectors)
36                 return NULL;
37
38         split_sectors = max_discard_sectors;
39
40         /*
41          * If the next starting sector would be misaligned, stop the discard at
42          * the previous aligned sector.
43          */
44         alignment = (q->limits.discard_alignment >> 9) % granularity;
45
46         tmp = bio->bi_iter.bi_sector + split_sectors - alignment;
47         tmp = sector_div(tmp, granularity);
48
49         if (split_sectors > tmp)
50                 split_sectors -= tmp;
51
52         return bio_split(bio, split_sectors, GFP_NOIO, bs);
53 }
54
55 static struct bio *blk_bio_write_same_split(struct request_queue *q,
56                                             struct bio *bio,
57                                             struct bio_set *bs,
58                                             unsigned *nsegs)
59 {
60         *nsegs = 1;
61
62         if (!q->limits.max_write_same_sectors)
63                 return NULL;
64
65         if (bio_sectors(bio) <= q->limits.max_write_same_sectors)
66                 return NULL;
67
68         return bio_split(bio, q->limits.max_write_same_sectors, GFP_NOIO, bs);
69 }
70
71 static struct bio *blk_bio_segment_split(struct request_queue *q,
72                                          struct bio *bio,
73                                          struct bio_set *bs,
74                                          unsigned *segs)
75 {
76         struct bio_vec bv, bvprv, *bvprvp = NULL;
77         struct bvec_iter iter;
78         unsigned seg_size = 0, nsegs = 0, sectors = 0;
79
80         bio_for_each_segment(bv, bio, iter) {
81                 if (sectors + (bv.bv_len >> 9) > queue_max_sectors(q))
82                         goto split;
83
84                 /*
85                  * If the queue doesn't support SG gaps and adding this
86                  * offset would create a gap, disallow it.
87                  */
88                 if (bvprvp && bvec_gap_to_prev(q, bvprvp, bv.bv_offset))
89                         goto split;
90
91                 if (bvprvp && blk_queue_cluster(q)) {
92                         if (seg_size + bv.bv_len > queue_max_segment_size(q))
93                                 goto new_segment;
94                         if (!BIOVEC_PHYS_MERGEABLE(bvprvp, &bv))
95                                 goto new_segment;
96                         if (!BIOVEC_SEG_BOUNDARY(q, bvprvp, &bv))
97                                 goto new_segment;
98
99                         seg_size += bv.bv_len;
100                         bvprv = bv;
101                         bvprvp = &bv;
102                         sectors += bv.bv_len >> 9;
103                         continue;
104                 }
105 new_segment:
106                 if (nsegs == queue_max_segments(q))
107                         goto split;
108
109                 nsegs++;
110                 bvprv = bv;
111                 bvprvp = &bv;
112                 seg_size = bv.bv_len;
113                 sectors += bv.bv_len >> 9;
114         }
115
116         *segs = nsegs;
117         return NULL;
118 split:
119         *segs = nsegs;
120         return bio_split(bio, sectors, GFP_NOIO, bs);
121 }
122
123 void blk_queue_split(struct request_queue *q, struct bio **bio,
124                      struct bio_set *bs)
125 {
126         struct bio *split, *res;
127         unsigned nsegs;
128
129         if ((*bio)->bi_rw & REQ_DISCARD)
130                 split = blk_bio_discard_split(q, *bio, bs, &nsegs);
131         else if ((*bio)->bi_rw & REQ_WRITE_SAME)
132                 split = blk_bio_write_same_split(q, *bio, bs, &nsegs);
133         else
134                 split = blk_bio_segment_split(q, *bio, q->bio_split, &nsegs);
135
136         /* physical segments can be figured out during splitting */
137         res = split ? split : *bio;
138         res->bi_phys_segments = nsegs;
139         bio_set_flag(res, BIO_SEG_VALID);
140
141         if (split) {
142                 bio_chain(split, *bio);
143                 generic_make_request(*bio);
144                 *bio = split;
145         }
146 }
147 EXPORT_SYMBOL(blk_queue_split);
148
149 static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
150                                              struct bio *bio,
151                                              bool no_sg_merge)
152 {
153         struct bio_vec bv, bvprv = { NULL };
154         int cluster, prev = 0;
155         unsigned int seg_size, nr_phys_segs;
156         struct bio *fbio, *bbio;
157         struct bvec_iter iter;
158
159         if (!bio)
160                 return 0;
161
162         /*
163          * This should probably be returning 0, but blk_add_request_payload()
164          * (Christoph!!!!)
165          */
166         if (bio->bi_rw & REQ_DISCARD)
167                 return 1;
168
169         if (bio->bi_rw & REQ_WRITE_SAME)
170                 return 1;
171
172         fbio = bio;
173         cluster = blk_queue_cluster(q);
174         seg_size = 0;
175         nr_phys_segs = 0;
176         for_each_bio(bio) {
177                 bio_for_each_segment(bv, bio, iter) {
178                         /*
179                          * If SG merging is disabled, each bio vector is
180                          * a segment
181                          */
182                         if (no_sg_merge)
183                                 goto new_segment;
184
185                         if (prev && cluster) {
186                                 if (seg_size + bv.bv_len
187                                     > queue_max_segment_size(q))
188                                         goto new_segment;
189                                 if (!BIOVEC_PHYS_MERGEABLE(&bvprv, &bv))
190                                         goto new_segment;
191                                 if (!BIOVEC_SEG_BOUNDARY(q, &bvprv, &bv))
192                                         goto new_segment;
193
194                                 seg_size += bv.bv_len;
195                                 bvprv = bv;
196                                 continue;
197                         }
198 new_segment:
199                         if (nr_phys_segs == 1 && seg_size >
200                             fbio->bi_seg_front_size)
201                                 fbio->bi_seg_front_size = seg_size;
202
203                         nr_phys_segs++;
204                         bvprv = bv;
205                         prev = 1;
206                         seg_size = bv.bv_len;
207                 }
208                 bbio = bio;
209         }
210
211         if (nr_phys_segs == 1 && seg_size > fbio->bi_seg_front_size)
212                 fbio->bi_seg_front_size = seg_size;
213         if (seg_size > bbio->bi_seg_back_size)
214                 bbio->bi_seg_back_size = seg_size;
215
216         return nr_phys_segs;
217 }
218
219 void blk_recalc_rq_segments(struct request *rq)
220 {
221         bool no_sg_merge = !!test_bit(QUEUE_FLAG_NO_SG_MERGE,
222                         &rq->q->queue_flags);
223
224         rq->nr_phys_segments = __blk_recalc_rq_segments(rq->q, rq->bio,
225                         no_sg_merge);
226 }
227
228 void blk_recount_segments(struct request_queue *q, struct bio *bio)
229 {
230         unsigned short seg_cnt;
231
232         /* estimate segment number by bi_vcnt for non-cloned bio */
233         if (bio_flagged(bio, BIO_CLONED))
234                 seg_cnt = bio_segments(bio);
235         else
236                 seg_cnt = bio->bi_vcnt;
237
238         if (test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags) &&
239                         (seg_cnt < queue_max_segments(q)))
240                 bio->bi_phys_segments = seg_cnt;
241         else {
242                 struct bio *nxt = bio->bi_next;
243
244                 bio->bi_next = NULL;
245                 bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio, false);
246                 bio->bi_next = nxt;
247         }
248
249         bio_set_flag(bio, BIO_SEG_VALID);
250 }
251 EXPORT_SYMBOL(blk_recount_segments);
252
253 static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
254                                    struct bio *nxt)
255 {
256         struct bio_vec end_bv = { NULL }, nxt_bv;
257         struct bvec_iter iter;
258
259         if (!blk_queue_cluster(q))
260                 return 0;
261
262         if (bio->bi_seg_back_size + nxt->bi_seg_front_size >
263             queue_max_segment_size(q))
264                 return 0;
265
266         if (!bio_has_data(bio))
267                 return 1;
268
269         bio_for_each_segment(end_bv, bio, iter)
270                 if (end_bv.bv_len == iter.bi_size)
271                         break;
272
273         nxt_bv = bio_iovec(nxt);
274
275         if (!BIOVEC_PHYS_MERGEABLE(&end_bv, &nxt_bv))
276                 return 0;
277
278         /*
279          * bio and nxt are contiguous in memory; check if the queue allows
280          * these two to be merged into one
281          */
282         if (BIOVEC_SEG_BOUNDARY(q, &end_bv, &nxt_bv))
283                 return 1;
284
285         return 0;
286 }
287
288 static inline void
289 __blk_segment_map_sg(struct request_queue *q, struct bio_vec *bvec,
290                      struct scatterlist *sglist, struct bio_vec *bvprv,
291                      struct scatterlist **sg, int *nsegs, int *cluster)
292 {
293
294         int nbytes = bvec->bv_len;
295
296         if (*sg && *cluster) {
297                 if ((*sg)->length + nbytes > queue_max_segment_size(q))
298                         goto new_segment;
299
300                 if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec))
301                         goto new_segment;
302                 if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec))
303                         goto new_segment;
304
305                 (*sg)->length += nbytes;
306         } else {
307 new_segment:
308                 if (!*sg)
309                         *sg = sglist;
310                 else {
311                         /*
312                          * If the driver previously mapped a shorter
313                          * list, we could see a termination bit
314                          * prematurely unless it fully inits the sg
315                          * table on each mapping. We KNOW that there
316                          * must be more entries here or the driver
317                          * would be buggy, so force clear the
318                          * termination bit to avoid doing a full
319                          * sg_init_table() in drivers for each command.
320                          */
321                         sg_unmark_end(*sg);
322                         *sg = sg_next(*sg);
323                 }
324
325                 sg_set_page(*sg, bvec->bv_page, nbytes, bvec->bv_offset);
326                 (*nsegs)++;
327         }
328         *bvprv = *bvec;
329 }
330
331 static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio,
332                              struct scatterlist *sglist,
333                              struct scatterlist **sg)
334 {
335         struct bio_vec bvec, bvprv = { NULL };
336         struct bvec_iter iter;
337         int nsegs, cluster;
338
339         nsegs = 0;
340         cluster = blk_queue_cluster(q);
341
342         if (bio->bi_rw & REQ_DISCARD) {
343                 /*
344                  * This is a hack - drivers should be neither modifying the
345                  * biovec, nor relying on bi_vcnt - but because of
346                  * blk_add_request_payload(), a discard bio may or may not have
347                  * a payload we need to set up here (thank you Christoph) and
348                  * bi_vcnt is really the only way of telling if we need to.
349                  */
350
351                 if (bio->bi_vcnt)
352                         goto single_segment;
353
354                 return 0;
355         }
356
357         if (bio->bi_rw & REQ_WRITE_SAME) {
358 single_segment:
359                 *sg = sglist;
360                 bvec = bio_iovec(bio);
361                 sg_set_page(*sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset);
362                 return 1;
363         }
364
365         for_each_bio(bio)
366                 bio_for_each_segment(bvec, bio, iter)
367                         __blk_segment_map_sg(q, &bvec, sglist, &bvprv, sg,
368                                              &nsegs, &cluster);
369
370         return nsegs;
371 }
372
373 /*
374  * map a request to scatterlist, return number of sg entries setup. Caller
375  * must make sure sg can hold rq->nr_phys_segments entries
376  */
377 int blk_rq_map_sg(struct request_queue *q, struct request *rq,
378                   struct scatterlist *sglist)
379 {
380         struct scatterlist *sg = NULL;
381         int nsegs = 0;
382
383         if (rq->bio)
384                 nsegs = __blk_bios_map_sg(q, rq->bio, sglist, &sg);
385
386         if (unlikely(rq->cmd_flags & REQ_COPY_USER) &&
387             (blk_rq_bytes(rq) & q->dma_pad_mask)) {
388                 unsigned int pad_len =
389                         (q->dma_pad_mask & ~blk_rq_bytes(rq)) + 1;
390
391                 sg->length += pad_len;
392                 rq->extra_len += pad_len;
393         }
394
395         if (q->dma_drain_size && q->dma_drain_needed(rq)) {
396                 if (rq->cmd_flags & REQ_WRITE)
397                         memset(q->dma_drain_buffer, 0, q->dma_drain_size);
398
399                 sg_unmark_end(sg);
400                 sg = sg_next(sg);
401                 sg_set_page(sg, virt_to_page(q->dma_drain_buffer),
402                             q->dma_drain_size,
403                             ((unsigned long)q->dma_drain_buffer) &
404                             (PAGE_SIZE - 1));
405                 nsegs++;
406                 rq->extra_len += q->dma_drain_size;
407         }
408
409         if (sg)
410                 sg_mark_end(sg);
411
412         return nsegs;
413 }
414 EXPORT_SYMBOL(blk_rq_map_sg);
415
416 static inline int ll_new_hw_segment(struct request_queue *q,
417                                     struct request *req,
418                                     struct bio *bio)
419 {
420         int nr_phys_segs = bio_phys_segments(q, bio);
421
422         if (req->nr_phys_segments + nr_phys_segs > queue_max_segments(q))
423                 goto no_merge;
424
425         if (blk_integrity_merge_bio(q, req, bio) == false)
426                 goto no_merge;
427
428         /*
429          * This will form the start of a new hw segment.  Bump both
430          * counters.
431          */
432         req->nr_phys_segments += nr_phys_segs;
433         return 1;
434
435 no_merge:
436         req->cmd_flags |= REQ_NOMERGE;
437         if (req == q->last_merge)
438                 q->last_merge = NULL;
439         return 0;
440 }
441
442 int ll_back_merge_fn(struct request_queue *q, struct request *req,
443                      struct bio *bio)
444 {
445         if (req_gap_back_merge(req, bio))
446                 return 0;
447         if (blk_integrity_rq(req) &&
448             integrity_req_gap_back_merge(req, bio))
449                 return 0;
450         if (blk_rq_sectors(req) + bio_sectors(bio) >
451             blk_rq_get_max_sectors(req)) {
452                 req->cmd_flags |= REQ_NOMERGE;
453                 if (req == q->last_merge)
454                         q->last_merge = NULL;
455                 return 0;
456         }
457         if (!bio_flagged(req->biotail, BIO_SEG_VALID))
458                 blk_recount_segments(q, req->biotail);
459         if (!bio_flagged(bio, BIO_SEG_VALID))
460                 blk_recount_segments(q, bio);
461
462         return ll_new_hw_segment(q, req, bio);
463 }
464
465 int ll_front_merge_fn(struct request_queue *q, struct request *req,
466                       struct bio *bio)
467 {
468
469         if (req_gap_front_merge(req, bio))
470                 return 0;
471         if (blk_integrity_rq(req) &&
472             integrity_req_gap_front_merge(req, bio))
473                 return 0;
474         if (blk_rq_sectors(req) + bio_sectors(bio) >
475             blk_rq_get_max_sectors(req)) {
476                 req->cmd_flags |= REQ_NOMERGE;
477                 if (req == q->last_merge)
478                         q->last_merge = NULL;
479                 return 0;
480         }
481         if (!bio_flagged(bio, BIO_SEG_VALID))
482                 blk_recount_segments(q, bio);
483         if (!bio_flagged(req->bio, BIO_SEG_VALID))
484                 blk_recount_segments(q, req->bio);
485
486         return ll_new_hw_segment(q, req, bio);
487 }
488
489 /*
490  * blk-mq uses req->special to carry normal driver per-request payload, it
491  * does not indicate a prepared command that we cannot merge with.
492  */
493 static bool req_no_special_merge(struct request *req)
494 {
495         struct request_queue *q = req->q;
496
497         return !q->mq_ops && req->special;
498 }
499
500 static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
501                                 struct request *next)
502 {
503         int total_phys_segments;
504         unsigned int seg_size =
505                 req->biotail->bi_seg_back_size + next->bio->bi_seg_front_size;
506
507         /*
508          * First check if the either of the requests are re-queued
509          * requests.  Can't merge them if they are.
510          */
511         if (req_no_special_merge(req) || req_no_special_merge(next))
512                 return 0;
513
514         if (req_gap_back_merge(req, next->bio))
515                 return 0;
516
517         /*
518          * Will it become too large?
519          */
520         if ((blk_rq_sectors(req) + blk_rq_sectors(next)) >
521             blk_rq_get_max_sectors(req))
522                 return 0;
523
524         total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
525         if (blk_phys_contig_segment(q, req->biotail, next->bio)) {
526                 if (req->nr_phys_segments == 1)
527                         req->bio->bi_seg_front_size = seg_size;
528                 if (next->nr_phys_segments == 1)
529                         next->biotail->bi_seg_back_size = seg_size;
530                 total_phys_segments--;
531         }
532
533         if (total_phys_segments > queue_max_segments(q))
534                 return 0;
535
536         if (blk_integrity_merge_rq(q, req, next) == false)
537                 return 0;
538
539         /* Merge is OK... */
540         req->nr_phys_segments = total_phys_segments;
541         return 1;
542 }
543
544 /**
545  * blk_rq_set_mixed_merge - mark a request as mixed merge
546  * @rq: request to mark as mixed merge
547  *
548  * Description:
549  *     @rq is about to be mixed merged.  Make sure the attributes
550  *     which can be mixed are set in each bio and mark @rq as mixed
551  *     merged.
552  */
553 void blk_rq_set_mixed_merge(struct request *rq)
554 {
555         unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK;
556         struct bio *bio;
557
558         if (rq->cmd_flags & REQ_MIXED_MERGE)
559                 return;
560
561         /*
562          * @rq will no longer represent mixable attributes for all the
563          * contained bios.  It will just track those of the first one.
564          * Distributes the attributs to each bio.
565          */
566         for (bio = rq->bio; bio; bio = bio->bi_next) {
567                 WARN_ON_ONCE((bio->bi_rw & REQ_FAILFAST_MASK) &&
568                              (bio->bi_rw & REQ_FAILFAST_MASK) != ff);
569                 bio->bi_rw |= ff;
570         }
571         rq->cmd_flags |= REQ_MIXED_MERGE;
572 }
573
574 static void blk_account_io_merge(struct request *req)
575 {
576         if (blk_do_io_stat(req)) {
577                 struct hd_struct *part;
578                 int cpu;
579
580                 cpu = part_stat_lock();
581                 part = req->part;
582
583                 part_round_stats(cpu, part);
584                 part_dec_in_flight(part, rq_data_dir(req));
585
586                 hd_struct_put(part);
587                 part_stat_unlock();
588         }
589 }
590
591 /*
592  * Has to be called with the request spinlock acquired
593  */
594 static int attempt_merge(struct request_queue *q, struct request *req,
595                           struct request *next)
596 {
597         if (!rq_mergeable(req) || !rq_mergeable(next))
598                 return 0;
599
600         if (!blk_check_merge_flags(req->cmd_flags, next->cmd_flags))
601                 return 0;
602
603         /*
604          * not contiguous
605          */
606         if (blk_rq_pos(req) + blk_rq_sectors(req) != blk_rq_pos(next))
607                 return 0;
608
609         if (rq_data_dir(req) != rq_data_dir(next)
610             || req->rq_disk != next->rq_disk
611             || req_no_special_merge(next))
612                 return 0;
613
614         if (req->cmd_flags & REQ_WRITE_SAME &&
615             !blk_write_same_mergeable(req->bio, next->bio))
616                 return 0;
617
618         /*
619          * If we are allowed to merge, then append bio list
620          * from next to rq and release next. merge_requests_fn
621          * will have updated segment counts, update sector
622          * counts here.
623          */
624         if (!ll_merge_requests_fn(q, req, next))
625                 return 0;
626
627         /*
628          * If failfast settings disagree or any of the two is already
629          * a mixed merge, mark both as mixed before proceeding.  This
630          * makes sure that all involved bios have mixable attributes
631          * set properly.
632          */
633         if ((req->cmd_flags | next->cmd_flags) & REQ_MIXED_MERGE ||
634             (req->cmd_flags & REQ_FAILFAST_MASK) !=
635             (next->cmd_flags & REQ_FAILFAST_MASK)) {
636                 blk_rq_set_mixed_merge(req);
637                 blk_rq_set_mixed_merge(next);
638         }
639
640         /*
641          * At this point we have either done a back merge
642          * or front merge. We need the smaller start_time of
643          * the merged requests to be the current request
644          * for accounting purposes.
645          */
646         if (time_after(req->start_time, next->start_time))
647                 req->start_time = next->start_time;
648
649         req->biotail->bi_next = next->bio;
650         req->biotail = next->biotail;
651
652         req->__data_len += blk_rq_bytes(next);
653
654         elv_merge_requests(q, req, next);
655
656         /*
657          * 'next' is going away, so update stats accordingly
658          */
659         blk_account_io_merge(next);
660
661         req->ioprio = ioprio_best(req->ioprio, next->ioprio);
662         if (blk_rq_cpu_valid(next))
663                 req->cpu = next->cpu;
664
665         /* owner-ship of bio passed from next to req */
666         next->bio = NULL;
667         __blk_put_request(q, next);
668         return 1;
669 }
670
671 int attempt_back_merge(struct request_queue *q, struct request *rq)
672 {
673         struct request *next = elv_latter_request(q, rq);
674
675         if (next)
676                 return attempt_merge(q, rq, next);
677
678         return 0;
679 }
680
681 int attempt_front_merge(struct request_queue *q, struct request *rq)
682 {
683         struct request *prev = elv_former_request(q, rq);
684
685         if (prev)
686                 return attempt_merge(q, prev, rq);
687
688         return 0;
689 }
690
691 int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
692                           struct request *next)
693 {
694         return attempt_merge(q, rq, next);
695 }
696
697 bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
698 {
699         if (!rq_mergeable(rq) || !bio_mergeable(bio))
700                 return false;
701
702         if (!blk_check_merge_flags(rq->cmd_flags, bio->bi_rw))
703                 return false;
704
705         /* different data direction or already started, don't merge */
706         if (bio_data_dir(bio) != rq_data_dir(rq))
707                 return false;
708
709         /* must be same device and not a special request */
710         if (rq->rq_disk != bio->bi_bdev->bd_disk || req_no_special_merge(rq))
711                 return false;
712
713         /* only merge integrity protected bio into ditto rq */
714         if (blk_integrity_merge_bio(rq->q, rq, bio) == false)
715                 return false;
716
717         /* must be using the same buffer */
718         if (rq->cmd_flags & REQ_WRITE_SAME &&
719             !blk_write_same_mergeable(rq->bio, bio))
720                 return false;
721
722         return true;
723 }
724
725 int blk_try_merge(struct request *rq, struct bio *bio)
726 {
727         if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector)
728                 return ELEVATOR_BACK_MERGE;
729         else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_iter.bi_sector)
730                 return ELEVATOR_FRONT_MERGE;
731         return ELEVATOR_NO_MERGE;
732 }