block: add a struct io_comp_batch argument to fops->iopoll()
[linux-2.6-block.git] / block / blk-mq.c
index b58878221f1770021cea6e3c13a648a97effeb02..79c25b64e8b090f0f153404f12ed591923a6d9d1 100644 (file)
@@ -306,16 +306,22 @@ void blk_mq_wake_waiters(struct request_queue *q)
  */
 static inline bool blk_mq_need_time_stamp(struct request *rq)
 {
-       return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS)) || rq->q->elevator;
+       return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_ELV));
 }
 
 static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
                unsigned int tag, u64 alloc_time_ns)
 {
+       struct blk_mq_ctx *ctx = data->ctx;
+       struct blk_mq_hw_ctx *hctx = data->hctx;
+       struct request_queue *q = data->q;
+       struct elevator_queue *e = q->elevator;
        struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
        struct request *rq = tags->static_rqs[tag];
+       unsigned int rq_flags = 0;
 
-       if (data->q->elevator) {
+       if (e) {
+               rq_flags = RQF_ELV;
                rq->tag = BLK_MQ_NO_TAG;
                rq->internal_tag = tag;
        } else {
@@ -323,51 +329,51 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
                rq->internal_tag = BLK_MQ_NO_TAG;
        }
 
+       if (data->flags & BLK_MQ_REQ_PM)
+               rq_flags |= RQF_PM;
+       if (blk_queue_io_stat(q))
+               rq_flags |= RQF_IO_STAT;
+       rq->rq_flags = rq_flags;
+
+       if (blk_mq_need_time_stamp(rq))
+               rq->start_time_ns = ktime_get_ns();
+       else
+               rq->start_time_ns = 0;
        /* csd/requeue_work/fifo_time is initialized before use */
-       rq->q = data->q;
-       rq->mq_ctx = data->ctx;
-       rq->mq_hctx = data->hctx;
-       rq->rq_flags = 0;
+       rq->q = q;
+       rq->mq_ctx = ctx;
+       rq->mq_hctx = hctx;
        rq->cmd_flags = data->cmd_flags;
-       if (data->flags & BLK_MQ_REQ_PM)
-               rq->rq_flags |= RQF_PM;
-       if (blk_queue_io_stat(data->q))
-               rq->rq_flags |= RQF_IO_STAT;
-       INIT_LIST_HEAD(&rq->queuelist);
-       INIT_HLIST_NODE(&rq->hash);
-       RB_CLEAR_NODE(&rq->rb_node);
        rq->rq_disk = NULL;
        rq->part = NULL;
 #ifdef CONFIG_BLK_RQ_ALLOC_TIME
        rq->alloc_time_ns = alloc_time_ns;
 #endif
-       if (blk_mq_need_time_stamp(rq))
-               rq->start_time_ns = ktime_get_ns();
-       else
-               rq->start_time_ns = 0;
        rq->io_start_time_ns = 0;
        rq->stats_sectors = 0;
        rq->nr_phys_segments = 0;
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
        rq->nr_integrity_segments = 0;
 #endif
-       blk_crypto_rq_set_defaults(rq);
-       /* tag was already set */
-       WRITE_ONCE(rq->deadline, 0);
-
        rq->timeout = 0;
-
        rq->end_io = NULL;
        rq->end_io_data = NULL;
 
-       data->ctx->rq_dispatched[op_is_sync(data->cmd_flags)]++;
+       blk_crypto_rq_set_defaults(rq);
+       INIT_LIST_HEAD(&rq->queuelist);
+       /* tag was already set */
+       WRITE_ONCE(rq->deadline, 0);
        refcount_set(&rq->ref, 1);
 
-       if (!op_is_flush(data->cmd_flags)) {
+       if (rq->rq_flags & RQF_ELV) {
                struct elevator_queue *e = data->q->elevator;
 
                rq->elv.icq = NULL;
-               if (e && e->type->ops.prepare_request) {
+               INIT_HLIST_NODE(&rq->hash);
+               RB_CLEAR_NODE(&rq->rb_node);
+
+               if (!op_is_flush(data->cmd_flags) &&
+                   e->type->ops.prepare_request) {
                        if (e->type->icq_cache)
                                blk_mq_sched_assign_ioc(rq);
 
@@ -376,7 +382,6 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
                }
        }
 
-       data->hctx->queued++;
        return rq;
 }
 
@@ -399,17 +404,11 @@ __blk_mq_alloc_requests_batch(struct blk_mq_alloc_data *data,
                tag = tag_offset + i;
                tags &= ~(1UL << i);
                rq = blk_mq_rq_ctx_init(data, tag, alloc_time_ns);
-               rq->rq_next = *data->cached_rq;
-               *data->cached_rq = rq;
+               rq_list_add(data->cached_rq, rq);
        }
        data->nr_tags -= nr;
 
-       if (!data->cached_rq)
-               return NULL;
-
-       rq = *data->cached_rq;
-       *data->cached_rq = rq->rq_next;
-       return rq;
+       return rq_list_pop(data->cached_rq);
 }
 
 static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
@@ -588,12 +587,12 @@ static void __blk_mq_free_request(struct request *rq)
 void blk_mq_free_request(struct request *rq)
 {
        struct request_queue *q = rq->q;
-       struct elevator_queue *e = q->elevator;
-       struct blk_mq_ctx *ctx = rq->mq_ctx;
        struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
 
-       if (rq->rq_flags & RQF_ELVPRIV) {
-               if (e && e->type->ops.finish_request)
+       if (rq->rq_flags & (RQF_ELVPRIV | RQF_ELV)) {
+               struct elevator_queue *e = q->elevator;
+
+               if (e->type->ops.finish_request)
                        e->type->ops.finish_request(rq);
                if (rq->elv.icq) {
                        put_io_context(rq->elv.icq->ioc);
@@ -601,7 +600,6 @@ void blk_mq_free_request(struct request *rq)
                }
        }
 
-       ctx->rq_completed[rq_is_sync(rq)]++;
        if (rq->rq_flags & RQF_MQ_INFLIGHT)
                __blk_mq_dec_active_requests(hctx);
 
@@ -618,16 +616,158 @@ EXPORT_SYMBOL_GPL(blk_mq_free_request);
 
 void blk_mq_free_plug_rqs(struct blk_plug *plug)
 {
-       while (plug->cached_rq) {
-               struct request *rq;
+       struct request *rq;
 
-               rq = plug->cached_rq;
-               plug->cached_rq = rq->rq_next;
+       while ((rq = rq_list_pop(&plug->cached_rq)) != NULL) {
                percpu_ref_get(&rq->q->q_usage_counter);
                blk_mq_free_request(rq);
        }
 }
 
+static void req_bio_endio(struct request *rq, struct bio *bio,
+                         unsigned int nbytes, blk_status_t error)
+{
+       if (error)
+               bio->bi_status = error;
+
+       if (unlikely(rq->rq_flags & RQF_QUIET))
+               bio_set_flag(bio, BIO_QUIET);
+
+       bio_advance(bio, nbytes);
+
+       if (req_op(rq) == REQ_OP_ZONE_APPEND && error == BLK_STS_OK) {
+               /*
+                * Partial zone append completions cannot be supported as the
+                * BIO fragments may end up not being written sequentially.
+                */
+               if (bio->bi_iter.bi_size)
+                       bio->bi_status = BLK_STS_IOERR;
+               else
+                       bio->bi_iter.bi_sector = rq->__sector;
+       }
+
+       /* don't actually finish bio if it's part of flush sequence */
+       if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ))
+               bio_endio(bio);
+}
+
+static void blk_account_io_completion(struct request *req, unsigned int bytes)
+{
+       if (req->part && blk_do_io_stat(req)) {
+               const int sgrp = op_stat_group(req_op(req));
+
+               part_stat_lock();
+               part_stat_add(req->part, sectors[sgrp], bytes >> 9);
+               part_stat_unlock();
+       }
+}
+
+/**
+ * blk_update_request - Complete multiple bytes without completing the request
+ * @req:      the request being processed
+ * @error:    block status code
+ * @nr_bytes: number of bytes to complete for @req
+ *
+ * Description:
+ *     Ends I/O on a number of bytes attached to @req, but doesn't complete
+ *     the request structure even if @req doesn't have leftover.
+ *     If @req has leftover, sets it up for the next range of segments.
+ *
+ *     Passing the result of blk_rq_bytes() as @nr_bytes guarantees
+ *     %false return from this function.
+ *
+ * Note:
+ *     The RQF_SPECIAL_PAYLOAD flag is ignored on purpose in this function
+ *      except in the consistency check at the end of this function.
+ *
+ * Return:
+ *     %false - this request doesn't have any more data
+ *     %true  - this request has more data
+ **/
+bool blk_update_request(struct request *req, blk_status_t error,
+               unsigned int nr_bytes)
+{
+       int total_bytes;
+
+       trace_block_rq_complete(req, blk_status_to_errno(error), nr_bytes);
+
+       if (!req->bio)
+               return false;
+
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+       if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ &&
+           error == BLK_STS_OK)
+               req->q->integrity.profile->complete_fn(req, nr_bytes);
+#endif
+
+       if (unlikely(error && !blk_rq_is_passthrough(req) &&
+                    !(req->rq_flags & RQF_QUIET)))
+               blk_print_req_error(req, error);
+
+       blk_account_io_completion(req, nr_bytes);
+
+       total_bytes = 0;
+       while (req->bio) {
+               struct bio *bio = req->bio;
+               unsigned bio_bytes = min(bio->bi_iter.bi_size, nr_bytes);
+
+               if (bio_bytes == bio->bi_iter.bi_size)
+                       req->bio = bio->bi_next;
+
+               /* Completion has already been traced */
+               bio_clear_flag(bio, BIO_TRACE_COMPLETION);
+               req_bio_endio(req, bio, bio_bytes, error);
+
+               total_bytes += bio_bytes;
+               nr_bytes -= bio_bytes;
+
+               if (!nr_bytes)
+                       break;
+       }
+
+       /*
+        * completely done
+        */
+       if (!req->bio) {
+               /*
+                * Reset counters so that the request stacking driver
+                * can find how many bytes remain in the request
+                * later.
+                */
+               req->__data_len = 0;
+               return false;
+       }
+
+       req->__data_len -= total_bytes;
+
+       /* update sector only for requests with clear definition of sector */
+       if (!blk_rq_is_passthrough(req))
+               req->__sector += total_bytes >> 9;
+
+       /* mixed attributes always follow the first bio */
+       if (req->rq_flags & RQF_MIXED_MERGE) {
+               req->cmd_flags &= ~REQ_FAILFAST_MASK;
+               req->cmd_flags |= req->bio->bi_opf & REQ_FAILFAST_MASK;
+       }
+
+       if (!(req->rq_flags & RQF_SPECIAL_PAYLOAD)) {
+               /*
+                * If total number of sectors is less than the first segment
+                * size, something has gone terribly wrong.
+                */
+               if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) {
+                       blk_dump_rq_flags(req, "request botched");
+                       req->__data_len = blk_rq_cur_bytes(req);
+               }
+
+               /* recalculate the number of segments */
+               req->nr_phys_segments = blk_recalc_rq_segments(req);
+       }
+
+       return true;
+}
+EXPORT_SYMBOL_GPL(blk_update_request);
+
 inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
 {
        if (blk_mq_need_time_stamp(rq)) {
@@ -1152,14 +1292,6 @@ struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx,
        return data.rq;
 }
 
-static inline unsigned int queued_to_index(unsigned int queued)
-{
-       if (!queued)
-               return 0;
-
-       return min(BLK_MQ_MAX_DISPATCH_ORDER - 1, ilog2(queued) + 1);
-}
-
 static bool __blk_mq_get_driver_tag(struct request *rq)
 {
        struct sbitmap_queue *bt = &rq->mq_hctx->tags->bitmap_tags;
@@ -1483,8 +1615,6 @@ out:
        if (!list_empty(&zone_list))
                list_splice_tail_init(&zone_list, list);
 
-       hctx->dispatched[queued_to_index(queued)]++;
-
        /* If we didn't flush the entire list, we could have told the driver
         * there was more coming, but that turned out to be a lie.
         */
@@ -2110,7 +2240,7 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
                goto insert;
        }
 
-       if (q->elevator && !bypass_insert)
+       if ((rq->rq_flags & RQF_ELV) && !bypass_insert)
                goto insert;
 
        budget_token = blk_mq_get_dispatch_budget(q);
@@ -2280,8 +2410,7 @@ void blk_mq_submit_bio(struct bio *bio)
 
        plug = blk_mq_plug(q, bio);
        if (plug && plug->cached_rq) {
-               rq = plug->cached_rq;
-               plug->cached_rq = rq->rq_next;
+               rq = rq_list_pop(&plug->cached_rq);
                INIT_LIST_HEAD(&rq->queuelist);
        } else {
                struct blk_mq_alloc_data data = {
@@ -2348,7 +2477,7 @@ void blk_mq_submit_bio(struct bio *bio)
                }
 
                blk_add_rq_to_plug(plug, rq);
-       } else if (q->elevator) {
+       } else if (rq->rq_flags & RQF_ELV) {
                /* Insert the request at the IO scheduler queue */
                blk_mq_sched_insert_request(rq, false, true, true);
        } else if (plug && !blk_queue_nomerges(q)) {
@@ -4045,20 +4174,15 @@ static bool blk_mq_poll_hybrid(struct request_queue *q, blk_qc_t qc)
 }
 
 static int blk_mq_poll_classic(struct request_queue *q, blk_qc_t cookie,
-               unsigned int flags)
+                              struct io_comp_batch *iob, unsigned int flags)
 {
        struct blk_mq_hw_ctx *hctx = blk_qc_to_hctx(q, cookie);
        long state = get_current_state();
        int ret;
 
-       hctx->poll_considered++;
-
        do {
-               hctx->poll_invoked++;
-
-               ret = q->mq_ops->poll(hctx);
+               ret = q->mq_ops->poll(hctx, iob);
                if (ret > 0) {
-                       hctx->poll_success++;
                        __set_current_state(TASK_RUNNING);
                        return ret;
                }
@@ -4077,14 +4201,15 @@ static int blk_mq_poll_classic(struct request_queue *q, blk_qc_t cookie,
        return 0;
 }
 
-int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, unsigned int flags)
+int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch *iob,
+               unsigned int flags)
 {
        if (!(flags & BLK_POLL_NOSLEEP) &&
            q->poll_nsec != BLK_MQ_POLL_CLASSIC) {
                if (blk_mq_poll_hybrid(q, cookie))
                        return 1;
        }
-       return blk_mq_poll_classic(q, cookie, flags);
+       return blk_mq_poll_classic(q, cookie, iob, flags);
 }
 
 unsigned int blk_mq_rq_cpu(struct request *rq)