Merge tag 'x86_cache_for_6.4' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
[linux-block.git] / block / blk-mq.c
index 2831f78f86a033dbd0131a5da659741728d6a2c6..f6dad0886a2fa1bacac778c093717ba21e24b33e 100644 (file)
 
 #include <trace/events/block.h>
 
-#include <linux/blk-mq.h>
 #include <linux/t10-pi.h>
 #include "blk.h"
 #include "blk-mq.h"
 #include "blk-mq-debugfs.h"
-#include "blk-mq-tag.h"
 #include "blk-pm.h"
 #include "blk-stat.h"
 #include "blk-mq-sched.h"
 
 static DEFINE_PER_CPU(struct llist_head, blk_cpu_done);
 
-static void blk_mq_poll_stats_start(struct request_queue *q);
-static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
-
-static int blk_mq_poll_stats_bkt(const struct request *rq)
-{
-       int ddir, sectors, bucket;
-
-       ddir = rq_data_dir(rq);
-       sectors = blk_rq_stats_sectors(rq);
-
-       bucket = ddir + 2 * ilog2(sectors);
-
-       if (bucket < 0)
-               return -1;
-       else if (bucket >= BLK_MQ_POLL_STATS_BKTS)
-               return ddir + BLK_MQ_POLL_STATS_BKTS - 2;
-
-       return bucket;
-}
-
-#define BLK_QC_T_SHIFT         16
-#define BLK_QC_T_INTERNAL      (1U << 31)
+static void blk_mq_insert_request(struct request *rq, blk_insert_t flags);
+static void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
+               struct list_head *list);
 
 static inline struct blk_mq_hw_ctx *blk_qc_to_hctx(struct request_queue *q,
                blk_qc_t qc)
 {
-       return xa_load(&q->hctx_table,
-                       (qc & ~BLK_QC_T_INTERNAL) >> BLK_QC_T_SHIFT);
-}
-
-static inline struct request *blk_qc_to_rq(struct blk_mq_hw_ctx *hctx,
-               blk_qc_t qc)
-{
-       unsigned int tag = qc & ((1U << BLK_QC_T_SHIFT) - 1);
-
-       if (qc & BLK_QC_T_INTERNAL)
-               return blk_mq_tag_to_rq(hctx->sched_tags, tag);
-       return blk_mq_tag_to_rq(hctx->tags, tag);
+       return xa_load(&q->hctx_table, qc);
 }
 
 static inline blk_qc_t blk_rq_to_qc(struct request *rq)
 {
-       return (rq->mq_hctx->queue_num << BLK_QC_T_SHIFT) |
-               (rq->tag != -1 ?
-                rq->tag : (rq->internal_tag | BLK_QC_T_INTERNAL));
+       return rq->mq_hctx->queue_num;
 }
 
 /*
@@ -840,6 +806,12 @@ static void blk_complete_request(struct request *req)
                req->q->integrity.profile->complete_fn(req, total_bytes);
 #endif
 
+       /*
+        * Upper layers may call blk_crypto_evict_key() anytime after the last
+        * bio_endio().  Therefore, the keyslot must be released before that.
+        */
+       blk_crypto_rq_put_keyslot(req);
+
        blk_account_io_completion(req, total_bytes);
 
        do {
@@ -905,6 +877,13 @@ bool blk_update_request(struct request *req, blk_status_t error,
                req->q->integrity.profile->complete_fn(req, nr_bytes);
 #endif
 
+       /*
+        * Upper layers may call blk_crypto_evict_key() anytime after the last
+        * bio_endio().  Therefore, the keyslot must be released before that.
+        */
+       if (blk_crypto_rq_has_keyslot(req) && nr_bytes >= blk_rq_bytes(req))
+               __blk_crypto_rq_put_keyslot(req);
+
        if (unlikely(error && !blk_rq_is_passthrough(req) &&
                     !(req->rq_flags & RQF_QUIET)) &&
                     !test_bit(GD_DEAD, &req->q->disk->state)) {
@@ -976,17 +955,6 @@ bool blk_update_request(struct request *req, blk_status_t error,
 }
 EXPORT_SYMBOL_GPL(blk_update_request);
 
-static void __blk_account_io_done(struct request *req, u64 now)
-{
-       const int sgrp = op_stat_group(req_op(req));
-
-       part_stat_lock();
-       update_io_ticks(req->part, jiffies, true);
-       part_stat_inc(req->part, ios[sgrp]);
-       part_stat_add(req->part, nsecs[sgrp], now - req->start_time_ns);
-       part_stat_unlock();
-}
-
 static inline void blk_account_io_done(struct request *req, u64 now)
 {
        /*
@@ -995,40 +963,41 @@ static inline void blk_account_io_done(struct request *req, u64 now)
         * containing request is enough.
         */
        if (blk_do_io_stat(req) && req->part &&
-           !(req->rq_flags & RQF_FLUSH_SEQ))
-               __blk_account_io_done(req, now);
-}
-
-static void __blk_account_io_start(struct request *rq)
-{
-       /*
-        * All non-passthrough requests are created from a bio with one
-        * exception: when a flush command that is part of a flush sequence
-        * generated by the state machine in blk-flush.c is cloned onto the
-        * lower device by dm-multipath we can get here without a bio.
-        */
-       if (rq->bio)
-               rq->part = rq->bio->bi_bdev;
-       else
-               rq->part = rq->q->disk->part0;
+           !(req->rq_flags & RQF_FLUSH_SEQ)) {
+               const int sgrp = op_stat_group(req_op(req));
 
-       part_stat_lock();
-       update_io_ticks(rq->part, jiffies, false);
-       part_stat_unlock();
+               part_stat_lock();
+               update_io_ticks(req->part, jiffies, true);
+               part_stat_inc(req->part, ios[sgrp]);
+               part_stat_add(req->part, nsecs[sgrp], now - req->start_time_ns);
+               part_stat_unlock();
+       }
 }
 
 static inline void blk_account_io_start(struct request *req)
 {
-       if (blk_do_io_stat(req))
-               __blk_account_io_start(req);
+       if (blk_do_io_stat(req)) {
+               /*
+                * All non-passthrough requests are created from a bio with one
+                * exception: when a flush command that is part of a flush sequence
+                * generated by the state machine in blk-flush.c is cloned onto the
+                * lower device by dm-multipath we can get here without a bio.
+                */
+               if (req->bio)
+                       req->part = req->bio->bi_bdev;
+               else
+                       req->part = req->q->disk->part0;
+
+               part_stat_lock();
+               update_io_ticks(req->part, jiffies, false);
+               part_stat_unlock();
+       }
 }
 
 static inline void __blk_mq_end_request_acct(struct request *rq, u64 now)
 {
-       if (rq->rq_flags & RQF_STATS) {
-               blk_mq_poll_stats_start(rq->q);
+       if (rq->rq_flags & RQF_STATS)
                blk_stat_add(rq, now);
-       }
 
        blk_mq_sched_completed_request(rq, now);
        blk_account_io_done(rq, now);
@@ -1322,6 +1291,8 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
  */
 void blk_execute_rq_nowait(struct request *rq, bool at_head)
 {
+       struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
+
        WARN_ON(irqs_disabled());
        WARN_ON(!blk_rq_is_passthrough(rq));
 
@@ -1332,10 +1303,13 @@ void blk_execute_rq_nowait(struct request *rq, bool at_head)
         * device, directly accessing the plug instead of using blk_mq_plug()
         * should not have any consequences.
         */
-       if (current->plug)
+       if (current->plug && !at_head) {
                blk_add_rq_to_plug(current->plug, rq);
-       else
-               blk_mq_sched_insert_request(rq, at_head, true, false);
+               return;
+       }
+
+       blk_mq_insert_request(rq, at_head ? BLK_MQ_INSERT_AT_HEAD : 0);
+       blk_mq_run_hw_queue(hctx, false);
 }
 EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
 
@@ -1383,6 +1357,7 @@ static void blk_rq_poll_completion(struct request *rq, struct completion *wait)
  */
 blk_status_t blk_execute_rq(struct request *rq, bool at_head)
 {
+       struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
        struct blk_rq_wait wait = {
                .done = COMPLETION_INITIALIZER_ONSTACK(wait.done),
        };
@@ -1394,7 +1369,8 @@ blk_status_t blk_execute_rq(struct request *rq, bool at_head)
        rq->end_io = blk_end_sync_rq;
 
        blk_account_io_start(rq);
-       blk_mq_sched_insert_request(rq, at_head, true, false);
+       blk_mq_insert_request(rq, at_head ? BLK_MQ_INSERT_AT_HEAD : 0);
+       blk_mq_run_hw_queue(hctx, false);
 
        if (blk_rq_is_poll(rq)) {
                blk_rq_poll_completion(rq, &wait.done);
@@ -1434,12 +1410,17 @@ static void __blk_mq_requeue_request(struct request *rq)
 
 void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list)
 {
+       struct request_queue *q = rq->q;
+
        __blk_mq_requeue_request(rq);
 
        /* this request will be re-inserted to io scheduler queue */
        blk_mq_sched_requeue_request(rq);
 
-       blk_mq_add_to_requeue_list(rq, true, kick_requeue_list);
+       blk_mq_add_to_requeue_list(rq, BLK_MQ_INSERT_AT_HEAD);
+
+       if (kick_requeue_list)
+               blk_mq_kick_requeue_list(q);
 }
 EXPORT_SYMBOL(blk_mq_requeue_request);
 
@@ -1455,33 +1436,33 @@ static void blk_mq_requeue_work(struct work_struct *work)
        spin_unlock_irq(&q->requeue_lock);
 
        list_for_each_entry_safe(rq, next, &rq_list, queuelist) {
-               if (!(rq->rq_flags & (RQF_SOFTBARRIER | RQF_DONTPREP)))
-                       continue;
-
-               rq->rq_flags &= ~RQF_SOFTBARRIER;
-               list_del_init(&rq->queuelist);
                /*
-                * If RQF_DONTPREP, rq has contained some driver specific
-                * data, so insert it to hctx dispatch list to avoid any
-                * merge.
+                * If RQF_DONTPREP ist set, the request has been started by the
+                * driver already and might have driver-specific data allocated
+                * already.  Insert it into the hctx dispatch list to avoid
+                * block layer merges for the request.
                 */
-               if (rq->rq_flags & RQF_DONTPREP)
-                       blk_mq_request_bypass_insert(rq, false, false);
-               else
-                       blk_mq_sched_insert_request(rq, true, false, false);
+               if (rq->rq_flags & RQF_DONTPREP) {
+                       rq->rq_flags &= ~RQF_SOFTBARRIER;
+                       list_del_init(&rq->queuelist);
+                       blk_mq_request_bypass_insert(rq, 0);
+               } else if (rq->rq_flags & RQF_SOFTBARRIER) {
+                       rq->rq_flags &= ~RQF_SOFTBARRIER;
+                       list_del_init(&rq->queuelist);
+                       blk_mq_insert_request(rq, BLK_MQ_INSERT_AT_HEAD);
+               }
        }
 
        while (!list_empty(&rq_list)) {
                rq = list_entry(rq_list.next, struct request, queuelist);
                list_del_init(&rq->queuelist);
-               blk_mq_sched_insert_request(rq, false, false, false);
+               blk_mq_insert_request(rq, 0);
        }
 
        blk_mq_run_hw_queues(q, false);
 }
 
-void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
-                               bool kick_requeue_list)
+void blk_mq_add_to_requeue_list(struct request *rq, blk_insert_t insert_flags)
 {
        struct request_queue *q = rq->q;
        unsigned long flags;
@@ -1493,16 +1474,13 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
        BUG_ON(rq->rq_flags & RQF_SOFTBARRIER);
 
        spin_lock_irqsave(&q->requeue_lock, flags);
-       if (at_head) {
+       if (insert_flags & BLK_MQ_INSERT_AT_HEAD) {
                rq->rq_flags |= RQF_SOFTBARRIER;
                list_add(&rq->queuelist, &q->requeue_list);
        } else {
                list_add_tail(&rq->queuelist, &q->requeue_list);
        }
        spin_unlock_irqrestore(&q->requeue_lock, flags);
-
-       if (kick_requeue_list)
-               blk_mq_kick_requeue_list(q);
 }
 
 void blk_mq_kick_requeue_list(struct request_queue *q)
@@ -2158,24 +2136,6 @@ out:
        return true;
 }
 
-/**
- * __blk_mq_run_hw_queue - Run a hardware queue.
- * @hctx: Pointer to the hardware queue to run.
- *
- * Send pending requests to the hardware.
- */
-static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
-{
-       /*
-        * We can't run the queue inline with ints disabled. Ensure that
-        * we catch bad users of this early.
-        */
-       WARN_ON_ONCE(in_interrupt());
-
-       blk_mq_run_dispatch_ops(hctx->queue,
-                       blk_mq_sched_dispatch_requests(hctx));
-}
-
 static inline int blk_mq_first_mapped_cpu(struct blk_mq_hw_ctx *hctx)
 {
        int cpu = cpumask_first_and(hctx->cpumask, cpu_online_mask);
@@ -2232,42 +2192,19 @@ select_cpu:
 }
 
 /**
- * __blk_mq_delay_run_hw_queue - Run (or schedule to run) a hardware queue.
+ * blk_mq_delay_run_hw_queue - Run a hardware queue asynchronously.
  * @hctx: Pointer to the hardware queue to run.
- * @async: If we want to run the queue asynchronously.
  * @msecs: Milliseconds of delay to wait before running the queue.
  *
- * If !@async, try to run the queue now. Else, run the queue asynchronously and
- * with a delay of @msecs.
+ * Run a hardware queue asynchronously with a delay of @msecs.
  */
-static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async,
-                                       unsigned long msecs)
+void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
 {
        if (unlikely(blk_mq_hctx_stopped(hctx)))
                return;
-
-       if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) {
-               if (cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask)) {
-                       __blk_mq_run_hw_queue(hctx);
-                       return;
-               }
-       }
-
        kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work,
                                    msecs_to_jiffies(msecs));
 }
-
-/**
- * blk_mq_delay_run_hw_queue - Run a hardware queue asynchronously.
- * @hctx: Pointer to the hardware queue to run.
- * @msecs: Milliseconds of delay to wait before running the queue.
- *
- * Run a hardware queue asynchronously with a delay of @msecs.
- */
-void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
-{
-       __blk_mq_delay_run_hw_queue(hctx, true, msecs);
-}
 EXPORT_SYMBOL(blk_mq_delay_run_hw_queue);
 
 /**
@@ -2283,6 +2220,11 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
 {
        bool need_run;
 
+       /*
+        * We can't run the queue inline with interrupts disabled.
+        */
+       WARN_ON_ONCE(!async && in_interrupt());
+
        /*
         * When queue is quiesced, we may be switching io scheduler, or
         * updating nr_hw_queues, or other things, and we can't run queue
@@ -2295,8 +2237,17 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
                need_run = !blk_queue_quiesced(hctx->queue) &&
                blk_mq_hctx_has_pending(hctx));
 
-       if (need_run)
-               __blk_mq_delay_run_hw_queue(hctx, async, 0);
+       if (!need_run)
+               return;
+
+       if (async || (hctx->flags & BLK_MQ_F_BLOCKING) ||
+           !cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask)) {
+               blk_mq_delay_run_hw_queue(hctx, 0);
+               return;
+       }
+
+       blk_mq_run_dispatch_ops(hctx->queue,
+                               blk_mq_sched_dispatch_requests(hctx));
 }
 EXPORT_SYMBOL(blk_mq_run_hw_queue);
 
@@ -2461,79 +2412,51 @@ EXPORT_SYMBOL(blk_mq_start_stopped_hw_queues);
 
 static void blk_mq_run_work_fn(struct work_struct *work)
 {
-       struct blk_mq_hw_ctx *hctx;
-
-       hctx = container_of(work, struct blk_mq_hw_ctx, run_work.work);
-
-       /*
-        * If we are stopped, don't run the queue.
-        */
-       if (blk_mq_hctx_stopped(hctx))
-               return;
-
-       __blk_mq_run_hw_queue(hctx);
-}
-
-static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx,
-                                           struct request *rq,
-                                           bool at_head)
-{
-       struct blk_mq_ctx *ctx = rq->mq_ctx;
-       enum hctx_type type = hctx->type;
-
-       lockdep_assert_held(&ctx->lock);
-
-       trace_block_rq_insert(rq);
-
-       if (at_head)
-               list_add(&rq->queuelist, &ctx->rq_lists[type]);
-       else
-               list_add_tail(&rq->queuelist, &ctx->rq_lists[type]);
-}
-
-void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
-                            bool at_head)
-{
-       struct blk_mq_ctx *ctx = rq->mq_ctx;
-
-       lockdep_assert_held(&ctx->lock);
+       struct blk_mq_hw_ctx *hctx =
+               container_of(work, struct blk_mq_hw_ctx, run_work.work);
 
-       __blk_mq_insert_req_list(hctx, rq, at_head);
-       blk_mq_hctx_mark_pending(hctx, ctx);
+       blk_mq_run_dispatch_ops(hctx->queue,
+                               blk_mq_sched_dispatch_requests(hctx));
 }
 
 /**
  * blk_mq_request_bypass_insert - Insert a request at dispatch list.
  * @rq: Pointer to request to be inserted.
- * @at_head: true if the request should be inserted at the head of the list.
- * @run_queue: If we should run the hardware queue after inserting the request.
+ * @flags: BLK_MQ_INSERT_*
  *
  * Should only be used carefully, when the caller knows we want to
  * bypass a potential IO scheduler on the target device.
  */
-void blk_mq_request_bypass_insert(struct request *rq, bool at_head,
-                                 bool run_queue)
+void blk_mq_request_bypass_insert(struct request *rq, blk_insert_t flags)
 {
        struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
 
        spin_lock(&hctx->lock);
-       if (at_head)
+       if (flags & BLK_MQ_INSERT_AT_HEAD)
                list_add(&rq->queuelist, &hctx->dispatch);
        else
                list_add_tail(&rq->queuelist, &hctx->dispatch);
        spin_unlock(&hctx->lock);
-
-       if (run_queue)
-               blk_mq_run_hw_queue(hctx, false);
 }
 
-void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
-                           struct list_head *list)
-
+static void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx,
+               struct blk_mq_ctx *ctx, struct list_head *list,
+               bool run_queue_async)
 {
        struct request *rq;
        enum hctx_type type = hctx->type;
 
+       /*
+        * Try to issue requests directly if the hw queue isn't busy to save an
+        * extra enqueue & dequeue to the sw queue.
+        */
+       if (!hctx->dispatch_busy && !run_queue_async) {
+               blk_mq_run_dispatch_ops(hctx->queue,
+                       blk_mq_try_issue_list_directly(hctx, list));
+               if (list_empty(list))
+                       goto out;
+       }
+
        /*
         * preemption doesn't flush plug list, so it's possible ctx->cpu is
         * offline now
@@ -2547,6 +2470,70 @@ void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
        list_splice_tail_init(list, &ctx->rq_lists[type]);
        blk_mq_hctx_mark_pending(hctx, ctx);
        spin_unlock(&ctx->lock);
+out:
+       blk_mq_run_hw_queue(hctx, run_queue_async);
+}
+
+static void blk_mq_insert_request(struct request *rq, blk_insert_t flags)
+{
+       struct request_queue *q = rq->q;
+       struct blk_mq_ctx *ctx = rq->mq_ctx;
+       struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
+
+       if (blk_rq_is_passthrough(rq)) {
+               /*
+                * Passthrough request have to be added to hctx->dispatch
+                * directly.  The device may be in a situation where it can't
+                * handle FS request, and always returns BLK_STS_RESOURCE for
+                * them, which gets them added to hctx->dispatch.
+                *
+                * If a passthrough request is required to unblock the queues,
+                * and it is added to the scheduler queue, there is no chance to
+                * dispatch it given we prioritize requests in hctx->dispatch.
+                */
+               blk_mq_request_bypass_insert(rq, flags);
+       } else if (rq->rq_flags & RQF_FLUSH_SEQ) {
+               /*
+                * Firstly normal IO request is inserted to scheduler queue or
+                * sw queue, meantime we add flush request to dispatch queue(
+                * hctx->dispatch) directly and there is at most one in-flight
+                * flush request for each hw queue, so it doesn't matter to add
+                * flush request to tail or front of the dispatch queue.
+                *
+                * Secondly in case of NCQ, flush request belongs to non-NCQ
+                * command, and queueing it will fail when there is any
+                * in-flight normal IO request(NCQ command). When adding flush
+                * rq to the front of hctx->dispatch, it is easier to introduce
+                * extra time to flush rq's latency because of S_SCHED_RESTART
+                * compared with adding to the tail of dispatch queue, then
+                * chance of flush merge is increased, and less flush requests
+                * will be issued to controller. It is observed that ~10% time
+                * is saved in blktests block/004 on disk attached to AHCI/NCQ
+                * drive when adding flush rq to the front of hctx->dispatch.
+                *
+                * Simply queue flush rq to the front of hctx->dispatch so that
+                * intensive flush workloads can benefit in case of NCQ HW.
+                */
+               blk_mq_request_bypass_insert(rq, BLK_MQ_INSERT_AT_HEAD);
+       } else if (q->elevator) {
+               LIST_HEAD(list);
+
+               WARN_ON_ONCE(rq->tag != BLK_MQ_NO_TAG);
+
+               list_add(&rq->queuelist, &list);
+               q->elevator->type->ops.insert_requests(hctx, &list, flags);
+       } else {
+               trace_block_rq_insert(rq);
+
+               spin_lock(&ctx->lock);
+               if (flags & BLK_MQ_INSERT_AT_HEAD)
+                       list_add(&rq->queuelist, &ctx->rq_lists[hctx->type]);
+               else
+                       list_add_tail(&rq->queuelist,
+                                     &ctx->rq_lists[hctx->type]);
+               blk_mq_hctx_mark_pending(hctx, ctx);
+               spin_unlock(&ctx->lock);
+       }
 }
 
 static void blk_mq_bio_to_request(struct request *rq, struct bio *bio,
@@ -2600,49 +2587,19 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
        return ret;
 }
 
-static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
-                                               struct request *rq,
-                                               bool bypass_insert, bool last)
+static bool blk_mq_get_budget_and_tag(struct request *rq)
 {
-       struct request_queue *q = rq->q;
-       bool run_queue = true;
        int budget_token;
 
-       /*
-        * RCU or SRCU read lock is needed before checking quiesced flag.
-        *
-        * When queue is stopped or quiesced, ignore 'bypass_insert' from
-        * blk_mq_request_issue_directly(), and return BLK_STS_OK to caller,
-        * and avoid driver to try to dispatch again.
-        */
-       if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)) {
-               run_queue = false;
-               bypass_insert = false;
-               goto insert;
-       }
-
-       if ((rq->rq_flags & RQF_ELV) && !bypass_insert)
-               goto insert;
-
-       budget_token = blk_mq_get_dispatch_budget(q);
+       budget_token = blk_mq_get_dispatch_budget(rq->q);
        if (budget_token < 0)
-               goto insert;
-
+               return false;
        blk_mq_set_rq_budget_token(rq, budget_token);
-
        if (!blk_mq_get_driver_tag(rq)) {
-               blk_mq_put_dispatch_budget(q, budget_token);
-               goto insert;
+               blk_mq_put_dispatch_budget(rq->q, budget_token);
+               return false;
        }
-
-       return __blk_mq_issue_directly(hctx, rq, last);
-insert:
-       if (bypass_insert)
-               return BLK_STS_RESOURCE;
-
-       blk_mq_sched_insert_request(rq, false, run_queue, false);
-
-       return BLK_STS_OK;
+       return true;
 }
 
 /**
@@ -2658,18 +2615,46 @@ insert:
 static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
                struct request *rq)
 {
-       blk_status_t ret =
-               __blk_mq_try_issue_directly(hctx, rq, false, true);
+       blk_status_t ret;
+
+       if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(rq->q)) {
+               blk_mq_insert_request(rq, 0);
+               return;
+       }
+
+       if ((rq->rq_flags & RQF_ELV) || !blk_mq_get_budget_and_tag(rq)) {
+               blk_mq_insert_request(rq, 0);
+               blk_mq_run_hw_queue(hctx, false);
+               return;
+       }
 
-       if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE)
-               blk_mq_request_bypass_insert(rq, false, true);
-       else if (ret != BLK_STS_OK)
+       ret = __blk_mq_issue_directly(hctx, rq, true);
+       switch (ret) {
+       case BLK_STS_OK:
+               break;
+       case BLK_STS_RESOURCE:
+       case BLK_STS_DEV_RESOURCE:
+               blk_mq_request_bypass_insert(rq, 0);
+               blk_mq_run_hw_queue(hctx, false);
+               break;
+       default:
                blk_mq_end_request(rq, ret);
+               break;
+       }
 }
 
 static blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last)
 {
-       return __blk_mq_try_issue_directly(rq->mq_hctx, rq, true, last);
+       struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
+
+       if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(rq->q)) {
+               blk_mq_insert_request(rq, 0);
+               return BLK_STS_OK;
+       }
+
+       if (!blk_mq_get_budget_and_tag(rq))
+               return BLK_STS_RESOURCE;
+       return __blk_mq_issue_directly(hctx, rq, last);
 }
 
 static void blk_mq_plug_issue_direct(struct blk_plug *plug)
@@ -2697,7 +2682,8 @@ static void blk_mq_plug_issue_direct(struct blk_plug *plug)
                        break;
                case BLK_STS_RESOURCE:
                case BLK_STS_DEV_RESOURCE:
-                       blk_mq_request_bypass_insert(rq, false, true);
+                       blk_mq_request_bypass_insert(rq, 0);
+                       blk_mq_run_hw_queue(hctx, false);
                        goto out;
                default:
                        blk_mq_end_request(rq, ret);
@@ -2743,7 +2729,16 @@ static void blk_mq_dispatch_plug_list(struct blk_plug *plug, bool from_sched)
 
        plug->mq_list = requeue_list;
        trace_block_unplug(this_hctx->queue, depth, !from_sched);
-       blk_mq_sched_insert_requests(this_hctx, this_ctx, &list, from_sched);
+
+       percpu_ref_get(&this_hctx->queue->q_usage_counter);
+       if (this_hctx->queue->elevator) {
+               this_hctx->queue->elevator->type->ops.insert_requests(this_hctx,
+                               &list, 0);
+               blk_mq_run_hw_queue(this_hctx, from_sched);
+       } else {
+               blk_mq_insert_requests(this_hctx, this_ctx, &list, from_sched);
+       }
+       percpu_ref_put(&this_hctx->queue->q_usage_counter);
 }
 
 void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
@@ -2789,7 +2784,7 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
        } while (!rq_list_empty(plug->mq_list));
 }
 
-void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
+static void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
                struct list_head *list)
 {
        int queued = 0;
@@ -2807,8 +2802,9 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
                        break;
                case BLK_STS_RESOURCE:
                case BLK_STS_DEV_RESOURCE:
-                       blk_mq_request_bypass_insert(rq, false,
-                                                    list_empty(list));
+                       blk_mq_request_bypass_insert(rq, 0);
+                       if (list_empty(list))
+                               blk_mq_run_hw_queue(hctx, false);
                        goto out;
                default:
                        blk_mq_end_request(rq, ret);
@@ -2934,6 +2930,7 @@ void blk_mq_submit_bio(struct bio *bio)
        struct request_queue *q = bdev_get_queue(bio->bi_bdev);
        struct blk_plug *plug = blk_mq_plug(bio);
        const int is_sync = op_is_sync(bio->bi_opf);
+       struct blk_mq_hw_ctx *hctx;
        struct request *rq;
        unsigned int nr_segs = 1;
        blk_status_t ret;
@@ -2965,7 +2962,7 @@ void blk_mq_submit_bio(struct bio *bio)
 
        blk_mq_bio_to_request(rq, bio, nr_segs);
 
-       ret = blk_crypto_init_request(rq);
+       ret = blk_crypto_rq_get_keyslot(rq);
        if (ret != BLK_STS_OK) {
                bio->bi_status = ret;
                bio_endio(bio);
@@ -2978,15 +2975,19 @@ void blk_mq_submit_bio(struct bio *bio)
                return;
        }
 
-       if (plug)
+       if (plug) {
                blk_add_rq_to_plug(plug, rq);
-       else if ((rq->rq_flags & RQF_ELV) ||
-                (rq->mq_hctx->dispatch_busy &&
-                 (q->nr_hw_queues == 1 || !is_sync)))
-               blk_mq_sched_insert_request(rq, false, true, true);
-       else
-               blk_mq_run_dispatch_ops(rq->q,
-                               blk_mq_try_issue_directly(rq->mq_hctx, rq));
+               return;
+       }
+
+       hctx = rq->mq_hctx;
+       if ((rq->rq_flags & RQF_ELV) ||
+           (hctx->dispatch_busy && (q->nr_hw_queues == 1 || !is_sync))) {
+               blk_mq_insert_request(rq, 0);
+               blk_mq_run_hw_queue(hctx, true);
+       } else {
+               blk_mq_run_dispatch_ops(q, blk_mq_try_issue_directly(hctx, rq));
+       }
 }
 
 #ifdef CONFIG_BLK_MQ_STACKING
@@ -3034,8 +3035,9 @@ blk_status_t blk_insert_cloned_request(struct request *rq)
        if (q->disk && should_fail_request(q->disk->part0, blk_rq_bytes(rq)))
                return BLK_STS_IOERR;
 
-       if (blk_crypto_insert_cloned_request(rq))
-               return BLK_STS_IOERR;
+       ret = blk_crypto_rq_get_keyslot(rq);
+       if (ret != BLK_STS_OK)
+               return ret;
 
        blk_account_io_start(rq);
 
@@ -4206,14 +4208,8 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
        /* mark the queue as mq asap */
        q->mq_ops = set->ops;
 
-       q->poll_cb = blk_stat_alloc_callback(blk_mq_poll_stats_fn,
-                                            blk_mq_poll_stats_bkt,
-                                            BLK_MQ_POLL_STATS_BKTS, q);
-       if (!q->poll_cb)
-               goto err_exit;
-
        if (blk_mq_alloc_ctxs(q))
-               goto err_poll;
+               goto err_exit;
 
        /* init q->mq_kobj and sw queues' kobjects */
        blk_mq_sysfs_init(q);
@@ -4241,11 +4237,6 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 
        q->nr_requests = set->queue_depth;
 
-       /*
-        * Default to classic polling
-        */
-       q->poll_nsec = BLK_MQ_POLL_CLASSIC;
-
        blk_mq_init_cpu_queues(q, set->nr_hw_queues);
        blk_mq_add_queue_tag_set(set, q);
        blk_mq_map_swqueue(q);
@@ -4253,9 +4244,6 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 
 err_hctxs:
        blk_mq_release(q);
-err_poll:
-       blk_stat_free_callback(q->poll_cb);
-       q->poll_cb = NULL;
 err_exit:
        q->mq_ops = NULL;
        return -ENOMEM;
@@ -4752,138 +4740,8 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
 }
 EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues);
 
-/* Enable polling stats and return whether they were already enabled. */
-static bool blk_poll_stats_enable(struct request_queue *q)
-{
-       if (q->poll_stat)
-               return true;
-
-       return blk_stats_alloc_enable(q);
-}
-
-static void blk_mq_poll_stats_start(struct request_queue *q)
-{
-       /*
-        * We don't arm the callback if polling stats are not enabled or the
-        * callback is already active.
-        */
-       if (!q->poll_stat || blk_stat_is_active(q->poll_cb))
-               return;
-
-       blk_stat_activate_msecs(q->poll_cb, 100);
-}
-
-static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb)
-{
-       struct request_queue *q = cb->data;
-       int bucket;
-
-       for (bucket = 0; bucket < BLK_MQ_POLL_STATS_BKTS; bucket++) {
-               if (cb->stat[bucket].nr_samples)
-                       q->poll_stat[bucket] = cb->stat[bucket];
-       }
-}
-
-static unsigned long blk_mq_poll_nsecs(struct request_queue *q,
-                                      struct request *rq)
-{
-       unsigned long ret = 0;
-       int bucket;
-
-       /*
-        * If stats collection isn't on, don't sleep but turn it on for
-        * future users
-        */
-       if (!blk_poll_stats_enable(q))
-               return 0;
-
-       /*
-        * As an optimistic guess, use half of the mean service time
-        * for this type of request. We can (and should) make this smarter.
-        * For instance, if the completion latencies are tight, we can
-        * get closer than just half the mean. This is especially
-        * important on devices where the completion latencies are longer
-        * than ~10 usec. We do use the stats for the relevant IO size
-        * if available which does lead to better estimates.
-        */
-       bucket = blk_mq_poll_stats_bkt(rq);
-       if (bucket < 0)
-               return ret;
-
-       if (q->poll_stat[bucket].nr_samples)
-               ret = (q->poll_stat[bucket].mean + 1) / 2;
-
-       return ret;
-}
-
-static bool blk_mq_poll_hybrid(struct request_queue *q, blk_qc_t qc)
-{
-       struct blk_mq_hw_ctx *hctx = blk_qc_to_hctx(q, qc);
-       struct request *rq = blk_qc_to_rq(hctx, qc);
-       struct hrtimer_sleeper hs;
-       enum hrtimer_mode mode;
-       unsigned int nsecs;
-       ktime_t kt;
-
-       /*
-        * If a request has completed on queue that uses an I/O scheduler, we
-        * won't get back a request from blk_qc_to_rq.
-        */
-       if (!rq || (rq->rq_flags & RQF_MQ_POLL_SLEPT))
-               return false;
-
-       /*
-        * If we get here, hybrid polling is enabled. Hence poll_nsec can be:
-        *
-        *  0:  use half of prev avg
-        * >0:  use this specific value
-        */
-       if (q->poll_nsec > 0)
-               nsecs = q->poll_nsec;
-       else
-               nsecs = blk_mq_poll_nsecs(q, rq);
-
-       if (!nsecs)
-               return false;
-
-       rq->rq_flags |= RQF_MQ_POLL_SLEPT;
-
-       /*
-        * This will be replaced with the stats tracking code, using
-        * 'avg_completion_time / 2' as the pre-sleep target.
-        */
-       kt = nsecs;
-
-       mode = HRTIMER_MODE_REL;
-       hrtimer_init_sleeper_on_stack(&hs, CLOCK_MONOTONIC, mode);
-       hrtimer_set_expires(&hs.timer, kt);
-
-       do {
-               if (blk_mq_rq_state(rq) == MQ_RQ_COMPLETE)
-                       break;
-               set_current_state(TASK_UNINTERRUPTIBLE);
-               hrtimer_sleeper_start_expires(&hs, mode);
-               if (hs.task)
-                       io_schedule();
-               hrtimer_cancel(&hs.timer);
-               mode = HRTIMER_MODE_ABS;
-       } while (hs.task && !signal_pending(current));
-
-       __set_current_state(TASK_RUNNING);
-       destroy_hrtimer_on_stack(&hs.timer);
-
-       /*
-        * If we sleep, have the caller restart the poll loop to reset the
-        * state.  Like for the other success return cases, the caller is
-        * responsible for checking if the IO completed.  If the IO isn't
-        * complete, we'll get called again and will go straight to the busy
-        * poll loop.
-        */
-       return true;
-}
-
-static int blk_mq_poll_classic(struct request_queue *q, blk_qc_t cookie,
-                              struct io_comp_batch *iob, unsigned int flags)
+int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch *iob,
+               unsigned int flags)
 {
        struct blk_mq_hw_ctx *hctx = blk_qc_to_hctx(q, cookie);
        long state = get_current_state();
@@ -4910,17 +4768,6 @@ static int blk_mq_poll_classic(struct request_queue *q, blk_qc_t cookie,
        return 0;
 }
 
-int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch *iob,
-               unsigned int flags)
-{
-       if (!(flags & BLK_POLL_NOSLEEP) &&
-           q->poll_nsec != BLK_MQ_POLL_CLASSIC) {
-               if (blk_mq_poll_hybrid(q, cookie))
-                       return 1;
-       }
-       return blk_mq_poll_classic(q, cookie, iob, flags);
-}
-
 unsigned int blk_mq_rq_cpu(struct request *rq)
 {
        return rq->mq_ctx->cpu;