block: also check RQF_STATS in blk_mq_need_time_stamp()
[linux-2.6-block.git] / block / blk-core.c
index d0cc6e14d2f0748e91001d770cbebfa091066889..875e8d105067a2248b7829e487bd66c5f4c9281c 100644 (file)
@@ -36,6 +36,7 @@
 #include <linux/blk-cgroup.h>
 #include <linux/debugfs.h>
 #include <linux/bpf.h>
+#include <linux/psi.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/block.h>
@@ -129,6 +130,7 @@ static const char *const blk_op_name[] = {
        REQ_OP_NAME(DISCARD),
        REQ_OP_NAME(SECURE_ERASE),
        REQ_OP_NAME(ZONE_RESET),
+       REQ_OP_NAME(ZONE_RESET_ALL),
        REQ_OP_NAME(WRITE_SAME),
        REQ_OP_NAME(WRITE_ZEROES),
        REQ_OP_NAME(SCSI_IN),
@@ -344,7 +346,8 @@ void blk_cleanup_queue(struct request_queue *q)
 
        /*
         * Drain all requests queued before DYING marking. Set DEAD flag to
-        * prevent that q->request_fn() gets invoked after draining finished.
+        * prevent that blk_mq_run_hw_queues() accesses the hardware queues
+        * after draining finished.
         */
        blk_freeze_queue(q);
 
@@ -479,7 +482,6 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
        if (!q)
                return NULL;
 
-       INIT_LIST_HEAD(&q->queue_head);
        q->last_merge = NULL;
 
        q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask);
@@ -518,6 +520,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
        mutex_init(&q->blk_trace_mutex);
 #endif
        mutex_init(&q->sysfs_lock);
+       mutex_init(&q->sysfs_dir_lock);
        spin_lock_init(&q->queue_lock);
 
        init_waitqueue_head(&q->mq_freeze_wq);
@@ -601,6 +604,7 @@ bool bio_attempt_back_merge(struct request *req, struct bio *bio,
                return false;
 
        trace_block_bio_backmerge(req->q, req, bio);
+       rq_qos_merge(req->q, req, bio);
 
        if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
                blk_rq_set_mixed_merge(req);
@@ -622,6 +626,7 @@ bool bio_attempt_front_merge(struct request *req, struct bio *bio,
                return false;
 
        trace_block_bio_frontmerge(req->q, req, bio);
+       rq_qos_merge(req->q, req, bio);
 
        if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
                blk_rq_set_mixed_merge(req);
@@ -647,6 +652,8 @@ bool bio_attempt_discard_merge(struct request_queue *q, struct request *req,
            blk_rq_get_max_sectors(req, blk_rq_pos(req)))
                goto no_merge;
 
+       rq_qos_merge(q, req, bio);
+
        req->biotail->bi_next = bio;
        req->biotail = bio;
        req->__data_len += bio->bi_iter.bi_size;
@@ -931,6 +938,10 @@ generic_make_request_checks(struct bio *bio)
                if (!blk_queue_is_zoned(q))
                        goto not_supported;
                break;
+       case REQ_OP_ZONE_RESET_ALL:
+               if (!blk_queue_is_zoned(q) || !blk_queue_zone_resetall(q))
+                       goto not_supported;
+               break;
        case REQ_OP_WRITE_ZEROES:
                if (!q->limits.max_write_zeroes_sectors)
                        goto not_supported;
@@ -1128,6 +1139,10 @@ EXPORT_SYMBOL_GPL(direct_make_request);
  */
 blk_qc_t submit_bio(struct bio *bio)
 {
+       bool workingset_read = false;
+       unsigned long pflags;
+       blk_qc_t ret;
+
        if (blkcg_punt_bio_submit(bio))
                return BLK_QC_T_NONE;
 
@@ -1146,6 +1161,8 @@ blk_qc_t submit_bio(struct bio *bio)
                if (op_is_write(bio_op(bio))) {
                        count_vm_events(PGPGOUT, count);
                } else {
+                       if (bio_flagged(bio, BIO_WORKINGSET))
+                               workingset_read = true;
                        task_io_account_read(bio->bi_iter.bi_size);
                        count_vm_events(PGPGIN, count);
                }
@@ -1160,7 +1177,21 @@ blk_qc_t submit_bio(struct bio *bio)
                }
        }
 
-       return generic_make_request(bio);
+       /*
+        * If we're reading data that is part of the userspace
+        * workingset, count submission time as memory stall. When the
+        * device is congested, or the submitting cgroup IO-throttled,
+        * submission can be a significant part of overall IO time.
+        */
+       if (workingset_read)
+               psi_memstall_enter(&pflags);
+
+       ret = generic_make_request(bio);
+
+       if (workingset_read)
+               psi_memstall_leave(&pflags);
+
+       return ret;
 }
 EXPORT_SYMBOL(submit_bio);