blk-mq: don't use the requeue list to queue flush commands
authorChristoph Hellwig <hch@lst.de>
Fri, 19 May 2023 04:40:50 +0000 (06:40 +0200)
committerJens Axboe <axboe@kernel.dk>
Sat, 20 May 2023 01:52:42 +0000 (19:52 -0600)
Currently both requeues of commands that were already sent to the driver
and flush commands submitted from the flush state machine share the same
requeue_list struct request_queue, despite requeues doing head
insertions and flushes not.  Switch to using two separate lists instead.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Link: https://lore.kernel.org/r/20230519044050.107790-8-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
block/blk-flush.c
block/blk-mq-debugfs.c
block/blk-mq.c
block/blk-mq.h
include/linux/blk-mq.h
include/linux/blkdev.h

index f407a59503173df69c3f16c2c4c38ed4ef578e0b..dba392cf22bec6cbae65e007d9890565aceb971a 100644 (file)
@@ -188,7 +188,9 @@ static void blk_flush_complete_seq(struct request *rq,
 
        case REQ_FSEQ_DATA:
                list_move_tail(&rq->flush.list, &fq->flush_data_in_flight);
-               blk_mq_add_to_requeue_list(rq, 0);
+               spin_lock(&q->requeue_lock);
+               list_add_tail(&rq->queuelist, &q->flush_list);
+               spin_unlock(&q->requeue_lock);
                blk_mq_kick_requeue_list(q);
                break;
 
@@ -346,7 +348,10 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
        smp_wmb();
        req_ref_set(flush_rq, 1);
 
-       blk_mq_add_to_requeue_list(flush_rq, 0);
+       spin_lock(&q->requeue_lock);
+       list_add_tail(&flush_rq->queuelist, &q->flush_list);
+       spin_unlock(&q->requeue_lock);
+
        blk_mq_kick_requeue_list(q);
 }
 
index 22e39b9a77ecf2a9d4e0051fb08e67f0af914b25..68165a50951b68e4cdd16d08c1923b34e17301c7 100644 (file)
@@ -244,7 +244,6 @@ static const char *const cmd_flag_name[] = {
 #define RQF_NAME(name) [ilog2((__force u32)RQF_##name)] = #name
 static const char *const rqf_name[] = {
        RQF_NAME(STARTED),
-       RQF_NAME(SOFTBARRIER),
        RQF_NAME(FLUSH_SEQ),
        RQF_NAME(MIXED_MERGE),
        RQF_NAME(MQ_INFLIGHT),
index aac67bc3d3680cb0f641efc26cd360c5841f2233..551e7760f45e20f167a075522f6ad830594198fe 100644 (file)
@@ -1416,13 +1416,16 @@ static void __blk_mq_requeue_request(struct request *rq)
 void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list)
 {
        struct request_queue *q = rq->q;
+       unsigned long flags;
 
        __blk_mq_requeue_request(rq);
 
        /* this request will be re-inserted to io scheduler queue */
        blk_mq_sched_requeue_request(rq);
 
-       blk_mq_add_to_requeue_list(rq, BLK_MQ_INSERT_AT_HEAD);
+       spin_lock_irqsave(&q->requeue_lock, flags);
+       list_add_tail(&rq->queuelist, &q->requeue_list);
+       spin_unlock_irqrestore(&q->requeue_lock, flags);
 
        if (kick_requeue_list)
                blk_mq_kick_requeue_list(q);
@@ -1434,13 +1437,16 @@ static void blk_mq_requeue_work(struct work_struct *work)
        struct request_queue *q =
                container_of(work, struct request_queue, requeue_work.work);
        LIST_HEAD(rq_list);
-       struct request *rq, *next;
+       LIST_HEAD(flush_list);
+       struct request *rq;
 
        spin_lock_irq(&q->requeue_lock);
        list_splice_init(&q->requeue_list, &rq_list);
+       list_splice_init(&q->flush_list, &flush_list);
        spin_unlock_irq(&q->requeue_lock);
 
-       list_for_each_entry_safe(rq, next, &rq_list, queuelist) {
+       while (!list_empty(&rq_list)) {
+               rq = list_entry(rq_list.next, struct request, queuelist);
                /*
                 * If RQF_DONTPREP ist set, the request has been started by the
                 * driver already and might have driver-specific data allocated
@@ -1448,18 +1454,16 @@ static void blk_mq_requeue_work(struct work_struct *work)
                 * block layer merges for the request.
                 */
                if (rq->rq_flags & RQF_DONTPREP) {
-                       rq->rq_flags &= ~RQF_SOFTBARRIER;
                        list_del_init(&rq->queuelist);
                        blk_mq_request_bypass_insert(rq, 0);
-               } else if (rq->rq_flags & RQF_SOFTBARRIER) {
-                       rq->rq_flags &= ~RQF_SOFTBARRIER;
+               } else {
                        list_del_init(&rq->queuelist);
                        blk_mq_insert_request(rq, BLK_MQ_INSERT_AT_HEAD);
                }
        }
 
-       while (!list_empty(&rq_list)) {
-               rq = list_entry(rq_list.next, struct request, queuelist);
+       while (!list_empty(&flush_list)) {
+               rq = list_entry(flush_list.next, struct request, queuelist);
                list_del_init(&rq->queuelist);
                blk_mq_insert_request(rq, 0);
        }
@@ -1467,27 +1471,6 @@ static void blk_mq_requeue_work(struct work_struct *work)
        blk_mq_run_hw_queues(q, false);
 }
 
-void blk_mq_add_to_requeue_list(struct request *rq, blk_insert_t insert_flags)
-{
-       struct request_queue *q = rq->q;
-       unsigned long flags;
-
-       /*
-        * We abuse this flag that is otherwise used by the I/O scheduler to
-        * request head insertion from the workqueue.
-        */
-       BUG_ON(rq->rq_flags & RQF_SOFTBARRIER);
-
-       spin_lock_irqsave(&q->requeue_lock, flags);
-       if (insert_flags & BLK_MQ_INSERT_AT_HEAD) {
-               rq->rq_flags |= RQF_SOFTBARRIER;
-               list_add(&rq->queuelist, &q->requeue_list);
-       } else {
-               list_add_tail(&rq->queuelist, &q->requeue_list);
-       }
-       spin_unlock_irqrestore(&q->requeue_lock, flags);
-}
-
 void blk_mq_kick_requeue_list(struct request_queue *q)
 {
        kblockd_mod_delayed_work_on(WORK_CPU_UNBOUND, &q->requeue_work, 0);
@@ -4239,6 +4222,7 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
        blk_mq_update_poll_flag(q);
 
        INIT_DELAYED_WORK(&q->requeue_work, blk_mq_requeue_work);
+       INIT_LIST_HEAD(&q->flush_list);
        INIT_LIST_HEAD(&q->requeue_list);
        spin_lock_init(&q->requeue_lock);
 
index ec7d2fb0b3c8ef36b68dfeb66f51da42cc68c9da..8c642e9f32f10275021da1d576e084ab7e2caf66 100644 (file)
@@ -47,7 +47,6 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
 void blk_mq_wake_waiters(struct request_queue *q);
 bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *,
                             unsigned int);
-void blk_mq_add_to_requeue_list(struct request *rq, blk_insert_t insert_flags);
 void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list);
 struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx,
                                        struct blk_mq_ctx *start);
index 935201c89743717bc5463c1d4a43fbca7c0326aa..d778cb6b211233d534b1b753b259339e73380dac 100644 (file)
@@ -28,8 +28,6 @@ typedef __u32 __bitwise req_flags_t;
 
 /* drive already may have started this one */
 #define RQF_STARTED            ((__force req_flags_t)(1 << 1))
-/* may not be passed by ioscheduler */
-#define RQF_SOFTBARRIER                ((__force req_flags_t)(1 << 3))
 /* request for flush sequence */
 #define RQF_FLUSH_SEQ          ((__force req_flags_t)(1 << 4))
 /* merge of different types, fail separately */
@@ -65,7 +63,7 @@ typedef __u32 __bitwise req_flags_t;
 
 /* flags that prevent us from merging requests: */
 #define RQF_NOMERGE_FLAGS \
-       (RQF_STARTED | RQF_SOFTBARRIER | RQF_FLUSH_SEQ | RQF_SPECIAL_PAYLOAD)
+       (RQF_STARTED | RQF_FLUSH_SEQ | RQF_SPECIAL_PAYLOAD)
 
 enum mq_rq_state {
        MQ_RQ_IDLE              = 0,
index 3952c52d6cd1b0242deca4c702ffd7215e8e5017..fe99948688dfda1131b62d84c07fcf733cdb2f2b 100644 (file)
@@ -487,6 +487,7 @@ struct request_queue {
         * for flush operations
         */
        struct blk_flush_queue  *fq;
+       struct list_head        flush_list;
 
        struct list_head        requeue_list;
        spinlock_t              requeue_lock;