blk-mq: move the srcu_struct used for quiescing to the tagset
authorChristoph Hellwig <hch@lst.de>
Tue, 1 Nov 2022 15:00:47 +0000 (16:00 +0100)
committerJens Axboe <axboe@kernel.dk>
Wed, 2 Nov 2022 14:35:34 +0000 (08:35 -0600)
All I/O submissions have fairly similar latencies, and a tagset-wide
quiesce is a fairly common operation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Chao Leng <lengchao@huawei.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Link: https://lore.kernel.org/r/20221101150050.3510-12-hch@lst.de
[axboe: fix whitespace]
Signed-off-by: Jens Axboe <axboe@kernel.dk>
block/blk-core.c
block/blk-mq.c
block/blk-mq.h
block/blk-sysfs.c
block/blk.h
block/genhd.c
include/linux/blk-mq.h
include/linux/blkdev.h

index 5d50dd16e2a594e2e71f1d7a45456baadb5abb14..e9e2bf15cd909fd349bf200c83b6c6cc4fd52551 100644 (file)
@@ -65,7 +65,6 @@ DEFINE_IDA(blk_queue_ida);
  * For queue allocation
  */
 struct kmem_cache *blk_requestq_cachep;
-struct kmem_cache *blk_requestq_srcu_cachep;
 
 /*
  * Controlling structure to kblockd
@@ -373,26 +372,20 @@ static void blk_timeout_work(struct work_struct *work)
 {
 }
 
-struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu)
+struct request_queue *blk_alloc_queue(int node_id)
 {
        struct request_queue *q;
 
-       q = kmem_cache_alloc_node(blk_get_queue_kmem_cache(alloc_srcu),
-                       GFP_KERNEL | __GFP_ZERO, node_id);
+       q = kmem_cache_alloc_node(blk_requestq_cachep, GFP_KERNEL | __GFP_ZERO,
+                                 node_id);
        if (!q)
                return NULL;
 
-       if (alloc_srcu) {
-               blk_queue_flag_set(QUEUE_FLAG_HAS_SRCU, q);
-               if (init_srcu_struct(q->srcu) != 0)
-                       goto fail_q;
-       }
-
        q->last_merge = NULL;
 
        q->id = ida_alloc(&blk_queue_ida, GFP_KERNEL);
        if (q->id < 0)
-               goto fail_srcu;
+               goto fail_q;
 
        q->stats = blk_alloc_queue_stats();
        if (!q->stats)
@@ -435,11 +428,8 @@ fail_stats:
        blk_free_queue_stats(q->stats);
 fail_id:
        ida_free(&blk_queue_ida, q->id);
-fail_srcu:
-       if (alloc_srcu)
-               cleanup_srcu_struct(q->srcu);
 fail_q:
-       kmem_cache_free(blk_get_queue_kmem_cache(alloc_srcu), q);
+       kmem_cache_free(blk_requestq_cachep, q);
        return NULL;
 }
 
@@ -1172,9 +1162,6 @@ int __init blk_dev_init(void)
                        sizeof_field(struct request, cmd_flags));
        BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 *
                        sizeof_field(struct bio, bi_opf));
-       BUILD_BUG_ON(ALIGN(offsetof(struct request_queue, srcu),
-                          __alignof__(struct request_queue)) !=
-                    sizeof(struct request_queue));
 
        /* used for unplugging and affects IO latency/throughput - HIGHPRI */
        kblockd_workqueue = alloc_workqueue("kblockd",
@@ -1185,10 +1172,6 @@ int __init blk_dev_init(void)
        blk_requestq_cachep = kmem_cache_create("request_queue",
                        sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
 
-       blk_requestq_srcu_cachep = kmem_cache_create("request_queue_srcu",
-                       sizeof(struct request_queue) +
-                       sizeof(struct srcu_struct), 0, SLAB_PANIC, NULL);
-
        blk_debugfs_root = debugfs_create_dir("block", NULL);
 
        return 0;
index a03abadfe4c6fb196b9c8bc8fff6373368620391..bee728dac9cda39bba01000920a625a8ded70e7f 100644 (file)
@@ -261,8 +261,8 @@ EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_nowait);
  */
 void blk_mq_wait_quiesce_done(struct request_queue *q)
 {
-       if (blk_queue_has_srcu(q))
-               synchronize_srcu(q->srcu);
+       if (q->tag_set->flags & BLK_MQ_F_BLOCKING)
+               synchronize_srcu(q->tag_set->srcu);
        else
                synchronize_rcu();
 }
@@ -4003,7 +4003,7 @@ static struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set,
        struct request_queue *q;
        int ret;
 
-       q = blk_alloc_queue(set->numa_node, set->flags & BLK_MQ_F_BLOCKING);
+       q = blk_alloc_queue(set->numa_node);
        if (!q)
                return ERR_PTR(-ENOMEM);
        q->queuedata = queuedata;
@@ -4168,9 +4168,6 @@ static void blk_mq_update_poll_flag(struct request_queue *q)
 int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
                struct request_queue *q)
 {
-       WARN_ON_ONCE(blk_queue_has_srcu(q) !=
-                       !!(set->flags & BLK_MQ_F_BLOCKING));
-
        /* mark the queue as mq asap */
        q->mq_ops = set->ops;
 
@@ -4429,8 +4426,18 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
        if (set->nr_maps == 1 && set->nr_hw_queues > nr_cpu_ids)
                set->nr_hw_queues = nr_cpu_ids;
 
-       if (blk_mq_alloc_tag_set_tags(set, set->nr_hw_queues) < 0)
-               return -ENOMEM;
+       if (set->flags & BLK_MQ_F_BLOCKING) {
+               set->srcu = kmalloc(sizeof(*set->srcu), GFP_KERNEL);
+               if (!set->srcu)
+                       return -ENOMEM;
+               ret = init_srcu_struct(set->srcu);
+               if (ret)
+                       goto out_free_srcu;
+       }
+
+       ret = blk_mq_alloc_tag_set_tags(set, set->nr_hw_queues);
+       if (ret)
+               goto out_cleanup_srcu;
 
        ret = -ENOMEM;
        for (i = 0; i < set->nr_maps; i++) {
@@ -4460,6 +4467,12 @@ out_free_mq_map:
        }
        kfree(set->tags);
        set->tags = NULL;
+out_cleanup_srcu:
+       if (set->flags & BLK_MQ_F_BLOCKING)
+               cleanup_srcu_struct(set->srcu);
+out_free_srcu:
+       if (set->flags & BLK_MQ_F_BLOCKING)
+               kfree(set->srcu);
        return ret;
 }
 EXPORT_SYMBOL(blk_mq_alloc_tag_set);
@@ -4499,6 +4512,10 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
 
        kfree(set->tags);
        set->tags = NULL;
+       if (set->flags & BLK_MQ_F_BLOCKING) {
+               cleanup_srcu_struct(set->srcu);
+               kfree(set->srcu);
+       }
 }
 EXPORT_SYMBOL(blk_mq_free_tag_set);
 
index 0b2870839cdd6c827fd35ce51a9c9cf232a72c38..ef59fee62780d301d4756000e660464078a6eaa2 100644 (file)
@@ -377,17 +377,17 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
 /* run the code block in @dispatch_ops with rcu/srcu read lock held */
 #define __blk_mq_run_dispatch_ops(q, check_sleep, dispatch_ops)        \
 do {                                                           \
-       if (!blk_queue_has_srcu(q)) {                           \
-               rcu_read_lock();                                \
-               (dispatch_ops);                                 \
-               rcu_read_unlock();                              \
-       } else {                                                \
+       if ((q)->tag_set->flags & BLK_MQ_F_BLOCKING) {          \
                int srcu_idx;                                   \
                                                                \
                might_sleep_if(check_sleep);                    \
-               srcu_idx = srcu_read_lock((q)->srcu);           \
+               srcu_idx = srcu_read_lock((q)->tag_set->srcu);  \
                (dispatch_ops);                                 \
-               srcu_read_unlock((q)->srcu, srcu_idx);          \
+               srcu_read_unlock((q)->tag_set->srcu, srcu_idx); \
+       } else {                                                \
+               rcu_read_lock();                                \
+               (dispatch_ops);                                 \
+               rcu_read_unlock();                              \
        }                                                       \
 } while (0)
 
index 7b98c7074771d4ffb776b11c418c616f1f1a9ba6..02e94c4beff174852f69f771ffa5dae193bbba0a 100644 (file)
@@ -742,10 +742,8 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
 
 static void blk_free_queue_rcu(struct rcu_head *rcu_head)
 {
-       struct request_queue *q = container_of(rcu_head, struct request_queue,
-                                              rcu_head);
-
-       kmem_cache_free(blk_get_queue_kmem_cache(blk_queue_has_srcu(q)), q);
+       kmem_cache_free(blk_requestq_cachep,
+                       container_of(rcu_head, struct request_queue, rcu_head));
 }
 
 /**
@@ -782,9 +780,6 @@ static void blk_release_queue(struct kobject *kobj)
        if (queue_is_mq(q))
                blk_mq_release(q);
 
-       if (blk_queue_has_srcu(q))
-               cleanup_srcu_struct(q->srcu);
-
        ida_free(&blk_queue_ida, q->id);
        call_rcu(&q->rcu_head, blk_free_queue_rcu);
 }
index f1398fb96cec9b437739da96830429daf2cc20ec..e85703ae81dd1b6b2628c36055b59f8df23aedda 100644 (file)
@@ -27,7 +27,6 @@ struct blk_flush_queue {
 };
 
 extern struct kmem_cache *blk_requestq_cachep;
-extern struct kmem_cache *blk_requestq_srcu_cachep;
 extern struct kobj_type blk_queue_ktype;
 extern struct ida blk_queue_ida;
 
@@ -429,13 +428,7 @@ int bio_add_hw_page(struct request_queue *q, struct bio *bio,
                struct page *page, unsigned int len, unsigned int offset,
                unsigned int max_sectors, bool *same_page);
 
-static inline struct kmem_cache *blk_get_queue_kmem_cache(bool srcu)
-{
-       if (srcu)
-               return blk_requestq_srcu_cachep;
-       return blk_requestq_cachep;
-}
-struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu);
+struct request_queue *blk_alloc_queue(int node_id);
 
 int disk_scan_partitions(struct gendisk *disk, fmode_t mode);
 
index e7bd036024fabe77bc24cf57a627455be751487e..09cde914e0548dd99b1d04370aaaa231e07ba7e0 100644 (file)
@@ -1414,7 +1414,7 @@ struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass)
        struct request_queue *q;
        struct gendisk *disk;
 
-       q = blk_alloc_queue(node, false);
+       q = blk_alloc_queue(node);
        if (!q)
                return NULL;
 
index 569053ed959d595fa462bdcfd9eb8f4461bfabc0..f059edebb11d82fa1102760bcc9ee97088175533 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/lockdep.h>
 #include <linux/scatterlist.h>
 #include <linux/prefetch.h>
+#include <linux/srcu.h>
 
 struct blk_mq_tags;
 struct blk_flush_queue;
@@ -500,6 +501,8 @@ enum hctx_type {
  * @tag_list_lock: Serializes tag_list accesses.
  * @tag_list:     List of the request queues that use this tag set. See also
  *                request_queue.tag_set_list.
+ * @srcu:         Use as lock when type of the request queue is blocking
+ *                (BLK_MQ_F_BLOCKING).
  */
 struct blk_mq_tag_set {
        struct blk_mq_queue_map map[HCTX_MAX_TYPES];
@@ -520,6 +523,7 @@ struct blk_mq_tag_set {
 
        struct mutex            tag_list_lock;
        struct list_head        tag_list;
+       struct srcu_struct      *srcu;
 };
 
 /**
index 32137d85c9ad5ce0fe0beb32e8b114d442afb73a..6a6fa167fc828d04efb202aeb4018f28853421bd 100644 (file)
@@ -22,7 +22,6 @@
 #include <linux/blkzoned.h>
 #include <linux/sched.h>
 #include <linux/sbitmap.h>
-#include <linux/srcu.h>
 #include <linux/uuid.h>
 #include <linux/xarray.h>
 
@@ -543,18 +542,11 @@ struct request_queue {
        struct mutex            debugfs_mutex;
 
        bool                    mq_sysfs_init_done;
-
-       /**
-        * @srcu: Sleepable RCU. Use as lock when type of the request queue
-        * is blocking (BLK_MQ_F_BLOCKING). Must be the last member
-        */
-       struct srcu_struct      srcu[];
 };
 
 /* Keep blk_queue_flag_name[] in sync with the definitions below */
 #define QUEUE_FLAG_STOPPED     0       /* queue is stopped */
 #define QUEUE_FLAG_DYING       1       /* queue being torn down */
-#define QUEUE_FLAG_HAS_SRCU    2       /* SRCU is allocated */
 #define QUEUE_FLAG_NOMERGES     3      /* disable merge attempts */
 #define QUEUE_FLAG_SAME_COMP   4       /* complete on same CPU-group */
 #define QUEUE_FLAG_FAIL_IO     5       /* fake timeout */
@@ -590,7 +582,6 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
 
 #define blk_queue_stopped(q)   test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
 #define blk_queue_dying(q)     test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags)
-#define blk_queue_has_srcu(q)  test_bit(QUEUE_FLAG_HAS_SRCU, &(q)->queue_flags)
 #define blk_queue_init_done(q) test_bit(QUEUE_FLAG_INIT_DONE, &(q)->queue_flags)
 #define blk_queue_nomerges(q)  test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
 #define blk_queue_noxmerges(q) \