block: move elv_register[unregister]_queue out of elevator_lock
authorMing Lei <ming.lei@redhat.com>
Mon, 5 May 2025 14:17:59 +0000 (22:17 +0800)
committerJens Axboe <axboe@kernel.dk>
Tue, 6 May 2025 13:43:43 +0000 (07:43 -0600)
Move elv_register[unregister]_queue out of ->elevator_lock & queue freezing,
so we can kill many lockdep warnings.

elv_register[unregister]_queue() is serialized, and just dealing with sysfs/
debugfs things, no need to be done with queue frozen:

- when it is called from adding disk, elevator switch isn't possible
  because ->queue_kobj isn't added yet

- when it is called from deleting disk, disable_elv_switch() is
  responsible for preventing new elevator switch and draining old
  elevator switch.

- when it is called from blk_mq_update_nr_hw_queues(), adding/removing
  disk and elevator switch can't be allowed or in-progress

With this change, elevator's ->exit() is called before calling
elv_unregister_queue, then user may call into ->show()/store() of elevator's
sysfs attributes, and we have covered this issue by adding `ELEVATOR_FLAG_DYNG`.

For blk-mq debugfs, hctx->sched_tags is always checked with ->elevator_lock by
debugfs code, meantime hctx->sched_tags is updated with ->elevator_lock, so
there isn't such issue.

Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20250505141805.2751237-22-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
block/blk-mq.c
block/elevator.c

index 2a57b793992bf6a51c843865dbec646a4e55834f..263bbe34d3f0456db3074c801d538f7b15f6025a 100644 (file)
@@ -5041,11 +5041,10 @@ reregister:
                blk_mq_debugfs_register_hctxs(q);
        }
 
+       /* elv_update_nr_hw_queues() unfreeze queue for us */
        list_for_each_entry(q, &set->tag_list, tag_set_list)
                elv_update_nr_hw_queues(q);
 
-       list_for_each_entry(q, &set->tag_list, tag_set_list)
-               blk_mq_unfreeze_queue_nomemrestore(q);
        memalloc_noio_restore(memflags);
 
        /* Free the excess tags when nr_hw_queues shrink. */
index f7e333abefe34daa39e2b11cab6d07998ffe7d95..8578b969e1733aade9096304569147a3137d8e8b 100644 (file)
 struct elv_change_ctx {
        const char *name;
        bool no_uevent;
+
+       /* for unregistering old elevator */
+       struct elevator_queue *old;
+       /* for registering new elevator */
+       struct elevator_queue *new;
 };
 
 static DEFINE_SPINLOCK(elv_list_lock);
@@ -158,14 +163,14 @@ static void elevator_exit(struct request_queue *q)
 {
        struct elevator_queue *e = q->elevator;
 
+       lockdep_assert_held(&q->elevator_lock);
+
        ioc_clear_queue(q);
        blk_mq_sched_free_rqs(q);
 
        mutex_lock(&e->sysfs_lock);
        blk_mq_exit_sched(q, e);
        mutex_unlock(&e->sysfs_lock);
-
-       kobject_put(&e->kobj);
 }
 
 static inline void __elv_rqhash_del(struct request *rq)
@@ -466,8 +471,6 @@ static int elv_register_queue(struct request_queue *q,
 {
        int error;
 
-       lockdep_assert_held(&q->elevator_lock);
-
        error = kobject_add(&e->kobj, &q->disk->queue_kobj, "iosched");
        if (!error) {
                const struct elv_fs_entry *attr = e->type->elevator_attrs;
@@ -494,8 +497,6 @@ static int elv_register_queue(struct request_queue *q,
 static void elv_unregister_queue(struct request_queue *q,
                                 struct elevator_queue *e)
 {
-       lockdep_assert_held(&q->elevator_lock);
-
        if (e && test_and_clear_bit(ELEVATOR_FLAG_REGISTERED, &e->flags)) {
                kobject_uevent(&e->kobj, KOBJ_REMOVE);
                kobject_del(&e->kobj);
@@ -586,7 +587,7 @@ static int elevator_switch(struct request_queue *q, struct elv_change_ctx *ctx)
        blk_mq_quiesce_queue(q);
 
        if (q->elevator) {
-               elv_unregister_queue(q, q->elevator);
+               ctx->old = q->elevator;
                elevator_exit(q);
        }
 
@@ -594,11 +595,7 @@ static int elevator_switch(struct request_queue *q, struct elv_change_ctx *ctx)
                ret = blk_mq_init_sched(q, new_e);
                if (ret)
                        goto out_unfreeze;
-               ret = elv_register_queue(q, q->elevator, !ctx->no_uevent);
-               if (ret) {
-                       elevator_exit(q);
-                       goto out_unfreeze;
-               }
+               ctx->new = q->elevator;
        } else {
                blk_queue_flag_clear(QUEUE_FLAG_SQ_SCHED, q);
                q->elevator = NULL;
@@ -619,6 +616,38 @@ out_unfreeze:
        return ret;
 }
 
+static void elv_exit_and_release(struct request_queue *q)
+{
+       struct elevator_queue *e;
+       unsigned memflags;
+
+       memflags = blk_mq_freeze_queue(q);
+       mutex_lock(&q->elevator_lock);
+       e = q->elevator;
+       elevator_exit(q);
+       mutex_unlock(&q->elevator_lock);
+       blk_mq_unfreeze_queue(q, memflags);
+       if (e)
+               kobject_put(&e->kobj);
+}
+
+static int elevator_change_done(struct request_queue *q,
+                               struct elv_change_ctx *ctx)
+{
+       int ret = 0;
+
+       if (ctx->old) {
+               elv_unregister_queue(q, ctx->old);
+               kobject_put(&ctx->old->kobj);
+       }
+       if (ctx->new) {
+               ret = elv_register_queue(q, ctx->new, !ctx->no_uevent);
+               if (ret)
+                       elv_exit_and_release(q);
+       }
+       return ret;
+}
+
 /*
  * Switch this queue to the given IO scheduler.
  */
@@ -645,6 +674,9 @@ static int elevator_change(struct request_queue *q, struct elv_change_ctx *ctx)
                ret = elevator_switch(q, ctx);
        mutex_unlock(&q->elevator_lock);
        blk_mq_unfreeze_queue(q, memflags);
+       if (!ret)
+               ret = elevator_change_done(q, ctx);
+
        return ret;
 }
 
@@ -654,18 +686,22 @@ static int elevator_change(struct request_queue *q, struct elv_change_ctx *ctx)
  */
 void elv_update_nr_hw_queues(struct request_queue *q)
 {
+       struct elv_change_ctx ctx = {};
+       int ret = -ENODEV;
+
        WARN_ON_ONCE(q->mq_freeze_depth == 0);
 
        mutex_lock(&q->elevator_lock);
        if (q->elevator && !blk_queue_dying(q) && !blk_queue_registered(q)) {
-               struct elv_change_ctx ctx = {
-                       .name = q->elevator->type->elevator_name,
-               };
+               ctx.name = q->elevator->type->elevator_name;
 
                /* force to reattach elevator after nr_hw_queue is updated */
-               elevator_switch(q, &ctx);
+               ret = elevator_switch(q, &ctx);
        }
        mutex_unlock(&q->elevator_lock);
+       blk_mq_unfreeze_queue_nomemrestore(q);
+       if (!ret)
+               WARN_ON_ONCE(elevator_change_done(q, &ctx));
 }
 
 /*