From d1ba22ab2becc8bf84d466791b970905abe99b23 Mon Sep 17 00:00:00 2001 From: Zizhi Wo Date: Tue, 6 May 2025 10:09:34 +0800 Subject: [PATCH] blk-throttle: Prevents the bps restricted io from entering the bps queue again [BUG] There has an issue of io delayed dispatch caused by io splitting. Consider the following scenario: 1) If we set a BPS limit of 1MB/s and restrict the maximum IO size per dispatch to 4KB, submitting -two- 1MB IO requests results in completion times of 1s and 2s, which is expected. 2) However, if we additionally set an IOPS limit of 1,000,000/s with the same BPS limit of 1MB/s, submitting -two- 1MB IO requests again results in both completing in 2s, even though the IOPS constraint is being met. [CAUSE] This issue arises because BPS and IOPS currently share the same queue in the blkthrotl mechanism: 1) This issue does not occur when only BPS is limited because the split IOs return false in blk_should_throtl() and do not go through to throtl again. 2) For split IOs, even if they have been tagged with BIO_BPS_THROTTLED, they still get queued alternately in the same list due to continuous splitting and reordering. As a result, the two IO requests are both completed at the 2-second mark, causing an unintended delay. 3) It is not difficult to imagine that in this scenario, if N 1MB IOs are issued at once, all IOs will eventually complete together in N seconds. [FIX] With the queue separation introduced in the previous patches, we now have separate BPS and IOPS queues. For IOs that have already passed the BPS limitation, they do not need to re-enter the BPS queue and can directly placed to the IOPS queue. Since we have split the queues, when the IOPS queue is previously empty and a new bio is added to the first qnode->bios_iops list in the service_queue, we also need to update the disptime. This patch introduces "THROTL_TG_IOPS_WAS_EMPTY" flag to mark it. Signed-off-by: Zizhi Wo Reviewed-by: Yu Kuai Signed-off-by: Zizhi Wo Link: https://lore.kernel.org/r/20250506020935.655574-8-wozizhi@huaweicloud.com Signed-off-by: Jens Axboe --- block/blk-throttle.c | 53 +++++++++++++++++++++++++++++++++++++------- block/blk-throttle.h | 11 ++++++--- 2 files changed, 53 insertions(+), 11 deletions(-) diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 3129bb83d819..bf4faac83662 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -163,7 +163,12 @@ static void throtl_qnode_add_bio(struct bio *bio, struct throtl_qnode *qn, { bool rw = bio_data_dir(bio); - if (bio_flagged(bio, BIO_TG_BPS_THROTTLED)) { + /* + * Split bios have already been throttled by bps, so they are + * directly queued into the iops path. + */ + if (bio_flagged(bio, BIO_TG_BPS_THROTTLED) || + bio_flagged(bio, BIO_BPS_THROTTLED)) { bio_list_add(&qn->bios_iops, bio); sq->nr_queued_iops[rw]++; } else { @@ -946,6 +951,15 @@ static void throtl_add_bio_tg(struct bio *bio, struct throtl_qnode *qn, throtl_qnode_add_bio(bio, qn, sq); + /* + * Since we have split the queues, when the iops queue is + * previously empty and a new @bio is added into the first @qn, + * we also need to update the @tg->disptime. + */ + if (bio_flagged(bio, BIO_BPS_THROTTLED) && + bio == throtl_peek_queued(&sq->queued[rw])) + tg->flags |= THROTL_TG_IOPS_WAS_EMPTY; + throtl_enqueue_tg(tg); } @@ -973,6 +987,7 @@ static void tg_update_disptime(struct throtl_grp *tg) /* see throtl_add_bio_tg() */ tg->flags &= ~THROTL_TG_WAS_EMPTY; + tg->flags &= ~THROTL_TG_IOPS_WAS_EMPTY; } static void start_parent_slice_with_credit(struct throtl_grp *child_tg, @@ -1160,7 +1175,8 @@ again: if (parent_sq) { /* @parent_sq is another throl_grp, propagate dispatch */ - if (tg->flags & THROTL_TG_WAS_EMPTY) { + if (tg->flags & THROTL_TG_WAS_EMPTY || + tg->flags & THROTL_TG_IOPS_WAS_EMPTY) { tg_update_disptime(tg); if (!throtl_schedule_next_dispatch(parent_sq, false)) { /* window is already open, repeat dispatching */ @@ -1705,9 +1721,28 @@ void blk_throtl_cancel_bios(struct gendisk *disk) static bool tg_within_limit(struct throtl_grp *tg, struct bio *bio, bool rw) { - /* throtl is FIFO - if bios are already queued, should queue */ - if (sq_queued(&tg->service_queue, rw)) + struct throtl_service_queue *sq = &tg->service_queue; + + /* + * For a split bio, we need to specifically distinguish whether the + * iops queue is empty. + */ + if (bio_flagged(bio, BIO_BPS_THROTTLED)) + return sq->nr_queued_iops[rw] == 0 && + tg_dispatch_iops_time(tg, bio) == 0; + + /* + * Throtl is FIFO - if bios are already queued, should queue. + * If the bps queue is empty and @bio is within the bps limit, charge + * bps here for direct placement into the iops queue. + */ + if (sq_queued(&tg->service_queue, rw)) { + if (sq->nr_queued_bps[rw] == 0 && + tg_dispatch_bps_time(tg, bio) == 0) + throtl_charge_bps_bio(tg, bio); + return false; + } return tg_dispatch_time(tg, bio) == 0; } @@ -1788,11 +1823,13 @@ bool __blk_throtl_bio(struct bio *bio) /* * Update @tg's dispatch time and force schedule dispatch if @tg - * was empty before @bio. The forced scheduling isn't likely to - * cause undue delay as @bio is likely to be dispatched directly if - * its @tg's disptime is not in the future. + * was empty before @bio, or the iops queue is empty and @bio will + * add to. The forced scheduling isn't likely to cause undue + * delay as @bio is likely to be dispatched directly if its @tg's + * disptime is not in the future. */ - if (tg->flags & THROTL_TG_WAS_EMPTY) { + if (tg->flags & THROTL_TG_WAS_EMPTY || + tg->flags & THROTL_TG_IOPS_WAS_EMPTY) { tg_update_disptime(tg); throtl_schedule_next_dispatch(tg->service_queue.parent_sq, true); } diff --git a/block/blk-throttle.h b/block/blk-throttle.h index ab892c0bd70f..3b27755bfbff 100644 --- a/block/blk-throttle.h +++ b/block/blk-throttle.h @@ -56,9 +56,14 @@ struct throtl_service_queue { }; enum tg_state_flags { - THROTL_TG_PENDING = 1 << 0, /* on parent's pending tree */ - THROTL_TG_WAS_EMPTY = 1 << 1, /* bio_lists[] became non-empty */ - THROTL_TG_CANCELING = 1 << 2, /* starts to cancel bio */ + THROTL_TG_PENDING = 1 << 0, /* on parent's pending tree */ + THROTL_TG_WAS_EMPTY = 1 << 1, /* bio_lists[] became non-empty */ + /* + * The sq's iops queue is empty, and a bio is about to be enqueued + * to the first qnode's bios_iops list. + */ + THROTL_TG_IOPS_WAS_EMPTY = 1 << 2, + THROTL_TG_CANCELING = 1 << 3, /* starts to cancel bio */ }; struct throtl_grp { -- 2.25.1