From: Boris Burkov Date: Mon, 3 Mar 2025 23:01:05 +0000 (-0800) Subject: btrfs: make btrfs_discard_workfn() block_group ref explicit X-Git-Tag: io_uring-6.15-20250403~74^2~23 X-Git-Url: https://git.kernel.dk/?a=commitdiff_plain;h=895c6721d310c036dcfebb5ab845822229fa35eb;p=linux-block.git btrfs: make btrfs_discard_workfn() block_group ref explicit Currently, the async discard machinery owns a ref to the block_group when the block_group is queued on a discard list. However, to handle races with discard cancellation and the discard workfn, we have a specific logic to detect that the block_group is *currently* running in the workfn, to protect the workfn's usage amidst cancellation. As far as I can tell, this doesn't have any overt bugs (though finish_discard_pass() and remove_from_discard_list() racing can have a surprising outcome for the caller of remove_from_discard_list() in that it is again added at the end). But it is needlessly complicated to rely on locking and the nullity of discard_ctl->block_group. Simplify this significantly by just taking a refcount while we are in the workfn and unconditionally drop it in both the remove and workfn paths, regardless of if they race. Reviewed-by: Filipe Manana Signed-off-by: Boris Burkov Signed-off-by: David Sterba --- diff --git a/fs/btrfs/discard.c b/fs/btrfs/discard.c index e815d165cccc..d6eef4bd9e9d 100644 --- a/fs/btrfs/discard.c +++ b/fs/btrfs/discard.c @@ -167,13 +167,7 @@ static bool remove_from_discard_list(struct btrfs_discard_ctl *discard_ctl, block_group->discard_eligible_time = 0; queued = !list_empty(&block_group->discard_list); list_del_init(&block_group->discard_list); - /* - * If the block group is currently running in the discard workfn, we - * don't want to deref it, since it's still being used by the workfn. - * The workfn will notice this case and deref the block group when it is - * finished. - */ - if (queued && !running) + if (queued) btrfs_put_block_group(block_group); spin_unlock(&discard_ctl->lock); @@ -260,9 +254,10 @@ again: block_group->discard_cursor = block_group->start; block_group->discard_state = BTRFS_DISCARD_EXTENTS; } - discard_ctl->block_group = block_group; } if (block_group) { + btrfs_get_block_group(block_group); + discard_ctl->block_group = block_group; *discard_state = block_group->discard_state; *discard_index = block_group->discard_index; } @@ -493,9 +488,20 @@ static void btrfs_discard_workfn(struct work_struct *work) block_group = peek_discard_list(discard_ctl, &discard_state, &discard_index, now); - if (!block_group || !btrfs_run_discard_work(discard_ctl)) + if (!block_group) return; + if (!btrfs_run_discard_work(discard_ctl)) { + spin_lock(&discard_ctl->lock); + btrfs_put_block_group(block_group); + discard_ctl->block_group = NULL; + spin_unlock(&discard_ctl->lock); + return; + } if (now < block_group->discard_eligible_time) { + spin_lock(&discard_ctl->lock); + btrfs_put_block_group(block_group); + discard_ctl->block_group = NULL; + spin_unlock(&discard_ctl->lock); btrfs_discard_schedule_work(discard_ctl, false); return; } @@ -547,15 +553,7 @@ static void btrfs_discard_workfn(struct work_struct *work) spin_lock(&discard_ctl->lock); discard_ctl->prev_discard = trimmed; discard_ctl->prev_discard_time = now; - /* - * If the block group was removed from the discard list while it was - * running in this workfn, then we didn't deref it, since this function - * still owned that reference. But we set the discard_ctl->block_group - * back to NULL, so we can use that condition to know that now we need - * to deref the block_group. - */ - if (discard_ctl->block_group == NULL) - btrfs_put_block_group(block_group); + btrfs_put_block_group(block_group); discard_ctl->block_group = NULL; __btrfs_discard_schedule_work(discard_ctl, now, false); spin_unlock(&discard_ctl->lock);