btrfs: make btrfs_discard_workfn() block_group ref explicit
authorBoris Burkov <boris@bur.io>
Mon, 3 Mar 2025 23:01:05 +0000 (15:01 -0800)
committerDavid Sterba <dsterba@suse.com>
Tue, 18 Mar 2025 19:35:51 +0000 (20:35 +0100)
Currently, the async discard machinery owns a ref to the block_group
when the block_group is queued on a discard list. However, to handle
races with discard cancellation and the discard workfn, we have a
specific logic to detect that the block_group is *currently* running in
the workfn, to protect the workfn's usage amidst cancellation.

As far as I can tell, this doesn't have any overt bugs (though
finish_discard_pass() and remove_from_discard_list() racing can have a
surprising outcome for the caller of remove_from_discard_list() in that
it is again added at the end).

But it is needlessly complicated to rely on locking and the nullity of
discard_ctl->block_group. Simplify this significantly by just taking a
refcount while we are in the workfn and unconditionally drop it in both
the remove and workfn paths, regardless of if they race.

Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Boris Burkov <boris@bur.io>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/discard.c

index e815d165cccc22a9c2531df50015721e17fe94ea..d6eef4bd9e9d45f27e2ba85e045f7edee29fd7fd 100644 (file)
@@ -167,13 +167,7 @@ static bool remove_from_discard_list(struct btrfs_discard_ctl *discard_ctl,
        block_group->discard_eligible_time = 0;
        queued = !list_empty(&block_group->discard_list);
        list_del_init(&block_group->discard_list);
-       /*
-        * If the block group is currently running in the discard workfn, we
-        * don't want to deref it, since it's still being used by the workfn.
-        * The workfn will notice this case and deref the block group when it is
-        * finished.
-        */
-       if (queued && !running)
+       if (queued)
                btrfs_put_block_group(block_group);
 
        spin_unlock(&discard_ctl->lock);
@@ -260,9 +254,10 @@ again:
                        block_group->discard_cursor = block_group->start;
                        block_group->discard_state = BTRFS_DISCARD_EXTENTS;
                }
-               discard_ctl->block_group = block_group;
        }
        if (block_group) {
+               btrfs_get_block_group(block_group);
+               discard_ctl->block_group = block_group;
                *discard_state = block_group->discard_state;
                *discard_index = block_group->discard_index;
        }
@@ -493,9 +488,20 @@ static void btrfs_discard_workfn(struct work_struct *work)
 
        block_group = peek_discard_list(discard_ctl, &discard_state,
                                        &discard_index, now);
-       if (!block_group || !btrfs_run_discard_work(discard_ctl))
+       if (!block_group)
                return;
+       if (!btrfs_run_discard_work(discard_ctl)) {
+               spin_lock(&discard_ctl->lock);
+               btrfs_put_block_group(block_group);
+               discard_ctl->block_group = NULL;
+               spin_unlock(&discard_ctl->lock);
+               return;
+       }
        if (now < block_group->discard_eligible_time) {
+               spin_lock(&discard_ctl->lock);
+               btrfs_put_block_group(block_group);
+               discard_ctl->block_group = NULL;
+               spin_unlock(&discard_ctl->lock);
                btrfs_discard_schedule_work(discard_ctl, false);
                return;
        }
@@ -547,15 +553,7 @@ static void btrfs_discard_workfn(struct work_struct *work)
        spin_lock(&discard_ctl->lock);
        discard_ctl->prev_discard = trimmed;
        discard_ctl->prev_discard_time = now;
-       /*
-        * If the block group was removed from the discard list while it was
-        * running in this workfn, then we didn't deref it, since this function
-        * still owned that reference. But we set the discard_ctl->block_group
-        * back to NULL, so we can use that condition to know that now we need
-        * to deref the block_group.
-        */
-       if (discard_ctl->block_group == NULL)
-               btrfs_put_block_group(block_group);
+       btrfs_put_block_group(block_group);
        discard_ctl->block_group = NULL;
        __btrfs_discard_schedule_work(discard_ctl, now, false);
        spin_unlock(&discard_ctl->lock);