bcachefs: Kill for_each_btree_key()
authorKent Overstreet <kent.overstreet@linux.dev>
Fri, 8 Dec 2023 04:28:26 +0000 (23:28 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Mon, 1 Jan 2024 16:47:40 +0000 (11:47 -0500)
for_each_btree_key() handles transaction restarts, like
for_each_btree_key2(), but only calls bch2_trans_begin() after a
transaction restart - for_each_btree_key2() wraps every loop iteration
in a transaction.

The for_each_btree_key() behaviour is problematic when it leads to
holding the SRCU lock that prevents key cache reclaim for an unbounded
amount of time - there's no real need to keep it around.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/alloc_background.c
fs/bcachefs/btree_gc.c
fs/bcachefs/btree_iter.h
fs/bcachefs/ec.c
fs/bcachefs/fsck.c
fs/bcachefs/inode.c
fs/bcachefs/move.c
fs/bcachefs/recovery.c
fs/bcachefs/snapshot.c

index ad4ad795b3bcedd91e9355fe1df8ab8f9bff7767..7ebf5516266ea5abb713a98d65a6f78f0d3f8b99 100644 (file)
@@ -544,8 +544,8 @@ int bch2_bucket_gens_init(struct bch_fs *c)
        u8 gen;
        int ret;
 
-       for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN,
-                          BTREE_ITER_PREFETCH, k, ret) {
+       ret = for_each_btree_key2(trans, iter, BTREE_ID_alloc, POS_MIN,
+                                 BTREE_ITER_PREFETCH, k, ({
                /*
                 * Not a fsck error because this is checked/repaired by
                 * bch2_check_alloc_key() which runs later:
@@ -572,8 +572,8 @@ int bch2_bucket_gens_init(struct bch_fs *c)
                }
 
                g.v.gens[offset] = gen;
-       }
-       bch2_trans_iter_exit(trans, &iter);
+               0;
+       }));
 
        if (have_bucket_gens_key && !ret)
                ret = commit_do(trans, NULL, NULL,
@@ -582,8 +582,7 @@ int bch2_bucket_gens_init(struct bch_fs *c)
 
        bch2_trans_put(trans);
 
-       if (ret)
-               bch_err_fn(c, ret);
+       bch_err_fn(c, ret);
        return ret;
 }
 
@@ -601,8 +600,8 @@ int bch2_alloc_read(struct bch_fs *c)
                const struct bch_bucket_gens *g;
                u64 b;
 
-               for_each_btree_key(trans, iter, BTREE_ID_bucket_gens, POS_MIN,
-                                  BTREE_ITER_PREFETCH, k, ret) {
+               ret = for_each_btree_key2(trans, iter, BTREE_ID_bucket_gens, POS_MIN,
+                                         BTREE_ITER_PREFETCH, k, ({
                        u64 start = bucket_gens_pos_to_alloc(k.k->p, 0).offset;
                        u64 end = bucket_gens_pos_to_alloc(bpos_nosnap_successor(k.k->p), 0).offset;
 
@@ -624,13 +623,13 @@ int bch2_alloc_read(struct bch_fs *c)
                             b < min_t(u64, ca->mi.nbuckets, end);
                             b++)
                                *bucket_gen(ca, b) = g->gens[b & KEY_TYPE_BUCKET_GENS_MASK];
-               }
-               bch2_trans_iter_exit(trans, &iter);
+                       0;
+               }));
        } else {
                struct bch_alloc_v4 a;
 
-               for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN,
-                                  BTREE_ITER_PREFETCH, k, ret) {
+               ret = for_each_btree_key2(trans, iter, BTREE_ID_alloc, POS_MIN,
+                                         BTREE_ITER_PREFETCH, k, ({
                        /*
                         * Not a fsck error because this is checked/repaired by
                         * bch2_check_alloc_key() which runs later:
@@ -641,16 +640,14 @@ int bch2_alloc_read(struct bch_fs *c)
                        ca = bch_dev_bkey_exists(c, k.k->p.inode);
 
                        *bucket_gen(ca, k.k->p.offset) = bch2_alloc_to_v4(k, &a)->gen;
-               }
-               bch2_trans_iter_exit(trans, &iter);
+                       0;
+               }));
        }
 
        bch2_trans_put(trans);
        up_read(&c->gc_lock);
 
-       if (ret)
-               bch_err_fn(c, ret);
-
+       bch_err_fn(c, ret);
        return ret;
 }
 
index 5a7b72a461dfde8cf05202f79124a746b15d809a..6a44427d2ab20b669ae531225e4c19c196ba1728 100644 (file)
@@ -1665,7 +1665,6 @@ static int bch2_gc_reflink_done(struct bch_fs *c, bool metadata_only)
 static int bch2_gc_reflink_start(struct bch_fs *c,
                                 bool metadata_only)
 {
-       struct btree_trans *trans;
        struct btree_iter iter;
        struct bkey_s_c k;
        struct reflink_gc *r;
@@ -1674,30 +1673,30 @@ static int bch2_gc_reflink_start(struct bch_fs *c,
        if (metadata_only)
                return 0;
 
-       trans = bch2_trans_get(c);
        c->reflink_gc_nr = 0;
 
-       for_each_btree_key(trans, iter, BTREE_ID_reflink, POS_MIN,
-                          BTREE_ITER_PREFETCH, k, ret) {
-               const __le64 *refcount = bkey_refcount_c(k);
+       ret = bch2_trans_run(c,
+               for_each_btree_key2(trans, iter, BTREE_ID_reflink, POS_MIN,
+                                 BTREE_ITER_PREFETCH, k, ({
+                       const __le64 *refcount = bkey_refcount_c(k);
 
-               if (!refcount)
-                       continue;
+                       if (!refcount)
+                               continue;
 
-               r = genradix_ptr_alloc(&c->reflink_gc_table, c->reflink_gc_nr++,
-                                      GFP_KERNEL);
-               if (!r) {
-                       ret = -BCH_ERR_ENOMEM_gc_reflink_start;
-                       break;
-               }
+                       r = genradix_ptr_alloc(&c->reflink_gc_table, c->reflink_gc_nr++,
+                                              GFP_KERNEL);
+                       if (!r) {
+                               ret = -BCH_ERR_ENOMEM_gc_reflink_start;
+                               break;
+                       }
 
-               r->offset       = k.k->p.offset;
-               r->size         = k.k->size;
-               r->refcount     = 0;
-       }
-       bch2_trans_iter_exit(trans, &iter);
+                       r->offset       = k.k->p.offset;
+                       r->size         = k.k->size;
+                       r->refcount     = 0;
+                       0;
+               })));
 
-       bch2_trans_put(trans);
+       bch_err_fn(c, ret);
        return ret;
 }
 
index 83a4ff0b91e17a5bf7820d78fae74faf58fb906f..10d059cfd36dec2f6e966688ae7e7b86cf2bba37 100644 (file)
@@ -777,7 +777,7 @@ transaction_restart:                                                        \
                            (_do) ?: bch2_trans_commit(_trans, (_disk_res),\
                                        (_journal_seq), (_commit_flags)))
 
-#define for_each_btree_key(_trans, _iter, _btree_id,                   \
+#define for_each_btree_key_old(_trans, _iter, _btree_id,                       \
                           _start, _flags, _k, _ret)                    \
        for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id),      \
                                  (_start), (_flags));                  \
index bc8b556f19a90fbbd768f360b87b11e4bf7cf86b..8e5237661cce975d6181a30a0c8818b0b3502efc 100644 (file)
@@ -1833,7 +1833,6 @@ void bch2_fs_ec_flush(struct bch_fs *c)
 
 int bch2_stripes_read(struct bch_fs *c)
 {
-       struct btree_trans *trans = bch2_trans_get(c);
        struct btree_iter iter;
        struct bkey_s_c k;
        const struct bch_stripe *s;
@@ -1841,36 +1840,33 @@ int bch2_stripes_read(struct bch_fs *c)
        unsigned i;
        int ret;
 
-       for_each_btree_key(trans, iter, BTREE_ID_stripes, POS_MIN,
-                          BTREE_ITER_PREFETCH, k, ret) {
-               if (k.k->type != KEY_TYPE_stripe)
-                       continue;
-
-               ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL);
-               if (ret)
-                       break;
-
-               s = bkey_s_c_to_stripe(k).v;
+       ret = bch2_trans_run(c,
+               for_each_btree_key2(trans, iter, BTREE_ID_stripes, POS_MIN,
+                                   BTREE_ITER_PREFETCH, k, ({
+                       if (k.k->type != KEY_TYPE_stripe)
+                               continue;
 
-               m = genradix_ptr(&c->stripes, k.k->p.offset);
-               m->sectors      = le16_to_cpu(s->sectors);
-               m->algorithm    = s->algorithm;
-               m->nr_blocks    = s->nr_blocks;
-               m->nr_redundant = s->nr_redundant;
-               m->blocks_nonempty = 0;
+                       ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL);
+                       if (ret)
+                               break;
 
-               for (i = 0; i < s->nr_blocks; i++)
-                       m->blocks_nonempty += !!stripe_blockcount_get(s, i);
+                       s = bkey_s_c_to_stripe(k).v;
 
-               bch2_stripes_heap_insert(c, m, k.k->p.offset);
-       }
-       bch2_trans_iter_exit(trans, &iter);
+                       m = genradix_ptr(&c->stripes, k.k->p.offset);
+                       m->sectors      = le16_to_cpu(s->sectors);
+                       m->algorithm    = s->algorithm;
+                       m->nr_blocks    = s->nr_blocks;
+                       m->nr_redundant = s->nr_redundant;
+                       m->blocks_nonempty = 0;
 
-       bch2_trans_put(trans);
+                       for (i = 0; i < s->nr_blocks; i++)
+                               m->blocks_nonempty += !!stripe_blockcount_get(s, i);
 
-       if (ret)
-               bch_err_fn(c, ret);
+                       bch2_stripes_heap_insert(c, m, k.k->p.offset);
+                       0;
+               })));
 
+       bch_err_fn(c, ret);
        return ret;
 }
 
index bc6b56628fdfe58005a394b3ae25465a93cf033d..39f529eb4a4d8b24e12308c960898dfe795f670e 100644 (file)
@@ -589,14 +589,13 @@ static int get_inodes_all_snapshots(struct btree_trans *trans,
        struct bch_fs *c = trans->c;
        struct btree_iter iter;
        struct bkey_s_c k;
-       u32 restart_count = trans->restart_count;
        int ret;
 
        w->recalculate_sums = false;
        w->inodes.nr = 0;
 
-       for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, inum),
-                          BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
+       for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inum),
+                                    BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
                if (k.k->p.offset != inum)
                        break;
 
@@ -609,8 +608,7 @@ static int get_inodes_all_snapshots(struct btree_trans *trans,
                return ret;
 
        w->first_this_inode = true;
-
-       return trans_was_restarted(trans, restart_count);
+       return 0;
 }
 
 static struct inode_walker_entry *
@@ -2146,19 +2144,14 @@ int bch2_check_directory_structure(struct bch_fs *c)
        pathbuf path = { 0, };
        int ret;
 
-       for_each_btree_key(trans, iter, BTREE_ID_inodes, POS_MIN,
-                          BTREE_ITER_INTENT|
-                          BTREE_ITER_PREFETCH|
-                          BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
+       for_each_btree_key_old(trans, iter, BTREE_ID_inodes, POS_MIN,
+                              BTREE_ITER_INTENT|
+                              BTREE_ITER_PREFETCH|
+                              BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
                if (!bkey_is_inode(k.k))
                        continue;
 
-               ret = bch2_inode_unpack(k, &u);
-               if (ret) {
-                       /* Should have been caught earlier in fsck: */
-                       bch_err(c, "error unpacking inode %llu: %i", k.k->p.offset, ret);
-                       break;
-               }
+               BUG_ON(bch2_inode_unpack(k, &u));
 
                if (u.bi_flags & BCH_INODE_unlinked)
                        continue;
@@ -2170,6 +2163,7 @@ int bch2_check_directory_structure(struct bch_fs *c)
        bch2_trans_iter_exit(trans, &iter);
        bch2_trans_put(trans);
        darray_exit(&path);
+
        bch_err_fn(c, ret);
        return ret;
 }
@@ -2255,47 +2249,42 @@ static int check_nlinks_find_hardlinks(struct bch_fs *c,
                                       struct nlink_table *t,
                                       u64 start, u64 *end)
 {
-       struct btree_trans *trans = bch2_trans_get(c);
        struct btree_iter iter;
        struct bkey_s_c k;
        struct bch_inode_unpacked u;
-       int ret = 0;
 
-       for_each_btree_key(trans, iter, BTREE_ID_inodes,
-                          POS(0, start),
-                          BTREE_ITER_INTENT|
-                          BTREE_ITER_PREFETCH|
-                          BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
-               if (!bkey_is_inode(k.k))
-                       continue;
-
-               /* Should never fail, checked by bch2_inode_invalid: */
-               BUG_ON(bch2_inode_unpack(k, &u));
-
-               /*
-                * Backpointer and directory structure checks are sufficient for
-                * directories, since they can't have hardlinks:
-                */
-               if (S_ISDIR(u.bi_mode))
-                       continue;
+       int ret = bch2_trans_run(c,
+               for_each_btree_key2(trans, iter, BTREE_ID_inodes,
+                                         POS(0, start),
+                                         BTREE_ITER_INTENT|
+                                         BTREE_ITER_PREFETCH|
+                                         BTREE_ITER_ALL_SNAPSHOTS, k, ({
+                       if (!bkey_is_inode(k.k))
+                               continue;
 
-               if (!u.bi_nlink)
-                       continue;
+                       /* Should never fail, checked by bch2_inode_invalid: */
+                       BUG_ON(bch2_inode_unpack(k, &u));
 
-               ret = add_nlink(c, t, k.k->p.offset, k.k->p.snapshot);
-               if (ret) {
-                       *end = k.k->p.offset;
-                       ret = 0;
-                       break;
-               }
+                       /*
+                        * Backpointer and directory structure checks are sufficient for
+                        * directories, since they can't have hardlinks:
+                        */
+                       if (S_ISDIR(u.bi_mode))
+                               continue;
 
-       }
-       bch2_trans_iter_exit(trans, &iter);
-       bch2_trans_put(trans);
+                       if (!u.bi_nlink)
+                               continue;
 
-       if (ret)
-               bch_err(c, "error in fsck: btree error %i while walking inodes", ret);
+                       ret = add_nlink(c, t, k.k->p.offset, k.k->p.snapshot);
+                       if (ret) {
+                               *end = k.k->p.offset;
+                               ret = 0;
+                               break;
+                       }
+                       0;
+               })));
 
+       bch_err_fn(c, ret);
        return ret;
 }
 
@@ -2303,42 +2292,39 @@ noinline_for_stack
 static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links,
                                     u64 range_start, u64 range_end)
 {
-       struct btree_trans *trans = bch2_trans_get(c);
        struct snapshots_seen s;
        struct btree_iter iter;
        struct bkey_s_c k;
        struct bkey_s_c_dirent d;
-       int ret;
 
        snapshots_seen_init(&s);
 
-       for_each_btree_key(trans, iter, BTREE_ID_dirents, POS_MIN,
-                          BTREE_ITER_INTENT|
-                          BTREE_ITER_PREFETCH|
-                          BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
-               ret = snapshots_seen_update(c, &s, iter.btree_id, k.k->p);
-               if (ret)
-                       break;
-
-               switch (k.k->type) {
-               case KEY_TYPE_dirent:
-                       d = bkey_s_c_to_dirent(k);
+       int ret = bch2_trans_run(c,
+               for_each_btree_key2(trans, iter, BTREE_ID_dirents, POS_MIN,
+                                   BTREE_ITER_INTENT|
+                                   BTREE_ITER_PREFETCH|
+                                   BTREE_ITER_ALL_SNAPSHOTS, k, ({
+                       ret = snapshots_seen_update(c, &s, iter.btree_id, k.k->p);
+                       if (ret)
+                               break;
 
-                       if (d.v->d_type != DT_DIR &&
-                           d.v->d_type != DT_SUBVOL)
-                               inc_link(c, &s, links, range_start, range_end,
-                                        le64_to_cpu(d.v->d_inum),
-                                        bch2_snapshot_equiv(c, d.k->p.snapshot));
-                       break;
-               }
-       }
-       bch2_trans_iter_exit(trans, &iter);
+                       switch (k.k->type) {
+                       case KEY_TYPE_dirent:
+                               d = bkey_s_c_to_dirent(k);
 
-       if (ret)
-               bch_err(c, "error in fsck: btree error %i while walking dirents", ret);
+                               if (d.v->d_type != DT_DIR &&
+                                   d.v->d_type != DT_SUBVOL)
+                                       inc_link(c, &s, links, range_start, range_end,
+                                                le64_to_cpu(d.v->d_inum),
+                                                bch2_snapshot_equiv(c, d.k->p.snapshot));
+                               break;
+                       }
+                       0;
+               })));
 
-       bch2_trans_put(trans);
        snapshots_seen_exit(&s);
+
+       bch_err_fn(c, ret);
        return ret;
 }
 
index 0d2bdc7575a812f9d80aa72e3fcca4de164da9ed..7ee9ac5e447960b2f66fd1e9a9a59650b1944de5 100644 (file)
@@ -1168,29 +1168,29 @@ again:
         * but we can't retry because the btree write buffer won't have been
         * flushed and we'd spin:
         */
-       for_each_btree_key(trans, iter, BTREE_ID_deleted_inodes, POS_MIN,
-                          BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
-               ret = commit_do(trans, NULL, NULL,
-                               BCH_TRANS_COMMIT_no_enospc|
-                               BCH_TRANS_COMMIT_lazy_rw,
-                       may_delete_deleted_inode(trans, &iter, k.k->p, &need_another_pass));
-               if (ret < 0)
-                       break;
-
-               if (ret) {
-                       if (!test_bit(BCH_FS_rw, &c->flags)) {
-                               bch2_trans_unlock(trans);
-                               bch2_fs_lazy_rw(c);
-                       }
-
+       ret = for_each_btree_key_commit(trans, iter, BTREE_ID_deleted_inodes, POS_MIN,
+                                       BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
+                                       NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({
+               ret = may_delete_deleted_inode(trans, &iter, k.k->p, &need_another_pass);
+               if (ret > 0) {
                        bch_verbose(c, "deleting unlinked inode %llu:%u", k.k->p.offset, k.k->p.snapshot);
 
                        ret = bch2_inode_rm_snapshot(trans, k.k->p.offset, k.k->p.snapshot);
-                       if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
-                               break;
+                       /*
+                        * We don't want to loop here: a transaction restart
+                        * error here means we handled a transaction restart and
+                        * we're actually done, but if we loop we'll retry the
+                        * same key because the write buffer hasn't been flushed
+                        * yet
+                        */
+                       if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
+                               ret = 0;
+                               continue;
+                       }
                }
-       }
-       bch2_trans_iter_exit(trans, &iter);
+
+               ret;
+       }));
 
        if (!ret && need_another_pass) {
                ret = bch2_btree_write_buffer_flush_sync(trans);
index 847ab0eba2da345fe37d3fbaa44f7849f7da944e..b2985e730650f8077cc7163386fcc3f798983844 100644 (file)
@@ -377,8 +377,8 @@ struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans,
 
                io_opts->d.nr = 0;
 
-               for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, extent_k.k->p.inode),
-                                  BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
+               ret = for_each_btree_key2(trans, iter, BTREE_ID_inodes, POS(0, extent_k.k->p.inode),
+                                         BTREE_ITER_ALL_SNAPSHOTS, k, ({
                        if (k.k->p.offset != extent_k.k->p.inode)
                                break;
 
@@ -391,11 +391,8 @@ struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans,
                        struct snapshot_io_opts_entry e = { .snapshot = k.k->p.snapshot };
                        bch2_inode_opts_get(&e.io_opts, trans->c, &inode);
 
-                       ret = darray_push(&io_opts->d, e);
-                       if (ret)
-                               break;
-               }
-               bch2_trans_iter_exit(trans, &iter);
+                       darray_push(&io_opts->d, e);
+               }));
                io_opts->cur_inum = extent_k.k->p.inode;
        }
 
index 629ddbb5850f64dcfcbb5cdc098c2a343a45e0b3..ed366b35a1f2805db692d853875313bdab6c4154 100644 (file)
@@ -534,7 +534,8 @@ static int bch2_set_may_go_rw(struct bch_fs *c)
        keys->gap = keys->nr;
 
        set_bit(BCH_FS_may_go_rw, &c->flags);
-       if (keys->nr || c->opts.fsck)
+
+       if (keys->nr || c->opts.fsck || !c->sb.clean)
                return bch2_fs_read_write_early(c);
        return 0;
 }
index b2d216fa71824e8666dfac76226726251fe555bd..48307f79f6ad71a9e9baa30bc050aadf38965e69 100644 (file)
@@ -1410,19 +1410,16 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
                goto err;
        }
 
-       for_each_btree_key(trans, iter, BTREE_ID_snapshots,
-                          POS_MIN, 0, k, ret) {
+       ret = for_each_btree_key2(trans, iter, BTREE_ID_snapshots,
+                                 POS_MIN, 0, k, ({
                if (k.k->type != KEY_TYPE_snapshot)
                        continue;
 
                snap = bkey_s_c_to_snapshot(k);
-               if (BCH_SNAPSHOT_DELETED(snap.v)) {
-                       ret = snapshot_list_add(c, &deleted, k.k->p.offset);
-                       if (ret)
-                               break;
-               }
-       }
-       bch2_trans_iter_exit(trans, &iter);
+               BCH_SNAPSHOT_DELETED(snap.v)
+                       ? snapshot_list_add(c, &deleted, k.k->p.offset)
+                       : 0;
+       }));
 
        if (ret) {
                bch_err_msg(c, ret, "walking snapshots");
@@ -1469,18 +1466,20 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
        bch2_trans_unlock(trans);
        down_write(&c->snapshot_create_lock);
 
-       for_each_btree_key(trans, iter, BTREE_ID_snapshots,
-                          POS_MIN, 0, k, ret) {
+       ret = for_each_btree_key2(trans, iter, BTREE_ID_snapshots,
+                                 POS_MIN, 0, k, ({
                u32 snapshot = k.k->p.offset;
                u32 equiv = bch2_snapshot_equiv(c, snapshot);
 
-               if (equiv != snapshot)
-                       snapshot_list_add(c, &deleted_interior, snapshot);
-       }
-       bch2_trans_iter_exit(trans, &iter);
+               equiv != snapshot
+                       ? snapshot_list_add(c, &deleted_interior, snapshot)
+                       : 0;
+       }));
 
-       if (ret)
+       if (ret) {
+               bch_err_msg(c, ret, "walking snapshots");
                goto err_create_lock;
+       }
 
        /*
         * Fixing children of deleted snapshots can't be done completely