bcachefs: Fix rebalance_work accounting
authorKent Overstreet <kent.overstreet@linux.dev>
Fri, 23 Aug 2024 19:35:22 +0000 (15:35 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sat, 24 Aug 2024 14:16:21 +0000 (10:16 -0400)
rebalance_work was keying off of the presence of rebelance_opts in the
extent - but that was incorrect, we keep those around after rebalance
for indirect extents since the inode's options are not directly
available

Fixes: 20ac515a9cc7 ("bcachefs: bch_acct_rebalance_work")
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/bcachefs_format.h
fs/bcachefs/buckets.c
fs/bcachefs/extents.c
fs/bcachefs/extents.h
fs/bcachefs/sb-downgrade.c

index c75f2e0f32bb9696aed79075744351db6d7511e3..14ce726bf5a3cce27152e31c82e7a976bfb6005a 100644 (file)
@@ -677,7 +677,8 @@ struct bch_sb_field_ext {
        x(bucket_stripe_sectors,        BCH_VERSION(1,  8))             \
        x(disk_accounting_v2,           BCH_VERSION(1,  9))             \
        x(disk_accounting_v3,           BCH_VERSION(1, 10))             \
-       x(disk_accounting_inum,         BCH_VERSION(1, 11))
+       x(disk_accounting_inum,         BCH_VERSION(1, 11))             \
+       x(rebalance_work_acct_fix,      BCH_VERSION(1, 12))
 
 enum bcachefs_metadata_version {
        bcachefs_metadata_version_min = 9,
index be2bbd2486314f12c1348892358495d5e5b82f6d..a2274429e7f4ad6cfb7517d746908caf083117eb 100644 (file)
@@ -699,7 +699,8 @@ err:
 static int __trigger_extent(struct btree_trans *trans,
                            enum btree_id btree_id, unsigned level,
                            struct bkey_s_c k,
-                           enum btree_iter_update_trigger_flags flags)
+                           enum btree_iter_update_trigger_flags flags,
+                           s64 *replicas_sectors)
 {
        bool gc = flags & BTREE_TRIGGER_gc;
        struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
@@ -708,7 +709,6 @@ static int __trigger_extent(struct btree_trans *trans,
        enum bch_data_type data_type = bkey_is_btree_ptr(k.k)
                ? BCH_DATA_btree
                : BCH_DATA_user;
-       s64 replicas_sectors = 0;
        int ret = 0;
 
        struct disk_accounting_pos acc_replicas_key = {
@@ -739,7 +739,7 @@ static int __trigger_extent(struct btree_trans *trans,
                        if (ret)
                                return ret;
                } else if (!p.has_ec) {
-                       replicas_sectors       += disk_sectors;
+                       *replicas_sectors       += disk_sectors;
                        acc_replicas_key.replicas.devs[acc_replicas_key.replicas.nr_devs++] = p.ptr.dev;
                } else {
                        ret = bch2_trigger_stripe_ptr(trans, k, p, data_type, disk_sectors, flags);
@@ -777,7 +777,7 @@ static int __trigger_extent(struct btree_trans *trans,
        }
 
        if (acc_replicas_key.replicas.nr_devs) {
-               ret = bch2_disk_accounting_mod(trans, &acc_replicas_key, &replicas_sectors, 1, gc);
+               ret = bch2_disk_accounting_mod(trans, &acc_replicas_key, replicas_sectors, 1, gc);
                if (ret)
                        return ret;
        }
@@ -787,7 +787,7 @@ static int __trigger_extent(struct btree_trans *trans,
                        .type                   = BCH_DISK_ACCOUNTING_snapshot,
                        .snapshot.id            = k.k->p.snapshot,
                };
-               ret = bch2_disk_accounting_mod(trans, &acc_snapshot_key, &replicas_sectors, 1, gc);
+               ret = bch2_disk_accounting_mod(trans, &acc_snapshot_key, replicas_sectors, 1, gc);
                if (ret)
                        return ret;
        }
@@ -807,7 +807,7 @@ static int __trigger_extent(struct btree_trans *trans,
                        .type           = BCH_DISK_ACCOUNTING_btree,
                        .btree.id       = btree_id,
                };
-               ret = bch2_disk_accounting_mod(trans, &acc_btree_key, &replicas_sectors, 1, gc);
+               ret = bch2_disk_accounting_mod(trans, &acc_btree_key, replicas_sectors, 1, gc);
                if (ret)
                        return ret;
        } else {
@@ -819,22 +819,13 @@ static int __trigger_extent(struct btree_trans *trans,
                s64 v[3] = {
                        insert ? 1 : -1,
                        insert ? k.k->size : -((s64) k.k->size),
-                       replicas_sectors,
+                       *replicas_sectors,
                };
                ret = bch2_disk_accounting_mod(trans, &acc_inum_key, v, ARRAY_SIZE(v), gc);
                if (ret)
                        return ret;
        }
 
-       if (bch2_bkey_rebalance_opts(k)) {
-               struct disk_accounting_pos acc = {
-                       .type           = BCH_DISK_ACCOUNTING_rebalance_work,
-               };
-               ret = bch2_disk_accounting_mod(trans, &acc, &replicas_sectors, 1, gc);
-               if (ret)
-                       return ret;
-       }
-
        return 0;
 }
 
@@ -843,6 +834,7 @@ int bch2_trigger_extent(struct btree_trans *trans,
                        struct bkey_s_c old, struct bkey_s new,
                        enum btree_iter_update_trigger_flags flags)
 {
+       struct bch_fs *c = trans->c;
        struct bkey_ptrs_c new_ptrs = bch2_bkey_ptrs_c(new.s_c);
        struct bkey_ptrs_c old_ptrs = bch2_bkey_ptrs_c(old);
        unsigned new_ptrs_bytes = (void *) new_ptrs.end - (void *) new_ptrs.start;
@@ -858,21 +850,53 @@ int bch2_trigger_extent(struct btree_trans *trans,
                    new_ptrs_bytes))
                return 0;
 
-       if (flags & BTREE_TRIGGER_transactional) {
-               struct bch_fs *c = trans->c;
-               int mod = (int) bch2_bkey_needs_rebalance(c, new.s_c) -
-                         (int) bch2_bkey_needs_rebalance(c, old);
+       if (flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) {
+               s64 old_replicas_sectors = 0, new_replicas_sectors = 0;
+
+               if (old.k->type) {
+                       int ret = __trigger_extent(trans, btree, level, old,
+                                                  flags & ~BTREE_TRIGGER_insert,
+                                                  &old_replicas_sectors);
+                       if (ret)
+                               return ret;
+               }
+
+               if (new.k->type) {
+                       int ret = __trigger_extent(trans, btree, level, new.s_c,
+                                                  flags & ~BTREE_TRIGGER_overwrite,
+                                                  &new_replicas_sectors);
+                       if (ret)
+                               return ret;
+               }
+
+               int need_rebalance_delta = 0;
+               s64 need_rebalance_sectors_delta = 0;
+
+               s64 s = bch2_bkey_sectors_need_rebalance(c, old);
+               need_rebalance_delta -= s != 0;
+               need_rebalance_sectors_delta -= s;
 
-               if (mod) {
+               s = bch2_bkey_sectors_need_rebalance(c, old);
+               need_rebalance_delta += s != 0;
+               need_rebalance_sectors_delta += s;
+
+               if ((flags & BTREE_TRIGGER_transactional) && need_rebalance_delta) {
                        int ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_rebalance_work,
-                                                             new.k->p, mod > 0);
+                                                         new.k->p, need_rebalance_delta > 0);
                        if (ret)
                                return ret;
                }
-       }
 
-       if (flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc))
-               return trigger_run_overwrite_then_insert(__trigger_extent, trans, btree, level, old, new, flags);
+               if (need_rebalance_sectors_delta) {
+                       struct disk_accounting_pos acc = {
+                               .type           = BCH_DISK_ACCOUNTING_rebalance_work,
+                       };
+                       int ret = bch2_disk_accounting_mod(trans, &acc, &need_rebalance_sectors_delta, 1,
+                                                          flags & BTREE_TRIGGER_gc);
+                       if (ret)
+                               return ret;
+               }
+       }
 
        return 0;
 }
index 9406f82fc2550b3f1b9430a9ff10d40deb2a4617..e317df3644a1197db95d43fcc202ec34d1f95984 100644 (file)
@@ -1379,6 +1379,45 @@ bool bch2_bkey_needs_rebalance(struct bch_fs *c, struct bkey_s_c k)
        return r != NULL;
 }
 
+static u64 __bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k,
+                                      unsigned target, unsigned compression)
+{
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+       const union bch_extent_entry *entry;
+       struct extent_ptr_decoded p;
+       u64 sectors = 0;
+
+       if (compression) {
+               unsigned compression_type = bch2_compression_opt_to_type(compression);
+
+               bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+                       if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible ||
+                           p.ptr.unwritten) {
+                               sectors = 0;
+                               goto incompressible;
+                       }
+
+                       if (!p.ptr.cached && p.crc.compression_type != compression_type)
+                               sectors += p.crc.compressed_size;
+               }
+       }
+incompressible:
+       if (target && bch2_target_accepts_data(c, BCH_DATA_user, target)) {
+               bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
+                       if (!p.ptr.cached && !bch2_dev_in_target(c, p.ptr.dev, target))
+                               sectors += p.crc.compressed_size;
+       }
+
+       return sectors;
+}
+
+u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k)
+{
+       const struct bch_extent_rebalance *r = bch2_bkey_rebalance_opts(k);
+
+       return r ? __bch2_bkey_sectors_need_rebalance(c, k, r->target, r->compression) : 0;
+}
+
 int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bkey_i *_k,
                                  struct bch_io_opts *opts)
 {
index 1a6ddee48041d13f3aa0e4483d454373141d02de..709dd83183be1fe5961da72681861390dd49dc8e 100644 (file)
@@ -692,6 +692,7 @@ const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s_c);
 unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *, struct bkey_s_c,
                                       unsigned, unsigned);
 bool bch2_bkey_needs_rebalance(struct bch_fs *, struct bkey_s_c);
+u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *, struct bkey_s_c);
 
 int bch2_bkey_set_needs_rebalance(struct bch_fs *, struct bkey_i *,
                                  struct bch_io_opts *);
index 650a1f77ca4036f5b03fb8fa8d9b9b8a323f1add..c7e4cdd3f6a521d01a1705c07553073a38076503 100644 (file)
@@ -74,6 +74,9 @@
          BCH_FSCK_ERR_accounting_key_replicas_devs_unsorted,   \
          BCH_FSCK_ERR_accounting_key_junk_at_end)              \
        x(disk_accounting_inum,                                 \
+         BIT_ULL(BCH_RECOVERY_PASS_check_allocations),         \
+         BCH_FSCK_ERR_accounting_mismatch)                     \
+       x(rebalance_work_acct_fix,                              \
          BIT_ULL(BCH_RECOVERY_PASS_check_allocations),         \
          BCH_FSCK_ERR_accounting_mismatch)
 
          BCH_FSCK_ERR_fs_usage_persistent_reserved_wrong,      \
          BCH_FSCK_ERR_fs_usage_replicas_wrong,                 \
          BCH_FSCK_ERR_accounting_replicas_not_marked,          \
-         BCH_FSCK_ERR_bkey_version_in_future)
+         BCH_FSCK_ERR_bkey_version_in_future)                  \
+       x(rebalance_work_acct_fix,                              \
+         BIT_ULL(BCH_RECOVERY_PASS_check_allocations),         \
+         BCH_FSCK_ERR_accounting_mismatch)
 
 struct upgrade_downgrade_entry {
        u64             recovery_passes;