bcachefs: Refactor filesystem usage accounting
authorKent Overstreet <kent.overstreet@gmail.com>
Fri, 13 Nov 2020 23:36:33 +0000 (18:36 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:49 +0000 (17:08 -0400)
Various filesystem usage counters are kept in percpu counters, with one
set per in flight journal buffer. Right now all the code that deals with
it assumes that there's only two buffers/sets of counters, but the
number of journal bufs is getting increased to 4 in the next patch - so
refactor that code to not assume a constant.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/bcachefs.h
fs/bcachefs/btree_gc.c
fs/bcachefs/buckets.c
fs/bcachefs/journal_types.h
fs/bcachefs/replicas.c
fs/bcachefs/super-io.c
fs/bcachefs/super.c

index 4fe3f9257752546c4927829c7691dc93572a3a8b..6db04dc9d2d3570c865e2d211007cc8f1bc4b976 100644 (file)
@@ -676,7 +676,7 @@ struct bch_fs {
 
        seqcount_t                      usage_lock;
        struct bch_fs_usage             *usage_base;
-       struct bch_fs_usage __percpu    *usage[2];
+       struct bch_fs_usage __percpu    *usage[JOURNAL_BUF_NR];
        struct bch_fs_usage __percpu    *usage_gc;
        u64 __percpu            *online_reserved;
 
index df018a2e463eb28d727d553e783a6510f8fe845b..5f5686466d7debcf30638a88cded92c2119430bf 100644 (file)
@@ -603,7 +603,6 @@ static int bch2_gc_done(struct bch_fs *c,
                struct genradix_iter dst_iter = genradix_iter_init(&c->stripes[0], 0);
                struct genradix_iter src_iter = genradix_iter_init(&c->stripes[1], 0);
                struct stripe *dst, *src;
-               unsigned i;
 
                c->ec_stripes_heap.used = 0;
 
index 7cc31b0e02e4590f8c482317d18933c37c90c13d..4762c5465ef05da627c47351762bf003ffcb958c 100644 (file)
@@ -207,13 +207,13 @@ static inline struct bch_fs_usage *fs_usage_ptr(struct bch_fs *c,
 {
        return this_cpu_ptr(gc
                            ? c->usage_gc
-                           : c->usage[journal_seq & 1]);
+                           : c->usage[journal_seq & JOURNAL_BUF_MASK]);
 }
 
 u64 bch2_fs_usage_read_one(struct bch_fs *c, u64 *v)
 {
        ssize_t offset = v - (u64 *) c->usage_base;
-       unsigned seq;
+       unsigned i, seq;
        u64 ret;
 
        BUG_ON(offset < 0 || offset >= fs_usage_u64s(c));
@@ -221,9 +221,10 @@ u64 bch2_fs_usage_read_one(struct bch_fs *c, u64 *v)
 
        do {
                seq = read_seqcount_begin(&c->usage_lock);
-               ret = *v +
-                       percpu_u64_get((u64 __percpu *) c->usage[0] + offset) +
-                       percpu_u64_get((u64 __percpu *) c->usage[1] + offset);
+               ret = *v;
+
+               for (i = 0; i < ARRAY_SIZE(c->usage); i++)
+                       ret += percpu_u64_get((u64 __percpu *) c->usage[i] + offset);
        } while (read_seqcount_retry(&c->usage_lock, seq));
 
        return ret;
@@ -232,15 +233,20 @@ u64 bch2_fs_usage_read_one(struct bch_fs *c, u64 *v)
 struct bch_fs_usage_online *bch2_fs_usage_read(struct bch_fs *c)
 {
        struct bch_fs_usage_online *ret;
-       unsigned seq, i, u64s;
+       unsigned seq, i, v, u64s = fs_usage_u64s(c);
+retry:
+       ret = kmalloc(u64s * sizeof(u64), GFP_NOFS);
+       if (unlikely(!ret))
+               return NULL;
 
        percpu_down_read(&c->mark_lock);
 
-       ret = kmalloc(sizeof(struct bch_fs_usage_online) +
-                     sizeof(u64) + c->replicas.nr, GFP_NOFS);
-       if (unlikely(!ret)) {
+       v = fs_usage_u64s(c);
+       if (unlikely(u64s != v)) {
+               u64s = v;
                percpu_up_read(&c->mark_lock);
-               return NULL;
+               kfree(ret);
+               goto retry;
        }
 
        ret->online_reserved = percpu_u64_get(c->online_reserved);
@@ -248,7 +254,7 @@ struct bch_fs_usage_online *bch2_fs_usage_read(struct bch_fs *c)
        u64s = fs_usage_u64s(c);
        do {
                seq = read_seqcount_begin(&c->usage_lock);
-               memcpy(&ret->u, c->usage_base, u64s * sizeof(u64));
+               memcpy(ret, c->usage_base, u64s * sizeof(u64));
                for (i = 0; i < ARRAY_SIZE(c->usage); i++)
                        acc_u64s_percpu((u64 *) &ret->u, (u64 __percpu *) c->usage[i], u64s);
        } while (read_seqcount_retry(&c->usage_lock, seq));
index 6312a7f06d8798031ecd06af21e19d324606157a..7e328ccc0a8fa962dd092dd6550e7675db04e124 100644 (file)
 
 struct journal_res;
 
+#define JOURNAL_BUF_BITS       1
+#define JOURNAL_BUF_NR         (1U << JOURNAL_BUF_BITS)
+#define JOURNAL_BUF_MASK       (JOURNAL_BUF_NR - 1)
+
 /*
  * We put two of these in struct journal; we used them for writes to the
  * journal that are being staged or in flight.
index f46aa1d70e35df4a482ac8fcea57f8b0adb3fd62..85c97f67936a4f4a4d41c081d43f95c8ccef1716 100644 (file)
@@ -275,7 +275,7 @@ static void __replicas_table_update_pcpu(struct bch_fs_usage __percpu *dst_p,
 static int replicas_table_update(struct bch_fs *c,
                                 struct bch_replicas_cpu *new_r)
 {
-       struct bch_fs_usage __percpu *new_usage[2];
+       struct bch_fs_usage __percpu *new_usage[JOURNAL_BUF_NR];
        struct bch_fs_usage_online *new_scratch = NULL;
        struct bch_fs_usage __percpu *new_gc = NULL;
        struct bch_fs_usage *new_base = NULL;
@@ -283,7 +283,14 @@ static int replicas_table_update(struct bch_fs *c,
                sizeof(u64) * new_r->nr;
        unsigned scratch_bytes = sizeof(struct bch_fs_usage_online) +
                sizeof(u64) * new_r->nr;
-       int ret = -ENOMEM;
+       int ret = 0;
+
+       memset(new_usage, 0, sizeof(new_usage));
+
+       for (i = 0; i < ARRAY_SIZE(new_usage); i++)
+               if (!(new_usage[i] = __alloc_percpu_gfp(bytes,
+                                       sizeof(u64), GFP_NOIO)))
+                       goto err;
 
        memset(new_usage, 0, sizeof(new_usage));
 
@@ -295,10 +302,8 @@ static int replicas_table_update(struct bch_fs *c,
        if (!(new_base = kzalloc(bytes, GFP_NOIO)) ||
            !(new_scratch  = kmalloc(scratch_bytes, GFP_NOIO)) ||
            (c->usage_gc &&
-            !(new_gc = __alloc_percpu_gfp(bytes, sizeof(u64), GFP_NOIO)))) {
-               bch_err(c, "error updating replicas table: memory allocation failure");
+            !(new_gc = __alloc_percpu_gfp(bytes, sizeof(u64), GFP_NOIO))))
                goto err;
-       }
 
        for (i = 0; i < ARRAY_SIZE(new_usage); i++)
                if (c->usage[i])
@@ -317,14 +322,17 @@ static int replicas_table_update(struct bch_fs *c,
        swap(c->usage_scratch,  new_scratch);
        swap(c->usage_gc,       new_gc);
        swap(c->replicas,       *new_r);
-       ret = 0;
-err:
+out:
        free_percpu(new_gc);
        kfree(new_scratch);
        free_percpu(new_usage[1]);
        free_percpu(new_usage[0]);
        kfree(new_base);
        return ret;
+err:
+       bch_err(c, "error updating replicas table: memory allocation failure");
+       ret = -ENOMEM;
+       goto out;
 }
 
 static unsigned reserve_journal_replicas(struct bch_fs *c,
@@ -499,9 +507,7 @@ int bch2_replicas_gc_end(struct bch_fs *c, int ret)
                struct bch_replicas_cpu n;
 
                if (!__replicas_has_entry(&c->replicas_gc, e) &&
-                   (c->usage_base->replicas[i] ||
-                    percpu_u64_get(&c->usage[0]->replicas[i]) ||
-                    percpu_u64_get(&c->usage[1]->replicas[i]))) {
+                   bch2_fs_usage_read_one(c, &c->usage_base->replicas[i])) {
                        n = cpu_replicas_add_entry(&c->replicas_gc, e);
                        if (!n.entries) {
                                ret = -ENOSPC;
@@ -606,9 +612,7 @@ retry:
                        cpu_replicas_entry(&c->replicas, i);
 
                if (e->data_type == BCH_DATA_journal ||
-                   c->usage_base->replicas[i] ||
-                   percpu_u64_get(&c->usage[0]->replicas[i]) ||
-                   percpu_u64_get(&c->usage[1]->replicas[i]))
+                   bch2_fs_usage_read_one(c, &c->usage_base->replicas[i]))
                        memcpy(cpu_replicas_entry(&new, new.nr++),
                               e, new.entry_size);
        }
index 5406315340e14b355dde81da9e248db53a7c8415..e25ff75b97f3e3df066b8ddad577412c0c7ad50b 100644 (file)
@@ -998,7 +998,7 @@ bch2_journal_super_entries_add_common(struct bch_fs *c,
                for (i = 0; i < ARRAY_SIZE(c->usage); i++)
                        bch2_fs_usage_acc_to_base(c, i);
        } else {
-               bch2_fs_usage_acc_to_base(c, journal_seq & 1);
+               bch2_fs_usage_acc_to_base(c, journal_seq & JOURNAL_BUF_MASK);
        }
 
        {
index 98a875e08e9a960b716efeffa2c241ed39dab34e..f46b4b05b4aa75145267d5d5e835205437230443 100644 (file)
@@ -483,8 +483,8 @@ static void __bch2_fs_free(struct bch_fs *c)
        percpu_free_rwsem(&c->mark_lock);
        free_percpu(c->online_reserved);
        kfree(c->usage_scratch);
-       free_percpu(c->usage[1]);
-       free_percpu(c->usage[0]);
+       for (i = 0; i < ARRAY_SIZE(c->usage); i++)
+               free_percpu(c->usage[i]);
        kfree(c->usage_base);
 
        if (c->btree_iters_bufs)