bcachefs: RESERVE_stripe
authorKent Overstreet <kent.overstreet@linux.dev>
Thu, 2 Mar 2023 06:54:17 +0000 (01:54 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:09:55 +0000 (17:09 -0400)
Rework stripe creation path - new algorithm for deciding when to create
new stripes or reuse existing stripes.

We add a new allocation watermark, RESERVE_stripe, above RESERVE_none.
Then we always try to create a new stripe by doing RESERVE_stripe
allocations; if this fails, we reuse an existing stripe and allocate
buckets for it with the reserve watermark for the given write
(RESERVE_none or RESERVE_movinggc).

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/alloc_background.h
fs/bcachefs/alloc_types.h
fs/bcachefs/buckets.h
fs/bcachefs/ec.c
fs/bcachefs/errcode.h

index c9ff590ef978f7f17445c90055cb7f349ebf6501..324798396fc6b667f608e035a78033e2c035f456 100644 (file)
@@ -216,7 +216,7 @@ static inline u64 should_invalidate_buckets(struct bch_dev *ca,
        u64 free = max_t(s64, 0,
                           u.d[BCH_DATA_free].buckets
                         + u.d[BCH_DATA_need_discard].buckets
-                        - bch2_dev_buckets_reserved(ca, RESERVE_none));
+                        - bch2_dev_buckets_reserved(ca, RESERVE_stripe));
 
        return clamp_t(s64, want_free - free, 0, u.d[BCH_DATA_cached].buckets);
 }
index c8a45ea9d66121f61a0718da37bea2fd2e19b055..4d09bd20d8ec74e9711f3b4f1f51cb7d83b9b585 100644 (file)
@@ -22,7 +22,8 @@ struct ec_bucket_buf;
        x(btree_movinggc)               \
        x(btree)                        \
        x(movinggc)                     \
-       x(none)
+       x(none)                         \
+       x(stripe)
 
 enum alloc_reserve {
 #define x(name)        RESERVE_##name,
index 22721bfea41495be64a909cd0285d999f2b5fd20..d677b0225c52bf1d9388790e8680805a124f572e 100644 (file)
@@ -157,6 +157,9 @@ static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum alloc_reser
        switch (reserve) {
        case RESERVE_NR:
                unreachable();
+       case RESERVE_stripe:
+               reserved += ca->mi.nbuckets >> 6;
+               fallthrough;
        case RESERVE_none:
                reserved += ca->mi.nbuckets >> 6;
                fallthrough;
index d206da686da8570bdf0bfacb8a64dc524ae8a136..6bf14f975d930646adb5b074ca28e36f7ed250ba 100644 (file)
@@ -1569,6 +1569,17 @@ static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stri
        BUG_ON(h->s->existing_stripe.size != h->blocksize);
        BUG_ON(h->s->existing_stripe.size != h->s->existing_stripe.key.v.sectors);
 
+       /*
+        * Free buckets we initially allocated - they might conflict with
+        * blocks from the stripe we're reusing:
+        */
+       for_each_set_bit(i, h->s->blocks_gotten, h->s->new_stripe.key.v.nr_blocks) {
+               bch2_open_bucket_put(c, c->open_buckets + h->s->blocks[i]);
+               h->s->blocks[i] = 0;
+       }
+       memset(h->s->blocks_gotten, 0, sizeof(h->s->blocks_gotten));
+       memset(h->s->blocks_allocated, 0, sizeof(h->s->blocks_allocated));
+
        for (i = 0; i < h->s->existing_stripe.key.v.nr_blocks; i++) {
                if (stripe_blockcount_get(&h->s->existing_stripe.key.v, i)) {
                        __set_bit(i, h->s->blocks_gotten);
@@ -1649,8 +1660,8 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
 {
        struct bch_fs *c = trans->c;
        struct ec_stripe_head *h;
+       bool waiting = false;
        int ret;
-       bool needs_stripe_new;
 
        h = __bch2_ec_stripe_head_get(trans, target, algo, redundancy, reserve);
        if (!h)
@@ -1658,8 +1669,7 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
        if (IS_ERR_OR_NULL(h))
                return h;
 
-       needs_stripe_new = !h->s;
-       if (needs_stripe_new) {
+       if (!h->s) {
                if (ec_new_stripe_alloc(c, h)) {
                        ret = -ENOMEM;
                        bch_err(c, "failed to allocate new stripe");
@@ -1670,30 +1680,53 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
                        BUG();
        }
 
-       /*
-        * Try reserve a new stripe before reusing an
-        * existing stripe. This will prevent unnecessary
-        * read amplification during write oriented workloads.
-        */
-       ret = 0;
-       if (!h->s->allocated && !h->s->res.sectors && !h->s->have_existing_stripe)
-               ret = __bch2_ec_stripe_head_reserve(trans, h);
-       if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-               goto err;
+       if (h->s->allocated)
+               goto allocated;
 
-       if (ret && needs_stripe_new)
-               ret = __bch2_ec_stripe_head_reuse(trans, h);
-       if (ret)
+       if (h->s->have_existing_stripe)
+               goto alloc_existing;
+
+       /* First, try to allocate a full stripe: */
+       ret =   new_stripe_alloc_buckets(trans, h, RESERVE_stripe, NULL) ?:
+               __bch2_ec_stripe_head_reserve(trans, h);
+       if (!ret)
+               goto allocated;
+       if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
+           bch2_err_matches(ret, ENOMEM))
                goto err;
 
-       if (!h->s->allocated) {
-               ret = new_stripe_alloc_buckets(trans, h, reserve, cl);
-               if (ret)
+       /*
+        * Not enough buckets available for a full stripe: we must reuse an
+        * existing stripe:
+        */
+       while (1) {
+               ret = __bch2_ec_stripe_head_reuse(trans, h);
+               if (!ret)
+                       break;
+               if (ret == -BCH_ERR_ENOSPC_stripe_reuse && cl)
+                       ret = -BCH_ERR_stripe_alloc_blocked;
+               if (waiting || !cl || ret != -BCH_ERR_stripe_alloc_blocked)
                        goto err;
 
-               h->s->allocated = true;
+               /* XXX freelist_wait? */
+               closure_wait(&c->freelist_wait, cl);
+               waiting = true;
        }
 
+       if (waiting)
+               closure_wake_up(&c->freelist_wait);
+alloc_existing:
+       /*
+        * Retry allocating buckets, with the reserve watermark for this
+        * particular write:
+        */
+       ret = new_stripe_alloc_buckets(trans, h, reserve, cl);
+       if (ret)
+               goto err;
+allocated:
+       h->s->allocated = true;
+       BUG_ON(!h->s->idx);
+
        BUG_ON(trans->restarted);
        return h;
 err:
index 6129af6129c3992148ee8a926fd7f443f7d4f87e..283303db7dfda74919cc5c0d532972c30c6c47f0 100644 (file)
@@ -93,6 +93,7 @@
        x(BCH_ERR_operation_blocked,    journal_res_get_blocked)                \
        x(BCH_ERR_operation_blocked,    journal_preres_get_blocked)             \
        x(BCH_ERR_operation_blocked,    bucket_alloc_blocked)                   \
+       x(BCH_ERR_operation_blocked,    stripe_alloc_blocked)                   \
        x(BCH_ERR_invalid,              invalid_sb)                             \
        x(BCH_ERR_invalid_sb,           invalid_sb_magic)                       \
        x(BCH_ERR_invalid_sb,           invalid_sb_version)                     \