bcachefs: Scale down number of writepoints when low on space
authorKent Overstreet <kent.overstreet@gmail.com>
Mon, 5 Nov 2018 02:55:35 +0000 (21:55 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:11 +0000 (17:08 -0400)
this means we don't have to reserve space for them when calculating
filesystem capacity

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/alloc_background.c
fs/bcachefs/alloc_background.h
fs/bcachefs/alloc_foreground.c
fs/bcachefs/alloc_foreground.h
fs/bcachefs/alloc_types.h
fs/bcachefs/bcachefs.h
fs/bcachefs/buckets.c
fs/bcachefs/buckets.h
fs/bcachefs/super.c

index 88be5f4be4b1fe85d4f556e7a6c1d79c922ddd05..1eb39283e7e2c08ac95734efd8731545e96db75a 100644 (file)
@@ -975,6 +975,7 @@ void bch2_recalc_capacity(struct bch_fs *c)
 {
        struct bch_dev *ca;
        u64 capacity = 0, reserved_sectors = 0, gc_reserve;
+       unsigned bucket_size_max = 0;
        unsigned long ra_pages = 0;
        unsigned i, j;
 
@@ -1012,12 +1013,9 @@ void bch2_recalc_capacity(struct bch_fs *c)
 
                dev_reserve += ca->free_inc.size;
 
-               dev_reserve += ARRAY_SIZE(c->write_points);
-
                dev_reserve += 1;       /* btree write point */
                dev_reserve += 1;       /* copygc write point */
                dev_reserve += 1;       /* rebalance write point */
-               dev_reserve += WRITE_POINT_COUNT;
 
                dev_reserve *= ca->mi.bucket_size;
 
@@ -1027,6 +1025,9 @@ void bch2_recalc_capacity(struct bch_fs *c)
                                             ca->mi.first_bucket);
 
                reserved_sectors += dev_reserve * 2;
+
+               bucket_size_max = max_t(unsigned, bucket_size_max,
+                                       ca->mi.bucket_size);
        }
 
        gc_reserve = c->opts.gc_reserve_bytes
@@ -1039,6 +1040,8 @@ void bch2_recalc_capacity(struct bch_fs *c)
 
        c->capacity = capacity - reserved_sectors;
 
+       c->bucket_size_max = bucket_size_max;
+
        if (c->capacity) {
                bch2_io_timer_add(&c->io_clock[READ],
                                 &c->bucket_clock[READ].rescale);
@@ -1330,8 +1333,6 @@ not_enough:
         * invalidated on disk:
         */
        if (invalidating_data) {
-               BUG();
-               pr_info("holding writes");
                pr_debug("invalidating existing data");
                set_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags);
        } else {
@@ -1391,40 +1392,12 @@ int bch2_fs_allocator_start(struct bch_fs *c)
        return bch2_alloc_write(c);
 }
 
-void bch2_fs_allocator_init(struct bch_fs *c)
+void bch2_fs_allocator_background_init(struct bch_fs *c)
 {
-       struct open_bucket *ob;
-       struct write_point *wp;
-
-       mutex_init(&c->write_points_hash_lock);
        spin_lock_init(&c->freelist_lock);
        bch2_bucket_clock_init(c, READ);
        bch2_bucket_clock_init(c, WRITE);
 
-       /* open bucket 0 is a sentinal NULL: */
-       spin_lock_init(&c->open_buckets[0].lock);
-
-       for (ob = c->open_buckets + 1;
-            ob < c->open_buckets + ARRAY_SIZE(c->open_buckets); ob++) {
-               spin_lock_init(&ob->lock);
-               c->open_buckets_nr_free++;
-
-               ob->freelist = c->open_buckets_freelist;
-               c->open_buckets_freelist = ob - c->open_buckets;
-       }
-
-       writepoint_init(&c->btree_write_point, BCH_DATA_BTREE);
-       writepoint_init(&c->rebalance_write_point, BCH_DATA_USER);
-
-       for (wp = c->write_points;
-            wp < c->write_points + ARRAY_SIZE(c->write_points); wp++) {
-               writepoint_init(wp, BCH_DATA_USER);
-
-               wp->last_used   = sched_clock();
-               wp->write_point = (unsigned long) wp;
-               hlist_add_head_rcu(&wp->node, writepoint_hash(c, wp->write_point));
-       }
-
        c->pd_controllers_update_seconds = 5;
        INIT_DELAYED_WORK(&c->pd_controllers_update, pd_controllers_update);
 }
index 6dbabe83cab77e4e7df415f5f63ce87965b563c7..245e037fbaea8e4f426c2ba2142410236284cd4b 100644 (file)
@@ -57,6 +57,6 @@ int bch2_dev_allocator_start(struct bch_dev *);
 
 int bch2_alloc_write(struct bch_fs *);
 int bch2_fs_allocator_start(struct bch_fs *);
-void bch2_fs_allocator_init(struct bch_fs *);
+void bch2_fs_allocator_background_init(struct bch_fs *);
 
 #endif /* _BCACHEFS_ALLOC_BACKGROUND_H */
index 920d9ff3c53bf0069ad4cf7b79b5348081c11b65..df74e41ec89089190235af631b89274e9d7dc2cb 100644 (file)
@@ -492,7 +492,7 @@ void bch2_writepoint_stop(struct bch_fs *c, struct bch_dev *ca,
 
        mutex_lock(&wp->lock);
        open_bucket_for_each(c, &wp->ptrs, ob, i)
-               if (ob->ptr.dev == ca->dev_idx)
+               if (!ca || ob->ptr.dev == ca->dev_idx)
                        open_bucket_free_unused(c, wp, ob);
                else
                        ob_push(c, &ptrs, ob);
@@ -501,6 +501,15 @@ void bch2_writepoint_stop(struct bch_fs *c, struct bch_dev *ca,
        mutex_unlock(&wp->lock);
 }
 
+static inline struct hlist_head *writepoint_hash(struct bch_fs *c,
+                                                unsigned long write_point)
+{
+       unsigned hash =
+               hash_long(write_point, ilog2(ARRAY_SIZE(c->write_points_hash)));
+
+       return &c->write_points_hash[hash];
+}
+
 static struct write_point *__writepoint_find(struct hlist_head *head,
                                             unsigned long write_point)
 {
@@ -513,6 +522,53 @@ static struct write_point *__writepoint_find(struct hlist_head *head,
        return NULL;
 }
 
+static inline bool too_many_writepoints(struct bch_fs *c, unsigned factor)
+{
+       u64 stranded    = c->write_points_nr * c->bucket_size_max;
+       u64 free        = bch2_fs_sectors_free(c, bch2_fs_usage_read(c));
+
+       return stranded * factor > free;
+}
+
+static bool try_increase_writepoints(struct bch_fs *c)
+{
+       struct write_point *wp;
+
+       if (c->write_points_nr == ARRAY_SIZE(c->write_points) ||
+           too_many_writepoints(c, 32))
+               return false;
+
+       wp = c->write_points + c->write_points_nr++;
+       hlist_add_head_rcu(&wp->node, writepoint_hash(c, wp->write_point));
+       return true;
+}
+
+static bool try_decrease_writepoints(struct bch_fs *c,
+                                    unsigned old_nr)
+{
+       struct write_point *wp;
+
+       mutex_lock(&c->write_points_hash_lock);
+       if (c->write_points_nr < old_nr) {
+               mutex_unlock(&c->write_points_hash_lock);
+               return true;
+       }
+
+       if (c->write_points_nr == 1 ||
+           !too_many_writepoints(c, 8)) {
+               mutex_unlock(&c->write_points_hash_lock);
+               return false;
+       }
+
+       wp = c->write_points + --c->write_points_nr;
+
+       hlist_del_rcu(&wp->node);
+       mutex_unlock(&c->write_points_hash_lock);
+
+       bch2_writepoint_stop(c, NULL, wp);
+       return true;
+}
+
 static struct write_point *writepoint_find(struct bch_fs *c,
                                           unsigned long write_point)
 {
@@ -536,16 +592,22 @@ lock_wp:
                mutex_unlock(&wp->lock);
                goto restart_find;
        }
-
+restart_find_oldest:
        oldest = NULL;
        for (wp = c->write_points;
-            wp < c->write_points + ARRAY_SIZE(c->write_points);
-            wp++)
+            wp < c->write_points + c->write_points_nr; wp++)
                if (!oldest || time_before64(wp->last_used, oldest->last_used))
                        oldest = wp;
 
        mutex_lock(&oldest->lock);
        mutex_lock(&c->write_points_hash_lock);
+       if (oldest >= c->write_points + c->write_points_nr ||
+           try_increase_writepoints(c)) {
+               mutex_unlock(&c->write_points_hash_lock);
+               mutex_unlock(&oldest->lock);
+               goto restart_find_oldest;
+       }
+
        wp = __writepoint_find(head, write_point);
        if (wp && wp != oldest) {
                mutex_unlock(&c->write_points_hash_lock);
@@ -581,10 +643,12 @@ struct write_point *bch2_alloc_sectors_start(struct bch_fs *c,
        unsigned nr_effective = 0;
        struct open_buckets ptrs = { .nr = 0 };
        bool have_cache = false;
+       unsigned write_points_nr;
        int ret = 0, i;
 
        BUG_ON(!nr_replicas || !nr_replicas_required);
-
+retry:
+       write_points_nr = c->write_points_nr;
        wp = writepoint_find(c, write_point.v);
 
        if (!target || (flags & BCH_WRITE_ONLY_SPECIFIED_DEVS)) {
@@ -637,6 +701,11 @@ err:
        wp->ptrs = ptrs;
 
        mutex_unlock(&wp->lock);
+
+       if (ret == -ENOSPC &&
+           try_decrease_writepoints(c, write_points_nr))
+               goto retry;
+
        return ERR_PTR(ret);
 }
 
@@ -688,3 +757,37 @@ void bch2_alloc_sectors_done(struct bch_fs *c, struct write_point *wp)
 
        bch2_open_buckets_put(c, &ptrs);
 }
+
+void bch2_fs_allocator_foreground_init(struct bch_fs *c)
+{
+       struct open_bucket *ob;
+       struct write_point *wp;
+
+       mutex_init(&c->write_points_hash_lock);
+       c->write_points_nr = ARRAY_SIZE(c->write_points);
+
+       /* open bucket 0 is a sentinal NULL: */
+       spin_lock_init(&c->open_buckets[0].lock);
+
+       for (ob = c->open_buckets + 1;
+            ob < c->open_buckets + ARRAY_SIZE(c->open_buckets); ob++) {
+               spin_lock_init(&ob->lock);
+               c->open_buckets_nr_free++;
+
+               ob->freelist = c->open_buckets_freelist;
+               c->open_buckets_freelist = ob - c->open_buckets;
+       }
+
+       writepoint_init(&c->btree_write_point, BCH_DATA_BTREE);
+       writepoint_init(&c->rebalance_write_point, BCH_DATA_USER);
+
+       for (wp = c->write_points;
+            wp < c->write_points + c->write_points_nr; wp++) {
+               writepoint_init(wp, BCH_DATA_USER);
+
+               wp->last_used   = sched_clock();
+               wp->write_point = (unsigned long) wp;
+               hlist_add_head_rcu(&wp->node,
+                                  writepoint_hash(c, wp->write_point));
+       }
+}
index 636fe686dc481370ee970dd67ab0fe7d16c81280..6672101cbe26af81f2eb13f5f985186679a19a7f 100644 (file)
@@ -91,15 +91,6 @@ void bch2_alloc_sectors_done(struct bch_fs *, struct write_point *);
 void bch2_writepoint_stop(struct bch_fs *, struct bch_dev *,
                          struct write_point *);
 
-static inline struct hlist_head *writepoint_hash(struct bch_fs *c,
-                                                unsigned long write_point)
-{
-       unsigned hash =
-               hash_long(write_point, ilog2(ARRAY_SIZE(c->write_points_hash)));
-
-       return &c->write_points_hash[hash];
-}
-
 static inline struct write_point_specifier writepoint_hashed(unsigned long v)
 {
        return (struct write_point_specifier) { .v = v | 1 };
@@ -117,4 +108,6 @@ static inline void writepoint_init(struct write_point *wp,
        wp->type = type;
 }
 
+void bch2_fs_allocator_foreground_init(struct bch_fs *);
+
 #endif /* _BCACHEFS_ALLOC_FOREGROUND_H */
index e0306d68ae9fda49f058adf876edafc1b86b717c..2a9c6f0344ed9b6f4dbbaad9a3104fd3e93ac925 100644 (file)
@@ -46,7 +46,9 @@ typedef FIFO(long)    alloc_fifo;
 
 /* Enough for 16 cache devices, 2 tiers and some left over for pipelining */
 #define OPEN_BUCKETS_COUNT     256
-#define WRITE_POINT_COUNT      32
+
+#define WRITE_POINT_HASH_NR    32
+#define WRITE_POINT_MAX                32
 
 struct open_bucket {
        spinlock_t              lock;
index 95d505aaf82ffd98f9bd65b3c61b7e7adb41300c..5665b93f200b75ef855ed62778ccb9305ca9f691 100644 (file)
@@ -601,6 +601,7 @@ struct bch_fs {
         * and forces them to be revalidated
         */
        u32                     capacity_gen;
+       unsigned                bucket_size_max;
 
        atomic64_t              sectors_available;
 
@@ -630,9 +631,10 @@ struct bch_fs {
        struct write_point      btree_write_point;
        struct write_point      rebalance_write_point;
 
-       struct write_point      write_points[WRITE_POINT_COUNT];
-       struct hlist_head       write_points_hash[WRITE_POINT_COUNT];
+       struct write_point      write_points[WRITE_POINT_MAX];
+       struct hlist_head       write_points_hash[WRITE_POINT_HASH_NR];
        struct mutex            write_points_hash_lock;
+       unsigned                write_points_nr;
 
        /* GARBAGE COLLECTION */
        struct task_struct      *gc_thread;
index 6f40c4bd16ec1c9620df61481d1b24c736c57afb..cfbe3ed41d0e0da67f75366c23b5baed28e51ab0 100644 (file)
@@ -300,11 +300,6 @@ u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage stats)
        return min(c->capacity, __bch2_fs_sectors_used(c, stats));
 }
 
-static u64 bch2_fs_sectors_free(struct bch_fs *c, struct bch_fs_usage stats)
-{
-       return c->capacity - bch2_fs_sectors_used(c, stats);
-}
-
 static inline int is_unavailable_bucket(struct bucket_mark m)
 {
        return !is_available_bucket(m);
index e22c51972c3109ef22f89c568fa01fa5a6cb6af6..c40ffe862a0632abfe496ba0ca1a75d80dc298c6 100644 (file)
@@ -175,6 +175,12 @@ void bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *,
 
 u64 bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage);
 
+static inline u64 bch2_fs_sectors_free(struct bch_fs *c,
+                                      struct bch_fs_usage stats)
+{
+       return c->capacity - bch2_fs_sectors_used(c, stats);
+}
+
 static inline bool is_available_bucket(struct bucket_mark mark)
 {
        return (!mark.owned_by_allocator &&
index 9d9d4fb8348b1ba2f3b9a263b24dca6d4d94871f..a2ee698970a80b7bf8548f96bd04dfd209443363 100644 (file)
@@ -524,7 +524,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
        for (i = 0; i < BCH_TIME_STAT_NR; i++)
                bch2_time_stats_init(&c->times[i]);
 
-       bch2_fs_allocator_init(c);
+       bch2_fs_allocator_background_init(c);
+       bch2_fs_allocator_foreground_init(c);
        bch2_fs_rebalance_init(c);
        bch2_fs_quota_init(c);