Commit | Line | Data |
---|---|---|
1c6fdbd8 KO |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * Code for manipulating bucket marks for garbage collection. | |
4 | * | |
5 | * Copyright 2014 Datera, Inc. | |
1c6fdbd8 KO |
6 | */ |
7 | ||
8 | #include "bcachefs.h" | |
7b3f84ea | 9 | #include "alloc_background.h" |
a8c752bb | 10 | #include "backpointers.h" |
b35b1925 | 11 | #include "bset.h" |
1c6fdbd8 | 12 | #include "btree_gc.h" |
b35b1925 | 13 | #include "btree_update.h" |
1c6fdbd8 | 14 | #include "buckets.h" |
21aec962 | 15 | #include "buckets_waiting_for_journal.h" |
1d16c605 | 16 | #include "disk_accounting.h" |
cd575ddf | 17 | #include "ec.h" |
1c6fdbd8 | 18 | #include "error.h" |
3e52c222 | 19 | #include "inode.h" |
1c6fdbd8 | 20 | #include "movinggc.h" |
161d1383 | 21 | #include "rebalance.h" |
dfc276df | 22 | #include "recovery.h" |
eb73e777 | 23 | #include "recovery_passes.h" |
890b74f0 | 24 | #include "reflink.h" |
7ef2a73a | 25 | #include "replicas.h" |
14b393ee | 26 | #include "subvolume.h" |
1c6fdbd8 KO |
27 | #include "trace.h" |
28 | ||
29 | #include <linux/preempt.h> | |
30 | ||
ed80c569 | 31 | void bch2_dev_usage_read_fast(struct bch_dev *ca, struct bch_dev_usage *usage) |
955ba7b5 KO |
32 | { |
33 | for (unsigned i = 0; i < BCH_DATA_NR; i++) | |
34 | usage->buckets[i] = percpu_u64_get(&ca->usage->d[i].buckets); | |
35 | } | |
36 | ||
37 | void bch2_dev_usage_full_read_fast(struct bch_dev *ca, struct bch_dev_usage_full *usage) | |
1c6fdbd8 | 38 | { |
f5095b9f | 39 | memset(usage, 0, sizeof(*usage)); |
345731a3 KO |
40 | acc_u64s_percpu((u64 *) usage, (u64 __percpu *) ca->usage, |
41 | sizeof(struct bch_dev_usage_full) / sizeof(u64)); | |
1c6fdbd8 KO |
42 | } |
43 | ||
1c6fdbd8 KO |
44 | static u64 reserve_factor(u64 r) |
45 | { | |
46 | return r + (round_up(r, (1 << RESERVE_FACTOR)) >> RESERVE_FACTOR); | |
47 | } | |
48 | ||
768ac639 KO |
49 | static struct bch_fs_usage_short |
50 | __bch2_fs_usage_read_short(struct bch_fs *c) | |
51 | { | |
52 | struct bch_fs_usage_short ret; | |
53 | u64 data, reserved; | |
54 | ||
55 | ret.capacity = c->capacity - | |
8bb8d683 | 56 | percpu_u64_get(&c->usage->hidden); |
768ac639 | 57 | |
8bb8d683 KO |
58 | data = percpu_u64_get(&c->usage->data) + |
59 | percpu_u64_get(&c->usage->btree); | |
60 | reserved = percpu_u64_get(&c->usage->reserved) + | |
5e82a9a1 | 61 | percpu_u64_get(c->online_reserved); |
768ac639 KO |
62 | |
63 | ret.used = min(ret.capacity, data + reserve_factor(reserved)); | |
64 | ret.free = ret.capacity - ret.used; | |
65 | ||
8bb8d683 | 66 | ret.nr_inodes = percpu_u64_get(&c->usage->nr_inodes); |
768ac639 KO |
67 | |
68 | return ret; | |
69 | } | |
70 | ||
5663a415 KO |
71 | struct bch_fs_usage_short |
72 | bch2_fs_usage_read_short(struct bch_fs *c) | |
73 | { | |
5663a415 KO |
74 | struct bch_fs_usage_short ret; |
75 | ||
768ac639 KO |
76 | percpu_down_read(&c->mark_lock); |
77 | ret = __bch2_fs_usage_read_short(c); | |
78 | percpu_up_read(&c->mark_lock); | |
5663a415 KO |
79 | |
80 | return ret; | |
1c6fdbd8 KO |
81 | } |
82 | ||
1a9e219d KO |
83 | void bch2_dev_usage_to_text(struct printbuf *out, |
84 | struct bch_dev *ca, | |
955ba7b5 | 85 | struct bch_dev_usage_full *usage) |
ed0cd515 | 86 | { |
8ed4ba36 KO |
87 | if (out->nr_tabstops < 5) { |
88 | printbuf_tabstops_reset(out); | |
89 | printbuf_tabstop_push(out, 12); | |
90 | printbuf_tabstop_push(out, 16); | |
91 | printbuf_tabstop_push(out, 16); | |
92 | printbuf_tabstop_push(out, 16); | |
93 | printbuf_tabstop_push(out, 16); | |
94 | } | |
95 | ||
7423330e | 96 | prt_printf(out, "\tbuckets\rsectors\rfragmented\r\n"); |
ed0cd515 KO |
97 | |
98 | for (unsigned i = 0; i < BCH_DATA_NR; i++) { | |
e58f963c | 99 | bch2_prt_data_type(out, i); |
7423330e | 100 | prt_printf(out, "\t%llu\r%llu\r%llu\r\n", |
1a9e219d KO |
101 | usage->d[i].buckets, |
102 | usage->d[i].sectors, | |
103 | usage->d[i].fragmented); | |
ed0cd515 | 104 | } |
1a9e219d KO |
105 | |
106 | prt_printf(out, "capacity\t%llu\r\n", ca->mi.nbuckets); | |
ed0cd515 KO |
107 | } |
108 | ||
f9035b0c KO |
109 | static int bch2_check_fix_ptr(struct btree_trans *trans, |
110 | struct bkey_s_c k, | |
111 | struct extent_ptr_decoded p, | |
112 | const union bch_extent_entry *entry, | |
113 | bool *do_update) | |
f40d13f9 KO |
114 | { |
115 | struct bch_fs *c = trans->c; | |
f40d13f9 KO |
116 | struct printbuf buf = PRINTBUF; |
117 | int ret = 0; | |
118 | ||
f9035b0c KO |
119 | struct bch_dev *ca = bch2_dev_tryget(c, p.ptr.dev); |
120 | if (!ca) { | |
52df04f0 KO |
121 | if (fsck_err_on(p.ptr.dev != BCH_SB_MEMBER_INVALID, |
122 | trans, ptr_to_invalid_device, | |
123 | "pointer to missing device %u\n" | |
124 | "while marking %s", | |
125 | p.ptr.dev, | |
126 | (printbuf_reset(&buf), | |
127 | bch2_bkey_val_to_text(&buf, c, k), buf.buf))) | |
f9035b0c KO |
128 | *do_update = true; |
129 | return 0; | |
130 | } | |
f40d13f9 | 131 | |
f9035b0c | 132 | struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr); |
9432e90d | 133 | if (!g) { |
a850bde6 | 134 | if (fsck_err(trans, ptr_to_invalid_device, |
9432e90d KO |
135 | "pointer to invalid bucket on device %u\n" |
136 | "while marking %s", | |
137 | p.ptr.dev, | |
138 | (printbuf_reset(&buf), | |
139 | bch2_bkey_val_to_text(&buf, c, k), buf.buf))) | |
140 | *do_update = true; | |
141 | goto out; | |
142 | } | |
143 | ||
f9035b0c KO |
144 | enum bch_data_type data_type = bch2_bkey_ptr_data_type(k, p, entry); |
145 | ||
146 | if (fsck_err_on(!g->gen_valid, | |
a850bde6 | 147 | trans, ptr_to_missing_alloc_key, |
f9035b0c KO |
148 | "bucket %u:%zu data type %s ptr gen %u missing in alloc btree\n" |
149 | "while marking %s", | |
150 | p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), | |
151 | bch2_data_type_str(ptr_data_type(k.k, &p.ptr)), | |
152 | p.ptr.gen, | |
153 | (printbuf_reset(&buf), | |
154 | bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { | |
155 | if (!p.ptr.cached) { | |
156 | g->gen_valid = true; | |
157 | g->gen = p.ptr.gen; | |
158 | } else { | |
9b133c0d | 159 | /* this pointer will be dropped */ |
f9035b0c | 160 | *do_update = true; |
9b133c0d | 161 | goto out; |
2f4b4a3b | 162 | } |
f9035b0c | 163 | } |
2f4b4a3b | 164 | |
9b133c0d KO |
165 | /* g->gen_valid == true */ |
166 | ||
f9035b0c | 167 | if (fsck_err_on(gen_cmp(p.ptr.gen, g->gen) > 0, |
a850bde6 | 168 | trans, ptr_gen_newer_than_bucket_gen, |
f9035b0c KO |
169 | "bucket %u:%zu data type %s ptr gen in the future: %u > %u\n" |
170 | "while marking %s", | |
171 | p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), | |
172 | bch2_data_type_str(ptr_data_type(k.k, &p.ptr)), | |
173 | p.ptr.gen, g->gen, | |
174 | (printbuf_reset(&buf), | |
175 | bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { | |
176 | if (!p.ptr.cached && | |
177 | (g->data_type != BCH_DATA_btree || | |
178 | data_type == BCH_DATA_btree)) { | |
9b133c0d | 179 | g->data_type = data_type; |
55f7962d | 180 | g->stripe_sectors = 0; |
f9035b0c KO |
181 | g->dirty_sectors = 0; |
182 | g->cached_sectors = 0; | |
f40d13f9 | 183 | } |
9b133c0d KO |
184 | |
185 | *do_update = true; | |
f9035b0c | 186 | } |
f40d13f9 | 187 | |
f9035b0c | 188 | if (fsck_err_on(gen_cmp(g->gen, p.ptr.gen) > BUCKET_GC_GEN_MAX, |
a850bde6 | 189 | trans, ptr_gen_newer_than_bucket_gen, |
f9035b0c KO |
190 | "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n" |
191 | "while marking %s", | |
192 | p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), g->gen, | |
193 | bch2_data_type_str(ptr_data_type(k.k, &p.ptr)), | |
194 | p.ptr.gen, | |
195 | (printbuf_reset(&buf), | |
196 | bch2_bkey_val_to_text(&buf, c, k), buf.buf))) | |
197 | *do_update = true; | |
198 | ||
199 | if (fsck_err_on(!p.ptr.cached && gen_cmp(p.ptr.gen, g->gen) < 0, | |
a850bde6 | 200 | trans, stale_dirty_ptr, |
f9035b0c KO |
201 | "bucket %u:%zu data type %s stale dirty ptr: %u < %u\n" |
202 | "while marking %s", | |
203 | p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), | |
204 | bch2_data_type_str(ptr_data_type(k.k, &p.ptr)), | |
205 | p.ptr.gen, g->gen, | |
206 | (printbuf_reset(&buf), | |
207 | bch2_bkey_val_to_text(&buf, c, k), buf.buf))) | |
208 | *do_update = true; | |
209 | ||
210 | if (data_type != BCH_DATA_btree && p.ptr.gen != g->gen) | |
211 | goto out; | |
212 | ||
213 | if (fsck_err_on(bucket_data_type_mismatch(g->data_type, data_type), | |
a850bde6 | 214 | trans, ptr_bucket_data_type_mismatch, |
f9035b0c KO |
215 | "bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n" |
216 | "while marking %s", | |
217 | p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), g->gen, | |
218 | bch2_data_type_str(g->data_type), | |
219 | bch2_data_type_str(data_type), | |
220 | (printbuf_reset(&buf), | |
221 | bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { | |
9b133c0d KO |
222 | if (!p.ptr.cached && |
223 | data_type == BCH_DATA_btree) { | |
d21262d4 KO |
224 | switch (g->data_type) { |
225 | case BCH_DATA_sb: | |
226 | bch_err(c, "btree and superblock in the same bucket - cannot repair"); | |
09b9c72b | 227 | ret = bch_err_throw(c, fsck_repair_unimplemented); |
d21262d4 KO |
228 | goto out; |
229 | case BCH_DATA_journal: | |
230 | ret = bch2_dev_journal_bucket_delete(ca, PTR_BUCKET_NR(ca, &p.ptr)); | |
231 | bch_err_msg(c, ret, "error deleting journal bucket %zu", | |
232 | PTR_BUCKET_NR(ca, &p.ptr)); | |
233 | if (ret) | |
234 | goto out; | |
235 | break; | |
236 | } | |
237 | ||
f9035b0c | 238 | g->data_type = data_type; |
55f7962d | 239 | g->stripe_sectors = 0; |
f9035b0c KO |
240 | g->dirty_sectors = 0; |
241 | g->cached_sectors = 0; | |
242 | } else { | |
243 | *do_update = true; | |
f40d13f9 | 244 | } |
f9035b0c KO |
245 | } |
246 | ||
247 | if (p.has_ec) { | |
248 | struct gc_stripe *m = genradix_ptr(&c->gc_stripes, p.ec.idx); | |
f40d13f9 | 249 | |
9432e90d | 250 | if (fsck_err_on(!m || !m->alive, |
a850bde6 | 251 | trans, ptr_to_missing_stripe, |
f9035b0c | 252 | "pointer to nonexistent stripe %llu\n" |
f40d13f9 | 253 | "while marking %s", |
f9035b0c | 254 | (u64) p.ec.idx, |
f40d13f9 KO |
255 | (printbuf_reset(&buf), |
256 | bch2_bkey_val_to_text(&buf, c, k), buf.buf))) | |
f9035b0c | 257 | *do_update = true; |
f40d13f9 | 258 | |
9432e90d | 259 | if (fsck_err_on(m && m->alive && !bch2_ptr_matches_stripe_m(m, p), |
a850bde6 | 260 | trans, ptr_to_incorrect_stripe, |
f9035b0c | 261 | "pointer does not match stripe %llu\n" |
f40d13f9 | 262 | "while marking %s", |
f9035b0c | 263 | (u64) p.ec.idx, |
f40d13f9 KO |
264 | (printbuf_reset(&buf), |
265 | bch2_bkey_val_to_text(&buf, c, k), buf.buf))) | |
f9035b0c KO |
266 | *do_update = true; |
267 | } | |
268 | out: | |
269 | fsck_err: | |
270 | bch2_dev_put(ca); | |
271 | printbuf_exit(&buf); | |
272 | return ret; | |
273 | } | |
f40d13f9 | 274 | |
f9035b0c KO |
275 | int bch2_check_fix_ptrs(struct btree_trans *trans, |
276 | enum btree_id btree, unsigned level, struct bkey_s_c k, | |
277 | enum btree_iter_update_trigger_flags flags) | |
278 | { | |
279 | struct bch_fs *c = trans->c; | |
280 | struct bkey_ptrs_c ptrs_c = bch2_bkey_ptrs_c(k); | |
281 | const union bch_extent_entry *entry_c; | |
282 | struct extent_ptr_decoded p = { 0 }; | |
283 | bool do_update = false; | |
284 | struct printbuf buf = PRINTBUF; | |
285 | int ret = 0; | |
f40d13f9 | 286 | |
66b7c51c KO |
287 | /* We don't yet do btree key updates correctly for when we're RW */ |
288 | BUG_ON(test_bit(BCH_FS_rw, &c->flags)); | |
289 | ||
f9035b0c KO |
290 | bkey_for_each_ptr_decode(k.k, ptrs_c, p, entry_c) { |
291 | ret = bch2_check_fix_ptr(trans, k, p, entry_c, &do_update); | |
292 | if (ret) | |
293 | goto err; | |
f40d13f9 KO |
294 | } |
295 | ||
296 | if (do_update) { | |
f40d13f9 KO |
297 | struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k); |
298 | ret = PTR_ERR_OR_ZERO(new); | |
299 | if (ret) | |
300 | goto err; | |
301 | ||
18dad454 KO |
302 | scoped_guard(rcu) |
303 | bch2_bkey_drop_ptrs(bkey_i_to_s(new), ptr, !bch2_dev_exists(c, ptr->dev)); | |
2f4b4a3b | 304 | |
f40d13f9 KO |
305 | if (level) { |
306 | /* | |
307 | * We don't want to drop btree node pointers - if the | |
308 | * btree node isn't there anymore, the read path will | |
309 | * sort it out: | |
310 | */ | |
311 | struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(new)); | |
18dad454 KO |
312 | scoped_guard(rcu) |
313 | bkey_for_each_ptr(ptrs, ptr) { | |
314 | struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev); | |
315 | ptr->gen = PTR_GC_BUCKET(ca, ptr)->gen; | |
316 | } | |
f40d13f9 KO |
317 | } else { |
318 | struct bkey_ptrs ptrs; | |
319 | union bch_extent_entry *entry; | |
427ba555 KO |
320 | |
321 | rcu_read_lock(); | |
f40d13f9 KO |
322 | restart_drop_ptrs: |
323 | ptrs = bch2_bkey_ptrs(bkey_i_to_s(new)); | |
324 | bkey_for_each_ptr_decode(bkey_i_to_s(new).k, ptrs, p, entry) { | |
2f4b4a3b | 325 | struct bch_dev *ca = bch2_dev_rcu(c, p.ptr.dev); |
f40d13f9 KO |
326 | struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr); |
327 | enum bch_data_type data_type = bch2_bkey_ptr_data_type(bkey_i_to_s_c(new), p, entry); | |
328 | ||
329 | if ((p.ptr.cached && | |
330 | (!g->gen_valid || gen_cmp(p.ptr.gen, g->gen) > 0)) || | |
331 | (!p.ptr.cached && | |
332 | gen_cmp(p.ptr.gen, g->gen) < 0) || | |
333 | gen_cmp(g->gen, p.ptr.gen) > BUCKET_GC_GEN_MAX || | |
334 | (g->data_type && | |
335 | g->data_type != data_type)) { | |
336 | bch2_bkey_drop_ptr(bkey_i_to_s(new), &entry->ptr); | |
337 | goto restart_drop_ptrs; | |
338 | } | |
339 | } | |
2f4b4a3b | 340 | rcu_read_unlock(); |
f40d13f9 KO |
341 | again: |
342 | ptrs = bch2_bkey_ptrs(bkey_i_to_s(new)); | |
343 | bkey_extent_entry_for_each(ptrs, entry) { | |
344 | if (extent_entry_type(entry) == BCH_EXTENT_ENTRY_stripe_ptr) { | |
345 | struct gc_stripe *m = genradix_ptr(&c->gc_stripes, | |
346 | entry->stripe_ptr.idx); | |
347 | union bch_extent_entry *next_ptr; | |
348 | ||
349 | bkey_extent_entry_for_each_from(ptrs, next_ptr, entry) | |
350 | if (extent_entry_type(next_ptr) == BCH_EXTENT_ENTRY_ptr) | |
351 | goto found; | |
352 | next_ptr = NULL; | |
353 | found: | |
354 | if (!next_ptr) { | |
355 | bch_err(c, "aieee, found stripe ptr with no data ptr"); | |
356 | continue; | |
357 | } | |
358 | ||
359 | if (!m || !m->alive || | |
360 | !__bch2_ptr_matches_stripe(&m->ptrs[entry->stripe_ptr.block], | |
361 | &next_ptr->ptr, | |
362 | m->sectors)) { | |
363 | bch2_bkey_extent_entry_drop(new, entry); | |
364 | goto again; | |
365 | } | |
366 | } | |
367 | } | |
368 | } | |
369 | ||
370 | if (0) { | |
371 | printbuf_reset(&buf); | |
372 | bch2_bkey_val_to_text(&buf, c, k); | |
373 | bch_info(c, "updated %s", buf.buf); | |
374 | ||
375 | printbuf_reset(&buf); | |
376 | bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(new)); | |
377 | bch_info(c, "new key %s", buf.buf); | |
378 | } | |
379 | ||
66b7c51c KO |
380 | if (!(flags & BTREE_TRIGGER_is_root)) { |
381 | struct btree_iter iter; | |
382 | bch2_trans_node_iter_init(trans, &iter, btree, new->k.p, 0, level, | |
383 | BTREE_ITER_intent|BTREE_ITER_all_snapshots); | |
384 | ret = bch2_btree_iter_traverse(trans, &iter) ?: | |
385 | bch2_trans_update(trans, &iter, new, | |
386 | BTREE_UPDATE_internal_snapshot_node| | |
387 | BTREE_TRIGGER_norun); | |
388 | bch2_trans_iter_exit(trans, &iter); | |
389 | if (ret) | |
390 | goto err; | |
391 | ||
392 | if (level) | |
393 | bch2_btree_node_update_key_early(trans, btree, level - 1, k, new); | |
394 | } else { | |
395 | struct jset_entry *e = bch2_trans_jset_entry_alloc(trans, | |
396 | jset_u64s(new->k.u64s)); | |
397 | ret = PTR_ERR_OR_ZERO(e); | |
398 | if (ret) | |
399 | goto err; | |
400 | ||
401 | journal_entry_set(e, | |
402 | BCH_JSET_ENTRY_btree_root, | |
403 | btree, level - 1, | |
404 | new, new->k.u64s); | |
405 | ||
406 | /* | |
407 | * no locking, we're single threaded and not rw yet, see | |
408 | * the big assertino above that we repeat here: | |
409 | */ | |
410 | BUG_ON(test_bit(BCH_FS_rw, &c->flags)); | |
f40d13f9 | 411 | |
66b7c51c KO |
412 | struct btree *b = bch2_btree_id_root(c, btree)->b; |
413 | bkey_copy(&b->key, new); | |
414 | } | |
f40d13f9 KO |
415 | } |
416 | err: | |
f40d13f9 KO |
417 | printbuf_exit(&buf); |
418 | return ret; | |
419 | } | |
420 | ||
6d77ce4a KO |
421 | static int bucket_ref_update_err(struct btree_trans *trans, struct printbuf *buf, |
422 | struct bkey_s_c k, bool insert, enum bch_sb_error_id id) | |
423 | { | |
424 | struct bch_fs *c = trans->c; | |
6d77ce4a KO |
425 | |
426 | prt_printf(buf, "\nwhile marking "); | |
427 | bch2_bkey_val_to_text(buf, c, k); | |
428 | prt_newline(buf); | |
429 | ||
20853251 | 430 | bool print = __bch2_count_fsck_err(c, id, buf); |
6d77ce4a | 431 | |
d4b30ed9 KO |
432 | int ret = bch2_run_explicit_recovery_pass(c, buf, |
433 | BCH_RECOVERY_PASS_check_allocations, 0); | |
6d77ce4a KO |
434 | |
435 | if (insert) { | |
6d77ce4a KO |
436 | bch2_trans_updates_to_text(buf, trans); |
437 | __bch2_inconsistent_error(c, buf); | |
ff875d4b KO |
438 | /* |
439 | * If we're in recovery, run_explicit_recovery_pass might give | |
440 | * us an error code for rewinding recovery | |
441 | */ | |
442 | if (!ret) | |
09b9c72b | 443 | ret = bch_err_throw(c, bucket_ref_update); |
ff875d4b KO |
444 | } else { |
445 | /* Always ignore overwrite errors, so that deletion works */ | |
446 | ret = 0; | |
6d77ce4a KO |
447 | } |
448 | ||
20853251 | 449 | if (print || insert) |
ebf561b2 | 450 | bch2_print_str(c, KERN_ERR, buf->buf); |
6d77ce4a KO |
451 | return ret; |
452 | } | |
453 | ||
07d7c4da KO |
454 | int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, |
455 | struct bkey_s_c k, | |
456 | const struct bch_extent_ptr *ptr, | |
457 | s64 sectors, enum bch_data_type ptr_data_type, | |
458 | u8 b_gen, u8 bucket_data_type, | |
459 | u32 *bucket_sectors) | |
932aa837 | 460 | { |
c1f59ef6 | 461 | struct bch_fs *c = trans->c; |
f443fa66 | 462 | size_t bucket_nr = PTR_BUCKET_NR(ca, ptr); |
fa8e94fa | 463 | struct printbuf buf = PRINTBUF; |
70e3e039 | 464 | bool inserting = sectors > 0; |
fa8e94fa | 465 | int ret = 0; |
9ef846a7 | 466 | |
70e3e039 KO |
467 | BUG_ON(!sectors); |
468 | ||
6d77ce4a KO |
469 | if (unlikely(gen_after(ptr->gen, b_gen))) { |
470 | bch2_log_msg_start(c, &buf); | |
471 | prt_printf(&buf, | |
472 | "bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen", | |
f443fa66 | 473 | ptr->dev, bucket_nr, b_gen, |
e58f963c | 474 | bch2_data_type_str(bucket_data_type ?: ptr_data_type), |
6d77ce4a KO |
475 | ptr->gen); |
476 | ||
477 | ret = bucket_ref_update_err(trans, &buf, k, inserting, | |
478 | BCH_FSCK_ERR_ptr_gen_newer_than_bucket_gen); | |
70e3e039 | 479 | goto out; |
9ef846a7 KO |
480 | } |
481 | ||
6d77ce4a KO |
482 | if (unlikely(gen_cmp(b_gen, ptr->gen) > BUCKET_GC_GEN_MAX)) { |
483 | bch2_log_msg_start(c, &buf); | |
484 | prt_printf(&buf, | |
485 | "bucket %u:%zu gen %u data type %s: ptr gen %u too stale", | |
f443fa66 | 486 | ptr->dev, bucket_nr, b_gen, |
e58f963c | 487 | bch2_data_type_str(bucket_data_type ?: ptr_data_type), |
6d77ce4a KO |
488 | ptr->gen); |
489 | ||
490 | ret = bucket_ref_update_err(trans, &buf, k, inserting, | |
491 | BCH_FSCK_ERR_ptr_too_stale); | |
70e3e039 | 492 | goto out; |
9ef846a7 KO |
493 | } |
494 | ||
70e3e039 KO |
495 | if (b_gen != ptr->gen && ptr->cached) { |
496 | ret = 1; | |
497 | goto out; | |
498 | } | |
499 | ||
6d77ce4a KO |
500 | if (unlikely(b_gen != ptr->gen)) { |
501 | bch2_log_msg_start(c, &buf); | |
502 | prt_printf(&buf, | |
503 | "bucket %u:%zu gen %u (mem gen %u) data type %s: stale dirty ptr (gen %u)", | |
f443fa66 | 504 | ptr->dev, bucket_nr, b_gen, |
6f692b16 | 505 | bucket_gen_get(ca, bucket_nr), |
e58f963c | 506 | bch2_data_type_str(bucket_data_type ?: ptr_data_type), |
6d77ce4a KO |
507 | ptr->gen); |
508 | ||
509 | ret = bucket_ref_update_err(trans, &buf, k, inserting, | |
510 | BCH_FSCK_ERR_stale_dirty_ptr); | |
c1f59ef6 | 511 | goto out; |
fa8e94fa | 512 | } |
9ef846a7 | 513 | |
6d77ce4a KO |
514 | if (unlikely(bucket_data_type_mismatch(bucket_data_type, ptr_data_type))) { |
515 | bch2_log_msg_start(c, &buf); | |
516 | prt_printf(&buf, "bucket %u:%zu gen %u different types of data in same bucket: %s, %s", | |
517 | ptr->dev, bucket_nr, b_gen, | |
518 | bch2_data_type_str(bucket_data_type), | |
519 | bch2_data_type_str(ptr_data_type)); | |
520 | ||
521 | ret = bucket_ref_update_err(trans, &buf, k, inserting, | |
522 | BCH_FSCK_ERR_ptr_bucket_data_type_mismatch); | |
70e3e039 | 523 | goto out; |
9ef846a7 KO |
524 | } |
525 | ||
6d77ce4a KO |
526 | if (unlikely((u64) *bucket_sectors + sectors > U32_MAX)) { |
527 | bch2_log_msg_start(c, &buf); | |
528 | prt_printf(&buf, | |
529 | "bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U32_MAX", | |
f443fa66 | 530 | ptr->dev, bucket_nr, b_gen, |
e58f963c | 531 | bch2_data_type_str(bucket_data_type ?: ptr_data_type), |
6d77ce4a KO |
532 | *bucket_sectors, sectors); |
533 | ||
534 | ret = bucket_ref_update_err(trans, &buf, k, inserting, | |
535 | BCH_FSCK_ERR_bucket_sector_count_overflow); | |
70e3e039 | 536 | sectors = -*bucket_sectors; |
6d77ce4a | 537 | goto out; |
9ef846a7 | 538 | } |
70e3e039 KO |
539 | |
540 | *bucket_sectors += sectors; | |
c1f59ef6 | 541 | out: |
fa8e94fa KO |
542 | printbuf_exit(&buf); |
543 | return ret; | |
39283c71 KO |
544 | } |
545 | ||
5b14ce35 | 546 | void bch2_trans_account_disk_usage_change(struct btree_trans *trans) |
35d5aff2 KO |
547 | { |
548 | struct bch_fs *c = trans->c; | |
5b14ce35 | 549 | u64 disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0; |
35d5aff2 KO |
550 | static int warned_disk_usage = 0; |
551 | bool warn = false; | |
35d5aff2 | 552 | |
58e1ea4b | 553 | percpu_down_read(&c->mark_lock); |
5b14ce35 | 554 | struct bch_fs_usage_base *src = &trans->fs_usage_delta; |
35d5aff2 | 555 | |
5b14ce35 | 556 | s64 added = src->btree + src->data + src->reserved; |
35d5aff2 KO |
557 | |
558 | /* | |
559 | * Not allowed to reduce sectors_available except by getting a | |
560 | * reservation: | |
561 | */ | |
5b14ce35 | 562 | s64 should_not_have_added = added - (s64) disk_res_sectors; |
35d5aff2 | 563 | if (unlikely(should_not_have_added > 0)) { |
68573b93 | 564 | u64 old, new; |
290448ed | 565 | |
68573b93 | 566 | old = atomic64_read(&c->sectors_available); |
290448ed | 567 | do { |
290448ed | 568 | new = max_t(s64, 0, old - should_not_have_added); |
68573b93 UB |
569 | } while (!atomic64_try_cmpxchg(&c->sectors_available, |
570 | &old, new)); | |
290448ed | 571 | |
35d5aff2 KO |
572 | added -= should_not_have_added; |
573 | warn = true; | |
574 | } | |
575 | ||
576 | if (added > 0) { | |
577 | trans->disk_res->sectors -= added; | |
578 | this_cpu_sub(*c->online_reserved, added); | |
579 | } | |
580 | ||
fb23d57a KO |
581 | preempt_disable(); |
582 | struct bch_fs_usage_base *dst = this_cpu_ptr(c->usage); | |
583 | acc_u64s((u64 *) dst, (u64 *) src, sizeof(*src) / sizeof(u64)); | |
35d5aff2 | 584 | preempt_enable(); |
58e1ea4b | 585 | percpu_up_read(&c->mark_lock); |
35d5aff2 KO |
586 | |
587 | if (unlikely(warn) && !xchg(&warned_disk_usage, 1)) | |
ad5d3d82 | 588 | bch2_trans_inconsistent(trans, |
df94cb2e | 589 | "disk usage increased %lli more than %llu sectors reserved)", |
ad5d3d82 | 590 | should_not_have_added, disk_res_sectors); |
5b14ce35 KO |
591 | } |
592 | ||
1f34c21b | 593 | /* KEY_TYPE_extent: */ |
932aa837 | 594 | |
07d7c4da | 595 | static int __mark_pointer(struct btree_trans *trans, struct bch_dev *ca, |
1f34c21b | 596 | struct bkey_s_c k, |
55f7962d | 597 | const struct extent_ptr_decoded *p, |
1f34c21b | 598 | s64 sectors, enum bch_data_type ptr_data_type, |
f859bc94 KO |
599 | struct bch_alloc_v4 *a, |
600 | bool insert) | |
1f34c21b | 601 | { |
55f7962d KO |
602 | u32 *dst_sectors = p->has_ec ? &a->stripe_sectors : |
603 | !p->ptr.cached ? &a->dirty_sectors : | |
604 | &a->cached_sectors; | |
605 | int ret = bch2_bucket_ref_update(trans, ca, k, &p->ptr, sectors, ptr_data_type, | |
be11ae16 | 606 | a->gen, a->data_type, dst_sectors); |
1f34c21b KO |
607 | |
608 | if (ret) | |
609 | return ret; | |
f859bc94 KO |
610 | if (insert) |
611 | alloc_data_type_set(a, ptr_data_type); | |
1f34c21b KO |
612 | return 0; |
613 | } | |
614 | ||
615 | static int bch2_trigger_pointer(struct btree_trans *trans, | |
616 | enum btree_id btree_id, unsigned level, | |
617 | struct bkey_s_c k, struct extent_ptr_decoded p, | |
47d2080e | 618 | const union bch_extent_entry *entry, |
5dd8c60e KO |
619 | s64 *sectors, |
620 | enum btree_iter_update_trigger_flags flags) | |
39283c71 | 621 | { |
934137b0 | 622 | struct bch_fs *c = trans->c; |
5dd8c60e | 623 | bool insert = !(flags & BTREE_TRIGGER_overwrite); |
9432e90d | 624 | struct printbuf buf = PRINTBUF; |
07d7c4da KO |
625 | int ret = 0; |
626 | ||
aca7a26f | 627 | struct bkey_i_backpointer bp; |
7171b1fd KO |
628 | bch2_extent_ptr_to_bp(c, btree_id, level, k, p, entry, &bp); |
629 | ||
630 | *sectors = insert ? bp.v.bucket_len : -(s64) bp.v.bucket_len; | |
aca7a26f | 631 | |
07d7c4da KO |
632 | struct bch_dev *ca = bch2_dev_tryget(c, p.ptr.dev); |
633 | if (unlikely(!ca)) { | |
52df04f0 | 634 | if (insert && p.ptr.dev != BCH_SB_MEMBER_INVALID) |
09b9c72b | 635 | ret = bch_err_throw(c, trigger_pointer); |
07d7c4da KO |
636 | goto err; |
637 | } | |
638 | ||
aca7a26f | 639 | struct bpos bucket = PTR_BUCKET_POS(ca, &p.ptr); |
eca5b56c KO |
640 | if (!bucket_valid(ca, bucket.offset)) { |
641 | if (insert) { | |
642 | bch2_dev_bucket_missing(ca, bucket.offset); | |
09b9c72b | 643 | ret = bch_err_throw(c, trigger_pointer); |
eca5b56c KO |
644 | } |
645 | goto err; | |
646 | } | |
3a63b32f | 647 | |
5dd8c60e | 648 | if (flags & BTREE_TRIGGER_transactional) { |
e0d5bc6a | 649 | struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update(trans, bucket, 0); |
07d7c4da | 650 | ret = PTR_ERR_OR_ZERO(a) ?: |
f859bc94 | 651 | __mark_pointer(trans, ca, k, &p, *sectors, bp.v.data_type, &a->v, insert); |
a8c752bb | 652 | if (ret) |
07d7c4da | 653 | goto err; |
1f34c21b | 654 | |
15800f3d KO |
655 | ret = bch2_bucket_backpointer_mod(trans, k, &bp, insert); |
656 | if (ret) | |
657 | goto err; | |
1f34c21b KO |
658 | } |
659 | ||
5dd8c60e | 660 | if (flags & BTREE_TRIGGER_gc) { |
be11ae16 | 661 | struct bucket *g = gc_bucket(ca, bucket.offset); |
9432e90d KO |
662 | if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n %s", |
663 | p.ptr.dev, | |
664 | (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { | |
09b9c72b | 665 | ret = bch_err_throw(c, trigger_pointer); |
49f2d182 | 666 | goto err; |
9432e90d KO |
667 | } |
668 | ||
1f34c21b | 669 | bucket_lock(g); |
be11ae16 | 670 | struct bch_alloc_v4 old = bucket_m_to_alloc(*g), new = old; |
f859bc94 | 671 | ret = __mark_pointer(trans, ca, k, &p, *sectors, bp.v.data_type, &new, insert); |
fb23d57a | 672 | alloc_to_bucket(g, new); |
1f34c21b | 673 | bucket_unlock(g); |
fb23d57a KO |
674 | |
675 | if (!ret) | |
676 | ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags); | |
a8c752bb | 677 | } |
07d7c4da KO |
678 | err: |
679 | bch2_dev_put(ca); | |
9432e90d | 680 | printbuf_exit(&buf); |
07d7c4da | 681 | return ret; |
932aa837 KO |
682 | } |
683 | ||
5a82ec3f | 684 | static int bch2_trigger_stripe_ptr(struct btree_trans *trans, |
1f34c21b | 685 | struct bkey_s_c k, |
5a82ec3f | 686 | struct extent_ptr_decoded p, |
1f34c21b | 687 | enum bch_data_type data_type, |
5dd8c60e KO |
688 | s64 sectors, |
689 | enum btree_iter_update_trigger_flags flags) | |
1f34c21b | 690 | { |
09b9c72b KO |
691 | struct bch_fs *c = trans->c; |
692 | ||
5dd8c60e | 693 | if (flags & BTREE_TRIGGER_transactional) { |
5a82ec3f KO |
694 | struct btree_iter iter; |
695 | struct bkey_i_stripe *s = bch2_bkey_get_mut_typed(trans, &iter, | |
696 | BTREE_ID_stripes, POS(0, p.ec.idx), | |
5dd8c60e | 697 | BTREE_ITER_with_updates, stripe); |
5a82ec3f KO |
698 | int ret = PTR_ERR_OR_ZERO(s); |
699 | if (unlikely(ret)) { | |
700 | bch2_trans_inconsistent_on(bch2_err_matches(ret, ENOENT), trans, | |
701 | "pointer to nonexistent stripe %llu", | |
702 | (u64) p.ec.idx); | |
703 | goto err; | |
704 | } | |
1f34c21b | 705 | |
5a82ec3f KO |
706 | if (!bch2_ptr_matches_stripe(&s->v, p)) { |
707 | bch2_trans_inconsistent(trans, | |
708 | "stripe pointer doesn't match stripe %llu", | |
709 | (u64) p.ec.idx); | |
09b9c72b | 710 | ret = bch_err_throw(c, trigger_stripe_pointer); |
5a82ec3f KO |
711 | goto err; |
712 | } | |
1f34c21b | 713 | |
5a82ec3f KO |
714 | stripe_blockcount_set(&s->v, p.ec.block, |
715 | stripe_blockcount_get(&s->v, p.ec.block) + | |
716 | sectors); | |
1f34c21b | 717 | |
393a05a7 KO |
718 | struct disk_accounting_pos acc; |
719 | memset(&acc, 0, sizeof(acc)); | |
720 | acc.type = BCH_DISK_ACCOUNTING_replicas; | |
1d16c605 KO |
721 | bch2_bkey_to_replicas(&acc.replicas, bkey_i_to_s_c(&s->k_i)); |
722 | acc.replicas.data_type = data_type; | |
fb23d57a | 723 | ret = bch2_disk_accounting_mod(trans, &acc, §ors, 1, false); |
5a82ec3f KO |
724 | err: |
725 | bch2_trans_iter_exit(trans, &iter); | |
726 | return ret; | |
1f34c21b KO |
727 | } |
728 | ||
5dd8c60e | 729 | if (flags & BTREE_TRIGGER_gc) { |
5a82ec3f KO |
730 | struct gc_stripe *m = genradix_ptr_alloc(&c->gc_stripes, p.ec.idx, GFP_KERNEL); |
731 | if (!m) { | |
732 | bch_err(c, "error allocating memory for gc_stripes, idx %llu", | |
733 | (u64) p.ec.idx); | |
09b9c72b | 734 | return bch_err_throw(c, ENOMEM_mark_stripe_ptr); |
5a82ec3f | 735 | } |
1f34c21b | 736 | |
cc297dfb | 737 | gc_stripe_lock(m); |
1f34c21b | 738 | |
5a82ec3f | 739 | if (!m || !m->alive) { |
cc297dfb | 740 | gc_stripe_unlock(m); |
5a82ec3f | 741 | struct printbuf buf = PRINTBUF; |
6d77ce4a KO |
742 | bch2_log_msg_start(c, &buf); |
743 | prt_printf(&buf, "pointer to nonexistent stripe %llu\n while marking ", | |
744 | (u64) p.ec.idx); | |
5a82ec3f | 745 | bch2_bkey_val_to_text(&buf, c, k); |
6d77ce4a | 746 | __bch2_inconsistent_error(c, &buf); |
ebf561b2 | 747 | bch2_print_str(c, KERN_ERR, buf.buf); |
5a82ec3f | 748 | printbuf_exit(&buf); |
09b9c72b | 749 | return bch_err_throw(c, trigger_stripe_pointer); |
5a82ec3f | 750 | } |
932aa837 | 751 | |
5a82ec3f | 752 | m->block_sectors[p.ec.block] += sectors; |
2a3731e3 | 753 | |
393a05a7 KO |
754 | struct disk_accounting_pos acc; |
755 | memset(&acc, 0, sizeof(acc)); | |
756 | acc.type = BCH_DISK_ACCOUNTING_replicas; | |
7f10fde3 | 757 | unsafe_memcpy(&acc.replicas, &m->r.e, replicas_entry_bytes(&m->r.e), "VLA"); |
cc297dfb | 758 | gc_stripe_unlock(m); |
932aa837 | 759 | |
fb23d57a KO |
760 | acc.replicas.data_type = data_type; |
761 | int ret = bch2_disk_accounting_mod(trans, &acc, §ors, 1, true); | |
762 | if (ret) | |
763 | return ret; | |
5a82ec3f | 764 | } |
990d42d1 | 765 | |
5a82ec3f | 766 | return 0; |
932aa837 KO |
767 | } |
768 | ||
4f9ec59f KO |
769 | static int __trigger_extent(struct btree_trans *trans, |
770 | enum btree_id btree_id, unsigned level, | |
5dd8c60e | 771 | struct bkey_s_c k, |
56e5c7f6 | 772 | enum btree_iter_update_trigger_flags flags) |
1f34c21b | 773 | { |
5dd8c60e | 774 | bool gc = flags & BTREE_TRIGGER_gc; |
1f34c21b KO |
775 | struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); |
776 | const union bch_extent_entry *entry; | |
777 | struct extent_ptr_decoded p; | |
1f34c21b KO |
778 | enum bch_data_type data_type = bkey_is_btree_ptr(k.k) |
779 | ? BCH_DATA_btree | |
780 | : BCH_DATA_user; | |
4f9ec59f | 781 | int ret = 0; |
1f34c21b | 782 | |
56e5c7f6 KO |
783 | s64 replicas_sectors = 0; |
784 | ||
393a05a7 KO |
785 | struct disk_accounting_pos acc_replicas_key; |
786 | memset(&acc_replicas_key, 0, sizeof(acc_replicas_key)); | |
787 | acc_replicas_key.type = BCH_DISK_ACCOUNTING_replicas; | |
788 | acc_replicas_key.replicas.data_type = data_type; | |
789 | acc_replicas_key.replicas.nr_devs = 0; | |
790 | acc_replicas_key.replicas.nr_required = 1; | |
1f34c21b | 791 | |
f4a584f4 | 792 | unsigned cur_compression_type = 0; |
bfcaa907 KO |
793 | u64 compression_acct[3] = { 1, 0, 0 }; |
794 | ||
1f34c21b | 795 | bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { |
8528bde1 | 796 | s64 disk_sectors = 0; |
47d2080e | 797 | ret = bch2_trigger_pointer(trans, btree_id, level, k, p, entry, &disk_sectors, flags); |
1f34c21b KO |
798 | if (ret < 0) |
799 | return ret; | |
800 | ||
801 | bool stale = ret > 0; | |
802 | ||
bfcaa907 KO |
803 | if (p.ptr.cached && stale) |
804 | continue; | |
805 | ||
1f34c21b | 806 | if (p.ptr.cached) { |
bfcaa907 KO |
807 | ret = bch2_mod_dev_cached_sectors(trans, p.ptr.dev, disk_sectors, gc); |
808 | if (ret) | |
809 | return ret; | |
1f34c21b | 810 | } else if (!p.has_ec) { |
56e5c7f6 | 811 | replicas_sectors += disk_sectors; |
fa1ab1b4 | 812 | replicas_entry_add_dev(&acc_replicas_key.replicas, p.ptr.dev); |
1f34c21b | 813 | } else { |
5a82ec3f | 814 | ret = bch2_trigger_stripe_ptr(trans, k, p, data_type, disk_sectors, flags); |
1f34c21b KO |
815 | if (ret) |
816 | return ret; | |
817 | ||
818 | /* | |
819 | * There may be other dirty pointers in this extent, but | |
820 | * if so they're not required for mounting if we have an | |
821 | * erasure coded pointer in this extent: | |
822 | */ | |
bfcaa907 KO |
823 | acc_replicas_key.replicas.nr_required = 0; |
824 | } | |
825 | ||
f4a584f4 KO |
826 | if (cur_compression_type && |
827 | cur_compression_type != p.crc.compression_type) { | |
bfcaa907 KO |
828 | if (flags & BTREE_TRIGGER_overwrite) |
829 | bch2_u64s_neg(compression_acct, ARRAY_SIZE(compression_acct)); | |
830 | ||
f4a584f4 KO |
831 | ret = bch2_disk_accounting_mod2(trans, gc, compression_acct, |
832 | compression, cur_compression_type); | |
bfcaa907 KO |
833 | if (ret) |
834 | return ret; | |
835 | ||
836 | compression_acct[0] = 1; | |
837 | compression_acct[1] = 0; | |
838 | compression_acct[2] = 0; | |
839 | } | |
840 | ||
f4a584f4 | 841 | cur_compression_type = p.crc.compression_type; |
bfcaa907 KO |
842 | if (p.crc.compression_type) { |
843 | compression_acct[1] += p.crc.uncompressed_size; | |
844 | compression_acct[2] += p.crc.compressed_size; | |
1f34c21b KO |
845 | } |
846 | } | |
847 | ||
bfcaa907 | 848 | if (acc_replicas_key.replicas.nr_devs) { |
56e5c7f6 | 849 | ret = bch2_disk_accounting_mod(trans, &acc_replicas_key, &replicas_sectors, 1, gc); |
bfcaa907 KO |
850 | if (ret) |
851 | return ret; | |
852 | } | |
853 | ||
6675c376 | 854 | if (acc_replicas_key.replicas.nr_devs && !level && k.k->p.snapshot) { |
56e5c7f6 | 855 | ret = bch2_disk_accounting_mod2_nr(trans, gc, &replicas_sectors, 1, snapshot, k.k->p.snapshot); |
6675c376 KO |
856 | if (ret) |
857 | return ret; | |
858 | } | |
859 | ||
f4a584f4 | 860 | if (cur_compression_type) { |
bfcaa907 KO |
861 | if (flags & BTREE_TRIGGER_overwrite) |
862 | bch2_u64s_neg(compression_acct, ARRAY_SIZE(compression_acct)); | |
863 | ||
f4a584f4 KO |
864 | ret = bch2_disk_accounting_mod2(trans, gc, compression_acct, |
865 | compression, cur_compression_type); | |
4f9ec59f KO |
866 | if (ret) |
867 | return ret; | |
1f34c21b KO |
868 | } |
869 | ||
6af91147 | 870 | if (level) { |
56e5c7f6 | 871 | ret = bch2_disk_accounting_mod2_nr(trans, gc, &replicas_sectors, 1, btree, btree_id); |
6af91147 KO |
872 | if (ret) |
873 | return ret; | |
58474f76 KO |
874 | } else { |
875 | bool insert = !(flags & BTREE_TRIGGER_overwrite); | |
f4a584f4 | 876 | |
58474f76 KO |
877 | s64 v[3] = { |
878 | insert ? 1 : -1, | |
879 | insert ? k.k->size : -((s64) k.k->size), | |
56e5c7f6 | 880 | replicas_sectors, |
58474f76 | 881 | }; |
f4a584f4 | 882 | ret = bch2_disk_accounting_mod2(trans, gc, v, inum, k.k->p.inode); |
58474f76 KO |
883 | if (ret) |
884 | return ret; | |
6af91147 KO |
885 | } |
886 | ||
1f34c21b KO |
887 | return 0; |
888 | } | |
889 | ||
4f9ec59f | 890 | int bch2_trigger_extent(struct btree_trans *trans, |
f40d13f9 | 891 | enum btree_id btree, unsigned level, |
4f9ec59f | 892 | struct bkey_s_c old, struct bkey_s new, |
5dd8c60e | 893 | enum btree_iter_update_trigger_flags flags) |
932aa837 | 894 | { |
49aa7830 | 895 | struct bch_fs *c = trans->c; |
f5d4481c KO |
896 | struct bkey_ptrs_c new_ptrs = bch2_bkey_ptrs_c(new.s_c); |
897 | struct bkey_ptrs_c old_ptrs = bch2_bkey_ptrs_c(old); | |
898 | unsigned new_ptrs_bytes = (void *) new_ptrs.end - (void *) new_ptrs.start; | |
899 | unsigned old_ptrs_bytes = (void *) old_ptrs.end - (void *) old_ptrs.start; | |
900 | ||
f40d13f9 KO |
901 | if (unlikely(flags & BTREE_TRIGGER_check_repair)) |
902 | return bch2_check_fix_ptrs(trans, btree, level, new.s_c, flags); | |
903 | ||
f5d4481c KO |
904 | /* if pointers aren't changing - nothing to do: */ |
905 | if (new_ptrs_bytes == old_ptrs_bytes && | |
906 | !memcmp(new_ptrs.start, | |
907 | old_ptrs.start, | |
908 | new_ptrs_bytes)) | |
909 | return 0; | |
910 | ||
49aa7830 | 911 | if (flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) { |
49aa7830 KO |
912 | if (old.k->type) { |
913 | int ret = __trigger_extent(trans, btree, level, old, | |
56e5c7f6 | 914 | flags & ~BTREE_TRIGGER_insert); |
49aa7830 KO |
915 | if (ret) |
916 | return ret; | |
917 | } | |
932aa837 | 918 | |
49aa7830 KO |
919 | if (new.k->type) { |
920 | int ret = __trigger_extent(trans, btree, level, new.s_c, | |
56e5c7f6 | 921 | flags & ~BTREE_TRIGGER_overwrite); |
49aa7830 KO |
922 | if (ret) |
923 | return ret; | |
924 | } | |
925 | ||
926 | int need_rebalance_delta = 0; | |
f4a584f4 | 927 | s64 need_rebalance_sectors_delta[1] = { 0 }; |
49aa7830 KO |
928 | |
929 | s64 s = bch2_bkey_sectors_need_rebalance(c, old); | |
930 | need_rebalance_delta -= s != 0; | |
f4a584f4 | 931 | need_rebalance_sectors_delta[0] -= s; |
49aa7830 | 932 | |
7f12a963 | 933 | s = bch2_bkey_sectors_need_rebalance(c, new.s_c); |
49aa7830 | 934 | need_rebalance_delta += s != 0; |
f4a584f4 | 935 | need_rebalance_sectors_delta[0] += s; |
49aa7830 KO |
936 | |
937 | if ((flags & BTREE_TRIGGER_transactional) && need_rebalance_delta) { | |
506b1876 | 938 | int ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_rebalance_work, |
49aa7830 | 939 | new.k->p, need_rebalance_delta > 0); |
37954a27 KO |
940 | if (ret) |
941 | return ret; | |
932aa837 | 942 | } |
932aa837 | 943 | |
f4a584f4 KO |
944 | if (need_rebalance_sectors_delta[0]) { |
945 | int ret = bch2_disk_accounting_mod2(trans, flags & BTREE_TRIGGER_gc, | |
946 | need_rebalance_sectors_delta, rebalance_work); | |
49aa7830 KO |
947 | if (ret) |
948 | return ret; | |
949 | } | |
950 | } | |
932aa837 | 951 | |
1f34c21b | 952 | return 0; |
932aa837 KO |
953 | } |
954 | ||
6cacd0c4 KO |
955 | /* KEY_TYPE_reservation */ |
956 | ||
957 | static int __trigger_reservation(struct btree_trans *trans, | |
5dd8c60e KO |
958 | enum btree_id btree_id, unsigned level, struct bkey_s_c k, |
959 | enum btree_iter_update_trigger_flags flags) | |
297d8934 | 960 | { |
fb23d57a | 961 | if (flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) { |
f4a584f4 | 962 | s64 sectors[1] = { k.k->size }; |
297d8934 | 963 | |
fb23d57a | 964 | if (flags & BTREE_TRIGGER_overwrite) |
f4a584f4 | 965 | sectors[0] = -sectors[0]; |
6cacd0c4 | 966 | |
f4a584f4 KO |
967 | return bch2_disk_accounting_mod2(trans, flags & BTREE_TRIGGER_gc, sectors, |
968 | persistent_reserved, bkey_s_c_to_reservation(k).v->nr_replicas); | |
6cacd0c4 | 969 | } |
297d8934 | 970 | |
297d8934 KO |
971 | return 0; |
972 | } | |
973 | ||
6cacd0c4 KO |
974 | int bch2_trigger_reservation(struct btree_trans *trans, |
975 | enum btree_id btree_id, unsigned level, | |
976 | struct bkey_s_c old, struct bkey_s new, | |
5dd8c60e | 977 | enum btree_iter_update_trigger_flags flags) |
523f33ef | 978 | { |
6cacd0c4 | 979 | return trigger_run_overwrite_then_insert(__trigger_reservation, trans, btree_id, level, old, new, flags); |
523f33ef KO |
980 | } |
981 | ||
6cacd0c4 KO |
982 | /* Mark superblocks: */ |
983 | ||
bfcf840d | 984 | static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, |
c281db0f | 985 | struct bch_dev *ca, u64 b, |
bfcf840d KO |
986 | enum bch_data_type type, |
987 | unsigned sectors) | |
988 | { | |
eb73e777 | 989 | struct bch_fs *c = trans->c; |
67e0dd8f | 990 | struct btree_iter iter; |
bfcf840d KO |
991 | int ret = 0; |
992 | ||
c281db0f | 993 | struct bkey_i_alloc_v4 *a = |
abe2f470 | 994 | bch2_trans_start_alloc_update_noupdate(trans, &iter, POS(ca->dev_idx, b)); |
3d48a7f8 KO |
995 | if (IS_ERR(a)) |
996 | return PTR_ERR(a); | |
bfcf840d | 997 | |
2640faeb | 998 | if (a->v.data_type && type && a->v.data_type != type) { |
bb36a129 KO |
999 | struct printbuf buf = PRINTBUF; |
1000 | bch2_log_msg_start(c, &buf); | |
1001 | prt_printf(&buf, "bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n" | |
1002 | "while marking %s\n", | |
1003 | iter.pos.inode, iter.pos.offset, a->v.gen, | |
1004 | bch2_data_type_str(a->v.data_type), | |
1005 | bch2_data_type_str(type), | |
1006 | bch2_data_type_str(type)); | |
1007 | ||
ff875d4b | 1008 | bch2_count_fsck_err(c, bucket_metadata_type_mismatch, &buf); |
bb36a129 | 1009 | |
ff875d4b | 1010 | ret = bch2_run_explicit_recovery_pass(c, &buf, |
d4b30ed9 | 1011 | BCH_RECOVERY_PASS_check_allocations, 0); |
bb36a129 | 1012 | |
ff875d4b KO |
1013 | /* Always print, this is always fatal */ |
1014 | bch2_print_str(c, KERN_ERR, buf.buf); | |
bb36a129 | 1015 | printbuf_exit(&buf); |
ff875d4b | 1016 | if (!ret) |
09b9c72b | 1017 | ret = bch_err_throw(c, metadata_bucket_inconsistency); |
bbe682c7 | 1018 | goto err; |
bfcf840d KO |
1019 | } |
1020 | ||
bbe682c7 KO |
1021 | if (a->v.data_type != type || |
1022 | a->v.dirty_sectors != sectors) { | |
1023 | a->v.data_type = type; | |
1024 | a->v.dirty_sectors = sectors; | |
1025 | ret = bch2_trans_update(trans, &iter, &a->k_i, 0); | |
1026 | } | |
1027 | err: | |
67e0dd8f | 1028 | bch2_trans_iter_exit(trans, &iter); |
bfcf840d KO |
1029 | return ret; |
1030 | } | |
1031 | ||
fb23d57a | 1032 | static int bch2_mark_metadata_bucket(struct btree_trans *trans, struct bch_dev *ca, |
5dd8c60e KO |
1033 | u64 b, enum bch_data_type data_type, unsigned sectors, |
1034 | enum btree_iter_update_trigger_flags flags) | |
c281db0f | 1035 | { |
fb23d57a KO |
1036 | struct bch_fs *c = trans->c; |
1037 | int ret = 0; | |
1038 | ||
c02eb9e8 | 1039 | struct bucket *g = gc_bucket(ca, b); |
9432e90d KO |
1040 | if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u when marking metadata type %s", |
1041 | ca->dev_idx, bch2_data_type_str(data_type))) | |
49f2d182 | 1042 | goto err; |
c281db0f KO |
1043 | |
1044 | bucket_lock(g); | |
c02eb9e8 | 1045 | struct bch_alloc_v4 old = bucket_m_to_alloc(*g); |
c281db0f KO |
1046 | |
1047 | if (bch2_fs_inconsistent_on(g->data_type && | |
1048 | g->data_type != data_type, c, | |
1049 | "different types of data in same bucket: %s, %s", | |
1050 | bch2_data_type_str(g->data_type), | |
9432e90d | 1051 | bch2_data_type_str(data_type))) |
49f2d182 | 1052 | goto err_unlock; |
c281db0f KO |
1053 | |
1054 | if (bch2_fs_inconsistent_on((u64) g->dirty_sectors + sectors > ca->mi.bucket_size, c, | |
1055 | "bucket %u:%llu gen %u data type %s sector count overflow: %u + %u > bucket size", | |
1056 | ca->dev_idx, b, g->gen, | |
1057 | bch2_data_type_str(g->data_type ?: data_type), | |
9432e90d | 1058 | g->dirty_sectors, sectors)) |
49f2d182 | 1059 | goto err_unlock; |
c281db0f KO |
1060 | |
1061 | g->data_type = data_type; | |
1062 | g->dirty_sectors += sectors; | |
c02eb9e8 | 1063 | struct bch_alloc_v4 new = bucket_m_to_alloc(*g); |
fb23d57a | 1064 | bucket_unlock(g); |
fb23d57a KO |
1065 | ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags); |
1066 | return ret; | |
9432e90d | 1067 | err_unlock: |
49f2d182 KO |
1068 | bucket_unlock(g); |
1069 | err: | |
09b9c72b | 1070 | return bch_err_throw(c, metadata_bucket_inconsistency); |
c281db0f KO |
1071 | } |
1072 | ||
bfcf840d | 1073 | int bch2_trans_mark_metadata_bucket(struct btree_trans *trans, |
5dd8c60e KO |
1074 | struct bch_dev *ca, u64 b, |
1075 | enum bch_data_type type, unsigned sectors, | |
1076 | enum btree_iter_update_trigger_flags flags) | |
bfcf840d | 1077 | { |
c281db0f KO |
1078 | BUG_ON(type != BCH_DATA_free && |
1079 | type != BCH_DATA_sb && | |
1080 | type != BCH_DATA_journal); | |
1081 | ||
1082 | /* | |
1083 | * Backup superblock might be past the end of our normal usable space: | |
1084 | */ | |
1085 | if (b >= ca->mi.nbuckets) | |
1086 | return 0; | |
1087 | ||
5dd8c60e | 1088 | if (flags & BTREE_TRIGGER_gc) |
fb23d57a | 1089 | return bch2_mark_metadata_bucket(trans, ca, b, type, sectors, flags); |
5dd8c60e | 1090 | else if (flags & BTREE_TRIGGER_transactional) |
c281db0f KO |
1091 | return commit_do(trans, NULL, NULL, 0, |
1092 | __bch2_trans_mark_metadata_bucket(trans, ca, b, type, sectors)); | |
1093 | else | |
1094 | BUG(); | |
bfcf840d KO |
1095 | } |
1096 | ||
1097 | static int bch2_trans_mark_metadata_sectors(struct btree_trans *trans, | |
5dd8c60e KO |
1098 | struct bch_dev *ca, u64 start, u64 end, |
1099 | enum bch_data_type type, u64 *bucket, unsigned *bucket_sectors, | |
1100 | enum btree_iter_update_trigger_flags flags) | |
bfcf840d | 1101 | { |
bfcf840d KO |
1102 | do { |
1103 | u64 b = sector_to_bucket(ca, start); | |
1104 | unsigned sectors = | |
1105 | min_t(u64, bucket_to_sector(ca, b + 1), end) - start; | |
1106 | ||
d62ab355 KO |
1107 | if (b != *bucket && *bucket_sectors) { |
1108 | int ret = bch2_trans_mark_metadata_bucket(trans, ca, *bucket, | |
c281db0f | 1109 | type, *bucket_sectors, flags); |
d62ab355 KO |
1110 | if (ret) |
1111 | return ret; | |
bfcf840d | 1112 | |
d62ab355 | 1113 | *bucket_sectors = 0; |
bfcf840d KO |
1114 | } |
1115 | ||
d62ab355 | 1116 | *bucket = b; |
bfcf840d KO |
1117 | *bucket_sectors += sectors; |
1118 | start += sectors; | |
d62ab355 | 1119 | } while (start < end); |
bfcf840d KO |
1120 | |
1121 | return 0; | |
1122 | } | |
1123 | ||
5dd8c60e KO |
1124 | static int __bch2_trans_mark_dev_sb(struct btree_trans *trans, struct bch_dev *ca, |
1125 | enum btree_iter_update_trigger_flags flags) | |
bfcf840d | 1126 | { |
0e49d3ff KO |
1127 | struct bch_fs *c = trans->c; |
1128 | ||
1129 | mutex_lock(&c->sb_lock); | |
1130 | struct bch_sb_layout layout = ca->disk_sb.sb->layout; | |
1131 | mutex_unlock(&c->sb_lock); | |
1132 | ||
bfcf840d KO |
1133 | u64 bucket = 0; |
1134 | unsigned i, bucket_sectors = 0; | |
1135 | int ret; | |
1136 | ||
0e49d3ff KO |
1137 | for (i = 0; i < layout.nr_superblocks; i++) { |
1138 | u64 offset = le64_to_cpu(layout.sb_offset[i]); | |
bfcf840d KO |
1139 | |
1140 | if (offset == BCH_SB_SECTOR) { | |
d62ab355 | 1141 | ret = bch2_trans_mark_metadata_sectors(trans, ca, |
bfcf840d | 1142 | 0, BCH_SB_SECTOR, |
c281db0f | 1143 | BCH_DATA_sb, &bucket, &bucket_sectors, flags); |
bfcf840d KO |
1144 | if (ret) |
1145 | return ret; | |
1146 | } | |
1147 | ||
d62ab355 | 1148 | ret = bch2_trans_mark_metadata_sectors(trans, ca, offset, |
0e49d3ff | 1149 | offset + (1 << layout.sb_max_size_bits), |
c281db0f | 1150 | BCH_DATA_sb, &bucket, &bucket_sectors, flags); |
bfcf840d KO |
1151 | if (ret) |
1152 | return ret; | |
1153 | } | |
1154 | ||
1155 | if (bucket_sectors) { | |
d62ab355 | 1156 | ret = bch2_trans_mark_metadata_bucket(trans, ca, |
c281db0f | 1157 | bucket, BCH_DATA_sb, bucket_sectors, flags); |
bfcf840d KO |
1158 | if (ret) |
1159 | return ret; | |
1160 | } | |
1161 | ||
1162 | for (i = 0; i < ca->journal.nr; i++) { | |
d62ab355 | 1163 | ret = bch2_trans_mark_metadata_bucket(trans, ca, |
bfcf840d | 1164 | ca->journal.buckets[i], |
c281db0f | 1165 | BCH_DATA_journal, ca->mi.bucket_size, flags); |
bfcf840d KO |
1166 | if (ret) |
1167 | return ret; | |
1168 | } | |
1169 | ||
1170 | return 0; | |
1171 | } | |
1172 | ||
5dd8c60e KO |
1173 | int bch2_trans_mark_dev_sb(struct bch_fs *c, struct bch_dev *ca, |
1174 | enum btree_iter_update_trigger_flags flags) | |
bfcf840d | 1175 | { |
c281db0f KO |
1176 | int ret = bch2_trans_run(c, |
1177 | __bch2_trans_mark_dev_sb(trans, ca, flags)); | |
cf904c8d | 1178 | bch_err_fn(c, ret); |
1bb3c2a9 | 1179 | return ret; |
bfcf840d KO |
1180 | } |
1181 | ||
5dd8c60e KO |
1182 | int bch2_trans_mark_dev_sbs_flags(struct bch_fs *c, |
1183 | enum btree_iter_update_trigger_flags flags) | |
bbe682c7 | 1184 | { |
cca2c0d2 | 1185 | for_each_online_member(c, ca, BCH_DEV_READ_REF_trans_mark_dev_sbs) { |
c281db0f | 1186 | int ret = bch2_trans_mark_dev_sb(c, ca, flags); |
bbe682c7 | 1187 | if (ret) { |
cca2c0d2 | 1188 | enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_trans_mark_dev_sbs); |
bbe682c7 KO |
1189 | return ret; |
1190 | } | |
1191 | } | |
1192 | ||
1193 | return 0; | |
1194 | } | |
1195 | ||
c281db0f KO |
1196 | int bch2_trans_mark_dev_sbs(struct bch_fs *c) |
1197 | { | |
5dd8c60e | 1198 | return bch2_trans_mark_dev_sbs_flags(c, BTREE_TRIGGER_transactional); |
c281db0f KO |
1199 | } |
1200 | ||
828552ca KO |
1201 | bool bch2_is_superblock_bucket(struct bch_dev *ca, u64 b) |
1202 | { | |
1203 | struct bch_sb_layout *layout = &ca->disk_sb.sb->layout; | |
1204 | u64 b_offset = bucket_to_sector(ca, b); | |
1205 | u64 b_end = bucket_to_sector(ca, b + 1); | |
1206 | unsigned i; | |
1207 | ||
1208 | if (!b) | |
1209 | return true; | |
1210 | ||
1211 | for (i = 0; i < layout->nr_superblocks; i++) { | |
1212 | u64 offset = le64_to_cpu(layout->sb_offset[i]); | |
1213 | u64 end = offset + (1 << layout->sb_max_size_bits); | |
1214 | ||
1215 | if (!(offset >= b_end || end <= b_offset)) | |
1216 | return true; | |
1217 | } | |
1218 | ||
1219 | for (i = 0; i < ca->journal.nr; i++) | |
1220 | if (b == ca->journal.buckets[i]) | |
1221 | return true; | |
1222 | ||
1223 | return false; | |
1224 | } | |
1225 | ||
1c6fdbd8 KO |
1226 | /* Disk reservations: */ |
1227 | ||
1c6fdbd8 KO |
1228 | #define SECTORS_CACHE 1024 |
1229 | ||
7e94eeff | 1230 | int __bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res, |
335d318e | 1231 | u64 sectors, enum bch_reservation_flags flags) |
1c6fdbd8 | 1232 | { |
5663a415 | 1233 | struct bch_fs_pcpu *pcpu; |
68573b93 | 1234 | u64 old, get; |
335d318e | 1235 | u64 sectors_available; |
1c6fdbd8 KO |
1236 | int ret; |
1237 | ||
9166b41d | 1238 | percpu_down_read(&c->mark_lock); |
1c6fdbd8 | 1239 | preempt_disable(); |
5663a415 | 1240 | pcpu = this_cpu_ptr(c->pcpu); |
1c6fdbd8 | 1241 | |
5663a415 | 1242 | if (sectors <= pcpu->sectors_available) |
1c6fdbd8 KO |
1243 | goto out; |
1244 | ||
68573b93 | 1245 | old = atomic64_read(&c->sectors_available); |
1c6fdbd8 | 1246 | do { |
1c6fdbd8 KO |
1247 | get = min((u64) sectors + SECTORS_CACHE, old); |
1248 | ||
1249 | if (get < sectors) { | |
1250 | preempt_enable(); | |
1c6fdbd8 KO |
1251 | goto recalculate; |
1252 | } | |
68573b93 UB |
1253 | } while (!atomic64_try_cmpxchg(&c->sectors_available, |
1254 | &old, old - get)); | |
1c6fdbd8 | 1255 | |
5663a415 | 1256 | pcpu->sectors_available += get; |
1c6fdbd8 KO |
1257 | |
1258 | out: | |
5663a415 | 1259 | pcpu->sectors_available -= sectors; |
5e82a9a1 | 1260 | this_cpu_add(*c->online_reserved, sectors); |
5663a415 | 1261 | res->sectors += sectors; |
1c6fdbd8 | 1262 | |
1c6fdbd8 | 1263 | preempt_enable(); |
9166b41d | 1264 | percpu_up_read(&c->mark_lock); |
1c6fdbd8 KO |
1265 | return 0; |
1266 | ||
1267 | recalculate: | |
fca1223c | 1268 | mutex_lock(&c->sectors_available_lock); |
39fbc5a4 | 1269 | |
fca1223c KO |
1270 | percpu_u64_set(&c->pcpu->sectors_available, 0); |
1271 | sectors_available = avail_factor(__bch2_fs_usage_read_short(c).free); | |
1c6fdbd8 | 1272 | |
335d318e KO |
1273 | if (sectors_available && (flags & BCH_DISK_RESERVATION_PARTIAL)) |
1274 | sectors = min(sectors, sectors_available); | |
1275 | ||
1c6fdbd8 KO |
1276 | if (sectors <= sectors_available || |
1277 | (flags & BCH_DISK_RESERVATION_NOFAIL)) { | |
1278 | atomic64_set(&c->sectors_available, | |
1279 | max_t(s64, 0, sectors_available - sectors)); | |
5e82a9a1 | 1280 | this_cpu_add(*c->online_reserved, sectors); |
5663a415 | 1281 | res->sectors += sectors; |
1c6fdbd8 | 1282 | ret = 0; |
1c6fdbd8 KO |
1283 | } else { |
1284 | atomic64_set(&c->sectors_available, sectors_available); | |
09b9c72b | 1285 | ret = bch_err_throw(c, ENOSPC_disk_reservation); |
1c6fdbd8 KO |
1286 | } |
1287 | ||
fca1223c KO |
1288 | mutex_unlock(&c->sectors_available_lock); |
1289 | percpu_up_read(&c->mark_lock); | |
1c6fdbd8 | 1290 | |
1c6fdbd8 KO |
1291 | return ret; |
1292 | } | |
1293 | ||
1294 | /* Startup/shutdown: */ | |
1295 | ||
ffcbec60 KO |
1296 | void bch2_buckets_nouse_free(struct bch_fs *c) |
1297 | { | |
1298 | for_each_member_device(c, ca) { | |
1299 | kvfree_rcu_mightsleep(ca->buckets_nouse); | |
1300 | ca->buckets_nouse = NULL; | |
1301 | } | |
1302 | } | |
1303 | ||
1304 | int bch2_buckets_nouse_alloc(struct bch_fs *c) | |
1305 | { | |
1306 | for_each_member_device(c, ca) { | |
1307 | BUG_ON(ca->buckets_nouse); | |
1308 | ||
45414083 | 1309 | ca->buckets_nouse = bch2_kvmalloc(BITS_TO_LONGS(ca->mi.nbuckets) * |
ffcbec60 KO |
1310 | sizeof(unsigned long), |
1311 | GFP_KERNEL|__GFP_ZERO); | |
1312 | if (!ca->buckets_nouse) { | |
f295298b | 1313 | bch2_dev_put(ca); |
09b9c72b | 1314 | return bch_err_throw(c, ENOMEM_buckets_nouse); |
ffcbec60 KO |
1315 | } |
1316 | } | |
1317 | ||
1318 | return 0; | |
1319 | } | |
1320 | ||
a7860877 KO |
1321 | static void bucket_gens_free_rcu(struct rcu_head *rcu) |
1322 | { | |
1323 | struct bucket_gens *buckets = | |
1324 | container_of(rcu, struct bucket_gens, rcu); | |
1325 | ||
cb6fc943 | 1326 | kvfree(buckets); |
a7860877 KO |
1327 | } |
1328 | ||
1c6fdbd8 KO |
1329 | int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) |
1330 | { | |
a7860877 | 1331 | struct bucket_gens *bucket_gens = NULL, *old_bucket_gens = NULL; |
5735608c | 1332 | bool resize = ca->bucket_gens != NULL; |
65d48e35 | 1333 | int ret; |
1c6fdbd8 | 1334 | |
49f2d182 KO |
1335 | if (resize) |
1336 | lockdep_assert_held(&c->state_lock); | |
1337 | ||
1338 | if (resize && ca->buckets_nouse) | |
09b9c72b | 1339 | return bch_err_throw(c, no_resize_with_buckets_nouse); |
ffcbec60 | 1340 | |
45414083 KO |
1341 | bucket_gens = bch2_kvmalloc(struct_size(bucket_gens, b, nbuckets), |
1342 | GFP_KERNEL|__GFP_ZERO); | |
901ff655 | 1343 | if (!bucket_gens) { |
09b9c72b | 1344 | ret = bch_err_throw(c, ENOMEM_bucket_gens); |
65d48e35 KO |
1345 | goto err; |
1346 | } | |
1347 | ||
a7860877 KO |
1348 | bucket_gens->first_bucket = ca->mi.first_bucket; |
1349 | bucket_gens->nbuckets = nbuckets; | |
9c4acd19 KO |
1350 | bucket_gens->nbuckets_minus_first = |
1351 | bucket_gens->nbuckets - bucket_gens->first_bucket; | |
1c6fdbd8 | 1352 | |
a7860877 | 1353 | old_bucket_gens = rcu_dereference_protected(ca->bucket_gens, 1); |
1c6fdbd8 KO |
1354 | |
1355 | if (resize) { | |
00246644 KO |
1356 | u64 copy = min(bucket_gens->nbuckets, |
1357 | old_bucket_gens->nbuckets); | |
a7860877 KO |
1358 | memcpy(bucket_gens->b, |
1359 | old_bucket_gens->b, | |
00246644 | 1360 | sizeof(bucket_gens->b[0]) * copy); |
1c6fdbd8 KO |
1361 | } |
1362 | ||
09b9c72b | 1363 | ret = bch2_bucket_bitmap_resize(ca, &ca->bucket_backpointer_mismatch, |
15f96932 | 1364 | ca->mi.nbuckets, nbuckets) ?: |
09b9c72b | 1365 | bch2_bucket_bitmap_resize(ca, &ca->bucket_backpointer_empty, |
15f96932 | 1366 | ca->mi.nbuckets, nbuckets); |
13ffcbae | 1367 | |
a7860877 | 1368 | rcu_assign_pointer(ca->bucket_gens, bucket_gens); |
a7860877 | 1369 | bucket_gens = old_bucket_gens; |
1c6fdbd8 | 1370 | |
5735608c KO |
1371 | nbuckets = ca->mi.nbuckets; |
1372 | ||
1c6fdbd8 KO |
1373 | ret = 0; |
1374 | err: | |
a7860877 | 1375 | if (bucket_gens) |
80bf2f34 | 1376 | call_rcu(&bucket_gens->rcu, bucket_gens_free_rcu); |
1c6fdbd8 KO |
1377 | |
1378 | return ret; | |
1379 | } | |
1380 | ||
1381 | void bch2_dev_buckets_free(struct bch_dev *ca) | |
1382 | { | |
cb6fc943 KO |
1383 | kvfree(ca->buckets_nouse); |
1384 | kvfree(rcu_dereference_protected(ca->bucket_gens, 1)); | |
f5095b9f | 1385 | free_percpu(ca->usage); |
1c6fdbd8 KO |
1386 | } |
1387 | ||
1388 | int bch2_dev_buckets_alloc(struct bch_fs *c, struct bch_dev *ca) | |
1389 | { | |
955ba7b5 | 1390 | ca->usage = alloc_percpu(struct bch_dev_usage_full); |
f5095b9f | 1391 | if (!ca->usage) |
09b9c72b | 1392 | return bch_err_throw(c, ENOMEM_usage_init); |
1c6fdbd8 | 1393 | |
3e3e02e6 | 1394 | return bch2_dev_buckets_resize(c, ca, ca->mi.nbuckets); |
1c6fdbd8 | 1395 | } |