Commit | Line | Data |
---|---|---|
1c6fdbd8 KO |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * Code for manipulating bucket marks for garbage collection. | |
4 | * | |
5 | * Copyright 2014 Datera, Inc. | |
6 | * | |
7 | * Bucket states: | |
8 | * - free bucket: mark == 0 | |
9 | * The bucket contains no data and will not be read | |
10 | * | |
11 | * - allocator bucket: owned_by_allocator == 1 | |
12 | * The bucket is on a free list, or it is an open bucket | |
13 | * | |
14 | * - cached bucket: owned_by_allocator == 0 && | |
15 | * dirty_sectors == 0 && | |
16 | * cached_sectors > 0 | |
17 | * The bucket contains data but may be safely discarded as there are | |
18 | * enough replicas of the data on other cache devices, or it has been | |
19 | * written back to the backing device | |
20 | * | |
21 | * - dirty bucket: owned_by_allocator == 0 && | |
22 | * dirty_sectors > 0 | |
23 | * The bucket contains data that we must not discard (either only copy, | |
24 | * or one of the 'main copies' for data requiring multiple replicas) | |
25 | * | |
26 | * - metadata bucket: owned_by_allocator == 0 && is_metadata == 1 | |
27 | * This is a btree node, journal or gen/prio bucket | |
28 | * | |
29 | * Lifecycle: | |
30 | * | |
31 | * bucket invalidated => bucket on freelist => open bucket => | |
32 | * [dirty bucket =>] cached bucket => bucket invalidated => ... | |
33 | * | |
34 | * Note that cache promotion can skip the dirty bucket step, as data | |
35 | * is copied from a deeper tier to a shallower tier, onto a cached | |
36 | * bucket. | |
37 | * Note also that a cached bucket can spontaneously become dirty -- | |
38 | * see below. | |
39 | * | |
40 | * Only a traversal of the key space can determine whether a bucket is | |
41 | * truly dirty or cached. | |
42 | * | |
43 | * Transitions: | |
44 | * | |
45 | * - free => allocator: bucket was invalidated | |
46 | * - cached => allocator: bucket was invalidated | |
47 | * | |
48 | * - allocator => dirty: open bucket was filled up | |
49 | * - allocator => cached: open bucket was filled up | |
50 | * - allocator => metadata: metadata was allocated | |
51 | * | |
52 | * - dirty => cached: dirty sectors were copied to a deeper tier | |
53 | * - dirty => free: dirty sectors were overwritten or moved (copy gc) | |
54 | * - cached => free: cached sectors were overwritten | |
55 | * | |
56 | * - metadata => free: metadata was freed | |
57 | * | |
58 | * Oddities: | |
59 | * - cached => dirty: a device was removed so formerly replicated data | |
60 | * is no longer sufficiently replicated | |
61 | * - free => cached: cannot happen | |
62 | * - free => dirty: cannot happen | |
63 | * - free => metadata: cannot happen | |
64 | */ | |
65 | ||
66 | #include "bcachefs.h" | |
7b3f84ea | 67 | #include "alloc_background.h" |
b35b1925 | 68 | #include "bset.h" |
1c6fdbd8 | 69 | #include "btree_gc.h" |
b35b1925 | 70 | #include "btree_update.h" |
1c6fdbd8 | 71 | #include "buckets.h" |
cd575ddf | 72 | #include "ec.h" |
1c6fdbd8 KO |
73 | #include "error.h" |
74 | #include "movinggc.h" | |
7ef2a73a | 75 | #include "replicas.h" |
1c6fdbd8 KO |
76 | #include "trace.h" |
77 | ||
78 | #include <linux/preempt.h> | |
79 | ||
af4d05c4 KO |
80 | static inline void fs_usage_data_type_to_base(struct bch_fs_usage *fs_usage, |
81 | enum bch_data_type data_type, | |
82 | s64 sectors) | |
83 | { | |
84 | switch (data_type) { | |
85 | case BCH_DATA_btree: | |
86 | fs_usage->btree += sectors; | |
87 | break; | |
88 | case BCH_DATA_user: | |
89 | case BCH_DATA_parity: | |
90 | fs_usage->data += sectors; | |
91 | break; | |
92 | case BCH_DATA_cached: | |
93 | fs_usage->cached += sectors; | |
94 | break; | |
95 | default: | |
96 | break; | |
97 | } | |
98 | } | |
99 | ||
1c6fdbd8 KO |
100 | /* |
101 | * Clear journal_seq_valid for buckets for which it's not needed, to prevent | |
102 | * wraparound: | |
103 | */ | |
104 | void bch2_bucket_seq_cleanup(struct bch_fs *c) | |
105 | { | |
c6923995 | 106 | u64 journal_seq = atomic64_read(&c->journal.seq); |
1c6fdbd8 KO |
107 | u16 last_seq_ondisk = c->journal.last_seq_ondisk; |
108 | struct bch_dev *ca; | |
109 | struct bucket_array *buckets; | |
110 | struct bucket *g; | |
111 | struct bucket_mark m; | |
112 | unsigned i; | |
113 | ||
c6923995 KO |
114 | if (journal_seq - c->last_bucket_seq_cleanup < |
115 | (1U << (BUCKET_JOURNAL_SEQ_BITS - 2))) | |
116 | return; | |
117 | ||
118 | c->last_bucket_seq_cleanup = journal_seq; | |
119 | ||
1c6fdbd8 KO |
120 | for_each_member_device(ca, c, i) { |
121 | down_read(&ca->bucket_lock); | |
122 | buckets = bucket_array(ca); | |
123 | ||
124 | for_each_bucket(g, buckets) { | |
125 | bucket_cmpxchg(g, m, ({ | |
126 | if (!m.journal_seq_valid || | |
127 | bucket_needs_journal_commit(m, last_seq_ondisk)) | |
128 | break; | |
129 | ||
130 | m.journal_seq_valid = 0; | |
131 | })); | |
132 | } | |
133 | up_read(&ca->bucket_lock); | |
134 | } | |
135 | } | |
136 | ||
3e0745e2 KO |
137 | void bch2_fs_usage_initialize(struct bch_fs *c) |
138 | { | |
139 | struct bch_fs_usage *usage; | |
ecf37a4a | 140 | unsigned i; |
3e0745e2 KO |
141 | |
142 | percpu_down_write(&c->mark_lock); | |
5e82a9a1 KO |
143 | usage = c->usage_base; |
144 | ||
145 | for (i = 0; i < ARRAY_SIZE(c->usage); i++) | |
146 | bch2_fs_usage_acc_to_base(c, i); | |
3e0745e2 | 147 | |
3577df5f | 148 | for (i = 0; i < BCH_REPLICAS_MAX; i++) |
768ac639 | 149 | usage->reserved += usage->persistent_reserved[i]; |
3577df5f | 150 | |
3e0745e2 KO |
151 | for (i = 0; i < c->replicas.nr; i++) { |
152 | struct bch_replicas_entry *e = | |
153 | cpu_replicas_entry(&c->replicas, i); | |
154 | ||
af4d05c4 | 155 | fs_usage_data_type_to_base(usage, e->data_type, usage->replicas[i]); |
3e0745e2 KO |
156 | } |
157 | ||
158 | percpu_up_write(&c->mark_lock); | |
159 | } | |
160 | ||
5e82a9a1 | 161 | void bch2_fs_usage_scratch_put(struct bch_fs *c, struct bch_fs_usage_online *fs_usage) |
4d8100da KO |
162 | { |
163 | if (fs_usage == c->usage_scratch) | |
164 | mutex_unlock(&c->usage_scratch_lock); | |
165 | else | |
166 | kfree(fs_usage); | |
167 | } | |
168 | ||
5e82a9a1 | 169 | struct bch_fs_usage_online *bch2_fs_usage_scratch_get(struct bch_fs *c) |
4d8100da | 170 | { |
5e82a9a1 KO |
171 | struct bch_fs_usage_online *ret; |
172 | unsigned bytes = sizeof(struct bch_fs_usage_online) + sizeof(u64) * | |
173 | READ_ONCE(c->replicas.nr); | |
174 | ret = kzalloc(bytes, GFP_NOWAIT|__GFP_NOWARN); | |
4d8100da KO |
175 | if (ret) |
176 | return ret; | |
177 | ||
178 | if (mutex_trylock(&c->usage_scratch_lock)) | |
179 | goto out_pool; | |
180 | ||
181 | ret = kzalloc(bytes, GFP_NOFS); | |
182 | if (ret) | |
183 | return ret; | |
184 | ||
185 | mutex_lock(&c->usage_scratch_lock); | |
186 | out_pool: | |
187 | ret = c->usage_scratch; | |
188 | memset(ret, 0, bytes); | |
189 | return ret; | |
190 | } | |
191 | ||
3d080aa5 | 192 | struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *ca) |
1c6fdbd8 | 193 | { |
768ac639 KO |
194 | struct bch_dev_usage ret; |
195 | ||
196 | memset(&ret, 0, sizeof(ret)); | |
197 | acc_u64s_percpu((u64 *) &ret, | |
198 | (u64 __percpu *) ca->usage[0], | |
199 | sizeof(ret) / sizeof(u64)); | |
200 | ||
201 | return ret; | |
1c6fdbd8 KO |
202 | } |
203 | ||
5e82a9a1 KO |
204 | static inline struct bch_fs_usage *fs_usage_ptr(struct bch_fs *c, |
205 | unsigned journal_seq, | |
206 | bool gc) | |
1c6fdbd8 | 207 | { |
5e82a9a1 KO |
208 | return this_cpu_ptr(gc |
209 | ? c->usage_gc | |
f299d573 | 210 | : c->usage[journal_seq & JOURNAL_BUF_MASK]); |
5e82a9a1 KO |
211 | } |
212 | ||
213 | u64 bch2_fs_usage_read_one(struct bch_fs *c, u64 *v) | |
214 | { | |
215 | ssize_t offset = v - (u64 *) c->usage_base; | |
f299d573 | 216 | unsigned i, seq; |
5e82a9a1 KO |
217 | u64 ret; |
218 | ||
219 | BUG_ON(offset < 0 || offset >= fs_usage_u64s(c)); | |
220 | percpu_rwsem_assert_held(&c->mark_lock); | |
221 | ||
222 | do { | |
223 | seq = read_seqcount_begin(&c->usage_lock); | |
f299d573 KO |
224 | ret = *v; |
225 | ||
226 | for (i = 0; i < ARRAY_SIZE(c->usage); i++) | |
227 | ret += percpu_u64_get((u64 __percpu *) c->usage[i] + offset); | |
5e82a9a1 KO |
228 | } while (read_seqcount_retry(&c->usage_lock, seq)); |
229 | ||
230 | return ret; | |
231 | } | |
232 | ||
233 | struct bch_fs_usage_online *bch2_fs_usage_read(struct bch_fs *c) | |
234 | { | |
235 | struct bch_fs_usage_online *ret; | |
f299d573 KO |
236 | unsigned seq, i, v, u64s = fs_usage_u64s(c); |
237 | retry: | |
238 | ret = kmalloc(u64s * sizeof(u64), GFP_NOFS); | |
239 | if (unlikely(!ret)) | |
240 | return NULL; | |
7ef2a73a KO |
241 | |
242 | percpu_down_read(&c->mark_lock); | |
243 | ||
f299d573 KO |
244 | v = fs_usage_u64s(c); |
245 | if (unlikely(u64s != v)) { | |
246 | u64s = v; | |
7ef2a73a | 247 | percpu_up_read(&c->mark_lock); |
f299d573 KO |
248 | kfree(ret); |
249 | goto retry; | |
7ef2a73a KO |
250 | } |
251 | ||
5e82a9a1 KO |
252 | ret->online_reserved = percpu_u64_get(c->online_reserved); |
253 | ||
254 | u64s = fs_usage_u64s(c); | |
255 | do { | |
256 | seq = read_seqcount_begin(&c->usage_lock); | |
f299d573 | 257 | memcpy(ret, c->usage_base, u64s * sizeof(u64)); |
5e82a9a1 KO |
258 | for (i = 0; i < ARRAY_SIZE(c->usage); i++) |
259 | acc_u64s_percpu((u64 *) &ret->u, (u64 __percpu *) c->usage[i], u64s); | |
260 | } while (read_seqcount_retry(&c->usage_lock, seq)); | |
7ef2a73a KO |
261 | |
262 | return ret; | |
1c6fdbd8 KO |
263 | } |
264 | ||
5e82a9a1 KO |
265 | void bch2_fs_usage_acc_to_base(struct bch_fs *c, unsigned idx) |
266 | { | |
267 | unsigned u64s = fs_usage_u64s(c); | |
268 | ||
269 | BUG_ON(idx >= ARRAY_SIZE(c->usage)); | |
270 | ||
271 | preempt_disable(); | |
272 | write_seqcount_begin(&c->usage_lock); | |
273 | ||
274 | acc_u64s_percpu((u64 *) c->usage_base, | |
275 | (u64 __percpu *) c->usage[idx], u64s); | |
276 | percpu_memset(c->usage[idx], 0, u64s * sizeof(u64)); | |
277 | ||
278 | write_seqcount_end(&c->usage_lock); | |
279 | preempt_enable(); | |
280 | } | |
281 | ||
282 | void bch2_fs_usage_to_text(struct printbuf *out, | |
283 | struct bch_fs *c, | |
284 | struct bch_fs_usage_online *fs_usage) | |
285 | { | |
286 | unsigned i; | |
287 | ||
288 | pr_buf(out, "capacity:\t\t\t%llu\n", c->capacity); | |
289 | ||
290 | pr_buf(out, "hidden:\t\t\t\t%llu\n", | |
291 | fs_usage->u.hidden); | |
292 | pr_buf(out, "data:\t\t\t\t%llu\n", | |
293 | fs_usage->u.data); | |
294 | pr_buf(out, "cached:\t\t\t\t%llu\n", | |
295 | fs_usage->u.cached); | |
296 | pr_buf(out, "reserved:\t\t\t%llu\n", | |
297 | fs_usage->u.reserved); | |
298 | pr_buf(out, "nr_inodes:\t\t\t%llu\n", | |
299 | fs_usage->u.nr_inodes); | |
300 | pr_buf(out, "online reserved:\t\t%llu\n", | |
301 | fs_usage->online_reserved); | |
302 | ||
303 | for (i = 0; | |
304 | i < ARRAY_SIZE(fs_usage->u.persistent_reserved); | |
305 | i++) { | |
306 | pr_buf(out, "%u replicas:\n", i + 1); | |
307 | pr_buf(out, "\treserved:\t\t%llu\n", | |
308 | fs_usage->u.persistent_reserved[i]); | |
309 | } | |
310 | ||
311 | for (i = 0; i < c->replicas.nr; i++) { | |
312 | struct bch_replicas_entry *e = | |
313 | cpu_replicas_entry(&c->replicas, i); | |
314 | ||
315 | pr_buf(out, "\t"); | |
316 | bch2_replicas_entry_to_text(out, e); | |
317 | pr_buf(out, ":\t%llu\n", fs_usage->u.replicas[i]); | |
318 | } | |
319 | } | |
320 | ||
1c6fdbd8 KO |
321 | #define RESERVE_FACTOR 6 |
322 | ||
323 | static u64 reserve_factor(u64 r) | |
324 | { | |
325 | return r + (round_up(r, (1 << RESERVE_FACTOR)) >> RESERVE_FACTOR); | |
326 | } | |
327 | ||
328 | static u64 avail_factor(u64 r) | |
329 | { | |
101d4713 | 330 | return div_u64(r << RESERVE_FACTOR, (1 << RESERVE_FACTOR) + 1); |
1c6fdbd8 KO |
331 | } |
332 | ||
5e82a9a1 | 333 | u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage_online *fs_usage) |
1c6fdbd8 | 334 | { |
5e82a9a1 KO |
335 | return min(fs_usage->u.hidden + |
336 | fs_usage->u.btree + | |
337 | fs_usage->u.data + | |
338 | reserve_factor(fs_usage->u.reserved + | |
768ac639 | 339 | fs_usage->online_reserved), |
7ef2a73a | 340 | c->capacity); |
5663a415 KO |
341 | } |
342 | ||
768ac639 KO |
343 | static struct bch_fs_usage_short |
344 | __bch2_fs_usage_read_short(struct bch_fs *c) | |
345 | { | |
346 | struct bch_fs_usage_short ret; | |
347 | u64 data, reserved; | |
348 | ||
349 | ret.capacity = c->capacity - | |
5e82a9a1 | 350 | bch2_fs_usage_read_one(c, &c->usage_base->hidden); |
768ac639 | 351 | |
5e82a9a1 KO |
352 | data = bch2_fs_usage_read_one(c, &c->usage_base->data) + |
353 | bch2_fs_usage_read_one(c, &c->usage_base->btree); | |
354 | reserved = bch2_fs_usage_read_one(c, &c->usage_base->reserved) + | |
355 | percpu_u64_get(c->online_reserved); | |
768ac639 KO |
356 | |
357 | ret.used = min(ret.capacity, data + reserve_factor(reserved)); | |
358 | ret.free = ret.capacity - ret.used; | |
359 | ||
5e82a9a1 | 360 | ret.nr_inodes = bch2_fs_usage_read_one(c, &c->usage_base->nr_inodes); |
768ac639 KO |
361 | |
362 | return ret; | |
363 | } | |
364 | ||
5663a415 KO |
365 | struct bch_fs_usage_short |
366 | bch2_fs_usage_read_short(struct bch_fs *c) | |
367 | { | |
5663a415 KO |
368 | struct bch_fs_usage_short ret; |
369 | ||
768ac639 KO |
370 | percpu_down_read(&c->mark_lock); |
371 | ret = __bch2_fs_usage_read_short(c); | |
372 | percpu_up_read(&c->mark_lock); | |
5663a415 KO |
373 | |
374 | return ret; | |
1c6fdbd8 KO |
375 | } |
376 | ||
1c6fdbd8 KO |
377 | static inline int is_unavailable_bucket(struct bucket_mark m) |
378 | { | |
379 | return !is_available_bucket(m); | |
380 | } | |
381 | ||
382 | static inline int is_fragmented_bucket(struct bucket_mark m, | |
383 | struct bch_dev *ca) | |
384 | { | |
385 | if (!m.owned_by_allocator && | |
89fd25be | 386 | m.data_type == BCH_DATA_user && |
1c6fdbd8 KO |
387 | bucket_sectors_used(m)) |
388 | return max_t(int, 0, (int) ca->mi.bucket_size - | |
389 | bucket_sectors_used(m)); | |
390 | return 0; | |
391 | } | |
392 | ||
af4d05c4 KO |
393 | static inline int is_stripe_data_bucket(struct bucket_mark m) |
394 | { | |
395 | return m.stripe && m.data_type != BCH_DATA_parity; | |
396 | } | |
397 | ||
649a9b68 KO |
398 | static inline int bucket_stripe_sectors(struct bucket_mark m) |
399 | { | |
af4d05c4 | 400 | return is_stripe_data_bucket(m) ? m.dirty_sectors : 0; |
649a9b68 KO |
401 | } |
402 | ||
1c6fdbd8 KO |
403 | static inline enum bch_data_type bucket_type(struct bucket_mark m) |
404 | { | |
405 | return m.cached_sectors && !m.dirty_sectors | |
89fd25be | 406 | ? BCH_DATA_cached |
1c6fdbd8 KO |
407 | : m.data_type; |
408 | } | |
409 | ||
9ca53b55 | 410 | static bool bucket_became_unavailable(struct bucket_mark old, |
1c6fdbd8 KO |
411 | struct bucket_mark new) |
412 | { | |
413 | return is_available_bucket(old) && | |
9ca53b55 | 414 | !is_available_bucket(new); |
1c6fdbd8 KO |
415 | } |
416 | ||
7ef2a73a | 417 | int bch2_fs_usage_apply(struct bch_fs *c, |
5e82a9a1 KO |
418 | struct bch_fs_usage_online *src, |
419 | struct disk_reservation *disk_res, | |
420 | unsigned journal_seq) | |
1c6fdbd8 | 421 | { |
5e82a9a1 KO |
422 | struct bch_fs_usage *dst = fs_usage_ptr(c, journal_seq, false); |
423 | s64 added = src->u.data + src->u.reserved; | |
4628529f | 424 | s64 should_not_have_added; |
7ef2a73a | 425 | int ret = 0; |
1c6fdbd8 | 426 | |
9166b41d | 427 | percpu_rwsem_assert_held(&c->mark_lock); |
eeb83e25 | 428 | |
1c6fdbd8 KO |
429 | /* |
430 | * Not allowed to reduce sectors_available except by getting a | |
431 | * reservation: | |
432 | */ | |
4628529f | 433 | should_not_have_added = added - (s64) (disk_res ? disk_res->sectors : 0); |
b35b1925 | 434 | if (WARN_ONCE(should_not_have_added > 0, |
5e82a9a1 KO |
435 | "disk usage increased by %lli more than reservation of %llu", |
436 | added, disk_res ? disk_res->sectors : 0)) { | |
4628529f KO |
437 | atomic64_sub(should_not_have_added, &c->sectors_available); |
438 | added -= should_not_have_added; | |
7ef2a73a | 439 | ret = -1; |
4628529f | 440 | } |
1c6fdbd8 KO |
441 | |
442 | if (added > 0) { | |
5e82a9a1 KO |
443 | disk_res->sectors -= added; |
444 | src->online_reserved -= added; | |
1c6fdbd8 KO |
445 | } |
446 | ||
5e82a9a1 KO |
447 | this_cpu_add(*c->online_reserved, src->online_reserved); |
448 | ||
1c6fdbd8 | 449 | preempt_disable(); |
5e82a9a1 | 450 | acc_u64s((u64 *) dst, (u64 *) &src->u, fs_usage_u64s(c)); |
1c6fdbd8 | 451 | preempt_enable(); |
1c6fdbd8 | 452 | |
7ef2a73a | 453 | return ret; |
1c6fdbd8 KO |
454 | } |
455 | ||
06b7345c KO |
456 | static inline void account_bucket(struct bch_fs_usage *fs_usage, |
457 | struct bch_dev_usage *dev_usage, | |
458 | enum bch_data_type type, | |
459 | int nr, s64 size) | |
460 | { | |
89fd25be | 461 | if (type == BCH_DATA_sb || type == BCH_DATA_journal) |
768ac639 | 462 | fs_usage->hidden += size; |
06b7345c | 463 | |
06b7345c KO |
464 | dev_usage->buckets[type] += nr; |
465 | } | |
466 | ||
1c6fdbd8 | 467 | static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, |
9ca53b55 KO |
468 | struct bch_fs_usage *fs_usage, |
469 | struct bucket_mark old, struct bucket_mark new, | |
470 | bool gc) | |
1c6fdbd8 | 471 | { |
649a9b68 | 472 | struct bch_dev_usage *u; |
1c6fdbd8 | 473 | |
9166b41d | 474 | percpu_rwsem_assert_held(&c->mark_lock); |
1c6fdbd8 | 475 | |
1c6fdbd8 | 476 | preempt_disable(); |
649a9b68 | 477 | u = this_cpu_ptr(ca->usage[gc]); |
1c6fdbd8 | 478 | |
06b7345c | 479 | if (bucket_type(old)) |
649a9b68 | 480 | account_bucket(fs_usage, u, bucket_type(old), |
06b7345c | 481 | -1, -ca->mi.bucket_size); |
dfe9bfb3 | 482 | |
06b7345c | 483 | if (bucket_type(new)) |
649a9b68 | 484 | account_bucket(fs_usage, u, bucket_type(new), |
06b7345c | 485 | 1, ca->mi.bucket_size); |
1c6fdbd8 | 486 | |
649a9b68 | 487 | u->buckets_unavailable += |
1c6fdbd8 KO |
488 | is_unavailable_bucket(new) - is_unavailable_bucket(old); |
489 | ||
649a9b68 KO |
490 | u->buckets_ec += (int) new.stripe - (int) old.stripe; |
491 | u->sectors_ec += bucket_stripe_sectors(new) - | |
492 | bucket_stripe_sectors(old); | |
493 | ||
494 | u->sectors[old.data_type] -= old.dirty_sectors; | |
495 | u->sectors[new.data_type] += new.dirty_sectors; | |
89fd25be | 496 | u->sectors[BCH_DATA_cached] += |
1c6fdbd8 | 497 | (int) new.cached_sectors - (int) old.cached_sectors; |
649a9b68 | 498 | u->sectors_fragmented += |
1c6fdbd8 KO |
499 | is_fragmented_bucket(new, ca) - is_fragmented_bucket(old, ca); |
500 | preempt_enable(); | |
501 | ||
502 | if (!is_available_bucket(old) && is_available_bucket(new)) | |
503 | bch2_wake_allocator(ca); | |
1c6fdbd8 KO |
504 | } |
505 | ||
f3721e12 | 506 | __flatten |
a1d58243 | 507 | void bch2_dev_usage_from_buckets(struct bch_fs *c) |
eeb83e25 | 508 | { |
a1d58243 | 509 | struct bch_dev *ca; |
eeb83e25 | 510 | struct bucket_mark old = { .v.counter = 0 }; |
eeb83e25 KO |
511 | struct bucket_array *buckets; |
512 | struct bucket *g; | |
a1d58243 KO |
513 | unsigned i; |
514 | int cpu; | |
515 | ||
5e82a9a1 | 516 | c->usage_base->hidden = 0; |
3a0e06db | 517 | |
a1d58243 KO |
518 | for_each_member_device(ca, c, i) { |
519 | for_each_possible_cpu(cpu) | |
520 | memset(per_cpu_ptr(ca->usage[0], cpu), 0, | |
521 | sizeof(*ca->usage[0])); | |
522 | ||
523 | buckets = bucket_array(ca); | |
eeb83e25 | 524 | |
a1d58243 | 525 | for_each_bucket(g, buckets) |
5e82a9a1 | 526 | bch2_dev_usage_update(c, ca, c->usage_base, |
a1d58243 KO |
527 | old, g->mark, false); |
528 | } | |
eeb83e25 KO |
529 | } |
530 | ||
b7ba66c8 KO |
531 | static inline int update_replicas(struct bch_fs *c, |
532 | struct bch_fs_usage *fs_usage, | |
533 | struct bch_replicas_entry *r, | |
534 | s64 sectors) | |
7ef2a73a KO |
535 | { |
536 | int idx = bch2_replicas_entry_idx(c, r); | |
537 | ||
b7ba66c8 KO |
538 | if (idx < 0) |
539 | return -1; | |
540 | ||
541 | if (!fs_usage) | |
542 | return 0; | |
7ef2a73a | 543 | |
af4d05c4 | 544 | fs_usage_data_type_to_base(fs_usage, r->data_type, sectors); |
768ac639 | 545 | fs_usage->replicas[idx] += sectors; |
b7ba66c8 | 546 | return 0; |
7ef2a73a KO |
547 | } |
548 | ||
549 | static inline void update_cached_sectors(struct bch_fs *c, | |
550 | struct bch_fs_usage *fs_usage, | |
551 | unsigned dev, s64 sectors) | |
552 | { | |
553 | struct bch_replicas_padded r; | |
554 | ||
555 | bch2_replicas_entry_cached(&r.e, dev); | |
556 | ||
557 | update_replicas(c, fs_usage, &r.e, sectors); | |
558 | } | |
559 | ||
6e738539 KO |
560 | static struct replicas_delta_list * |
561 | replicas_deltas_realloc(struct btree_trans *trans, unsigned more) | |
562 | { | |
563 | struct replicas_delta_list *d = trans->fs_usage_deltas; | |
564 | unsigned new_size = d ? (d->size + more) * 2 : 128; | |
565 | ||
566 | if (!d || d->used + more > d->size) { | |
567 | d = krealloc(d, sizeof(*d) + new_size, GFP_NOIO|__GFP_ZERO); | |
568 | BUG_ON(!d); | |
569 | ||
570 | d->size = new_size; | |
571 | trans->fs_usage_deltas = d; | |
572 | } | |
573 | return d; | |
574 | } | |
575 | ||
576 | static inline void update_replicas_list(struct btree_trans *trans, | |
577 | struct bch_replicas_entry *r, | |
578 | s64 sectors) | |
579 | { | |
580 | struct replicas_delta_list *d; | |
581 | struct replicas_delta *n; | |
df5d4dae KO |
582 | unsigned b; |
583 | ||
584 | if (!sectors) | |
585 | return; | |
6e738539 | 586 | |
df5d4dae | 587 | b = replicas_entry_bytes(r) + 8; |
6e738539 KO |
588 | d = replicas_deltas_realloc(trans, b); |
589 | ||
590 | n = (void *) d->d + d->used; | |
591 | n->delta = sectors; | |
592 | memcpy((void *) n + offsetof(struct replicas_delta, r), | |
593 | r, replicas_entry_bytes(r)); | |
594 | d->used += b; | |
595 | } | |
596 | ||
597 | static inline void update_cached_sectors_list(struct btree_trans *trans, | |
598 | unsigned dev, s64 sectors) | |
599 | { | |
600 | struct bch_replicas_padded r; | |
601 | ||
602 | bch2_replicas_entry_cached(&r.e, dev); | |
603 | ||
604 | update_replicas_list(trans, &r.e, sectors); | |
605 | } | |
606 | ||
b7ba66c8 KO |
607 | static inline struct replicas_delta * |
608 | replicas_delta_next(struct replicas_delta *d) | |
609 | { | |
610 | return (void *) d + replicas_entry_bytes(&d->r) + 8; | |
611 | } | |
612 | ||
613 | int bch2_replicas_delta_list_apply(struct bch_fs *c, | |
614 | struct bch_fs_usage *fs_usage, | |
615 | struct replicas_delta_list *r) | |
6e738539 KO |
616 | { |
617 | struct replicas_delta *d = r->d; | |
618 | struct replicas_delta *top = (void *) r->d + r->used; | |
77d63522 KO |
619 | unsigned i; |
620 | ||
b7ba66c8 KO |
621 | for (d = r->d; d != top; d = replicas_delta_next(d)) |
622 | if (update_replicas(c, fs_usage, &d->r, d->delta)) { | |
623 | top = d; | |
624 | goto unwind; | |
625 | } | |
626 | ||
627 | if (!fs_usage) | |
628 | return 0; | |
629 | ||
77d63522 | 630 | fs_usage->nr_inodes += r->nr_inodes; |
6e738539 | 631 | |
77d63522 KO |
632 | for (i = 0; i < BCH_REPLICAS_MAX; i++) { |
633 | fs_usage->reserved += r->persistent_reserved[i]; | |
634 | fs_usage->persistent_reserved[i] += r->persistent_reserved[i]; | |
635 | } | |
6e738539 | 636 | |
b7ba66c8 KO |
637 | return 0; |
638 | unwind: | |
639 | for (d = r->d; d != top; d = replicas_delta_next(d)) | |
640 | update_replicas(c, fs_usage, &d->r, -d->delta); | |
641 | return -1; | |
6e738539 KO |
642 | } |
643 | ||
8777210b KO |
644 | #define do_mark_fn(fn, c, pos, flags, ...) \ |
645 | ({ \ | |
646 | int gc, ret = 0; \ | |
647 | \ | |
648 | percpu_rwsem_assert_held(&c->mark_lock); \ | |
649 | \ | |
650 | for (gc = 0; gc < 2 && !ret; gc++) \ | |
2d594dfb | 651 | if (!gc == !(flags & BTREE_TRIGGER_GC) || \ |
8777210b KO |
652 | (gc && gc_visited(c, pos))) \ |
653 | ret = fn(c, __VA_ARGS__, gc); \ | |
654 | ret; \ | |
655 | }) | |
656 | ||
657 | static int __bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca, | |
658 | size_t b, struct bucket_mark *ret, | |
659 | bool gc) | |
1c6fdbd8 | 660 | { |
5e82a9a1 | 661 | struct bch_fs_usage *fs_usage = fs_usage_ptr(c, 0, gc); |
9ca53b55 | 662 | struct bucket *g = __bucket(ca, b, gc); |
39fbc5a4 | 663 | struct bucket_mark old, new; |
1c6fdbd8 | 664 | |
06ab329c | 665 | old = bucket_cmpxchg(g, new, ({ |
b29e197a | 666 | BUG_ON(!is_available_bucket(new)); |
1c6fdbd8 | 667 | |
430735cd | 668 | new.owned_by_allocator = true; |
1c6fdbd8 KO |
669 | new.data_type = 0; |
670 | new.cached_sectors = 0; | |
671 | new.dirty_sectors = 0; | |
672 | new.gen++; | |
673 | })); | |
674 | ||
06ab329c KO |
675 | bch2_dev_usage_update(c, ca, fs_usage, old, new, gc); |
676 | ||
39fbc5a4 | 677 | if (old.cached_sectors) |
7ef2a73a | 678 | update_cached_sectors(c, fs_usage, ca->dev_idx, |
73c27c60 | 679 | -((s64) old.cached_sectors)); |
39fbc5a4 | 680 | |
8777210b | 681 | if (!gc) |
39fbc5a4 | 682 | *ret = old; |
8777210b | 683 | return 0; |
9ca53b55 KO |
684 | } |
685 | ||
686 | void bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca, | |
687 | size_t b, struct bucket_mark *old) | |
688 | { | |
8777210b KO |
689 | do_mark_fn(__bch2_invalidate_bucket, c, gc_phase(GC_PHASE_START), 0, |
690 | ca, b, old); | |
39fbc5a4 | 691 | |
1c6fdbd8 KO |
692 | if (!old->owned_by_allocator && old->cached_sectors) |
693 | trace_invalidate(ca, bucket_to_sector(ca, b), | |
694 | old->cached_sectors); | |
1c6fdbd8 KO |
695 | } |
696 | ||
8777210b KO |
697 | static int __bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca, |
698 | size_t b, bool owned_by_allocator, | |
699 | bool gc) | |
1c6fdbd8 | 700 | { |
5e82a9a1 | 701 | struct bch_fs_usage *fs_usage = fs_usage_ptr(c, 0, gc); |
9ca53b55 | 702 | struct bucket *g = __bucket(ca, b, gc); |
1c6fdbd8 KO |
703 | struct bucket_mark old, new; |
704 | ||
06ab329c | 705 | old = bucket_cmpxchg(g, new, ({ |
1c6fdbd8 KO |
706 | new.owned_by_allocator = owned_by_allocator; |
707 | })); | |
708 | ||
06ab329c KO |
709 | bch2_dev_usage_update(c, ca, fs_usage, old, new, gc); |
710 | ||
9ca53b55 KO |
711 | BUG_ON(!gc && |
712 | !owned_by_allocator && !old.owned_by_allocator); | |
8777210b KO |
713 | |
714 | return 0; | |
9ca53b55 KO |
715 | } |
716 | ||
717 | void bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca, | |
718 | size_t b, bool owned_by_allocator, | |
719 | struct gc_pos pos, unsigned flags) | |
720 | { | |
3a0e06db KO |
721 | preempt_disable(); |
722 | ||
8777210b KO |
723 | do_mark_fn(__bch2_mark_alloc_bucket, c, pos, flags, |
724 | ca, b, owned_by_allocator); | |
3a0e06db KO |
725 | |
726 | preempt_enable(); | |
1c6fdbd8 KO |
727 | } |
728 | ||
e63534a2 KO |
729 | static int bch2_mark_alloc(struct bch_fs *c, |
730 | struct bkey_s_c old, struct bkey_s_c new, | |
8fe826f9 | 731 | struct bch_fs_usage *fs_usage, |
6e738539 | 732 | u64 journal_seq, unsigned flags) |
8fe826f9 | 733 | { |
2d594dfb | 734 | bool gc = flags & BTREE_TRIGGER_GC; |
8fe826f9 KO |
735 | struct bkey_alloc_unpacked u; |
736 | struct bch_dev *ca; | |
737 | struct bucket *g; | |
e63534a2 KO |
738 | struct bucket_mark old_m, m; |
739 | ||
740 | /* We don't do anything for deletions - do we?: */ | |
741 | if (new.k->type != KEY_TYPE_alloc) | |
742 | return 0; | |
8fe826f9 | 743 | |
8fe826f9 KO |
744 | /* |
745 | * alloc btree is read in by bch2_alloc_read, not gc: | |
746 | */ | |
2d594dfb KO |
747 | if ((flags & BTREE_TRIGGER_GC) && |
748 | !(flags & BTREE_TRIGGER_BUCKET_INVALIDATE)) | |
8fe826f9 KO |
749 | return 0; |
750 | ||
e63534a2 | 751 | ca = bch_dev_bkey_exists(c, new.k->p.inode); |
460651ee | 752 | |
e63534a2 | 753 | if (new.k->p.offset >= ca->mi.nbuckets) |
460651ee KO |
754 | return 0; |
755 | ||
e63534a2 KO |
756 | g = __bucket(ca, new.k->p.offset, gc); |
757 | u = bch2_alloc_unpack(new); | |
8fe826f9 | 758 | |
e63534a2 | 759 | old_m = bucket_cmpxchg(g, m, ({ |
8fe826f9 KO |
760 | m.gen = u.gen; |
761 | m.data_type = u.data_type; | |
762 | m.dirty_sectors = u.dirty_sectors; | |
763 | m.cached_sectors = u.cached_sectors; | |
932aa837 | 764 | |
6e738539 | 765 | if (journal_seq) { |
932aa837 KO |
766 | m.journal_seq_valid = 1; |
767 | m.journal_seq = journal_seq; | |
768 | } | |
8fe826f9 KO |
769 | })); |
770 | ||
f3721e12 | 771 | bch2_dev_usage_update(c, ca, fs_usage, old_m, m, gc); |
6fb076e6 | 772 | |
8fe826f9 KO |
773 | g->io_time[READ] = u.read_time; |
774 | g->io_time[WRITE] = u.write_time; | |
775 | g->oldest_gen = u.oldest_gen; | |
776 | g->gen_valid = 1; | |
777 | ||
932aa837 KO |
778 | /* |
779 | * need to know if we're getting called from the invalidate path or | |
780 | * not: | |
781 | */ | |
782 | ||
2d594dfb | 783 | if ((flags & BTREE_TRIGGER_BUCKET_INVALIDATE) && |
e63534a2 | 784 | old_m.cached_sectors) { |
8fe826f9 | 785 | update_cached_sectors(c, fs_usage, ca->dev_idx, |
e63534a2 KO |
786 | -old_m.cached_sectors); |
787 | trace_invalidate(ca, bucket_to_sector(ca, new.k->p.offset), | |
788 | old_m.cached_sectors); | |
8fe826f9 KO |
789 | } |
790 | ||
791 | return 0; | |
792 | } | |
793 | ||
b2be7c8b | 794 | #define checked_add(a, b) \ |
59928c12 | 795 | ({ \ |
b2be7c8b | 796 | unsigned _res = (unsigned) (a) + (b); \ |
59928c12 KO |
797 | bool overflow = _res > U16_MAX; \ |
798 | if (overflow) \ | |
799 | _res = U16_MAX; \ | |
b2be7c8b | 800 | (a) = _res; \ |
59928c12 KO |
801 | overflow; \ |
802 | }) | |
1c6fdbd8 | 803 | |
8777210b | 804 | static int __bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, |
aafcf9bc | 805 | size_t b, enum bch_data_type data_type, |
8777210b | 806 | unsigned sectors, bool gc) |
9ca53b55 | 807 | { |
9ca53b55 | 808 | struct bucket *g = __bucket(ca, b, gc); |
59928c12 KO |
809 | struct bucket_mark old, new; |
810 | bool overflow; | |
9ca53b55 | 811 | |
89fd25be KO |
812 | BUG_ON(data_type != BCH_DATA_sb && |
813 | data_type != BCH_DATA_journal); | |
9ca53b55 | 814 | |
59928c12 | 815 | old = bucket_cmpxchg(g, new, ({ |
aafcf9bc | 816 | new.data_type = data_type; |
59928c12 | 817 | overflow = checked_add(new.dirty_sectors, sectors); |
9ca53b55 | 818 | })); |
8777210b | 819 | |
06ab329c | 820 | bch2_fs_inconsistent_on(old.data_type && |
aafcf9bc | 821 | old.data_type != data_type, c, |
06ab329c KO |
822 | "different types of data in same bucket: %s, %s", |
823 | bch2_data_types[old.data_type], | |
aafcf9bc | 824 | bch2_data_types[data_type]); |
06ab329c | 825 | |
59928c12 | 826 | bch2_fs_inconsistent_on(overflow, c, |
aafcf9bc KO |
827 | "bucket %u:%zu gen %u data type %s sector count overflow: %u + %u > U16_MAX", |
828 | ca->dev_idx, b, new.gen, | |
829 | bch2_data_types[old.data_type ?: data_type], | |
59928c12 KO |
830 | old.dirty_sectors, sectors); |
831 | ||
832 | if (c) | |
5e82a9a1 | 833 | bch2_dev_usage_update(c, ca, fs_usage_ptr(c, 0, gc), |
59928c12 KO |
834 | old, new, gc); |
835 | ||
8777210b | 836 | return 0; |
9ca53b55 KO |
837 | } |
838 | ||
1c6fdbd8 KO |
839 | void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, |
840 | size_t b, enum bch_data_type type, | |
841 | unsigned sectors, struct gc_pos pos, | |
842 | unsigned flags) | |
843 | { | |
89fd25be KO |
844 | BUG_ON(type != BCH_DATA_sb && |
845 | type != BCH_DATA_journal); | |
1c6fdbd8 | 846 | |
9ca53b55 KO |
847 | preempt_disable(); |
848 | ||
1c6fdbd8 | 849 | if (likely(c)) { |
8777210b KO |
850 | do_mark_fn(__bch2_mark_metadata_bucket, c, pos, flags, |
851 | ca, b, type, sectors); | |
6eac2c2e | 852 | } else { |
59928c12 | 853 | __bch2_mark_metadata_bucket(c, ca, b, type, sectors, 0); |
6eac2c2e | 854 | } |
1c6fdbd8 | 855 | |
9ca53b55 | 856 | preempt_enable(); |
1c6fdbd8 KO |
857 | } |
858 | ||
43de7376 KO |
859 | static s64 disk_sectors_scaled(unsigned n, unsigned d, unsigned sectors) |
860 | { | |
861 | return DIV_ROUND_UP(sectors * n, d); | |
862 | } | |
863 | ||
864 | static s64 __ptr_disk_sectors_delta(unsigned old_size, | |
865 | unsigned offset, s64 delta, | |
866 | unsigned flags, | |
867 | unsigned n, unsigned d) | |
1c6fdbd8 | 868 | { |
43de7376 KO |
869 | BUG_ON(!n || !d); |
870 | ||
2d594dfb | 871 | if (flags & BTREE_TRIGGER_OVERWRITE_SPLIT) { |
43de7376 | 872 | BUG_ON(offset + -delta > old_size); |
2cbe5cfe | 873 | |
43de7376 KO |
874 | return -disk_sectors_scaled(n, d, old_size) + |
875 | disk_sectors_scaled(n, d, offset) + | |
876 | disk_sectors_scaled(n, d, old_size - offset + delta); | |
2d594dfb | 877 | } else if (flags & BTREE_TRIGGER_OVERWRITE) { |
43de7376 | 878 | BUG_ON(offset + -delta > old_size); |
2cbe5cfe | 879 | |
43de7376 KO |
880 | return -disk_sectors_scaled(n, d, old_size) + |
881 | disk_sectors_scaled(n, d, old_size + delta); | |
641ab736 | 882 | } else { |
43de7376 | 883 | return disk_sectors_scaled(n, d, delta); |
1c6fdbd8 | 884 | } |
47799326 KO |
885 | } |
886 | ||
43de7376 KO |
887 | static s64 ptr_disk_sectors_delta(struct extent_ptr_decoded p, |
888 | unsigned offset, s64 delta, | |
889 | unsigned flags) | |
890 | { | |
891 | return __ptr_disk_sectors_delta(p.crc.live_size, | |
892 | offset, delta, flags, | |
893 | p.crc.compressed_size, | |
894 | p.crc.uncompressed_size); | |
895 | } | |
896 | ||
39283c71 KO |
897 | static int check_bucket_ref(struct bch_fs *c, struct bkey_s_c k, |
898 | const struct bch_extent_ptr *ptr, | |
899 | s64 sectors, enum bch_data_type ptr_data_type, | |
900 | u8 bucket_gen, u8 bucket_data_type, | |
901 | u16 dirty_sectors, u16 cached_sectors) | |
932aa837 | 902 | { |
39283c71 KO |
903 | size_t bucket_nr = PTR_BUCKET_NR(bch_dev_bkey_exists(c, ptr->dev), ptr); |
904 | u16 bucket_sectors = !ptr->cached | |
9ef846a7 KO |
905 | ? dirty_sectors |
906 | : cached_sectors; | |
9ef846a7 KO |
907 | char buf[200]; |
908 | ||
39283c71 | 909 | if (gen_after(ptr->gen, bucket_gen)) { |
9ef846a7 KO |
910 | bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, |
911 | "bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen\n" | |
912 | "while marking %s", | |
39283c71 KO |
913 | ptr->dev, bucket_nr, bucket_gen, |
914 | bch2_data_types[bucket_data_type ?: ptr_data_type], | |
915 | ptr->gen, | |
9ef846a7 KO |
916 | (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); |
917 | return -EIO; | |
918 | } | |
919 | ||
39283c71 | 920 | if (gen_cmp(bucket_gen, ptr->gen) > BUCKET_GC_GEN_MAX) { |
9ef846a7 KO |
921 | bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, |
922 | "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n" | |
923 | "while marking %s", | |
39283c71 KO |
924 | ptr->dev, bucket_nr, bucket_gen, |
925 | bch2_data_types[bucket_data_type ?: ptr_data_type], | |
926 | ptr->gen, | |
9ef846a7 KO |
927 | (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); |
928 | return -EIO; | |
929 | } | |
930 | ||
39283c71 | 931 | if (bucket_gen != ptr->gen && !ptr->cached) { |
9ef846a7 KO |
932 | bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, |
933 | "bucket %u:%zu gen %u data type %s: stale dirty ptr (gen %u)\n" | |
934 | "while marking %s", | |
39283c71 KO |
935 | ptr->dev, bucket_nr, bucket_gen, |
936 | bch2_data_types[bucket_data_type ?: ptr_data_type], | |
937 | ptr->gen, | |
9ef846a7 KO |
938 | (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); |
939 | return -EIO; | |
940 | } | |
941 | ||
39283c71 | 942 | if (bucket_gen != ptr->gen) |
9ef846a7 KO |
943 | return 1; |
944 | ||
39283c71 KO |
945 | if (bucket_data_type && ptr_data_type && |
946 | bucket_data_type != ptr_data_type) { | |
9ef846a7 KO |
947 | bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, |
948 | "bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n" | |
949 | "while marking %s", | |
39283c71 KO |
950 | ptr->dev, bucket_nr, bucket_gen, |
951 | bch2_data_types[bucket_data_type], | |
9ef846a7 KO |
952 | bch2_data_types[ptr_data_type], |
953 | (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); | |
954 | return -EIO; | |
955 | } | |
956 | ||
39283c71 | 957 | if ((unsigned) (bucket_sectors + sectors) > U16_MAX) { |
9ef846a7 KO |
958 | bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, |
959 | "bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U16_MAX\n" | |
960 | "while marking %s", | |
39283c71 KO |
961 | ptr->dev, bucket_nr, bucket_gen, |
962 | bch2_data_types[bucket_data_type ?: ptr_data_type], | |
963 | bucket_sectors, sectors, | |
9ef846a7 KO |
964 | (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); |
965 | return -EIO; | |
966 | } | |
967 | ||
39283c71 KO |
968 | return 0; |
969 | } | |
970 | ||
971 | static int bucket_set_stripe(struct bch_fs *c, struct bkey_s_c k, | |
af4d05c4 | 972 | unsigned ptr_idx, |
39283c71 | 973 | struct bch_fs_usage *fs_usage, |
af4d05c4 | 974 | u64 journal_seq, unsigned flags, |
39283c71 KO |
975 | bool enabled) |
976 | { | |
af4d05c4 KO |
977 | const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; |
978 | unsigned nr_data = s->nr_blocks - s->nr_redundant; | |
979 | bool parity = ptr_idx >= nr_data; | |
980 | const struct bch_extent_ptr *ptr = s->ptrs + ptr_idx; | |
39283c71 KO |
981 | bool gc = flags & BTREE_TRIGGER_GC; |
982 | struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); | |
983 | struct bucket *g = PTR_BUCKET(ca, ptr, gc); | |
984 | struct bucket_mark new, old; | |
985 | char buf[200]; | |
986 | int ret; | |
987 | ||
b88e971e KO |
988 | if (enabled) |
989 | g->ec_redundancy = s->nr_redundant; | |
990 | ||
39283c71 KO |
991 | old = bucket_cmpxchg(g, new, ({ |
992 | ret = check_bucket_ref(c, k, ptr, 0, 0, new.gen, new.data_type, | |
993 | new.dirty_sectors, new.cached_sectors); | |
994 | if (ret) | |
995 | return ret; | |
996 | ||
997 | if (new.stripe && enabled) | |
998 | bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, | |
999 | "bucket %u:%zu gen %u: multiple stripes using same bucket\n%s", | |
1000 | ptr->dev, PTR_BUCKET_NR(ca, ptr), new.gen, | |
1001 | (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); | |
1002 | ||
1003 | if (!new.stripe && !enabled) | |
1004 | bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, | |
1005 | "bucket %u:%zu gen %u: deleting stripe but not marked\n%s", | |
1006 | ptr->dev, PTR_BUCKET_NR(ca, ptr), new.gen, | |
1007 | (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); | |
1008 | ||
1009 | new.stripe = enabled; | |
af4d05c4 KO |
1010 | |
1011 | if ((flags & BTREE_TRIGGER_GC) && parity) { | |
1012 | new.data_type = enabled ? BCH_DATA_parity : 0; | |
1013 | new.dirty_sectors = enabled ? le16_to_cpu(s->sectors): 0; | |
1014 | } | |
1015 | ||
39283c71 KO |
1016 | if (journal_seq) { |
1017 | new.journal_seq_valid = 1; | |
1018 | new.journal_seq = journal_seq; | |
1019 | } | |
1020 | })); | |
1021 | ||
b88e971e KO |
1022 | if (!enabled) |
1023 | g->ec_redundancy = 0; | |
1024 | ||
39283c71 KO |
1025 | bch2_dev_usage_update(c, ca, fs_usage, old, new, gc); |
1026 | return 0; | |
1027 | } | |
1028 | ||
1029 | static int __mark_pointer(struct bch_fs *c, struct bkey_s_c k, | |
1030 | const struct bch_extent_ptr *ptr, | |
1031 | s64 sectors, enum bch_data_type ptr_data_type, | |
1032 | u8 bucket_gen, u8 *bucket_data_type, | |
1033 | u16 *dirty_sectors, u16 *cached_sectors) | |
1034 | { | |
1035 | u16 *dst_sectors = !ptr->cached | |
1036 | ? dirty_sectors | |
1037 | : cached_sectors; | |
1038 | int ret = check_bucket_ref(c, k, ptr, sectors, ptr_data_type, | |
1039 | bucket_gen, *bucket_data_type, | |
1040 | *dirty_sectors, *cached_sectors); | |
1041 | ||
1042 | if (ret) | |
1043 | return ret; | |
1044 | ||
1045 | *dst_sectors += sectors; | |
9ef846a7 KO |
1046 | *bucket_data_type = *dirty_sectors || *cached_sectors |
1047 | ? ptr_data_type : 0; | |
1048 | return 0; | |
1049 | } | |
1050 | ||
1051 | static int bch2_mark_pointer(struct bch_fs *c, struct bkey_s_c k, | |
1052 | struct extent_ptr_decoded p, | |
1053 | s64 sectors, enum bch_data_type data_type, | |
1054 | struct bch_fs_usage *fs_usage, | |
1055 | u64 journal_seq, unsigned flags) | |
47799326 | 1056 | { |
2d594dfb | 1057 | bool gc = flags & BTREE_TRIGGER_GC; |
47799326 KO |
1058 | struct bucket_mark old, new; |
1059 | struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev); | |
932aa837 | 1060 | struct bucket *g = PTR_BUCKET(ca, &p.ptr, gc); |
9ef846a7 | 1061 | u8 bucket_data_type; |
47799326 | 1062 | u64 v; |
9ef846a7 | 1063 | int ret; |
6eac2c2e | 1064 | |
1c6fdbd8 KO |
1065 | v = atomic64_read(&g->_mark.v); |
1066 | do { | |
1067 | new.v.counter = old.v.counter = v; | |
9ef846a7 | 1068 | bucket_data_type = new.data_type; |
1c6fdbd8 | 1069 | |
39283c71 | 1070 | ret = __mark_pointer(c, k, &p.ptr, sectors, data_type, new.gen, |
9ef846a7 KO |
1071 | &bucket_data_type, |
1072 | &new.dirty_sectors, | |
1073 | &new.cached_sectors); | |
1074 | if (ret) | |
1075 | return ret; | |
1c6fdbd8 | 1076 | |
9ef846a7 | 1077 | new.data_type = bucket_data_type; |
1c6fdbd8 | 1078 | |
9ef846a7 KO |
1079 | if (journal_seq) { |
1080 | new.journal_seq_valid = 1; | |
1081 | new.journal_seq = journal_seq; | |
1c6fdbd8 KO |
1082 | } |
1083 | ||
2d594dfb | 1084 | if (flags & BTREE_TRIGGER_NOATOMIC) { |
1c6fdbd8 KO |
1085 | g->_mark = new; |
1086 | break; | |
1087 | } | |
1088 | } while ((v = atomic64_cmpxchg(&g->_mark.v, | |
1089 | old.v.counter, | |
1090 | new.v.counter)) != old.v.counter); | |
1091 | ||
9ca53b55 | 1092 | bch2_dev_usage_update(c, ca, fs_usage, old, new, gc); |
1c6fdbd8 | 1093 | |
9ca53b55 | 1094 | BUG_ON(!gc && bucket_became_unavailable(old, new)); |
73c27c60 | 1095 | |
9ef846a7 | 1096 | return 0; |
1c6fdbd8 KO |
1097 | } |
1098 | ||
dfe9bfb3 KO |
1099 | static int bch2_mark_stripe_ptr(struct bch_fs *c, |
1100 | struct bch_extent_stripe_ptr p, | |
7ef2a73a KO |
1101 | enum bch_data_type data_type, |
1102 | struct bch_fs_usage *fs_usage, | |
af4d05c4 | 1103 | s64 sectors, unsigned flags) |
cd575ddf | 1104 | { |
2d594dfb | 1105 | bool gc = flags & BTREE_TRIGGER_GC; |
af4d05c4 | 1106 | struct bch_replicas_padded r; |
dfe9bfb3 | 1107 | struct stripe *m; |
ba6dd1dd | 1108 | unsigned i, blocks_nonempty = 0; |
61c8d7c8 | 1109 | |
dfe9bfb3 | 1110 | m = genradix_ptr(&c->stripes[gc], p.idx); |
cd575ddf | 1111 | |
61c8d7c8 KO |
1112 | spin_lock(&c->ec_stripes_heap_lock); |
1113 | ||
dfe9bfb3 | 1114 | if (!m || !m->alive) { |
61c8d7c8 | 1115 | spin_unlock(&c->ec_stripes_heap_lock); |
dfe9bfb3 KO |
1116 | bch_err_ratelimited(c, "pointer to nonexistent stripe %llu", |
1117 | (u64) p.idx); | |
76426098 | 1118 | return -EIO; |
dfe9bfb3 | 1119 | } |
cd575ddf | 1120 | |
61c8d7c8 | 1121 | m->block_sectors[p.block] += sectors; |
cd575ddf | 1122 | |
af4d05c4 KO |
1123 | r = m->r; |
1124 | ||
ba6dd1dd KO |
1125 | for (i = 0; i < m->nr_blocks; i++) |
1126 | blocks_nonempty += m->block_sectors[i] != 0; | |
cd575ddf | 1127 | |
ba6dd1dd KO |
1128 | if (m->blocks_nonempty != blocks_nonempty) { |
1129 | m->blocks_nonempty = blocks_nonempty; | |
61c8d7c8 KO |
1130 | if (!gc) |
1131 | bch2_stripes_heap_update(c, m, p.idx); | |
1132 | } | |
cd575ddf | 1133 | |
61c8d7c8 | 1134 | spin_unlock(&c->ec_stripes_heap_lock); |
dfe9bfb3 | 1135 | |
af4d05c4 KO |
1136 | r.e.data_type = data_type; |
1137 | update_replicas(c, fs_usage, &r.e, sectors); | |
1138 | ||
dfe9bfb3 | 1139 | return 0; |
cd575ddf KO |
1140 | } |
1141 | ||
e63534a2 KO |
1142 | static int bch2_mark_extent(struct bch_fs *c, |
1143 | struct bkey_s_c old, struct bkey_s_c new, | |
2cbe5cfe KO |
1144 | unsigned offset, s64 sectors, |
1145 | enum bch_data_type data_type, | |
5663a415 | 1146 | struct bch_fs_usage *fs_usage, |
6e738539 | 1147 | unsigned journal_seq, unsigned flags) |
1c6fdbd8 | 1148 | { |
e63534a2 | 1149 | struct bkey_s_c k = flags & BTREE_TRIGGER_INSERT ? new : old; |
26609b61 KO |
1150 | struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); |
1151 | const union bch_extent_entry *entry; | |
1152 | struct extent_ptr_decoded p; | |
7ef2a73a KO |
1153 | struct bch_replicas_padded r; |
1154 | s64 dirty_sectors = 0; | |
9ef846a7 | 1155 | bool stale; |
26609b61 KO |
1156 | int ret; |
1157 | ||
7ef2a73a KO |
1158 | r.e.data_type = data_type; |
1159 | r.e.nr_devs = 0; | |
1160 | r.e.nr_required = 1; | |
1161 | ||
47799326 KO |
1162 | BUG_ON(!sectors); |
1163 | ||
26609b61 | 1164 | bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { |
89fd25be | 1165 | s64 disk_sectors = data_type == BCH_DATA_btree |
641ab736 | 1166 | ? sectors |
2cbe5cfe | 1167 | : ptr_disk_sectors_delta(p, offset, sectors, flags); |
9ef846a7 KO |
1168 | |
1169 | ret = bch2_mark_pointer(c, k, p, disk_sectors, data_type, | |
1170 | fs_usage, journal_seq, flags); | |
1171 | if (ret < 0) | |
1172 | return ret; | |
1173 | ||
1174 | stale = ret > 0; | |
b35b1925 | 1175 | |
7ef2a73a | 1176 | if (p.ptr.cached) { |
df5d4dae | 1177 | if (!stale) |
73c27c60 KO |
1178 | update_cached_sectors(c, fs_usage, p.ptr.dev, |
1179 | disk_sectors); | |
37954a27 | 1180 | } else if (!p.has_ec) { |
7ef2a73a KO |
1181 | dirty_sectors += disk_sectors; |
1182 | r.e.devs[r.e.nr_devs++] = p.ptr.dev; | |
1183 | } else { | |
43de7376 | 1184 | ret = bch2_mark_stripe_ptr(c, p.ec, data_type, |
af4d05c4 | 1185 | fs_usage, disk_sectors, flags); |
37954a27 KO |
1186 | if (ret) |
1187 | return ret; | |
26609b61 | 1188 | |
43de7376 KO |
1189 | /* |
1190 | * There may be other dirty pointers in this extent, but | |
1191 | * if so they're not required for mounting if we have an | |
1192 | * erasure coded pointer in this extent: | |
1193 | */ | |
7ef2a73a KO |
1194 | r.e.nr_required = 0; |
1195 | } | |
47799326 | 1196 | } |
b35b1925 | 1197 | |
332c6e53 KO |
1198 | if (r.e.nr_devs) |
1199 | update_replicas(c, fs_usage, &r.e, dirty_sectors); | |
dfe9bfb3 KO |
1200 | |
1201 | return 0; | |
47799326 | 1202 | } |
5b650fd1 | 1203 | |
e63534a2 KO |
1204 | static int bch2_mark_stripe(struct bch_fs *c, |
1205 | struct bkey_s_c old, struct bkey_s_c new, | |
dfe9bfb3 | 1206 | struct bch_fs_usage *fs_usage, |
6e738539 | 1207 | u64 journal_seq, unsigned flags) |
cd575ddf | 1208 | { |
2d594dfb | 1209 | bool gc = flags & BTREE_TRIGGER_GC; |
e63534a2 KO |
1210 | size_t idx = new.k->p.offset; |
1211 | const struct bch_stripe *old_s = old.k->type == KEY_TYPE_stripe | |
1212 | ? bkey_s_c_to_stripe(old).v : NULL; | |
1213 | const struct bch_stripe *new_s = new.k->type == KEY_TYPE_stripe | |
1214 | ? bkey_s_c_to_stripe(new).v : NULL; | |
26609b61 KO |
1215 | struct stripe *m = genradix_ptr(&c->stripes[gc], idx); |
1216 | unsigned i; | |
39283c71 | 1217 | int ret; |
cd575ddf | 1218 | |
e63534a2 | 1219 | if (!m || (old_s && !m->alive)) { |
26609b61 KO |
1220 | bch_err_ratelimited(c, "error marking nonexistent stripe %zu", |
1221 | idx); | |
1222 | return -1; | |
1223 | } | |
cd575ddf | 1224 | |
e63534a2 KO |
1225 | if (!new_s) { |
1226 | /* Deleting: */ | |
39283c71 | 1227 | for (i = 0; i < old_s->nr_blocks; i++) { |
af4d05c4 | 1228 | ret = bucket_set_stripe(c, old, i, fs_usage, |
39283c71 KO |
1229 | journal_seq, flags, false); |
1230 | if (ret) | |
1231 | return ret; | |
1232 | } | |
7ef2a73a | 1233 | |
ba6dd1dd | 1234 | if (!gc && m->on_heap) { |
e63534a2 KO |
1235 | spin_lock(&c->ec_stripes_heap_lock); |
1236 | bch2_stripes_heap_del(c, m, idx); | |
1237 | spin_unlock(&c->ec_stripes_heap_lock); | |
1238 | } | |
26609b61 | 1239 | |
af4d05c4 KO |
1240 | if (gc) |
1241 | update_replicas(c, fs_usage, &m->r.e, | |
1242 | -((s64) m->sectors * m->nr_redundant)); | |
1243 | ||
e63534a2 KO |
1244 | memset(m, 0, sizeof(*m)); |
1245 | } else { | |
1246 | BUG_ON(old_s && new_s->nr_blocks != old_s->nr_blocks); | |
1247 | BUG_ON(old_s && new_s->nr_redundant != old_s->nr_redundant); | |
1248 | ||
ba6dd1dd KO |
1249 | for (i = 0; i < new_s->nr_blocks; i++) { |
1250 | if (!old_s || | |
1251 | memcmp(new_s->ptrs + i, | |
1252 | old_s->ptrs + i, | |
1253 | sizeof(struct bch_extent_ptr))) { | |
1254 | ||
39283c71 | 1255 | if (old_s) { |
af4d05c4 | 1256 | bucket_set_stripe(c, old, i, fs_usage, |
ba6dd1dd | 1257 | journal_seq, flags, false); |
39283c71 KO |
1258 | if (ret) |
1259 | return ret; | |
1260 | } | |
af4d05c4 | 1261 | ret = bucket_set_stripe(c, new, i, fs_usage, |
39283c71 KO |
1262 | journal_seq, flags, true); |
1263 | if (ret) | |
1264 | return ret; | |
ba6dd1dd KO |
1265 | } |
1266 | } | |
e63534a2 | 1267 | |
ba6dd1dd | 1268 | m->alive = true; |
e63534a2 KO |
1269 | m->sectors = le16_to_cpu(new_s->sectors); |
1270 | m->algorithm = new_s->algorithm; | |
1271 | m->nr_blocks = new_s->nr_blocks; | |
1272 | m->nr_redundant = new_s->nr_redundant; | |
5b088c1d KO |
1273 | m->blocks_nonempty = 0; |
1274 | ||
1275 | for (i = 0; i < new_s->nr_blocks; i++) { | |
1276 | m->block_sectors[i] = | |
1277 | stripe_blockcount_get(new_s, i); | |
1278 | m->blocks_nonempty += !!m->block_sectors[i]; | |
1279 | } | |
e63534a2 | 1280 | |
af4d05c4 KO |
1281 | if (gc && old_s) |
1282 | update_replicas(c, fs_usage, &m->r.e, | |
1283 | -((s64) m->sectors * m->nr_redundant)); | |
1284 | ||
e63534a2 | 1285 | bch2_bkey_to_replicas(&m->r.e, new); |
7ef2a73a | 1286 | |
af4d05c4 KO |
1287 | if (gc) |
1288 | update_replicas(c, fs_usage, &m->r.e, | |
1289 | ((s64) m->sectors * m->nr_redundant)); | |
1290 | ||
e63534a2 KO |
1291 | if (!gc) { |
1292 | spin_lock(&c->ec_stripes_heap_lock); | |
6e738539 | 1293 | bch2_stripes_heap_update(c, m, idx); |
e63534a2 KO |
1294 | spin_unlock(&c->ec_stripes_heap_lock); |
1295 | } | |
cd575ddf | 1296 | } |
dfe9bfb3 KO |
1297 | |
1298 | return 0; | |
cd575ddf KO |
1299 | } |
1300 | ||
00b8ccf7 | 1301 | static int bch2_mark_key_locked(struct bch_fs *c, |
e63534a2 KO |
1302 | struct bkey_s_c old, |
1303 | struct bkey_s_c new, | |
2cbe5cfe | 1304 | unsigned offset, s64 sectors, |
36e916e1 KO |
1305 | struct bch_fs_usage *fs_usage, |
1306 | u64 journal_seq, unsigned flags) | |
47799326 | 1307 | { |
e63534a2 | 1308 | struct bkey_s_c k = flags & BTREE_TRIGGER_INSERT ? new : old; |
4d8100da KO |
1309 | int ret = 0; |
1310 | ||
e63534a2 KO |
1311 | BUG_ON(!(flags & (BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE))); |
1312 | ||
4d8100da KO |
1313 | preempt_disable(); |
1314 | ||
2d594dfb | 1315 | if (!fs_usage || (flags & BTREE_TRIGGER_GC)) |
6e738539 | 1316 | fs_usage = fs_usage_ptr(c, journal_seq, |
2d594dfb | 1317 | flags & BTREE_TRIGGER_GC); |
dfe9bfb3 | 1318 | |
26609b61 | 1319 | switch (k.k->type) { |
8fe826f9 | 1320 | case KEY_TYPE_alloc: |
e63534a2 | 1321 | ret = bch2_mark_alloc(c, old, new, fs_usage, journal_seq, flags); |
4d8100da | 1322 | break; |
26609b61 | 1323 | case KEY_TYPE_btree_ptr: |
548b3d20 | 1324 | case KEY_TYPE_btree_ptr_v2: |
2d594dfb | 1325 | sectors = !(flags & BTREE_TRIGGER_OVERWRITE) |
6e738539 KO |
1326 | ? c->opts.btree_node_size |
1327 | : -c->opts.btree_node_size; | |
1328 | ||
e63534a2 | 1329 | ret = bch2_mark_extent(c, old, new, offset, sectors, |
89fd25be | 1330 | BCH_DATA_btree, fs_usage, journal_seq, flags); |
4d8100da | 1331 | break; |
26609b61 | 1332 | case KEY_TYPE_extent: |
76426098 | 1333 | case KEY_TYPE_reflink_v: |
e63534a2 | 1334 | ret = bch2_mark_extent(c, old, new, offset, sectors, |
89fd25be | 1335 | BCH_DATA_user, fs_usage, journal_seq, flags); |
4d8100da | 1336 | break; |
26609b61 | 1337 | case KEY_TYPE_stripe: |
e63534a2 | 1338 | ret = bch2_mark_stripe(c, old, new, fs_usage, journal_seq, flags); |
4d8100da | 1339 | break; |
bdba6c29 | 1340 | case KEY_TYPE_inode: |
719fe7fb KO |
1341 | fs_usage->nr_inodes += new.k->type == KEY_TYPE_inode; |
1342 | fs_usage->nr_inodes -= old.k->type == KEY_TYPE_inode; | |
4d8100da | 1343 | break; |
26609b61 KO |
1344 | case KEY_TYPE_reservation: { |
1345 | unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas; | |
1346 | ||
1347 | sectors *= replicas; | |
7ef2a73a KO |
1348 | replicas = clamp_t(unsigned, replicas, 1, |
1349 | ARRAY_SIZE(fs_usage->persistent_reserved)); | |
26609b61 | 1350 | |
768ac639 | 1351 | fs_usage->reserved += sectors; |
7ef2a73a | 1352 | fs_usage->persistent_reserved[replicas - 1] += sectors; |
4d8100da | 1353 | break; |
26609b61 | 1354 | } |
1c6fdbd8 | 1355 | } |
4d8100da KO |
1356 | |
1357 | preempt_enable(); | |
1358 | ||
1359 | return ret; | |
9ca53b55 KO |
1360 | } |
1361 | ||
e63534a2 | 1362 | int bch2_mark_key(struct bch_fs *c, struct bkey_s_c new, |
2cbe5cfe | 1363 | unsigned offset, s64 sectors, |
5663a415 | 1364 | struct bch_fs_usage *fs_usage, |
eeb83e25 KO |
1365 | u64 journal_seq, unsigned flags) |
1366 | { | |
e63534a2 KO |
1367 | struct bkey deleted; |
1368 | struct bkey_s_c old = (struct bkey_s_c) { &deleted, NULL }; | |
eeb83e25 KO |
1369 | int ret; |
1370 | ||
e63534a2 KO |
1371 | bkey_init(&deleted); |
1372 | ||
9166b41d | 1373 | percpu_down_read(&c->mark_lock); |
e63534a2 KO |
1374 | ret = bch2_mark_key_locked(c, old, new, offset, sectors, |
1375 | fs_usage, journal_seq, | |
1376 | BTREE_TRIGGER_INSERT|flags); | |
9166b41d | 1377 | percpu_up_read(&c->mark_lock); |
dfe9bfb3 KO |
1378 | |
1379 | return ret; | |
1c6fdbd8 KO |
1380 | } |
1381 | ||
94f651e2 | 1382 | int bch2_mark_update(struct btree_trans *trans, |
54e86b58 | 1383 | struct btree_iter *iter, |
e63534a2 | 1384 | struct bkey_i *new, |
94f651e2 KO |
1385 | struct bch_fs_usage *fs_usage, |
1386 | unsigned flags) | |
b35b1925 KO |
1387 | { |
1388 | struct bch_fs *c = trans->c; | |
64f2a880 KO |
1389 | struct btree *b = iter_l(iter)->b; |
1390 | struct btree_node_iter node_iter = iter_l(iter)->iter; | |
e63534a2 KO |
1391 | struct bkey_packed *_old; |
1392 | struct bkey_s_c old; | |
1393 | struct bkey unpacked; | |
94f651e2 | 1394 | int ret = 0; |
b35b1925 | 1395 | |
2d594dfb KO |
1396 | if (unlikely(flags & BTREE_TRIGGER_NORUN)) |
1397 | return 0; | |
1398 | ||
26609b61 | 1399 | if (!btree_node_type_needs_gc(iter->btree_id)) |
94f651e2 | 1400 | return 0; |
eeb83e25 | 1401 | |
e63534a2 KO |
1402 | bkey_init(&unpacked); |
1403 | old = (struct bkey_s_c) { &unpacked, NULL }; | |
c6dd04f8 | 1404 | |
e63534a2 | 1405 | if (!btree_node_type_is_extents(iter->btree_id)) { |
719fe7fb | 1406 | /* iterators should be uptodate, shouldn't get errors here: */ |
e63534a2 | 1407 | if (btree_iter_type(iter) != BTREE_ITER_CACHED) { |
719fe7fb KO |
1408 | old = bch2_btree_iter_peek_slot(iter); |
1409 | BUG_ON(bkey_err(old)); | |
e63534a2 KO |
1410 | } else { |
1411 | struct bkey_cached *ck = (void *) iter->l[0].b; | |
201a4d4c | 1412 | |
e63534a2 KO |
1413 | if (ck->valid) |
1414 | old = bkey_i_to_s_c(ck->k); | |
1415 | } | |
b35b1925 | 1416 | |
e63534a2 KO |
1417 | if (old.k->type == new->k.type) { |
1418 | bch2_mark_key_locked(c, old, bkey_i_to_s_c(new), 0, 0, | |
1419 | fs_usage, trans->journal_res.seq, | |
1420 | BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|flags); | |
1421 | ||
1422 | } else { | |
1423 | bch2_mark_key_locked(c, old, bkey_i_to_s_c(new), 0, 0, | |
1424 | fs_usage, trans->journal_res.seq, | |
1425 | BTREE_TRIGGER_INSERT|flags); | |
1426 | bch2_mark_key_locked(c, old, bkey_i_to_s_c(new), 0, 0, | |
1427 | fs_usage, trans->journal_res.seq, | |
1428 | BTREE_TRIGGER_OVERWRITE|flags); | |
1429 | } | |
1430 | } else { | |
1431 | BUG_ON(btree_iter_type(iter) == BTREE_ITER_CACHED); | |
1432 | bch2_mark_key_locked(c, old, bkey_i_to_s_c(new), | |
1433 | 0, new->k.size, | |
1434 | fs_usage, trans->journal_res.seq, | |
1435 | BTREE_TRIGGER_INSERT|flags); | |
1436 | ||
1437 | while ((_old = bch2_btree_node_iter_peek(&node_iter, b))) { | |
1438 | unsigned offset = 0; | |
1439 | s64 sectors; | |
1440 | ||
1441 | old = bkey_disassemble(b, _old, &unpacked); | |
1442 | sectors = -((s64) old.k->size); | |
1443 | ||
1444 | flags |= BTREE_TRIGGER_OVERWRITE; | |
1445 | ||
1446 | if (bkey_cmp(new->k.p, bkey_start_pos(old.k)) <= 0) | |
1447 | return 0; | |
1448 | ||
1449 | switch (bch2_extent_overlap(&new->k, old.k)) { | |
1450 | case BCH_EXTENT_OVERLAP_ALL: | |
1451 | offset = 0; | |
1452 | sectors = -((s64) old.k->size); | |
1453 | break; | |
1454 | case BCH_EXTENT_OVERLAP_BACK: | |
1455 | offset = bkey_start_offset(&new->k) - | |
1456 | bkey_start_offset(old.k); | |
1457 | sectors = bkey_start_offset(&new->k) - | |
1458 | old.k->p.offset; | |
1459 | break; | |
1460 | case BCH_EXTENT_OVERLAP_FRONT: | |
1461 | offset = 0; | |
1462 | sectors = bkey_start_offset(old.k) - | |
1463 | new->k.p.offset; | |
1464 | break; | |
1465 | case BCH_EXTENT_OVERLAP_MIDDLE: | |
1466 | offset = bkey_start_offset(&new->k) - | |
1467 | bkey_start_offset(old.k); | |
1468 | sectors = -((s64) new->k.size); | |
1469 | flags |= BTREE_TRIGGER_OVERWRITE_SPLIT; | |
1470 | break; | |
1471 | } | |
1472 | ||
1473 | BUG_ON(sectors >= 0); | |
b35b1925 | 1474 | |
e63534a2 KO |
1475 | ret = bch2_mark_key_locked(c, old, bkey_i_to_s_c(new), |
1476 | offset, sectors, fs_usage, | |
1477 | trans->journal_res.seq, flags) ?: 1; | |
1478 | if (ret <= 0) | |
1479 | break; | |
1480 | ||
1481 | bch2_btree_node_iter_advance(&node_iter, b); | |
1482 | } | |
b35b1925 | 1483 | } |
94f651e2 KO |
1484 | |
1485 | return ret; | |
4d8100da | 1486 | } |
b35b1925 | 1487 | |
4d8100da | 1488 | void bch2_trans_fs_usage_apply(struct btree_trans *trans, |
5e82a9a1 | 1489 | struct bch_fs_usage_online *fs_usage) |
4d8100da KO |
1490 | { |
1491 | struct bch_fs *c = trans->c; | |
1492 | struct btree_insert_entry *i; | |
1493 | static int warned_disk_usage = 0; | |
1494 | u64 disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0; | |
1495 | char buf[200]; | |
1496 | ||
5e82a9a1 KO |
1497 | if (!bch2_fs_usage_apply(c, fs_usage, trans->disk_res, |
1498 | trans->journal_res.seq) || | |
4d8100da KO |
1499 | warned_disk_usage || |
1500 | xchg(&warned_disk_usage, 1)) | |
1501 | return; | |
7ef2a73a | 1502 | |
76426098 KO |
1503 | bch_err(c, "disk usage increased more than %llu sectors reserved", |
1504 | disk_res_sectors); | |
4d8100da | 1505 | |
a7199432 | 1506 | trans_for_each_update(trans, i) { |
7ef2a73a | 1507 | pr_err("while inserting"); |
4d8100da | 1508 | bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(i->k)); |
7ef2a73a KO |
1509 | pr_err("%s", buf); |
1510 | pr_err("overlapping with"); | |
1511 | ||
64f2a880 KO |
1512 | if (btree_iter_type(i->iter) != BTREE_ITER_CACHED) { |
1513 | struct btree *b = iter_l(i->iter)->b; | |
1514 | struct btree_node_iter node_iter = iter_l(i->iter)->iter; | |
1515 | struct bkey_packed *_k; | |
7ef2a73a | 1516 | |
64f2a880 KO |
1517 | while ((_k = bch2_btree_node_iter_peek(&node_iter, b))) { |
1518 | struct bkey unpacked; | |
1519 | struct bkey_s_c k; | |
eeb83e25 | 1520 | |
64f2a880 KO |
1521 | pr_info("_k %px format %u", _k, _k->format); |
1522 | k = bkey_disassemble(b, _k, &unpacked); | |
7ef2a73a | 1523 | |
64f2a880 KO |
1524 | if (btree_node_is_extents(b) |
1525 | ? bkey_cmp(i->k->k.p, bkey_start_pos(k.k)) <= 0 | |
1526 | : bkey_cmp(i->k->k.p, k.k->p)) | |
1527 | break; | |
1528 | ||
1529 | bch2_bkey_val_to_text(&PBUF(buf), c, k); | |
1530 | pr_err("%s", buf); | |
7ef2a73a | 1531 | |
64f2a880 KO |
1532 | bch2_btree_node_iter_advance(&node_iter, b); |
1533 | } | |
1534 | } else { | |
1535 | struct bkey_cached *ck = (void *) i->iter->l[0].b; | |
1536 | ||
e63534a2 KO |
1537 | if (ck->valid) { |
1538 | bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(ck->k)); | |
1539 | pr_err("%s", buf); | |
1540 | } | |
7ef2a73a KO |
1541 | } |
1542 | } | |
b35b1925 KO |
1543 | } |
1544 | ||
932aa837 KO |
1545 | /* trans_mark: */ |
1546 | ||
5d20ba48 KO |
1547 | static struct btree_iter *trans_get_update(struct btree_trans *trans, |
1548 | enum btree_id btree_id, struct bpos pos, | |
1549 | struct bkey_s_c *k) | |
932aa837 | 1550 | { |
e3d3a9d9 | 1551 | struct btree_insert_entry *i; |
932aa837 | 1552 | |
a7199432 | 1553 | trans_for_each_update(trans, i) |
36e9d698 | 1554 | if (i->iter->btree_id == btree_id && |
e3d3a9d9 KO |
1555 | (btree_node_type_is_extents(btree_id) |
1556 | ? bkey_cmp(pos, bkey_start_pos(&i->k->k)) >= 0 && | |
1557 | bkey_cmp(pos, i->k->k.p) < 0 | |
1558 | : !bkey_cmp(pos, i->iter->pos))) { | |
5d20ba48 KO |
1559 | *k = bkey_i_to_s_c(i->k); |
1560 | return i->iter; | |
932aa837 KO |
1561 | } |
1562 | ||
5d20ba48 KO |
1563 | return NULL; |
1564 | } | |
1565 | ||
1566 | static int trans_get_key(struct btree_trans *trans, | |
1567 | enum btree_id btree_id, struct bpos pos, | |
1568 | struct btree_iter **iter, | |
1569 | struct bkey_s_c *k) | |
1570 | { | |
1571 | unsigned flags = btree_id != BTREE_ID_ALLOC | |
1572 | ? BTREE_ITER_SLOTS | |
1573 | : BTREE_ITER_CACHED; | |
1574 | int ret; | |
1575 | ||
1576 | *iter = trans_get_update(trans, btree_id, pos, k); | |
1577 | if (*iter) | |
1578 | return 1; | |
1579 | ||
64bc0011 | 1580 | *iter = bch2_trans_get_iter(trans, btree_id, pos, |
5d20ba48 | 1581 | flags|BTREE_ITER_INTENT); |
5d20ba48 | 1582 | *k = __bch2_btree_iter_peek(*iter, flags); |
932aa837 KO |
1583 | ret = bkey_err(*k); |
1584 | if (ret) | |
1585 | bch2_trans_iter_put(trans, *iter); | |
1586 | return ret; | |
1587 | } | |
1588 | ||
39283c71 KO |
1589 | static int bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter **_iter, |
1590 | const struct bch_extent_ptr *ptr, | |
1591 | struct bkey_alloc_unpacked *u) | |
932aa837 KO |
1592 | { |
1593 | struct bch_fs *c = trans->c; | |
39283c71 KO |
1594 | struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); |
1595 | struct bpos pos = POS(ptr->dev, PTR_BUCKET_NR(ca, ptr)); | |
255adc51 | 1596 | struct bucket *g; |
39283c71 KO |
1597 | struct btree_iter *iter; |
1598 | struct bkey_s_c k; | |
932aa837 KO |
1599 | int ret; |
1600 | ||
39283c71 | 1601 | iter = trans_get_update(trans, BTREE_ID_ALLOC, pos, &k); |
5d20ba48 | 1602 | if (iter) { |
39283c71 | 1603 | *u = bch2_alloc_unpack(k); |
67163cde | 1604 | } else { |
5d20ba48 KO |
1605 | iter = bch2_trans_get_iter(trans, BTREE_ID_ALLOC, pos, |
1606 | BTREE_ITER_CACHED| | |
1607 | BTREE_ITER_CACHED_NOFILL| | |
1608 | BTREE_ITER_INTENT); | |
5d20ba48 | 1609 | ret = bch2_btree_iter_traverse(iter); |
39283c71 KO |
1610 | if (ret) { |
1611 | bch2_trans_iter_put(trans, iter); | |
1612 | return ret; | |
1613 | } | |
255adc51 | 1614 | |
5d20ba48 KO |
1615 | percpu_down_read(&c->mark_lock); |
1616 | g = bucket(ca, pos.offset); | |
39283c71 | 1617 | *u = alloc_mem_to_key(g, READ_ONCE(g->mark)); |
5d20ba48 KO |
1618 | percpu_up_read(&c->mark_lock); |
1619 | } | |
255adc51 | 1620 | |
39283c71 KO |
1621 | *_iter = iter; |
1622 | return 0; | |
1623 | } | |
1624 | ||
1625 | static int bch2_trans_mark_pointer(struct btree_trans *trans, | |
1626 | struct bkey_s_c k, struct extent_ptr_decoded p, | |
1627 | s64 sectors, enum bch_data_type data_type) | |
1628 | { | |
1629 | struct bch_fs *c = trans->c; | |
1630 | struct btree_iter *iter; | |
1631 | struct bkey_alloc_unpacked u; | |
1632 | struct bkey_i_alloc *a; | |
1633 | int ret; | |
1634 | ||
1635 | ret = bch2_trans_start_alloc_update(trans, &iter, &p.ptr, &u); | |
1636 | if (ret) | |
1637 | return ret; | |
1638 | ||
1639 | ret = __mark_pointer(c, k, &p.ptr, sectors, data_type, u.gen, &u.data_type, | |
9ef846a7 KO |
1640 | &u.dirty_sectors, &u.cached_sectors); |
1641 | if (ret) | |
ef496cd2 | 1642 | goto out; |
932aa837 | 1643 | |
24326cd1 | 1644 | a = bch2_trans_kmalloc(trans, BKEY_ALLOC_U64s_MAX * 8); |
91052b9d | 1645 | ret = PTR_ERR_OR_ZERO(a); |
932aa837 KO |
1646 | if (ret) |
1647 | goto out; | |
1648 | ||
91052b9d | 1649 | bkey_alloc_init(&a->k_i); |
39283c71 | 1650 | a->k.p = iter->pos; |
932aa837 | 1651 | bch2_alloc_pack(a, u); |
24326cd1 | 1652 | bch2_trans_update(trans, iter, &a->k_i, 0); |
932aa837 KO |
1653 | out: |
1654 | bch2_trans_iter_put(trans, iter); | |
1655 | return ret; | |
1656 | } | |
1657 | ||
1658 | static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans, | |
2a3731e3 | 1659 | struct extent_ptr_decoded p, |
af4d05c4 | 1660 | s64 sectors, enum bch_data_type data_type) |
932aa837 | 1661 | { |
76426098 | 1662 | struct bch_fs *c = trans->c; |
932aa837 KO |
1663 | struct btree_iter *iter; |
1664 | struct bkey_s_c k; | |
24326cd1 | 1665 | struct bkey_i_stripe *s; |
af4d05c4 | 1666 | struct bch_replicas_padded r; |
932aa837 KO |
1667 | int ret = 0; |
1668 | ||
2a3731e3 | 1669 | ret = trans_get_key(trans, BTREE_ID_EC, POS(0, p.ec.idx), &iter, &k); |
67163cde | 1670 | if (ret < 0) |
932aa837 KO |
1671 | return ret; |
1672 | ||
1673 | if (k.k->type != KEY_TYPE_stripe) { | |
76426098 KO |
1674 | bch2_fs_inconsistent(c, |
1675 | "pointer to nonexistent stripe %llu", | |
2a3731e3 KO |
1676 | (u64) p.ec.idx); |
1677 | ret = -EIO; | |
1678 | goto out; | |
1679 | } | |
1680 | ||
1681 | if (!bch2_ptr_matches_stripe(bkey_s_c_to_stripe(k).v, p)) { | |
1682 | bch2_fs_inconsistent(c, | |
1683 | "stripe pointer doesn't match stripe %llu", | |
1684 | (u64) p.ec.idx); | |
76426098 | 1685 | ret = -EIO; |
932aa837 KO |
1686 | goto out; |
1687 | } | |
1688 | ||
24326cd1 KO |
1689 | s = bch2_trans_kmalloc(trans, bkey_bytes(k.k)); |
1690 | ret = PTR_ERR_OR_ZERO(s); | |
932aa837 KO |
1691 | if (ret) |
1692 | goto out; | |
1693 | ||
24326cd1 | 1694 | bkey_reassemble(&s->k_i, k); |
2a3731e3 KO |
1695 | stripe_blockcount_set(&s->v, p.ec.block, |
1696 | stripe_blockcount_get(&s->v, p.ec.block) + | |
43de7376 | 1697 | sectors); |
24326cd1 | 1698 | bch2_trans_update(trans, iter, &s->k_i, 0); |
af4d05c4 KO |
1699 | |
1700 | bch2_bkey_to_replicas(&r.e, bkey_i_to_s_c(&s->k_i)); | |
1701 | r.e.data_type = data_type; | |
1702 | update_replicas_list(trans, &r.e, sectors); | |
932aa837 KO |
1703 | out: |
1704 | bch2_trans_iter_put(trans, iter); | |
1705 | return ret; | |
1706 | } | |
1707 | ||
1708 | static int bch2_trans_mark_extent(struct btree_trans *trans, | |
2cbe5cfe KO |
1709 | struct bkey_s_c k, unsigned offset, |
1710 | s64 sectors, unsigned flags, | |
1711 | enum bch_data_type data_type) | |
932aa837 KO |
1712 | { |
1713 | struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); | |
1714 | const union bch_extent_entry *entry; | |
1715 | struct extent_ptr_decoded p; | |
1716 | struct bch_replicas_padded r; | |
1717 | s64 dirty_sectors = 0; | |
1718 | bool stale; | |
932aa837 KO |
1719 | int ret; |
1720 | ||
1721 | r.e.data_type = data_type; | |
1722 | r.e.nr_devs = 0; | |
1723 | r.e.nr_required = 1; | |
1724 | ||
1725 | BUG_ON(!sectors); | |
1726 | ||
1727 | bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { | |
89fd25be | 1728 | s64 disk_sectors = data_type == BCH_DATA_btree |
932aa837 | 1729 | ? sectors |
2cbe5cfe | 1730 | : ptr_disk_sectors_delta(p, offset, sectors, flags); |
932aa837 | 1731 | |
9ef846a7 | 1732 | ret = bch2_trans_mark_pointer(trans, k, p, disk_sectors, |
3838be78 | 1733 | data_type); |
932aa837 KO |
1734 | if (ret < 0) |
1735 | return ret; | |
1736 | ||
1737 | stale = ret > 0; | |
1738 | ||
1739 | if (p.ptr.cached) { | |
df5d4dae | 1740 | if (!stale) |
3838be78 | 1741 | update_cached_sectors_list(trans, p.ptr.dev, |
932aa837 | 1742 | disk_sectors); |
37954a27 | 1743 | } else if (!p.has_ec) { |
932aa837 KO |
1744 | dirty_sectors += disk_sectors; |
1745 | r.e.devs[r.e.nr_devs++] = p.ptr.dev; | |
1746 | } else { | |
2a3731e3 | 1747 | ret = bch2_trans_mark_stripe_ptr(trans, p, |
af4d05c4 | 1748 | disk_sectors, data_type); |
37954a27 KO |
1749 | if (ret) |
1750 | return ret; | |
932aa837 KO |
1751 | |
1752 | r.e.nr_required = 0; | |
1753 | } | |
1754 | } | |
1755 | ||
43de7376 KO |
1756 | if (r.e.nr_devs) |
1757 | update_replicas_list(trans, &r.e, dirty_sectors); | |
932aa837 KO |
1758 | |
1759 | return 0; | |
1760 | } | |
1761 | ||
719fe7fb KO |
1762 | static int bch2_trans_mark_stripe_alloc_ref(struct btree_trans *trans, |
1763 | const struct bch_extent_ptr *ptr, | |
1764 | s64 sectors, bool parity) | |
1765 | { | |
1766 | struct bkey_i_alloc *a; | |
1767 | struct btree_iter *iter; | |
1768 | struct bkey_alloc_unpacked u; | |
1769 | int ret; | |
1770 | ||
1771 | ret = bch2_trans_start_alloc_update(trans, &iter, ptr, &u); | |
1772 | if (ret) | |
1773 | return ret; | |
1774 | ||
1775 | if (parity) { | |
1776 | u.dirty_sectors += sectors; | |
1777 | u.data_type = u.dirty_sectors | |
1778 | ? BCH_DATA_parity | |
1779 | : 0; | |
1780 | } | |
1781 | ||
1782 | a = bch2_trans_kmalloc(trans, BKEY_ALLOC_U64s_MAX * 8); | |
1783 | ret = PTR_ERR_OR_ZERO(a); | |
1784 | if (ret) | |
1785 | goto err; | |
1786 | ||
1787 | bkey_alloc_init(&a->k_i); | |
1788 | a->k.p = iter->pos; | |
1789 | bch2_alloc_pack(a, u); | |
1790 | bch2_trans_update(trans, iter, &a->k_i, 0); | |
1791 | err: | |
1792 | bch2_trans_iter_put(trans, iter); | |
1793 | return ret; | |
1794 | } | |
1795 | ||
39283c71 | 1796 | static int bch2_trans_mark_stripe(struct btree_trans *trans, |
719fe7fb | 1797 | struct bkey_s_c old, struct bkey_s_c new, |
af4d05c4 | 1798 | unsigned flags) |
39283c71 | 1799 | { |
719fe7fb KO |
1800 | const struct bch_stripe *old_s = old.k->type == KEY_TYPE_stripe |
1801 | ? bkey_s_c_to_stripe(old).v : NULL; | |
1802 | const struct bch_stripe *new_s = new.k->type == KEY_TYPE_stripe | |
1803 | ? bkey_s_c_to_stripe(new).v : NULL; | |
af4d05c4 | 1804 | struct bch_replicas_padded r; |
39283c71 KO |
1805 | unsigned i; |
1806 | int ret = 0; | |
1807 | ||
1808 | /* | |
719fe7fb | 1809 | * If the pointers aren't changing, we don't need to do anything: |
39283c71 | 1810 | */ |
719fe7fb KO |
1811 | if (new_s && old_s && |
1812 | !memcmp(old_s->ptrs, new_s->ptrs, | |
1813 | new_s->nr_blocks * sizeof(struct bch_extent_ptr))) | |
1814 | return 0; | |
39283c71 | 1815 | |
719fe7fb KO |
1816 | if (new_s) { |
1817 | unsigned nr_data = new_s->nr_blocks - new_s->nr_redundant; | |
1818 | s64 sectors = le16_to_cpu(new_s->sectors); | |
af4d05c4 | 1819 | |
719fe7fb KO |
1820 | bch2_bkey_to_replicas(&r.e, new); |
1821 | update_replicas_list(trans, &r.e, sectors * new_s->nr_redundant); | |
39283c71 | 1822 | |
719fe7fb KO |
1823 | for (i = 0; i < new_s->nr_blocks; i++) { |
1824 | bool parity = i >= nr_data; | |
1825 | ||
1826 | ret = bch2_trans_mark_stripe_alloc_ref(trans, | |
1827 | &new_s->ptrs[i], sectors, parity); | |
1828 | if (ret) | |
1829 | return ret; | |
af4d05c4 | 1830 | } |
719fe7fb | 1831 | } |
af4d05c4 | 1832 | |
719fe7fb KO |
1833 | if (old_s) { |
1834 | unsigned nr_data = old_s->nr_blocks - old_s->nr_redundant; | |
1835 | s64 sectors = -((s64) le16_to_cpu(old_s->sectors)); | |
1836 | ||
1837 | bch2_bkey_to_replicas(&r.e, old); | |
1838 | update_replicas_list(trans, &r.e, sectors * old_s->nr_redundant); | |
1839 | ||
1840 | for (i = 0; i < old_s->nr_blocks; i++) { | |
1841 | bool parity = i >= nr_data; | |
1842 | ||
1843 | ret = bch2_trans_mark_stripe_alloc_ref(trans, | |
1844 | &old_s->ptrs[i], sectors, parity); | |
1845 | if (ret) | |
1846 | return ret; | |
1847 | } | |
39283c71 KO |
1848 | } |
1849 | ||
1850 | return ret; | |
1851 | } | |
1852 | ||
801a3de6 KO |
1853 | static __le64 *bkey_refcount(struct bkey_i *k) |
1854 | { | |
1855 | switch (k->k.type) { | |
1856 | case KEY_TYPE_reflink_v: | |
1857 | return &bkey_i_to_reflink_v(k)->v.refcount; | |
1858 | case KEY_TYPE_indirect_inline_data: | |
1859 | return &bkey_i_to_indirect_inline_data(k)->v.refcount; | |
1860 | default: | |
1861 | return NULL; | |
1862 | } | |
1863 | } | |
1864 | ||
76426098 KO |
1865 | static int __bch2_trans_mark_reflink_p(struct btree_trans *trans, |
1866 | struct bkey_s_c_reflink_p p, | |
1867 | u64 idx, unsigned sectors, | |
1868 | unsigned flags) | |
1869 | { | |
1870 | struct bch_fs *c = trans->c; | |
1871 | struct btree_iter *iter; | |
76426098 | 1872 | struct bkey_s_c k; |
801a3de6 KO |
1873 | struct bkey_i *n; |
1874 | __le64 *refcount; | |
76426098 KO |
1875 | s64 ret; |
1876 | ||
1877 | ret = trans_get_key(trans, BTREE_ID_REFLINK, | |
1878 | POS(0, idx), &iter, &k); | |
67163cde | 1879 | if (ret < 0) |
76426098 KO |
1880 | return ret; |
1881 | ||
2d594dfb | 1882 | if ((flags & BTREE_TRIGGER_OVERWRITE) && |
76426098 KO |
1883 | (bkey_start_offset(k.k) < idx || |
1884 | k.k->p.offset > idx + sectors)) | |
1885 | goto out; | |
1886 | ||
2e70ce56 | 1887 | sectors = k.k->p.offset - idx; |
76426098 | 1888 | |
801a3de6 KO |
1889 | n = bch2_trans_kmalloc(trans, bkey_bytes(k.k)); |
1890 | ret = PTR_ERR_OR_ZERO(n); | |
76426098 KO |
1891 | if (ret) |
1892 | goto err; | |
1893 | ||
801a3de6 KO |
1894 | bkey_reassemble(n, k); |
1895 | ||
1896 | refcount = bkey_refcount(n); | |
1897 | if (!refcount) { | |
1898 | bch2_fs_inconsistent(c, | |
1899 | "%llu:%llu len %u points to nonexistent indirect extent %llu", | |
1900 | p.k->p.inode, p.k->p.offset, p.k->size, idx); | |
1901 | ret = -EIO; | |
1902 | goto err; | |
1903 | } | |
76426098 | 1904 | |
801a3de6 | 1905 | le64_add_cpu(refcount, !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1); |
76426098 | 1906 | |
801a3de6 KO |
1907 | if (!*refcount) { |
1908 | n->k.type = KEY_TYPE_deleted; | |
1909 | set_bkey_val_u64s(&n->k, 0); | |
76426098 | 1910 | } |
24326cd1 | 1911 | |
2e70ce56 KO |
1912 | bch2_btree_iter_set_pos(iter, bkey_start_pos(k.k)); |
1913 | BUG_ON(iter->uptodate > BTREE_ITER_NEED_PEEK); | |
1914 | ||
801a3de6 | 1915 | bch2_trans_update(trans, iter, n, 0); |
76426098 | 1916 | out: |
2e70ce56 | 1917 | ret = sectors; |
76426098 KO |
1918 | err: |
1919 | bch2_trans_iter_put(trans, iter); | |
1920 | return ret; | |
1921 | } | |
1922 | ||
1923 | static int bch2_trans_mark_reflink_p(struct btree_trans *trans, | |
1924 | struct bkey_s_c_reflink_p p, unsigned offset, | |
1925 | s64 sectors, unsigned flags) | |
1926 | { | |
1927 | u64 idx = le64_to_cpu(p.v->idx) + offset; | |
1928 | s64 ret = 0; | |
1929 | ||
1930 | sectors = abs(sectors); | |
1931 | BUG_ON(offset + sectors > p.k->size); | |
1932 | ||
1933 | while (sectors) { | |
1934 | ret = __bch2_trans_mark_reflink_p(trans, p, idx, sectors, flags); | |
1935 | if (ret < 0) | |
1936 | break; | |
1937 | ||
1938 | idx += ret; | |
1939 | sectors = max_t(s64, 0LL, sectors - ret); | |
1940 | ret = 0; | |
1941 | } | |
1942 | ||
1943 | return ret; | |
1944 | } | |
1945 | ||
719fe7fb KO |
1946 | int bch2_trans_mark_key(struct btree_trans *trans, |
1947 | struct bkey_s_c old, | |
1948 | struct bkey_s_c new, | |
2cbe5cfe | 1949 | unsigned offset, s64 sectors, unsigned flags) |
932aa837 KO |
1950 | { |
1951 | struct bch_fs *c = trans->c; | |
719fe7fb KO |
1952 | struct bkey_s_c k = flags & BTREE_TRIGGER_INSERT ? new : old; |
1953 | struct replicas_delta_list *d; | |
1954 | ||
1955 | BUG_ON(!(flags & (BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE))); | |
932aa837 KO |
1956 | |
1957 | switch (k.k->type) { | |
1958 | case KEY_TYPE_btree_ptr: | |
548b3d20 | 1959 | case KEY_TYPE_btree_ptr_v2: |
2d594dfb | 1960 | sectors = !(flags & BTREE_TRIGGER_OVERWRITE) |
6e738539 KO |
1961 | ? c->opts.btree_node_size |
1962 | : -c->opts.btree_node_size; | |
1963 | ||
2cbe5cfe | 1964 | return bch2_trans_mark_extent(trans, k, offset, sectors, |
89fd25be | 1965 | flags, BCH_DATA_btree); |
932aa837 | 1966 | case KEY_TYPE_extent: |
76426098 | 1967 | case KEY_TYPE_reflink_v: |
2cbe5cfe | 1968 | return bch2_trans_mark_extent(trans, k, offset, sectors, |
89fd25be | 1969 | flags, BCH_DATA_user); |
39283c71 | 1970 | case KEY_TYPE_stripe: |
719fe7fb KO |
1971 | return bch2_trans_mark_stripe(trans, old, new, flags); |
1972 | case KEY_TYPE_inode: { | |
1973 | int nr = (new.k->type == KEY_TYPE_inode) - | |
1974 | (old.k->type == KEY_TYPE_inode); | |
1975 | ||
1976 | if (nr) { | |
1977 | d = replicas_deltas_realloc(trans, 0); | |
1978 | d->nr_inodes += nr; | |
1979 | } | |
3838be78 | 1980 | |
932aa837 | 1981 | return 0; |
719fe7fb | 1982 | } |
932aa837 KO |
1983 | case KEY_TYPE_reservation: { |
1984 | unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas; | |
1985 | ||
4ee202e2 | 1986 | d = replicas_deltas_realloc(trans, 0); |
3838be78 | 1987 | |
932aa837 KO |
1988 | sectors *= replicas; |
1989 | replicas = clamp_t(unsigned, replicas, 1, | |
77d63522 | 1990 | ARRAY_SIZE(d->persistent_reserved)); |
932aa837 | 1991 | |
77d63522 | 1992 | d->persistent_reserved[replicas - 1] += sectors; |
932aa837 KO |
1993 | return 0; |
1994 | } | |
76426098 KO |
1995 | case KEY_TYPE_reflink_p: |
1996 | return bch2_trans_mark_reflink_p(trans, | |
1997 | bkey_s_c_to_reflink_p(k), | |
1998 | offset, sectors, flags); | |
932aa837 KO |
1999 | default: |
2000 | return 0; | |
2001 | } | |
2002 | } | |
2003 | ||
2004 | int bch2_trans_mark_update(struct btree_trans *trans, | |
88767d65 | 2005 | struct btree_iter *iter, |
719fe7fb | 2006 | struct bkey_i *new, |
2d594dfb | 2007 | unsigned flags) |
932aa837 | 2008 | { |
719fe7fb | 2009 | struct bkey_s_c old; |
932aa837 KO |
2010 | int ret; |
2011 | ||
2d594dfb KO |
2012 | if (unlikely(flags & BTREE_TRIGGER_NORUN)) |
2013 | return 0; | |
2014 | ||
932aa837 KO |
2015 | if (!btree_node_type_needs_gc(iter->btree_id)) |
2016 | return 0; | |
2017 | ||
719fe7fb KO |
2018 | if (!btree_node_type_is_extents(iter->btree_id)) { |
2019 | /* iterators should be uptodate, shouldn't get errors here: */ | |
2020 | if (btree_iter_type(iter) != BTREE_ITER_CACHED) { | |
2021 | old = bch2_btree_iter_peek_slot(iter); | |
2022 | BUG_ON(bkey_err(old)); | |
2023 | } else { | |
2024 | struct bkey_cached *ck = (void *) iter->l[0].b; | |
2ca88e5a | 2025 | |
719fe7fb KO |
2026 | BUG_ON(!ck->valid); |
2027 | old = bkey_i_to_s_c(ck->k); | |
2028 | } | |
2ca88e5a | 2029 | |
719fe7fb KO |
2030 | if (old.k->type == new->k.type) { |
2031 | ret = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new), 0, 0, | |
2032 | BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|flags); | |
2033 | } else { | |
2034 | ret = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new), 0, 0, | |
2035 | BTREE_TRIGGER_INSERT|flags) ?: | |
2036 | bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new), 0, 0, | |
2037 | BTREE_TRIGGER_OVERWRITE|flags); | |
2038 | } | |
2039 | } else { | |
2040 | struct btree *b = iter_l(iter)->b; | |
2041 | struct btree_node_iter node_iter = iter_l(iter)->iter; | |
2042 | struct bkey_packed *_old; | |
932aa837 | 2043 | struct bkey unpacked; |
932aa837 | 2044 | |
719fe7fb | 2045 | EBUG_ON(btree_iter_type(iter) == BTREE_ITER_CACHED); |
932aa837 | 2046 | |
719fe7fb KO |
2047 | bkey_init(&unpacked); |
2048 | old = (struct bkey_s_c) { &unpacked, NULL }; | |
2049 | ||
2050 | ret = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new), | |
2051 | 0, new->k.size, | |
2052 | BTREE_TRIGGER_INSERT); | |
2053 | if (ret) | |
2054 | return ret; | |
2055 | ||
2056 | while ((_old = bch2_btree_node_iter_peek(&node_iter, b))) { | |
2057 | unsigned flags = BTREE_TRIGGER_OVERWRITE; | |
2058 | unsigned offset = 0; | |
2059 | s64 sectors; | |
2060 | ||
2061 | old = bkey_disassemble(b, _old, &unpacked); | |
2062 | sectors = -((s64) old.k->size); | |
2063 | ||
2064 | flags |= BTREE_TRIGGER_OVERWRITE; | |
2065 | ||
2066 | if (bkey_cmp(new->k.p, bkey_start_pos(old.k)) <= 0) | |
2067 | return 0; | |
932aa837 | 2068 | |
719fe7fb | 2069 | switch (bch2_extent_overlap(&new->k, old.k)) { |
932aa837 | 2070 | case BCH_EXTENT_OVERLAP_ALL: |
2cbe5cfe | 2071 | offset = 0; |
719fe7fb | 2072 | sectors = -((s64) old.k->size); |
932aa837 KO |
2073 | break; |
2074 | case BCH_EXTENT_OVERLAP_BACK: | |
719fe7fb KO |
2075 | offset = bkey_start_offset(&new->k) - |
2076 | bkey_start_offset(old.k); | |
2077 | sectors = bkey_start_offset(&new->k) - | |
2078 | old.k->p.offset; | |
932aa837 KO |
2079 | break; |
2080 | case BCH_EXTENT_OVERLAP_FRONT: | |
2cbe5cfe | 2081 | offset = 0; |
719fe7fb KO |
2082 | sectors = bkey_start_offset(old.k) - |
2083 | new->k.p.offset; | |
932aa837 KO |
2084 | break; |
2085 | case BCH_EXTENT_OVERLAP_MIDDLE: | |
719fe7fb KO |
2086 | offset = bkey_start_offset(&new->k) - |
2087 | bkey_start_offset(old.k); | |
2088 | sectors = -((s64) new->k.size); | |
2d594dfb | 2089 | flags |= BTREE_TRIGGER_OVERWRITE_SPLIT; |
932aa837 KO |
2090 | break; |
2091 | } | |
2092 | ||
2093 | BUG_ON(sectors >= 0); | |
932aa837 | 2094 | |
719fe7fb KO |
2095 | ret = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new), |
2096 | offset, sectors, flags); | |
2097 | if (ret) | |
2098 | return ret; | |
932aa837 | 2099 | |
719fe7fb KO |
2100 | bch2_btree_node_iter_advance(&node_iter, b); |
2101 | } | |
932aa837 KO |
2102 | } |
2103 | ||
719fe7fb | 2104 | return ret; |
932aa837 KO |
2105 | } |
2106 | ||
1c6fdbd8 KO |
2107 | /* Disk reservations: */ |
2108 | ||
1c6fdbd8 KO |
2109 | #define SECTORS_CACHE 1024 |
2110 | ||
2111 | int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res, | |
2112 | unsigned sectors, int flags) | |
2113 | { | |
5663a415 | 2114 | struct bch_fs_pcpu *pcpu; |
1c6fdbd8 KO |
2115 | u64 old, v, get; |
2116 | s64 sectors_available; | |
2117 | int ret; | |
2118 | ||
9166b41d | 2119 | percpu_down_read(&c->mark_lock); |
1c6fdbd8 | 2120 | preempt_disable(); |
5663a415 | 2121 | pcpu = this_cpu_ptr(c->pcpu); |
1c6fdbd8 | 2122 | |
5663a415 | 2123 | if (sectors <= pcpu->sectors_available) |
1c6fdbd8 KO |
2124 | goto out; |
2125 | ||
2126 | v = atomic64_read(&c->sectors_available); | |
2127 | do { | |
2128 | old = v; | |
2129 | get = min((u64) sectors + SECTORS_CACHE, old); | |
2130 | ||
2131 | if (get < sectors) { | |
2132 | preempt_enable(); | |
1c6fdbd8 KO |
2133 | goto recalculate; |
2134 | } | |
2135 | } while ((v = atomic64_cmpxchg(&c->sectors_available, | |
2136 | old, old - get)) != old); | |
2137 | ||
5663a415 | 2138 | pcpu->sectors_available += get; |
1c6fdbd8 KO |
2139 | |
2140 | out: | |
5663a415 | 2141 | pcpu->sectors_available -= sectors; |
5e82a9a1 | 2142 | this_cpu_add(*c->online_reserved, sectors); |
5663a415 | 2143 | res->sectors += sectors; |
1c6fdbd8 | 2144 | |
1c6fdbd8 | 2145 | preempt_enable(); |
9166b41d | 2146 | percpu_up_read(&c->mark_lock); |
1c6fdbd8 KO |
2147 | return 0; |
2148 | ||
2149 | recalculate: | |
fca1223c | 2150 | mutex_lock(&c->sectors_available_lock); |
39fbc5a4 | 2151 | |
fca1223c KO |
2152 | percpu_u64_set(&c->pcpu->sectors_available, 0); |
2153 | sectors_available = avail_factor(__bch2_fs_usage_read_short(c).free); | |
1c6fdbd8 KO |
2154 | |
2155 | if (sectors <= sectors_available || | |
2156 | (flags & BCH_DISK_RESERVATION_NOFAIL)) { | |
2157 | atomic64_set(&c->sectors_available, | |
2158 | max_t(s64, 0, sectors_available - sectors)); | |
5e82a9a1 | 2159 | this_cpu_add(*c->online_reserved, sectors); |
5663a415 | 2160 | res->sectors += sectors; |
1c6fdbd8 | 2161 | ret = 0; |
1c6fdbd8 KO |
2162 | } else { |
2163 | atomic64_set(&c->sectors_available, sectors_available); | |
2164 | ret = -ENOSPC; | |
2165 | } | |
2166 | ||
fca1223c KO |
2167 | mutex_unlock(&c->sectors_available_lock); |
2168 | percpu_up_read(&c->mark_lock); | |
1c6fdbd8 | 2169 | |
1c6fdbd8 KO |
2170 | return ret; |
2171 | } | |
2172 | ||
2173 | /* Startup/shutdown: */ | |
2174 | ||
2175 | static void buckets_free_rcu(struct rcu_head *rcu) | |
2176 | { | |
2177 | struct bucket_array *buckets = | |
2178 | container_of(rcu, struct bucket_array, rcu); | |
2179 | ||
2180 | kvpfree(buckets, | |
2181 | sizeof(struct bucket_array) + | |
2182 | buckets->nbuckets * sizeof(struct bucket)); | |
2183 | } | |
2184 | ||
2185 | int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) | |
2186 | { | |
2187 | struct bucket_array *buckets = NULL, *old_buckets = NULL; | |
8eb7f3ee | 2188 | unsigned long *buckets_nouse = NULL; |
1c6fdbd8 KO |
2189 | alloc_fifo free[RESERVE_NR]; |
2190 | alloc_fifo free_inc; | |
2191 | alloc_heap alloc_heap; | |
1c6fdbd8 KO |
2192 | |
2193 | size_t btree_reserve = DIV_ROUND_UP(BTREE_NODE_RESERVE, | |
2194 | ca->mi.bucket_size / c->opts.btree_node_size); | |
2195 | /* XXX: these should be tunable */ | |
8b335bae | 2196 | size_t reserve_none = max_t(size_t, 1, nbuckets >> 9); |
3187aa8d | 2197 | size_t copygc_reserve = max_t(size_t, 2, nbuckets >> 6); |
8b335bae | 2198 | size_t free_inc_nr = max(max_t(size_t, 1, nbuckets >> 12), |
d0cc3def | 2199 | btree_reserve * 2); |
e6d11615 | 2200 | bool resize = ca->buckets[0] != NULL; |
1c6fdbd8 KO |
2201 | int ret = -ENOMEM; |
2202 | unsigned i; | |
2203 | ||
2204 | memset(&free, 0, sizeof(free)); | |
2205 | memset(&free_inc, 0, sizeof(free_inc)); | |
2206 | memset(&alloc_heap, 0, sizeof(alloc_heap)); | |
1c6fdbd8 KO |
2207 | |
2208 | if (!(buckets = kvpmalloc(sizeof(struct bucket_array) + | |
2209 | nbuckets * sizeof(struct bucket), | |
2210 | GFP_KERNEL|__GFP_ZERO)) || | |
8eb7f3ee | 2211 | !(buckets_nouse = kvpmalloc(BITS_TO_LONGS(nbuckets) * |
1c6fdbd8 KO |
2212 | sizeof(unsigned long), |
2213 | GFP_KERNEL|__GFP_ZERO)) || | |
1c6fdbd8 KO |
2214 | !init_fifo(&free[RESERVE_MOVINGGC], |
2215 | copygc_reserve, GFP_KERNEL) || | |
2216 | !init_fifo(&free[RESERVE_NONE], reserve_none, GFP_KERNEL) || | |
b29e197a | 2217 | !init_fifo(&free_inc, free_inc_nr, GFP_KERNEL) || |
e6d11615 | 2218 | !init_heap(&alloc_heap, ALLOC_SCAN_BATCH(ca) << 1, GFP_KERNEL)) |
1c6fdbd8 KO |
2219 | goto err; |
2220 | ||
2221 | buckets->first_bucket = ca->mi.first_bucket; | |
2222 | buckets->nbuckets = nbuckets; | |
2223 | ||
e6d11615 | 2224 | bch2_copygc_stop(c); |
1c6fdbd8 KO |
2225 | |
2226 | if (resize) { | |
b9c3d139 | 2227 | down_write(&c->gc_lock); |
1c6fdbd8 | 2228 | down_write(&ca->bucket_lock); |
9166b41d | 2229 | percpu_down_write(&c->mark_lock); |
1c6fdbd8 KO |
2230 | } |
2231 | ||
2232 | old_buckets = bucket_array(ca); | |
2233 | ||
2234 | if (resize) { | |
2235 | size_t n = min(buckets->nbuckets, old_buckets->nbuckets); | |
2236 | ||
2237 | memcpy(buckets->b, | |
2238 | old_buckets->b, | |
2239 | n * sizeof(struct bucket)); | |
8eb7f3ee KO |
2240 | memcpy(buckets_nouse, |
2241 | ca->buckets_nouse, | |
1c6fdbd8 KO |
2242 | BITS_TO_LONGS(n) * sizeof(unsigned long)); |
2243 | } | |
2244 | ||
9ca53b55 | 2245 | rcu_assign_pointer(ca->buckets[0], buckets); |
1c6fdbd8 KO |
2246 | buckets = old_buckets; |
2247 | ||
8eb7f3ee | 2248 | swap(ca->buckets_nouse, buckets_nouse); |
1c6fdbd8 | 2249 | |
b9c3d139 | 2250 | if (resize) { |
9166b41d | 2251 | percpu_up_write(&c->mark_lock); |
b9c3d139 KO |
2252 | up_write(&c->gc_lock); |
2253 | } | |
1c6fdbd8 KO |
2254 | |
2255 | spin_lock(&c->freelist_lock); | |
2256 | for (i = 0; i < RESERVE_NR; i++) { | |
2257 | fifo_move(&free[i], &ca->free[i]); | |
2258 | swap(ca->free[i], free[i]); | |
2259 | } | |
2260 | fifo_move(&free_inc, &ca->free_inc); | |
2261 | swap(ca->free_inc, free_inc); | |
2262 | spin_unlock(&c->freelist_lock); | |
2263 | ||
2264 | /* with gc lock held, alloc_heap can't be in use: */ | |
2265 | swap(ca->alloc_heap, alloc_heap); | |
2266 | ||
1c6fdbd8 KO |
2267 | nbuckets = ca->mi.nbuckets; |
2268 | ||
1ada1606 | 2269 | if (resize) |
1c6fdbd8 | 2270 | up_write(&ca->bucket_lock); |
1c6fdbd8 | 2271 | |
1c6fdbd8 KO |
2272 | ret = 0; |
2273 | err: | |
1c6fdbd8 KO |
2274 | free_heap(&alloc_heap); |
2275 | free_fifo(&free_inc); | |
2276 | for (i = 0; i < RESERVE_NR; i++) | |
2277 | free_fifo(&free[i]); | |
8eb7f3ee | 2278 | kvpfree(buckets_nouse, |
1c6fdbd8 | 2279 | BITS_TO_LONGS(nbuckets) * sizeof(unsigned long)); |
1c6fdbd8 KO |
2280 | if (buckets) |
2281 | call_rcu(&old_buckets->rcu, buckets_free_rcu); | |
2282 | ||
2283 | return ret; | |
2284 | } | |
2285 | ||
2286 | void bch2_dev_buckets_free(struct bch_dev *ca) | |
2287 | { | |
2288 | unsigned i; | |
2289 | ||
1c6fdbd8 KO |
2290 | free_heap(&ca->alloc_heap); |
2291 | free_fifo(&ca->free_inc); | |
2292 | for (i = 0; i < RESERVE_NR; i++) | |
2293 | free_fifo(&ca->free[i]); | |
8eb7f3ee | 2294 | kvpfree(ca->buckets_nouse, |
1c6fdbd8 | 2295 | BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long)); |
9ca53b55 | 2296 | kvpfree(rcu_dereference_protected(ca->buckets[0], 1), |
1c6fdbd8 KO |
2297 | sizeof(struct bucket_array) + |
2298 | ca->mi.nbuckets * sizeof(struct bucket)); | |
2299 | ||
9ca53b55 | 2300 | free_percpu(ca->usage[0]); |
1c6fdbd8 KO |
2301 | } |
2302 | ||
2303 | int bch2_dev_buckets_alloc(struct bch_fs *c, struct bch_dev *ca) | |
2304 | { | |
9ca53b55 | 2305 | if (!(ca->usage[0] = alloc_percpu(struct bch_dev_usage))) |
1c6fdbd8 KO |
2306 | return -ENOMEM; |
2307 | ||
2308 | return bch2_dev_buckets_resize(c, ca, ca->mi.nbuckets);; | |
2309 | } |