Commit | Line | Data |
---|---|---|
1c6fdbd8 KO |
1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | /* | |
3 | * Code for manipulating bucket marks for garbage collection. | |
4 | * | |
5 | * Copyright 2014 Datera, Inc. | |
6 | */ | |
7 | ||
8 | #ifndef _BUCKETS_H | |
9 | #define _BUCKETS_H | |
10 | ||
11 | #include "buckets_types.h" | |
f0ac7df2 | 12 | #include "extents.h" |
fb8e5b4c KO |
13 | #include "sb-members.h" |
14 | ||
15 | static inline size_t sector_to_bucket(const struct bch_dev *ca, sector_t s) | |
16 | { | |
17 | return div_u64(s, ca->mi.bucket_size); | |
18 | } | |
19 | ||
20 | static inline sector_t bucket_to_sector(const struct bch_dev *ca, size_t b) | |
21 | { | |
22 | return ((sector_t) b) * ca->mi.bucket_size; | |
23 | } | |
24 | ||
25 | static inline sector_t bucket_remainder(const struct bch_dev *ca, sector_t s) | |
26 | { | |
27 | u32 remainder; | |
28 | ||
29 | div_u64_rem(s, ca->mi.bucket_size, &remainder); | |
30 | return remainder; | |
31 | } | |
32 | ||
33 | static inline size_t sector_to_bucket_and_offset(const struct bch_dev *ca, sector_t s, | |
34 | u32 *offset) | |
35 | { | |
36 | return div_u64_rem(s, ca->mi.bucket_size, offset); | |
37 | } | |
1c6fdbd8 KO |
38 | |
39 | #define for_each_bucket(_b, _buckets) \ | |
40 | for (_b = (_buckets)->b + (_buckets)->first_bucket; \ | |
41 | _b < (_buckets)->b + (_buckets)->nbuckets; _b++) | |
42 | ||
8c2d82a6 KO |
43 | /* |
44 | * Ugly hack alert: | |
45 | * | |
46 | * We need to cram a spinlock in a single byte, because that's what we have left | |
47 | * in struct bucket, and we care about the size of these - during fsck, we need | |
48 | * in memory state for every single bucket on every device. | |
49 | * | |
50 | * We used to do | |
51 | * while (xchg(&b->lock, 1) cpu_relax(); | |
52 | * but, it turns out not all architectures support xchg on a single byte. | |
53 | * | |
54 | * So now we use bit_spin_lock(), with fun games since we can't burn a whole | |
55 | * ulong for this - we just need to make sure the lock bit always ends up in the | |
56 | * first byte. | |
57 | */ | |
58 | ||
59 | #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ | |
60 | #define BUCKET_LOCK_BITNR 0 | |
61 | #else | |
62 | #define BUCKET_LOCK_BITNR (BITS_PER_LONG - 1) | |
63 | #endif | |
64 | ||
65 | union ulong_byte_assert { | |
66 | ulong ulong; | |
67 | u8 byte; | |
68 | }; | |
69 | ||
66d90823 KO |
70 | static inline void bucket_unlock(struct bucket *b) |
71 | { | |
8c2d82a6 | 72 | BUILD_BUG_ON(!((union ulong_byte_assert) { .ulong = 1UL << BUCKET_LOCK_BITNR }).byte); |
73bbeaa2 KO |
73 | |
74 | clear_bit_unlock(BUCKET_LOCK_BITNR, (void *) &b->lock); | |
75 | wake_up_bit((void *) &b->lock, BUCKET_LOCK_BITNR); | |
66d90823 KO |
76 | } |
77 | ||
78 | static inline void bucket_lock(struct bucket *b) | |
79 | { | |
73bbeaa2 KO |
80 | wait_on_bit_lock((void *) &b->lock, BUCKET_LOCK_BITNR, |
81 | TASK_UNINTERRUPTIBLE); | |
66d90823 | 82 | } |
1c6fdbd8 | 83 | |
5735608c | 84 | static inline struct bucket_array *gc_bucket_array(struct bch_dev *ca) |
1c6fdbd8 | 85 | { |
5735608c | 86 | return rcu_dereference_check(ca->buckets_gc, |
1c6fdbd8 | 87 | !ca->fs || |
9166b41d | 88 | percpu_rwsem_is_held(&ca->fs->mark_lock) || |
1c6fdbd8 KO |
89 | lockdep_is_held(&ca->fs->gc_lock) || |
90 | lockdep_is_held(&ca->bucket_lock)); | |
91 | } | |
92 | ||
5735608c | 93 | static inline struct bucket *gc_bucket(struct bch_dev *ca, size_t b) |
1c6fdbd8 | 94 | { |
5735608c | 95 | struct bucket_array *buckets = gc_bucket_array(ca); |
1c6fdbd8 KO |
96 | |
97 | BUG_ON(b < buckets->first_bucket || b >= buckets->nbuckets); | |
98 | return buckets->b + b; | |
99 | } | |
100 | ||
a7860877 KO |
101 | static inline struct bucket_gens *bucket_gens(struct bch_dev *ca) |
102 | { | |
103 | return rcu_dereference_check(ca->bucket_gens, | |
104 | !ca->fs || | |
105 | percpu_rwsem_is_held(&ca->fs->mark_lock) || | |
106 | lockdep_is_held(&ca->fs->gc_lock) || | |
107 | lockdep_is_held(&ca->bucket_lock)); | |
a7860877 KO |
108 | } |
109 | ||
110 | static inline u8 *bucket_gen(struct bch_dev *ca, size_t b) | |
111 | { | |
112 | struct bucket_gens *gens = bucket_gens(ca); | |
113 | ||
114 | BUG_ON(b < gens->first_bucket || b >= gens->nbuckets); | |
115 | return gens->b + b; | |
116 | } | |
117 | ||
1c6fdbd8 KO |
118 | static inline size_t PTR_BUCKET_NR(const struct bch_dev *ca, |
119 | const struct bch_extent_ptr *ptr) | |
120 | { | |
121 | return sector_to_bucket(ca, ptr->offset); | |
122 | } | |
123 | ||
3d48a7f8 KO |
124 | static inline struct bpos PTR_BUCKET_POS(const struct bch_fs *c, |
125 | const struct bch_extent_ptr *ptr) | |
126 | { | |
127 | struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); | |
128 | ||
129 | return POS(ptr->dev, PTR_BUCKET_NR(ca, ptr)); | |
130 | } | |
131 | ||
a8c752bb KO |
132 | static inline struct bpos PTR_BUCKET_POS_OFFSET(const struct bch_fs *c, |
133 | const struct bch_extent_ptr *ptr, | |
134 | u32 *bucket_offset) | |
135 | { | |
136 | struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); | |
137 | ||
138 | return POS(ptr->dev, sector_to_bucket_and_offset(ca, ptr->offset, bucket_offset)); | |
139 | } | |
140 | ||
47ac34ec KO |
141 | static inline struct bucket *PTR_GC_BUCKET(struct bch_dev *ca, |
142 | const struct bch_extent_ptr *ptr) | |
1c6fdbd8 | 143 | { |
47ac34ec | 144 | return gc_bucket(ca, PTR_BUCKET_NR(ca, ptr)); |
1c6fdbd8 KO |
145 | } |
146 | ||
06ab329c KO |
147 | static inline enum bch_data_type ptr_data_type(const struct bkey *k, |
148 | const struct bch_extent_ptr *ptr) | |
149 | { | |
f0ac7df2 | 150 | if (bkey_is_btree_ptr(k)) |
89fd25be | 151 | return BCH_DATA_btree; |
06ab329c | 152 | |
89fd25be | 153 | return ptr->cached ? BCH_DATA_cached : BCH_DATA_user; |
06ab329c KO |
154 | } |
155 | ||
a8c752bb KO |
156 | static inline s64 ptr_disk_sectors(s64 sectors, struct extent_ptr_decoded p) |
157 | { | |
158 | EBUG_ON(sectors < 0); | |
159 | ||
160 | return crc_is_compressed(p.crc) | |
161 | ? DIV_ROUND_UP_ULL(sectors * p.crc.compressed_size, | |
162 | p.crc.uncompressed_size) | |
163 | : sectors; | |
164 | } | |
165 | ||
1c6fdbd8 KO |
166 | static inline int gen_cmp(u8 a, u8 b) |
167 | { | |
168 | return (s8) (a - b); | |
169 | } | |
170 | ||
171 | static inline int gen_after(u8 a, u8 b) | |
172 | { | |
173 | int r = gen_cmp(a, b); | |
174 | ||
175 | return r > 0 ? r : 0; | |
176 | } | |
177 | ||
178 | /** | |
179 | * ptr_stale() - check if a pointer points into a bucket that has been | |
180 | * invalidated. | |
181 | */ | |
182 | static inline u8 ptr_stale(struct bch_dev *ca, | |
183 | const struct bch_extent_ptr *ptr) | |
184 | { | |
4b674b09 KO |
185 | u8 ret; |
186 | ||
187 | rcu_read_lock(); | |
a7860877 | 188 | ret = gen_after(*bucket_gen(ca, PTR_BUCKET_NR(ca, ptr)), ptr->gen); |
4b674b09 KO |
189 | rcu_read_unlock(); |
190 | ||
191 | return ret; | |
1c6fdbd8 KO |
192 | } |
193 | ||
1c6fdbd8 KO |
194 | /* Device usage: */ |
195 | ||
ed80c569 KO |
196 | void bch2_dev_usage_read_fast(struct bch_dev *, struct bch_dev_usage *); |
197 | static inline struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *ca) | |
198 | { | |
199 | struct bch_dev_usage ret; | |
200 | ||
201 | bch2_dev_usage_read_fast(ca, &ret); | |
202 | return ret; | |
203 | } | |
204 | ||
822835ff | 205 | void bch2_dev_usage_init(struct bch_dev *); |
ed0cd515 | 206 | void bch2_dev_usage_to_text(struct printbuf *, struct bch_dev_usage *); |
1c6fdbd8 | 207 | |
e53a961c | 208 | static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum bch_watermark watermark) |
f25d8215 | 209 | { |
f25d8215 KO |
210 | s64 reserved = 0; |
211 | ||
e53a961c KO |
212 | switch (watermark) { |
213 | case BCH_WATERMARK_NR: | |
3764647b | 214 | BUG(); |
e53a961c | 215 | case BCH_WATERMARK_stripe: |
e84face6 KO |
216 | reserved += ca->mi.nbuckets >> 6; |
217 | fallthrough; | |
e53a961c | 218 | case BCH_WATERMARK_normal: |
f25d8215 KO |
219 | reserved += ca->mi.nbuckets >> 6; |
220 | fallthrough; | |
e53a961c | 221 | case BCH_WATERMARK_copygc: |
f25d8215 KO |
222 | reserved += ca->nr_btree_reserve; |
223 | fallthrough; | |
e53a961c | 224 | case BCH_WATERMARK_btree: |
f25d8215 KO |
225 | reserved += ca->nr_btree_reserve; |
226 | fallthrough; | |
e53a961c | 227 | case BCH_WATERMARK_btree_copygc: |
494036d8 | 228 | case BCH_WATERMARK_reclaim: |
e2a316b3 | 229 | case BCH_WATERMARK_interior_updates: |
f25d8215 | 230 | break; |
f25d8215 | 231 | } |
1c6fdbd8 | 232 | |
822835ff KO |
233 | return reserved; |
234 | } | |
1c6fdbd8 | 235 | |
30f0349d KO |
236 | static inline u64 dev_buckets_free(struct bch_dev *ca, |
237 | struct bch_dev_usage usage, | |
e53a961c | 238 | enum bch_watermark watermark) |
30f0349d KO |
239 | { |
240 | return max_t(s64, 0, | |
241 | usage.d[BCH_DATA_free].buckets - | |
242 | ca->nr_open_buckets - | |
e53a961c | 243 | bch2_dev_buckets_reserved(ca, watermark)); |
30f0349d KO |
244 | } |
245 | ||
822835ff KO |
246 | static inline u64 __dev_buckets_available(struct bch_dev *ca, |
247 | struct bch_dev_usage usage, | |
e53a961c | 248 | enum bch_watermark watermark) |
822835ff | 249 | { |
f25d8215 | 250 | return max_t(s64, 0, |
58aaa083 DH |
251 | usage.d[BCH_DATA_free].buckets |
252 | + usage.d[BCH_DATA_cached].buckets | |
253 | + usage.d[BCH_DATA_need_gc_gens].buckets | |
254 | + usage.d[BCH_DATA_need_discard].buckets | |
255 | - ca->nr_open_buckets | |
e53a961c | 256 | - bch2_dev_buckets_reserved(ca, watermark)); |
1c6fdbd8 KO |
257 | } |
258 | ||
f25d8215 | 259 | static inline u64 dev_buckets_available(struct bch_dev *ca, |
e53a961c | 260 | enum bch_watermark watermark) |
1c6fdbd8 | 261 | { |
e53a961c | 262 | return __dev_buckets_available(ca, bch2_dev_usage_read(ca), watermark); |
1c6fdbd8 KO |
263 | } |
264 | ||
265 | /* Filesystem usage: */ | |
266 | ||
962210b2 KO |
267 | static inline unsigned __fs_usage_u64s(unsigned nr_replicas) |
268 | { | |
269 | return sizeof(struct bch_fs_usage) / sizeof(u64) + nr_replicas; | |
270 | } | |
271 | ||
ecf37a4a | 272 | static inline unsigned fs_usage_u64s(struct bch_fs *c) |
7ef2a73a | 273 | { |
962210b2 KO |
274 | return __fs_usage_u64s(READ_ONCE(c->replicas.nr)); |
275 | } | |
276 | ||
277 | static inline unsigned __fs_usage_online_u64s(unsigned nr_replicas) | |
278 | { | |
279 | return sizeof(struct bch_fs_usage_online) / sizeof(u64) + nr_replicas; | |
280 | } | |
281 | ||
282 | static inline unsigned fs_usage_online_u64s(struct bch_fs *c) | |
283 | { | |
284 | return __fs_usage_online_u64s(READ_ONCE(c->replicas.nr)); | |
ecf37a4a | 285 | } |
7ef2a73a | 286 | |
180fb49d KO |
287 | static inline unsigned dev_usage_u64s(void) |
288 | { | |
289 | return sizeof(struct bch_dev_usage) / sizeof(u64); | |
290 | } | |
291 | ||
5e82a9a1 | 292 | u64 bch2_fs_usage_read_one(struct bch_fs *, u64 *); |
1c6fdbd8 | 293 | |
5e82a9a1 KO |
294 | struct bch_fs_usage_online *bch2_fs_usage_read(struct bch_fs *); |
295 | ||
296 | void bch2_fs_usage_acc_to_base(struct bch_fs *, unsigned); | |
297 | ||
298 | void bch2_fs_usage_to_text(struct printbuf *, | |
299 | struct bch_fs *, struct bch_fs_usage_online *); | |
300 | ||
301 | u64 bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage_online *); | |
1c6fdbd8 | 302 | |
5663a415 KO |
303 | struct bch_fs_usage_short |
304 | bch2_fs_usage_read_short(struct bch_fs *); | |
b092dadd | 305 | |
6820ac2c KO |
306 | void bch2_dev_usage_update(struct bch_fs *, struct bch_dev *, |
307 | const struct bch_alloc_v4 *, | |
308 | const struct bch_alloc_v4 *, u64, bool); | |
f4f78779 KO |
309 | void bch2_dev_usage_update_m(struct bch_fs *, struct bch_dev *, |
310 | struct bucket *, struct bucket *); | |
6820ac2c | 311 | |
5663a415 KO |
312 | /* key/bucket marking: */ |
313 | ||
4dc5bb9a KO |
314 | static inline struct bch_fs_usage *fs_usage_ptr(struct bch_fs *c, |
315 | unsigned journal_seq, | |
316 | bool gc) | |
317 | { | |
318 | percpu_rwsem_assert_held(&c->mark_lock); | |
319 | BUG_ON(!gc && !journal_seq); | |
320 | ||
321 | return this_cpu_ptr(gc | |
322 | ? c->usage_gc | |
323 | : c->usage[journal_seq & JOURNAL_BUF_MASK]); | |
324 | } | |
325 | ||
f4f78779 KO |
326 | int bch2_update_replicas(struct bch_fs *, struct bkey_s_c, |
327 | struct bch_replicas_entry_v1 *, s64, | |
328 | unsigned, bool); | |
25f64e99 KO |
329 | int bch2_update_replicas_list(struct btree_trans *, |
330 | struct bch_replicas_entry_v1 *, s64); | |
331 | int bch2_update_cached_sectors_list(struct btree_trans *, unsigned, s64); | |
4dc5bb9a KO |
332 | int bch2_replicas_deltas_realloc(struct btree_trans *, unsigned); |
333 | ||
3e0745e2 | 334 | void bch2_fs_usage_initialize(struct bch_fs *); |
1c6fdbd8 | 335 | |
f4f78779 KO |
336 | int bch2_check_bucket_ref(struct btree_trans *, struct bkey_s_c, |
337 | const struct bch_extent_ptr *, | |
338 | s64, enum bch_data_type, u8, u8, u32); | |
339 | ||
c6b6d416 KO |
340 | int bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *, |
341 | size_t, enum bch_data_type, unsigned, | |
342 | struct gc_pos, unsigned); | |
1c6fdbd8 | 343 | |
4f9ec59f KO |
344 | int bch2_trigger_extent(struct btree_trans *, enum btree_id, unsigned, |
345 | struct bkey_s_c, struct bkey_s, unsigned); | |
6cacd0c4 | 346 | int bch2_trigger_reservation(struct btree_trans *, enum btree_id, unsigned, |
ad00bce0 | 347 | struct bkey_s_c, struct bkey_s, unsigned); |
880e2275 | 348 | |
282e7c37 | 349 | #define trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, _new, _flags)\ |
523f33ef KO |
350 | ({ \ |
351 | int ret = 0; \ | |
352 | \ | |
353 | if (_old.k->type) \ | |
354 | ret = _fn(_trans, _btree_id, _level, _old, _flags & ~BTREE_TRIGGER_INSERT); \ | |
355 | if (!ret && _new.k->type) \ | |
ad00bce0 | 356 | ret = _fn(_trans, _btree_id, _level, _new.s_c, _flags & ~BTREE_TRIGGER_OVERWRITE);\ |
523f33ef KO |
357 | ret; \ |
358 | }) | |
359 | ||
5b14ce35 KO |
360 | void bch2_trans_account_disk_usage_change(struct btree_trans *); |
361 | ||
920e69bc | 362 | void bch2_trans_fs_usage_revert(struct btree_trans *, struct replicas_delta_list *); |
502cfb35 | 363 | int bch2_trans_fs_usage_apply(struct btree_trans *, struct replicas_delta_list *); |
4d8100da | 364 | |
d62ab355 KO |
365 | int bch2_trans_mark_metadata_bucket(struct btree_trans *, struct bch_dev *, |
366 | size_t, enum bch_data_type, unsigned); | |
367 | int bch2_trans_mark_dev_sb(struct bch_fs *, struct bch_dev *); | |
bbe682c7 | 368 | int bch2_trans_mark_dev_sbs(struct bch_fs *); |
bfcf840d | 369 | |
fb8e5b4c KO |
370 | static inline bool is_superblock_bucket(struct bch_dev *ca, u64 b) |
371 | { | |
372 | struct bch_sb_layout *layout = &ca->disk_sb.sb->layout; | |
373 | u64 b_offset = bucket_to_sector(ca, b); | |
374 | u64 b_end = bucket_to_sector(ca, b + 1); | |
375 | unsigned i; | |
376 | ||
377 | if (!b) | |
378 | return true; | |
379 | ||
380 | for (i = 0; i < layout->nr_superblocks; i++) { | |
381 | u64 offset = le64_to_cpu(layout->sb_offset[i]); | |
382 | u64 end = offset + (1 << layout->sb_max_size_bits); | |
383 | ||
384 | if (!(offset >= b_end || end <= b_offset)) | |
385 | return true; | |
386 | } | |
387 | ||
388 | return false; | |
389 | } | |
390 | ||
e58f963c KO |
391 | static inline const char *bch2_data_type_str(enum bch_data_type type) |
392 | { | |
393 | return type < BCH_DATA_NR | |
394 | ? __bch2_data_types[type] | |
395 | : "(invalid data type)"; | |
396 | } | |
397 | ||
5663a415 | 398 | /* disk reservations: */ |
1c6fdbd8 | 399 | |
1c6fdbd8 KO |
400 | static inline void bch2_disk_reservation_put(struct bch_fs *c, |
401 | struct disk_reservation *res) | |
402 | { | |
7e94eeff KO |
403 | if (res->sectors) { |
404 | this_cpu_sub(*c->online_reserved, res->sectors); | |
405 | res->sectors = 0; | |
406 | } | |
1c6fdbd8 KO |
407 | } |
408 | ||
409 | #define BCH_DISK_RESERVATION_NOFAIL (1 << 0) | |
1c6fdbd8 | 410 | |
7e94eeff KO |
411 | int __bch2_disk_reservation_add(struct bch_fs *, |
412 | struct disk_reservation *, | |
413 | u64, int); | |
414 | ||
415 | static inline int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res, | |
416 | u64 sectors, int flags) | |
417 | { | |
b2d1d56b | 418 | #ifdef __KERNEL__ |
7e94eeff KO |
419 | u64 old, new; |
420 | ||
421 | do { | |
422 | old = this_cpu_read(c->pcpu->sectors_available); | |
423 | if (sectors > old) | |
424 | return __bch2_disk_reservation_add(c, res, sectors, flags); | |
425 | ||
426 | new = old - sectors; | |
427 | } while (this_cpu_cmpxchg(c->pcpu->sectors_available, old, new) != old); | |
428 | ||
429 | this_cpu_add(*c->online_reserved, sectors); | |
430 | res->sectors += sectors; | |
431 | return 0; | |
b2d1d56b KO |
432 | #else |
433 | return __bch2_disk_reservation_add(c, res, sectors, flags); | |
434 | #endif | |
7e94eeff | 435 | } |
1c6fdbd8 KO |
436 | |
437 | static inline struct disk_reservation | |
438 | bch2_disk_reservation_init(struct bch_fs *c, unsigned nr_replicas) | |
439 | { | |
440 | return (struct disk_reservation) { | |
441 | .sectors = 0, | |
442 | #if 0 | |
443 | /* not used yet: */ | |
444 | .gen = c->capacity_gen, | |
445 | #endif | |
446 | .nr_replicas = nr_replicas, | |
447 | }; | |
448 | } | |
449 | ||
450 | static inline int bch2_disk_reservation_get(struct bch_fs *c, | |
451 | struct disk_reservation *res, | |
cd9f3dfe | 452 | u64 sectors, unsigned nr_replicas, |
1c6fdbd8 KO |
453 | int flags) |
454 | { | |
455 | *res = bch2_disk_reservation_init(c, nr_replicas); | |
456 | ||
457 | return bch2_disk_reservation_add(c, res, sectors * nr_replicas, flags); | |
458 | } | |
459 | ||
ed343411 DR |
460 | #define RESERVE_FACTOR 6 |
461 | ||
462 | static inline u64 avail_factor(u64 r) | |
463 | { | |
464 | return div_u64(r << RESERVE_FACTOR, (1 << RESERVE_FACTOR) + 1); | |
465 | } | |
466 | ||
1c6fdbd8 KO |
467 | int bch2_dev_buckets_resize(struct bch_fs *, struct bch_dev *, u64); |
468 | void bch2_dev_buckets_free(struct bch_dev *); | |
469 | int bch2_dev_buckets_alloc(struct bch_fs *, struct bch_dev *); | |
470 | ||
471 | #endif /* _BUCKETS_H */ |