Commit | Line | Data |
---|---|---|
1c6fdbd8 KO |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | ||
3 | #include "bcachefs.h" | |
5e82a9a1 | 4 | #include "buckets.h" |
2c5af169 | 5 | #include "journal.h" |
1c6fdbd8 KO |
6 | #include "replicas.h" |
7 | #include "super-io.h" | |
8 | ||
9 | static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *, | |
10 | struct bch_replicas_cpu *); | |
11 | ||
12 | /* Replicas tracking - in memory: */ | |
13 | ||
eab32c8e | 14 | static void verify_replicas_entry(struct bch_replicas_entry *e) |
7ef2a73a | 15 | { |
eab32c8e | 16 | #ifdef CONFIG_BCACHEFS_DEBUG |
7ef2a73a KO |
17 | unsigned i; |
18 | ||
eab32c8e KO |
19 | BUG_ON(e->data_type >= BCH_DATA_NR); |
20 | BUG_ON(!e->nr_devs); | |
21 | BUG_ON(e->nr_required > 1 && | |
22 | e->nr_required >= e->nr_devs); | |
23 | ||
7ef2a73a KO |
24 | for (i = 0; i + 1 < e->nr_devs; i++) |
25 | BUG_ON(e->devs[i] >= e->devs[i + 1]); | |
26 | #endif | |
27 | } | |
28 | ||
7a920560 KO |
29 | static void replicas_entry_sort(struct bch_replicas_entry *e) |
30 | { | |
31 | bubble_sort(e->devs, e->nr_devs, u8_cmp); | |
32 | } | |
33 | ||
1c6fdbd8 KO |
34 | static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r) |
35 | { | |
36 | eytzinger0_sort(r->entries, r->nr, r->entry_size, memcmp, NULL); | |
37 | } | |
38 | ||
7ef2a73a KO |
39 | void bch2_replicas_entry_to_text(struct printbuf *out, |
40 | struct bch_replicas_entry *e) | |
1c6fdbd8 | 41 | { |
7a920560 | 42 | unsigned i; |
1c6fdbd8 | 43 | |
af9d3bc2 KO |
44 | pr_buf(out, "%s: %u/%u [", |
45 | bch2_data_types[e->data_type], | |
46 | e->nr_required, | |
47 | e->nr_devs); | |
1c6fdbd8 | 48 | |
7a920560 | 49 | for (i = 0; i < e->nr_devs; i++) |
319f9ac3 KO |
50 | pr_buf(out, i ? " %u" : "%u", e->devs[i]); |
51 | pr_buf(out, "]"); | |
1c6fdbd8 KO |
52 | } |
53 | ||
319f9ac3 KO |
54 | void bch2_cpu_replicas_to_text(struct printbuf *out, |
55 | struct bch_replicas_cpu *r) | |
1c6fdbd8 | 56 | { |
7a920560 | 57 | struct bch_replicas_entry *e; |
1c6fdbd8 | 58 | bool first = true; |
1c6fdbd8 KO |
59 | |
60 | for_each_cpu_replicas_entry(r, e) { | |
1c6fdbd8 | 61 | if (!first) |
319f9ac3 | 62 | pr_buf(out, " "); |
1c6fdbd8 KO |
63 | first = false; |
64 | ||
7ef2a73a | 65 | bch2_replicas_entry_to_text(out, e); |
1c6fdbd8 | 66 | } |
1c6fdbd8 KO |
67 | } |
68 | ||
103e2127 KO |
69 | static void extent_to_replicas(struct bkey_s_c k, |
70 | struct bch_replicas_entry *r) | |
71 | { | |
26609b61 KO |
72 | struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); |
73 | const union bch_extent_entry *entry; | |
74 | struct extent_ptr_decoded p; | |
103e2127 | 75 | |
26609b61 | 76 | r->nr_required = 1; |
af9d3bc2 | 77 | |
26609b61 KO |
78 | bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { |
79 | if (p.ptr.cached) | |
80 | continue; | |
cd575ddf | 81 | |
b2ca9903 KO |
82 | if (!p.has_ec) |
83 | r->devs[r->nr_devs++] = p.ptr.dev; | |
84 | else | |
df5d4dae | 85 | r->nr_required = 0; |
cd575ddf KO |
86 | } |
87 | } | |
88 | ||
89 | static void stripe_to_replicas(struct bkey_s_c k, | |
90 | struct bch_replicas_entry *r) | |
91 | { | |
26609b61 KO |
92 | struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k); |
93 | const struct bch_extent_ptr *ptr; | |
cd575ddf | 94 | |
26609b61 | 95 | r->nr_required = s.v->nr_blocks - s.v->nr_redundant; |
cd575ddf | 96 | |
26609b61 KO |
97 | for (ptr = s.v->ptrs; |
98 | ptr < s.v->ptrs + s.v->nr_blocks; | |
99 | ptr++) | |
100 | r->devs[r->nr_devs++] = ptr->dev; | |
103e2127 KO |
101 | } |
102 | ||
932aa837 KO |
103 | void bch2_bkey_to_replicas(struct bch_replicas_entry *e, |
104 | struct bkey_s_c k) | |
103e2127 KO |
105 | { |
106 | e->nr_devs = 0; | |
107 | ||
26609b61 KO |
108 | switch (k.k->type) { |
109 | case KEY_TYPE_btree_ptr: | |
548b3d20 | 110 | case KEY_TYPE_btree_ptr_v2: |
89fd25be | 111 | e->data_type = BCH_DATA_btree; |
103e2127 KO |
112 | extent_to_replicas(k, e); |
113 | break; | |
26609b61 | 114 | case KEY_TYPE_extent: |
76426098 | 115 | case KEY_TYPE_reflink_v: |
89fd25be | 116 | e->data_type = BCH_DATA_user; |
103e2127 KO |
117 | extent_to_replicas(k, e); |
118 | break; | |
26609b61 | 119 | case KEY_TYPE_stripe: |
af4d05c4 | 120 | e->data_type = BCH_DATA_parity; |
cd575ddf KO |
121 | stripe_to_replicas(k, e); |
122 | break; | |
103e2127 KO |
123 | } |
124 | ||
125 | replicas_entry_sort(e); | |
126 | } | |
127 | ||
7ef2a73a KO |
128 | void bch2_devlist_to_replicas(struct bch_replicas_entry *e, |
129 | enum bch_data_type data_type, | |
130 | struct bch_devs_list devs) | |
1c6fdbd8 KO |
131 | { |
132 | unsigned i; | |
133 | ||
134 | BUG_ON(!data_type || | |
89fd25be | 135 | data_type == BCH_DATA_sb || |
1c6fdbd8 KO |
136 | data_type >= BCH_DATA_NR); |
137 | ||
7a920560 KO |
138 | e->data_type = data_type; |
139 | e->nr_devs = 0; | |
af9d3bc2 | 140 | e->nr_required = 1; |
1c6fdbd8 | 141 | |
7a920560 KO |
142 | for (i = 0; i < devs.nr; i++) |
143 | e->devs[e->nr_devs++] = devs.devs[i]; | |
1c6fdbd8 | 144 | |
7a920560 | 145 | replicas_entry_sort(e); |
1c6fdbd8 KO |
146 | } |
147 | ||
73e6ab95 | 148 | static struct bch_replicas_cpu |
1c6fdbd8 | 149 | cpu_replicas_add_entry(struct bch_replicas_cpu *old, |
7a920560 | 150 | struct bch_replicas_entry *new_entry) |
1c6fdbd8 | 151 | { |
73e6ab95 KO |
152 | unsigned i; |
153 | struct bch_replicas_cpu new = { | |
154 | .nr = old->nr + 1, | |
155 | .entry_size = max_t(unsigned, old->entry_size, | |
156 | replicas_entry_bytes(new_entry)), | |
157 | }; | |
1c6fdbd8 | 158 | |
7ef2a73a | 159 | BUG_ON(!new_entry->data_type); |
eab32c8e | 160 | verify_replicas_entry(new_entry); |
7ef2a73a | 161 | |
73e6ab95 KO |
162 | new.entries = kcalloc(new.nr, new.entry_size, GFP_NOIO); |
163 | if (!new.entries) | |
164 | return new; | |
1c6fdbd8 KO |
165 | |
166 | for (i = 0; i < old->nr; i++) | |
73e6ab95 | 167 | memcpy(cpu_replicas_entry(&new, i), |
1c6fdbd8 | 168 | cpu_replicas_entry(old, i), |
7a920560 | 169 | old->entry_size); |
1c6fdbd8 | 170 | |
73e6ab95 | 171 | memcpy(cpu_replicas_entry(&new, old->nr), |
7a920560 KO |
172 | new_entry, |
173 | replicas_entry_bytes(new_entry)); | |
1c6fdbd8 | 174 | |
73e6ab95 | 175 | bch2_cpu_replicas_sort(&new); |
1c6fdbd8 KO |
176 | return new; |
177 | } | |
178 | ||
7ef2a73a KO |
179 | static inline int __replicas_entry_idx(struct bch_replicas_cpu *r, |
180 | struct bch_replicas_entry *search) | |
181 | { | |
182 | int idx, entry_size = replicas_entry_bytes(search); | |
183 | ||
184 | if (unlikely(entry_size > r->entry_size)) | |
185 | return -1; | |
186 | ||
eab32c8e | 187 | verify_replicas_entry(search); |
7ef2a73a KO |
188 | |
189 | #define entry_cmp(_l, _r, size) memcmp(_l, _r, entry_size) | |
190 | idx = eytzinger0_find(r->entries, r->nr, r->entry_size, | |
191 | entry_cmp, search); | |
192 | #undef entry_cmp | |
193 | ||
194 | return idx < r->nr ? idx : -1; | |
195 | } | |
196 | ||
197 | int bch2_replicas_entry_idx(struct bch_fs *c, | |
198 | struct bch_replicas_entry *search) | |
199 | { | |
200 | replicas_entry_sort(search); | |
201 | ||
202 | return __replicas_entry_idx(&c->replicas, search); | |
203 | } | |
204 | ||
1d25849c KO |
205 | static bool __replicas_has_entry(struct bch_replicas_cpu *r, |
206 | struct bch_replicas_entry *search) | |
1c6fdbd8 | 207 | { |
7ef2a73a | 208 | return __replicas_entry_idx(r, search) >= 0; |
1c6fdbd8 KO |
209 | } |
210 | ||
988e98cf KO |
211 | bool bch2_replicas_marked(struct bch_fs *c, |
212 | struct bch_replicas_entry *search) | |
1d25849c | 213 | { |
988e98cf KO |
214 | bool marked; |
215 | ||
7ef2a73a KO |
216 | if (!search->nr_devs) |
217 | return true; | |
218 | ||
eab32c8e | 219 | verify_replicas_entry(search); |
7ef2a73a | 220 | |
4d8100da | 221 | percpu_down_read(&c->mark_lock); |
988e98cf KO |
222 | marked = __replicas_has_entry(&c->replicas, search) && |
223 | (likely((!c->replicas_gc.entries)) || | |
224 | __replicas_has_entry(&c->replicas_gc, search)); | |
73e6ab95 | 225 | percpu_up_read(&c->mark_lock); |
1d25849c KO |
226 | |
227 | return marked; | |
228 | } | |
229 | ||
5e82a9a1 | 230 | static void __replicas_table_update(struct bch_fs_usage *dst, |
7ef2a73a | 231 | struct bch_replicas_cpu *dst_r, |
5e82a9a1 | 232 | struct bch_fs_usage *src, |
7ef2a73a KO |
233 | struct bch_replicas_cpu *src_r) |
234 | { | |
7ef2a73a KO |
235 | int src_idx, dst_idx; |
236 | ||
7ef2a73a KO |
237 | *dst = *src; |
238 | ||
239 | for (src_idx = 0; src_idx < src_r->nr; src_idx++) { | |
768ac639 | 240 | if (!src->replicas[src_idx]) |
7ef2a73a KO |
241 | continue; |
242 | ||
243 | dst_idx = __replicas_entry_idx(dst_r, | |
244 | cpu_replicas_entry(src_r, src_idx)); | |
245 | BUG_ON(dst_idx < 0); | |
246 | ||
768ac639 | 247 | dst->replicas[dst_idx] = src->replicas[src_idx]; |
7ef2a73a KO |
248 | } |
249 | } | |
250 | ||
5e82a9a1 KO |
251 | static void __replicas_table_update_pcpu(struct bch_fs_usage __percpu *dst_p, |
252 | struct bch_replicas_cpu *dst_r, | |
253 | struct bch_fs_usage __percpu *src_p, | |
254 | struct bch_replicas_cpu *src_r) | |
255 | { | |
256 | unsigned src_nr = sizeof(struct bch_fs_usage) / sizeof(u64) + src_r->nr; | |
257 | struct bch_fs_usage *dst, *src = (void *) | |
258 | bch2_acc_percpu_u64s((void *) src_p, src_nr); | |
259 | ||
260 | preempt_disable(); | |
261 | dst = this_cpu_ptr(dst_p); | |
262 | preempt_enable(); | |
263 | ||
264 | __replicas_table_update(dst, dst_r, src, src_r); | |
265 | } | |
266 | ||
7ef2a73a KO |
267 | /* |
268 | * Resize filesystem accounting: | |
269 | */ | |
270 | static int replicas_table_update(struct bch_fs *c, | |
271 | struct bch_replicas_cpu *new_r) | |
272 | { | |
f299d573 | 273 | struct bch_fs_usage __percpu *new_usage[JOURNAL_BUF_NR]; |
5e82a9a1 KO |
274 | struct bch_fs_usage_online *new_scratch = NULL; |
275 | struct bch_fs_usage __percpu *new_gc = NULL; | |
276 | struct bch_fs_usage *new_base = NULL; | |
277 | unsigned i, bytes = sizeof(struct bch_fs_usage) + | |
278 | sizeof(u64) * new_r->nr; | |
279 | unsigned scratch_bytes = sizeof(struct bch_fs_usage_online) + | |
7ef2a73a | 280 | sizeof(u64) * new_r->nr; |
f299d573 KO |
281 | int ret = 0; |
282 | ||
283 | memset(new_usage, 0, sizeof(new_usage)); | |
284 | ||
285 | for (i = 0; i < ARRAY_SIZE(new_usage); i++) | |
286 | if (!(new_usage[i] = __alloc_percpu_gfp(bytes, | |
287 | sizeof(u64), GFP_NOIO))) | |
288 | goto err; | |
7ef2a73a | 289 | |
5e82a9a1 | 290 | memset(new_usage, 0, sizeof(new_usage)); |
7ef2a73a | 291 | |
5e82a9a1 KO |
292 | for (i = 0; i < ARRAY_SIZE(new_usage); i++) |
293 | if (!(new_usage[i] = __alloc_percpu_gfp(bytes, | |
294 | sizeof(u64), GFP_NOIO))) | |
295 | goto err; | |
296 | ||
297 | if (!(new_base = kzalloc(bytes, GFP_NOIO)) || | |
298 | !(new_scratch = kmalloc(scratch_bytes, GFP_NOIO)) || | |
299 | (c->usage_gc && | |
f299d573 | 300 | !(new_gc = __alloc_percpu_gfp(bytes, sizeof(u64), GFP_NOIO)))) |
5e82a9a1 | 301 | goto err; |
ecf37a4a | 302 | |
5e82a9a1 KO |
303 | for (i = 0; i < ARRAY_SIZE(new_usage); i++) |
304 | if (c->usage[i]) | |
305 | __replicas_table_update_pcpu(new_usage[i], new_r, | |
306 | c->usage[i], &c->replicas); | |
307 | if (c->usage_base) | |
308 | __replicas_table_update(new_base, new_r, | |
309 | c->usage_base, &c->replicas); | |
310 | if (c->usage_gc) | |
311 | __replicas_table_update_pcpu(new_gc, new_r, | |
312 | c->usage_gc, &c->replicas); | |
313 | ||
314 | for (i = 0; i < ARRAY_SIZE(new_usage); i++) | |
315 | swap(c->usage[i], new_usage[i]); | |
316 | swap(c->usage_base, new_base); | |
ecf37a4a | 317 | swap(c->usage_scratch, new_scratch); |
5e82a9a1 | 318 | swap(c->usage_gc, new_gc); |
ecf37a4a | 319 | swap(c->replicas, *new_r); |
f299d573 | 320 | out: |
5e82a9a1 | 321 | free_percpu(new_gc); |
4d8100da | 322 | kfree(new_scratch); |
ecf37a4a KO |
323 | free_percpu(new_usage[1]); |
324 | free_percpu(new_usage[0]); | |
5e82a9a1 | 325 | kfree(new_base); |
7ef2a73a | 326 | return ret; |
f299d573 KO |
327 | err: |
328 | bch_err(c, "error updating replicas table: memory allocation failure"); | |
329 | ret = -ENOMEM; | |
330 | goto out; | |
7ef2a73a KO |
331 | } |
332 | ||
2c5af169 KO |
333 | static unsigned reserve_journal_replicas(struct bch_fs *c, |
334 | struct bch_replicas_cpu *r) | |
335 | { | |
336 | struct bch_replicas_entry *e; | |
337 | unsigned journal_res_u64s = 0; | |
338 | ||
339 | /* nr_inodes: */ | |
340 | journal_res_u64s += | |
341 | DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64)); | |
342 | ||
343 | /* key_version: */ | |
344 | journal_res_u64s += | |
345 | DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64)); | |
346 | ||
3577df5f KO |
347 | /* persistent_reserved: */ |
348 | journal_res_u64s += | |
349 | DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64)) * | |
350 | BCH_REPLICAS_MAX; | |
351 | ||
2c5af169 KO |
352 | for_each_cpu_replicas_entry(r, e) |
353 | journal_res_u64s += | |
3577df5f | 354 | DIV_ROUND_UP(sizeof(struct jset_entry_data_usage) + |
2c5af169 KO |
355 | e->nr_devs, sizeof(u64)); |
356 | return journal_res_u64s; | |
357 | } | |
358 | ||
1c6fdbd8 KO |
359 | noinline |
360 | static int bch2_mark_replicas_slowpath(struct bch_fs *c, | |
7a920560 | 361 | struct bch_replicas_entry *new_entry) |
1c6fdbd8 | 362 | { |
73e6ab95 | 363 | struct bch_replicas_cpu new_r, new_gc; |
1e1a31c4 | 364 | int ret = 0; |
1c6fdbd8 | 365 | |
eab32c8e KO |
366 | verify_replicas_entry(new_entry); |
367 | ||
73e6ab95 KO |
368 | memset(&new_r, 0, sizeof(new_r)); |
369 | memset(&new_gc, 0, sizeof(new_gc)); | |
370 | ||
1c6fdbd8 KO |
371 | mutex_lock(&c->sb_lock); |
372 | ||
73e6ab95 KO |
373 | if (c->replicas_gc.entries && |
374 | !__replicas_has_entry(&c->replicas_gc, new_entry)) { | |
375 | new_gc = cpu_replicas_add_entry(&c->replicas_gc, new_entry); | |
376 | if (!new_gc.entries) | |
1c6fdbd8 KO |
377 | goto err; |
378 | } | |
379 | ||
73e6ab95 KO |
380 | if (!__replicas_has_entry(&c->replicas, new_entry)) { |
381 | new_r = cpu_replicas_add_entry(&c->replicas, new_entry); | |
382 | if (!new_r.entries) | |
1c6fdbd8 KO |
383 | goto err; |
384 | ||
73e6ab95 | 385 | ret = bch2_cpu_replicas_to_sb_replicas(c, &new_r); |
1c6fdbd8 KO |
386 | if (ret) |
387 | goto err; | |
2c5af169 KO |
388 | |
389 | bch2_journal_entry_res_resize(&c->journal, | |
390 | &c->replicas_journal_res, | |
391 | reserve_journal_replicas(c, &new_r)); | |
1c6fdbd8 KO |
392 | } |
393 | ||
73e6ab95 KO |
394 | if (!new_r.entries && |
395 | !new_gc.entries) | |
396 | goto out; | |
397 | ||
1c6fdbd8 KO |
398 | /* allocations done, now commit: */ |
399 | ||
73e6ab95 | 400 | if (new_r.entries) |
1c6fdbd8 KO |
401 | bch2_write_super(c); |
402 | ||
403 | /* don't update in memory replicas until changes are persistent */ | |
73e6ab95 KO |
404 | percpu_down_write(&c->mark_lock); |
405 | if (new_r.entries) | |
7ef2a73a | 406 | ret = replicas_table_update(c, &new_r); |
73e6ab95 KO |
407 | if (new_gc.entries) |
408 | swap(new_gc, c->replicas_gc); | |
409 | percpu_up_write(&c->mark_lock); | |
410 | out: | |
1c6fdbd8 | 411 | mutex_unlock(&c->sb_lock); |
73e6ab95 KO |
412 | |
413 | kfree(new_r.entries); | |
414 | kfree(new_gc.entries); | |
415 | ||
1c6fdbd8 | 416 | return ret; |
1e1a31c4 KO |
417 | err: |
418 | bch_err(c, "error adding replicas entry: memory allocation failure"); | |
419 | ret = -ENOMEM; | |
420 | goto out; | |
1c6fdbd8 KO |
421 | } |
422 | ||
988e98cf KO |
423 | static int __bch2_mark_replicas(struct bch_fs *c, |
424 | struct bch_replicas_entry *r, | |
425 | bool check) | |
103e2127 | 426 | { |
988e98cf KO |
427 | return likely(bch2_replicas_marked(c, r)) ? 0 |
428 | : check ? -1 | |
7ef2a73a | 429 | : bch2_mark_replicas_slowpath(c, r); |
103e2127 KO |
430 | } |
431 | ||
988e98cf KO |
432 | int bch2_mark_replicas(struct bch_fs *c, struct bch_replicas_entry *r) |
433 | { | |
434 | return __bch2_mark_replicas(c, r, false); | |
435 | } | |
436 | ||
437 | static int __bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k, | |
438 | bool check) | |
1c6fdbd8 | 439 | { |
e47c0171 | 440 | struct bch_replicas_padded search; |
7ef2a73a KO |
441 | struct bch_devs_list cached = bch2_bkey_cached_devs(k); |
442 | unsigned i; | |
988e98cf | 443 | int ret; |
1c6fdbd8 | 444 | |
7ef2a73a KO |
445 | for (i = 0; i < cached.nr; i++) { |
446 | bch2_replicas_entry_cached(&search.e, cached.devs[i]); | |
7a920560 | 447 | |
988e98cf KO |
448 | ret = __bch2_mark_replicas(c, &search.e, check); |
449 | if (ret) | |
450 | return ret; | |
7ef2a73a | 451 | } |
1c6fdbd8 | 452 | |
932aa837 | 453 | bch2_bkey_to_replicas(&search.e, k); |
1c6fdbd8 | 454 | |
af4d05c4 KO |
455 | ret = __bch2_mark_replicas(c, &search.e, check); |
456 | if (ret) | |
457 | return ret; | |
458 | ||
459 | if (search.e.data_type == BCH_DATA_parity) { | |
460 | search.e.data_type = BCH_DATA_cached; | |
461 | ret = __bch2_mark_replicas(c, &search.e, check); | |
462 | if (ret) | |
463 | return ret; | |
464 | ||
465 | search.e.data_type = BCH_DATA_user; | |
466 | ret = __bch2_mark_replicas(c, &search.e, check); | |
467 | if (ret) | |
468 | return ret; | |
469 | } | |
470 | ||
471 | return 0; | |
4d8100da KO |
472 | } |
473 | ||
474 | bool bch2_bkey_replicas_marked(struct bch_fs *c, | |
988e98cf | 475 | struct bkey_s_c k) |
4d8100da | 476 | { |
988e98cf | 477 | return __bch2_mark_bkey_replicas(c, k, true) == 0; |
1c6fdbd8 KO |
478 | } |
479 | ||
26609b61 | 480 | int bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k) |
1c6fdbd8 | 481 | { |
988e98cf | 482 | return __bch2_mark_bkey_replicas(c, k, false); |
1c6fdbd8 KO |
483 | } |
484 | ||
485 | int bch2_replicas_gc_end(struct bch_fs *c, int ret) | |
486 | { | |
7ef2a73a KO |
487 | unsigned i; |
488 | ||
1c6fdbd8 KO |
489 | lockdep_assert_held(&c->replicas_gc_lock); |
490 | ||
491 | mutex_lock(&c->sb_lock); | |
5e82a9a1 | 492 | percpu_down_write(&c->mark_lock); |
1c6fdbd8 | 493 | |
7ef2a73a KO |
494 | /* |
495 | * this is kind of crappy; the replicas gc mechanism needs to be ripped | |
496 | * out | |
497 | */ | |
498 | ||
499 | for (i = 0; i < c->replicas.nr; i++) { | |
500 | struct bch_replicas_entry *e = | |
501 | cpu_replicas_entry(&c->replicas, i); | |
502 | struct bch_replicas_cpu n; | |
7ef2a73a | 503 | |
5e82a9a1 | 504 | if (!__replicas_has_entry(&c->replicas_gc, e) && |
f299d573 | 505 | bch2_fs_usage_read_one(c, &c->usage_base->replicas[i])) { |
5e82a9a1 KO |
506 | n = cpu_replicas_add_entry(&c->replicas_gc, e); |
507 | if (!n.entries) { | |
508 | ret = -ENOSPC; | |
509 | goto err; | |
510 | } | |
511 | ||
512 | swap(n, c->replicas_gc); | |
513 | kfree(n.entries); | |
7ef2a73a | 514 | } |
7ef2a73a KO |
515 | } |
516 | ||
73e6ab95 | 517 | if (bch2_cpu_replicas_to_sb_replicas(c, &c->replicas_gc)) { |
1c6fdbd8 KO |
518 | ret = -ENOSPC; |
519 | goto err; | |
520 | } | |
521 | ||
5e82a9a1 | 522 | ret = replicas_table_update(c, &c->replicas_gc); |
73e6ab95 | 523 | err: |
73e6ab95 KO |
524 | kfree(c->replicas_gc.entries); |
525 | c->replicas_gc.entries = NULL; | |
5e82a9a1 | 526 | |
73e6ab95 | 527 | percpu_up_write(&c->mark_lock); |
1c6fdbd8 | 528 | |
5e82a9a1 KO |
529 | if (!ret) |
530 | bch2_write_super(c); | |
531 | ||
1c6fdbd8 | 532 | mutex_unlock(&c->sb_lock); |
5e82a9a1 | 533 | |
1c6fdbd8 | 534 | return ret; |
1c6fdbd8 KO |
535 | } |
536 | ||
537 | int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask) | |
538 | { | |
7a920560 | 539 | struct bch_replicas_entry *e; |
73e6ab95 | 540 | unsigned i = 0; |
1c6fdbd8 KO |
541 | |
542 | lockdep_assert_held(&c->replicas_gc_lock); | |
543 | ||
544 | mutex_lock(&c->sb_lock); | |
73e6ab95 | 545 | BUG_ON(c->replicas_gc.entries); |
1c6fdbd8 | 546 | |
73e6ab95 KO |
547 | c->replicas_gc.nr = 0; |
548 | c->replicas_gc.entry_size = 0; | |
1c6fdbd8 | 549 | |
73e6ab95 KO |
550 | for_each_cpu_replicas_entry(&c->replicas, e) |
551 | if (!((1 << e->data_type) & typemask)) { | |
552 | c->replicas_gc.nr++; | |
553 | c->replicas_gc.entry_size = | |
554 | max_t(unsigned, c->replicas_gc.entry_size, | |
555 | replicas_entry_bytes(e)); | |
556 | } | |
557 | ||
558 | c->replicas_gc.entries = kcalloc(c->replicas_gc.nr, | |
559 | c->replicas_gc.entry_size, | |
560 | GFP_NOIO); | |
561 | if (!c->replicas_gc.entries) { | |
1c6fdbd8 | 562 | mutex_unlock(&c->sb_lock); |
1e1a31c4 | 563 | bch_err(c, "error allocating c->replicas_gc"); |
1c6fdbd8 KO |
564 | return -ENOMEM; |
565 | } | |
566 | ||
73e6ab95 | 567 | for_each_cpu_replicas_entry(&c->replicas, e) |
1c6fdbd8 | 568 | if (!((1 << e->data_type) & typemask)) |
73e6ab95 KO |
569 | memcpy(cpu_replicas_entry(&c->replicas_gc, i++), |
570 | e, c->replicas_gc.entry_size); | |
1c6fdbd8 | 571 | |
73e6ab95 | 572 | bch2_cpu_replicas_sort(&c->replicas_gc); |
1c6fdbd8 KO |
573 | mutex_unlock(&c->sb_lock); |
574 | ||
575 | return 0; | |
576 | } | |
577 | ||
ae0ff7b8 KO |
578 | int bch2_replicas_gc2(struct bch_fs *c) |
579 | { | |
580 | struct bch_replicas_cpu new = { 0 }; | |
581 | unsigned i, nr; | |
582 | int ret = 0; | |
583 | ||
584 | bch2_journal_meta(&c->journal); | |
585 | retry: | |
586 | nr = READ_ONCE(c->replicas.nr); | |
587 | new.entry_size = READ_ONCE(c->replicas.entry_size); | |
588 | new.entries = kcalloc(nr, new.entry_size, GFP_KERNEL); | |
1e1a31c4 KO |
589 | if (!new.entries) { |
590 | bch_err(c, "error allocating c->replicas_gc"); | |
ae0ff7b8 | 591 | return -ENOMEM; |
1e1a31c4 | 592 | } |
ae0ff7b8 KO |
593 | |
594 | mutex_lock(&c->sb_lock); | |
595 | percpu_down_write(&c->mark_lock); | |
596 | ||
597 | if (nr != c->replicas.nr || | |
598 | new.entry_size != c->replicas.entry_size) { | |
599 | percpu_up_write(&c->mark_lock); | |
600 | mutex_unlock(&c->sb_lock); | |
601 | kfree(new.entries); | |
602 | goto retry; | |
603 | } | |
604 | ||
605 | for (i = 0; i < c->replicas.nr; i++) { | |
606 | struct bch_replicas_entry *e = | |
607 | cpu_replicas_entry(&c->replicas, i); | |
608 | ||
89fd25be | 609 | if (e->data_type == BCH_DATA_journal || |
53ef2c5c KO |
610 | c->usage_base->replicas[i] || |
611 | percpu_u64_get(&c->usage[0]->replicas[i]) || | |
612 | percpu_u64_get(&c->usage[1]->replicas[i]) || | |
613 | percpu_u64_get(&c->usage[2]->replicas[i]) || | |
614 | percpu_u64_get(&c->usage[3]->replicas[i])) | |
ae0ff7b8 KO |
615 | memcpy(cpu_replicas_entry(&new, new.nr++), |
616 | e, new.entry_size); | |
617 | } | |
618 | ||
619 | bch2_cpu_replicas_sort(&new); | |
620 | ||
621 | if (bch2_cpu_replicas_to_sb_replicas(c, &new)) { | |
622 | ret = -ENOSPC; | |
623 | goto err; | |
624 | } | |
625 | ||
626 | ret = replicas_table_update(c, &new); | |
627 | err: | |
628 | kfree(new.entries); | |
629 | ||
630 | percpu_up_write(&c->mark_lock); | |
631 | ||
632 | if (!ret) | |
633 | bch2_write_super(c); | |
634 | ||
635 | mutex_unlock(&c->sb_lock); | |
636 | ||
637 | return ret; | |
638 | } | |
639 | ||
42b72e0b KO |
640 | int bch2_replicas_set_usage(struct bch_fs *c, |
641 | struct bch_replicas_entry *r, | |
642 | u64 sectors) | |
643 | { | |
644 | int ret, idx = bch2_replicas_entry_idx(c, r); | |
645 | ||
646 | if (idx < 0) { | |
647 | struct bch_replicas_cpu n; | |
648 | ||
649 | n = cpu_replicas_add_entry(&c->replicas, r); | |
650 | if (!n.entries) | |
651 | return -ENOMEM; | |
652 | ||
653 | ret = replicas_table_update(c, &n); | |
654 | if (ret) | |
655 | return ret; | |
656 | ||
657 | kfree(n.entries); | |
658 | ||
659 | idx = bch2_replicas_entry_idx(c, r); | |
660 | BUG_ON(ret < 0); | |
661 | } | |
662 | ||
5e82a9a1 | 663 | c->usage_base->replicas[idx] = sectors; |
42b72e0b KO |
664 | |
665 | return 0; | |
666 | } | |
667 | ||
1c6fdbd8 KO |
668 | /* Replicas tracking - superblock: */ |
669 | ||
73e6ab95 KO |
670 | static int |
671 | __bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r, | |
672 | struct bch_replicas_cpu *cpu_r) | |
1c6fdbd8 | 673 | { |
7a920560 | 674 | struct bch_replicas_entry *e, *dst; |
af9d3bc2 | 675 | unsigned nr = 0, entry_size = 0, idx = 0; |
1c6fdbd8 | 676 | |
af9d3bc2 KO |
677 | for_each_replicas_entry(sb_r, e) { |
678 | entry_size = max_t(unsigned, entry_size, | |
679 | replicas_entry_bytes(e)); | |
680 | nr++; | |
681 | } | |
1c6fdbd8 | 682 | |
73e6ab95 KO |
683 | cpu_r->entries = kcalloc(nr, entry_size, GFP_NOIO); |
684 | if (!cpu_r->entries) | |
685 | return -ENOMEM; | |
1c6fdbd8 KO |
686 | |
687 | cpu_r->nr = nr; | |
688 | cpu_r->entry_size = entry_size; | |
689 | ||
af9d3bc2 KO |
690 | for_each_replicas_entry(sb_r, e) { |
691 | dst = cpu_replicas_entry(cpu_r, idx++); | |
692 | memcpy(dst, e, replicas_entry_bytes(e)); | |
693 | replicas_entry_sort(dst); | |
694 | } | |
1c6fdbd8 | 695 | |
73e6ab95 | 696 | return 0; |
af9d3bc2 KO |
697 | } |
698 | ||
73e6ab95 KO |
699 | static int |
700 | __bch2_sb_replicas_v0_to_cpu_replicas(struct bch_sb_field_replicas_v0 *sb_r, | |
701 | struct bch_replicas_cpu *cpu_r) | |
af9d3bc2 KO |
702 | { |
703 | struct bch_replicas_entry_v0 *e; | |
af9d3bc2 KO |
704 | unsigned nr = 0, entry_size = 0, idx = 0; |
705 | ||
706 | for_each_replicas_entry(sb_r, e) { | |
707 | entry_size = max_t(unsigned, entry_size, | |
708 | replicas_entry_bytes(e)); | |
709 | nr++; | |
710 | } | |
711 | ||
712 | entry_size += sizeof(struct bch_replicas_entry) - | |
713 | sizeof(struct bch_replicas_entry_v0); | |
714 | ||
73e6ab95 KO |
715 | cpu_r->entries = kcalloc(nr, entry_size, GFP_NOIO); |
716 | if (!cpu_r->entries) | |
717 | return -ENOMEM; | |
af9d3bc2 KO |
718 | |
719 | cpu_r->nr = nr; | |
720 | cpu_r->entry_size = entry_size; | |
721 | ||
722 | for_each_replicas_entry(sb_r, e) { | |
723 | struct bch_replicas_entry *dst = | |
724 | cpu_replicas_entry(cpu_r, idx++); | |
725 | ||
726 | dst->data_type = e->data_type; | |
727 | dst->nr_devs = e->nr_devs; | |
728 | dst->nr_required = 1; | |
729 | memcpy(dst->devs, e->devs, e->nr_devs); | |
730 | replicas_entry_sort(dst); | |
731 | } | |
1c6fdbd8 | 732 | |
73e6ab95 | 733 | return 0; |
1c6fdbd8 KO |
734 | } |
735 | ||
736 | int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c) | |
737 | { | |
af9d3bc2 KO |
738 | struct bch_sb_field_replicas *sb_v1; |
739 | struct bch_sb_field_replicas_v0 *sb_v0; | |
73e6ab95 KO |
740 | struct bch_replicas_cpu new_r = { 0, 0, NULL }; |
741 | int ret = 0; | |
1c6fdbd8 | 742 | |
af9d3bc2 | 743 | if ((sb_v1 = bch2_sb_get_replicas(c->disk_sb.sb))) |
73e6ab95 | 744 | ret = __bch2_sb_replicas_to_cpu_replicas(sb_v1, &new_r); |
af9d3bc2 | 745 | else if ((sb_v0 = bch2_sb_get_replicas_v0(c->disk_sb.sb))) |
73e6ab95 | 746 | ret = __bch2_sb_replicas_v0_to_cpu_replicas(sb_v0, &new_r); |
af9d3bc2 | 747 | |
73e6ab95 | 748 | if (ret) |
1c6fdbd8 KO |
749 | return -ENOMEM; |
750 | ||
73e6ab95 KO |
751 | bch2_cpu_replicas_sort(&new_r); |
752 | ||
753 | percpu_down_write(&c->mark_lock); | |
2c5af169 | 754 | |
7ef2a73a | 755 | ret = replicas_table_update(c, &new_r); |
73e6ab95 | 756 | percpu_up_write(&c->mark_lock); |
af9d3bc2 | 757 | |
73e6ab95 | 758 | kfree(new_r.entries); |
1c6fdbd8 KO |
759 | |
760 | return 0; | |
761 | } | |
762 | ||
af9d3bc2 KO |
763 | static int bch2_cpu_replicas_to_sb_replicas_v0(struct bch_fs *c, |
764 | struct bch_replicas_cpu *r) | |
765 | { | |
766 | struct bch_sb_field_replicas_v0 *sb_r; | |
767 | struct bch_replicas_entry_v0 *dst; | |
768 | struct bch_replicas_entry *src; | |
769 | size_t bytes; | |
770 | ||
771 | bytes = sizeof(struct bch_sb_field_replicas); | |
772 | ||
773 | for_each_cpu_replicas_entry(r, src) | |
774 | bytes += replicas_entry_bytes(src) - 1; | |
775 | ||
776 | sb_r = bch2_sb_resize_replicas_v0(&c->disk_sb, | |
777 | DIV_ROUND_UP(bytes, sizeof(u64))); | |
778 | if (!sb_r) | |
779 | return -ENOSPC; | |
780 | ||
781 | bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas); | |
782 | sb_r = bch2_sb_get_replicas_v0(c->disk_sb.sb); | |
783 | ||
784 | memset(&sb_r->entries, 0, | |
785 | vstruct_end(&sb_r->field) - | |
786 | (void *) &sb_r->entries); | |
787 | ||
788 | dst = sb_r->entries; | |
789 | for_each_cpu_replicas_entry(r, src) { | |
790 | dst->data_type = src->data_type; | |
791 | dst->nr_devs = src->nr_devs; | |
792 | memcpy(dst->devs, src->devs, src->nr_devs); | |
793 | ||
794 | dst = replicas_entry_next(dst); | |
795 | ||
796 | BUG_ON((void *) dst > vstruct_end(&sb_r->field)); | |
797 | } | |
798 | ||
799 | return 0; | |
800 | } | |
801 | ||
1c6fdbd8 KO |
802 | static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c, |
803 | struct bch_replicas_cpu *r) | |
804 | { | |
805 | struct bch_sb_field_replicas *sb_r; | |
7a920560 | 806 | struct bch_replicas_entry *dst, *src; |
af9d3bc2 | 807 | bool need_v1 = false; |
7a920560 | 808 | size_t bytes; |
1c6fdbd8 KO |
809 | |
810 | bytes = sizeof(struct bch_sb_field_replicas); | |
811 | ||
af9d3bc2 | 812 | for_each_cpu_replicas_entry(r, src) { |
7a920560 | 813 | bytes += replicas_entry_bytes(src); |
af9d3bc2 KO |
814 | if (src->nr_required != 1) |
815 | need_v1 = true; | |
816 | } | |
817 | ||
818 | if (!need_v1) | |
819 | return bch2_cpu_replicas_to_sb_replicas_v0(c, r); | |
1c6fdbd8 KO |
820 | |
821 | sb_r = bch2_sb_resize_replicas(&c->disk_sb, | |
7a920560 | 822 | DIV_ROUND_UP(bytes, sizeof(u64))); |
1c6fdbd8 KO |
823 | if (!sb_r) |
824 | return -ENOSPC; | |
825 | ||
af9d3bc2 KO |
826 | bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas_v0); |
827 | sb_r = bch2_sb_get_replicas(c->disk_sb.sb); | |
828 | ||
1c6fdbd8 KO |
829 | memset(&sb_r->entries, 0, |
830 | vstruct_end(&sb_r->field) - | |
831 | (void *) &sb_r->entries); | |
832 | ||
7a920560 KO |
833 | dst = sb_r->entries; |
834 | for_each_cpu_replicas_entry(r, src) { | |
835 | memcpy(dst, src, replicas_entry_bytes(src)); | |
1c6fdbd8 | 836 | |
7a920560 | 837 | dst = replicas_entry_next(dst); |
1c6fdbd8 | 838 | |
7a920560 | 839 | BUG_ON((void *) dst > vstruct_end(&sb_r->field)); |
1c6fdbd8 KO |
840 | } |
841 | ||
842 | return 0; | |
843 | } | |
844 | ||
7a920560 KO |
845 | static const char *check_dup_replicas_entries(struct bch_replicas_cpu *cpu_r) |
846 | { | |
847 | unsigned i; | |
848 | ||
849 | sort_cmp_size(cpu_r->entries, | |
850 | cpu_r->nr, | |
851 | cpu_r->entry_size, | |
852 | memcmp, NULL); | |
853 | ||
854 | for (i = 0; i + 1 < cpu_r->nr; i++) { | |
855 | struct bch_replicas_entry *l = | |
856 | cpu_replicas_entry(cpu_r, i); | |
857 | struct bch_replicas_entry *r = | |
858 | cpu_replicas_entry(cpu_r, i + 1); | |
859 | ||
860 | BUG_ON(memcmp(l, r, cpu_r->entry_size) > 0); | |
861 | ||
862 | if (!memcmp(l, r, cpu_r->entry_size)) | |
863 | return "duplicate replicas entry"; | |
864 | } | |
865 | ||
866 | return NULL; | |
867 | } | |
868 | ||
1c6fdbd8 KO |
869 | static const char *bch2_sb_validate_replicas(struct bch_sb *sb, struct bch_sb_field *f) |
870 | { | |
871 | struct bch_sb_field_replicas *sb_r = field_to_type(f, replicas); | |
872 | struct bch_sb_field_members *mi = bch2_sb_get_members(sb); | |
73e6ab95 | 873 | struct bch_replicas_cpu cpu_r = { .entries = NULL }; |
1c6fdbd8 KO |
874 | struct bch_replicas_entry *e; |
875 | const char *err; | |
876 | unsigned i; | |
877 | ||
878 | for_each_replicas_entry(sb_r, e) { | |
879 | err = "invalid replicas entry: invalid data type"; | |
880 | if (e->data_type >= BCH_DATA_NR) | |
881 | goto err; | |
882 | ||
883 | err = "invalid replicas entry: no devices"; | |
7a920560 | 884 | if (!e->nr_devs) |
1c6fdbd8 KO |
885 | goto err; |
886 | ||
af9d3bc2 | 887 | err = "invalid replicas entry: bad nr_required"; |
eab32c8e KO |
888 | if (e->nr_required > 1 && |
889 | e->nr_required >= e->nr_devs) | |
1c6fdbd8 KO |
890 | goto err; |
891 | ||
892 | err = "invalid replicas entry: invalid device"; | |
7a920560 | 893 | for (i = 0; i < e->nr_devs; i++) |
1c6fdbd8 KO |
894 | if (!bch2_dev_exists(sb, mi, e->devs[i])) |
895 | goto err; | |
896 | } | |
897 | ||
898 | err = "cannot allocate memory"; | |
73e6ab95 | 899 | if (__bch2_sb_replicas_to_cpu_replicas(sb_r, &cpu_r)) |
1c6fdbd8 KO |
900 | goto err; |
901 | ||
73e6ab95 | 902 | err = check_dup_replicas_entries(&cpu_r); |
1c6fdbd8 | 903 | err: |
73e6ab95 | 904 | kfree(cpu_r.entries); |
1c6fdbd8 KO |
905 | return err; |
906 | } | |
907 | ||
319f9ac3 KO |
908 | static void bch2_sb_replicas_to_text(struct printbuf *out, |
909 | struct bch_sb *sb, | |
910 | struct bch_sb_field *f) | |
1c6fdbd8 | 911 | { |
319f9ac3 | 912 | struct bch_sb_field_replicas *r = field_to_type(f, replicas); |
1c6fdbd8 KO |
913 | struct bch_replicas_entry *e; |
914 | bool first = true; | |
1c6fdbd8 | 915 | |
1c6fdbd8 KO |
916 | for_each_replicas_entry(r, e) { |
917 | if (!first) | |
319f9ac3 | 918 | pr_buf(out, " "); |
1c6fdbd8 KO |
919 | first = false; |
920 | ||
7ef2a73a | 921 | bch2_replicas_entry_to_text(out, e); |
1c6fdbd8 | 922 | } |
1c6fdbd8 KO |
923 | } |
924 | ||
319f9ac3 KO |
925 | const struct bch_sb_field_ops bch_sb_field_ops_replicas = { |
926 | .validate = bch2_sb_validate_replicas, | |
927 | .to_text = bch2_sb_replicas_to_text, | |
928 | }; | |
929 | ||
af9d3bc2 KO |
930 | static const char *bch2_sb_validate_replicas_v0(struct bch_sb *sb, struct bch_sb_field *f) |
931 | { | |
932 | struct bch_sb_field_replicas_v0 *sb_r = field_to_type(f, replicas_v0); | |
933 | struct bch_sb_field_members *mi = bch2_sb_get_members(sb); | |
73e6ab95 | 934 | struct bch_replicas_cpu cpu_r = { .entries = NULL }; |
af9d3bc2 KO |
935 | struct bch_replicas_entry_v0 *e; |
936 | const char *err; | |
937 | unsigned i; | |
938 | ||
939 | for_each_replicas_entry_v0(sb_r, e) { | |
940 | err = "invalid replicas entry: invalid data type"; | |
941 | if (e->data_type >= BCH_DATA_NR) | |
942 | goto err; | |
943 | ||
944 | err = "invalid replicas entry: no devices"; | |
945 | if (!e->nr_devs) | |
946 | goto err; | |
947 | ||
948 | err = "invalid replicas entry: invalid device"; | |
949 | for (i = 0; i < e->nr_devs; i++) | |
950 | if (!bch2_dev_exists(sb, mi, e->devs[i])) | |
951 | goto err; | |
952 | } | |
953 | ||
954 | err = "cannot allocate memory"; | |
73e6ab95 | 955 | if (__bch2_sb_replicas_v0_to_cpu_replicas(sb_r, &cpu_r)) |
af9d3bc2 KO |
956 | goto err; |
957 | ||
73e6ab95 | 958 | err = check_dup_replicas_entries(&cpu_r); |
af9d3bc2 | 959 | err: |
73e6ab95 | 960 | kfree(cpu_r.entries); |
af9d3bc2 KO |
961 | return err; |
962 | } | |
963 | ||
964 | const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0 = { | |
965 | .validate = bch2_sb_validate_replicas_v0, | |
966 | }; | |
967 | ||
1c6fdbd8 KO |
968 | /* Query replicas: */ |
969 | ||
1c6fdbd8 KO |
970 | struct replicas_status __bch2_replicas_status(struct bch_fs *c, |
971 | struct bch_devs_mask online_devs) | |
972 | { | |
973 | struct bch_sb_field_members *mi; | |
7a920560 | 974 | struct bch_replicas_entry *e; |
7a920560 | 975 | unsigned i, nr_online, nr_offline; |
1c6fdbd8 KO |
976 | struct replicas_status ret; |
977 | ||
978 | memset(&ret, 0, sizeof(ret)); | |
979 | ||
980 | for (i = 0; i < ARRAY_SIZE(ret.replicas); i++) | |
af9d3bc2 | 981 | ret.replicas[i].redundancy = INT_MAX; |
1c6fdbd8 KO |
982 | |
983 | mi = bch2_sb_get_members(c->disk_sb.sb); | |
1c6fdbd8 | 984 | |
73e6ab95 KO |
985 | percpu_down_read(&c->mark_lock); |
986 | ||
987 | for_each_cpu_replicas_entry(&c->replicas, e) { | |
1c6fdbd8 KO |
988 | if (e->data_type >= ARRAY_SIZE(ret.replicas)) |
989 | panic("e %p data_type %u\n", e, e->data_type); | |
990 | ||
991 | nr_online = nr_offline = 0; | |
992 | ||
7a920560 KO |
993 | for (i = 0; i < e->nr_devs; i++) { |
994 | BUG_ON(!bch2_dev_exists(c->disk_sb.sb, mi, | |
995 | e->devs[i])); | |
1c6fdbd8 | 996 | |
7a920560 | 997 | if (test_bit(e->devs[i], online_devs.d)) |
1c6fdbd8 KO |
998 | nr_online++; |
999 | else | |
1000 | nr_offline++; | |
1001 | } | |
1002 | ||
af9d3bc2 KO |
1003 | ret.replicas[e->data_type].redundancy = |
1004 | min(ret.replicas[e->data_type].redundancy, | |
1005 | (int) nr_online - (int) e->nr_required); | |
1c6fdbd8 KO |
1006 | |
1007 | ret.replicas[e->data_type].nr_offline = | |
1008 | max(ret.replicas[e->data_type].nr_offline, | |
1009 | nr_offline); | |
1010 | } | |
1011 | ||
73e6ab95 | 1012 | percpu_up_read(&c->mark_lock); |
1c6fdbd8 | 1013 | |
af9d3bc2 KO |
1014 | for (i = 0; i < ARRAY_SIZE(ret.replicas); i++) |
1015 | if (ret.replicas[i].redundancy == INT_MAX) | |
1016 | ret.replicas[i].redundancy = 0; | |
1017 | ||
1c6fdbd8 KO |
1018 | return ret; |
1019 | } | |
1020 | ||
1021 | struct replicas_status bch2_replicas_status(struct bch_fs *c) | |
1022 | { | |
1023 | return __bch2_replicas_status(c, bch2_online_devs(c)); | |
1024 | } | |
1025 | ||
1026 | static bool have_enough_devs(struct replicas_status s, | |
1027 | enum bch_data_type type, | |
1028 | bool force_if_degraded, | |
1029 | bool force_if_lost) | |
1030 | { | |
1031 | return (!s.replicas[type].nr_offline || force_if_degraded) && | |
af9d3bc2 | 1032 | (s.replicas[type].redundancy >= 0 || force_if_lost); |
1c6fdbd8 KO |
1033 | } |
1034 | ||
1035 | bool bch2_have_enough_devs(struct replicas_status s, unsigned flags) | |
1036 | { | |
89fd25be | 1037 | return (have_enough_devs(s, BCH_DATA_journal, |
1c6fdbd8 KO |
1038 | flags & BCH_FORCE_IF_METADATA_DEGRADED, |
1039 | flags & BCH_FORCE_IF_METADATA_LOST) && | |
89fd25be | 1040 | have_enough_devs(s, BCH_DATA_btree, |
1c6fdbd8 KO |
1041 | flags & BCH_FORCE_IF_METADATA_DEGRADED, |
1042 | flags & BCH_FORCE_IF_METADATA_LOST) && | |
89fd25be | 1043 | have_enough_devs(s, BCH_DATA_user, |
1c6fdbd8 KO |
1044 | flags & BCH_FORCE_IF_DATA_DEGRADED, |
1045 | flags & BCH_FORCE_IF_DATA_LOST)); | |
1046 | } | |
1047 | ||
af9d3bc2 | 1048 | int bch2_replicas_online(struct bch_fs *c, bool meta) |
1c6fdbd8 KO |
1049 | { |
1050 | struct replicas_status s = bch2_replicas_status(c); | |
1051 | ||
af9d3bc2 | 1052 | return (meta |
89fd25be KO |
1053 | ? min(s.replicas[BCH_DATA_journal].redundancy, |
1054 | s.replicas[BCH_DATA_btree].redundancy) | |
1055 | : s.replicas[BCH_DATA_user].redundancy) + 1; | |
1c6fdbd8 KO |
1056 | } |
1057 | ||
1058 | unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca) | |
1059 | { | |
7a920560 | 1060 | struct bch_replicas_entry *e; |
7a920560 | 1061 | unsigned i, ret = 0; |
1c6fdbd8 | 1062 | |
73e6ab95 | 1063 | percpu_down_read(&c->mark_lock); |
1c6fdbd8 | 1064 | |
73e6ab95 | 1065 | for_each_cpu_replicas_entry(&c->replicas, e) |
7a920560 KO |
1066 | for (i = 0; i < e->nr_devs; i++) |
1067 | if (e->devs[i] == ca->dev_idx) | |
1068 | ret |= 1 << e->data_type; | |
1069 | ||
73e6ab95 | 1070 | percpu_up_read(&c->mark_lock); |
1c6fdbd8 KO |
1071 | |
1072 | return ret; | |
1073 | } | |
2c5af169 KO |
1074 | |
1075 | int bch2_fs_replicas_init(struct bch_fs *c) | |
1076 | { | |
1077 | c->journal.entry_u64s_reserved += | |
1078 | reserve_journal_replicas(c, &c->replicas); | |
ecf37a4a KO |
1079 | |
1080 | return replicas_table_update(c, &c->replicas); | |
2c5af169 | 1081 | } |