Commit | Line | Data |
---|---|---|
1c6fdbd8 KO |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | ||
3 | #include "bcachefs.h" | |
8e3f913e | 4 | #include "alloc_background.h" |
7b3f84ea | 5 | #include "alloc_foreground.h" |
8e3f913e | 6 | #include "backpointers.h" |
07a1006a | 7 | #include "bkey_buf.h" |
1c6fdbd8 KO |
8 | #include "btree_gc.h" |
9 | #include "btree_update.h" | |
7ef2a73a | 10 | #include "btree_update_interior.h" |
8e3f913e | 11 | #include "btree_write_buffer.h" |
4628529f | 12 | #include "disk_groups.h" |
961b2d62 | 13 | #include "ec.h" |
d4bf5eec | 14 | #include "errcode.h" |
8e3f913e | 15 | #include "error.h" |
1c6fdbd8 | 16 | #include "inode.h" |
1809b8cb KO |
17 | #include "io_read.h" |
18 | #include "io_write.h" | |
1c6fdbd8 KO |
19 | #include "journal_reclaim.h" |
20 | #include "keylist.h" | |
21 | #include "move.h" | |
22 | #include "replicas.h" | |
84809057 | 23 | #include "snapshot.h" |
1c6fdbd8 KO |
24 | #include "super-io.h" |
25 | #include "trace.h" | |
26 | ||
27 | #include <linux/ioprio.h> | |
28 | #include <linux/kthread.h> | |
29 | ||
01e95645 KO |
30 | const char * const bch2_data_ops_strs[] = { |
31 | #define x(t, n, ...) [n] = #t, | |
32 | BCH_DATA_OPS() | |
33 | #undef x | |
34 | NULL | |
35 | }; | |
36 | ||
5a21764d KO |
37 | static void trace_move_extent2(struct bch_fs *c, struct bkey_s_c k) |
38 | { | |
39 | if (trace_move_extent_enabled()) { | |
40 | struct printbuf buf = PRINTBUF; | |
41 | ||
42 | bch2_bkey_val_to_text(&buf, c, k); | |
43 | trace_move_extent(c, buf.buf); | |
44 | printbuf_exit(&buf); | |
45 | } | |
46 | } | |
47 | ||
48 | static void trace_move_extent_read2(struct bch_fs *c, struct bkey_s_c k) | |
49 | { | |
50 | if (trace_move_extent_read_enabled()) { | |
51 | struct printbuf buf = PRINTBUF; | |
52 | ||
53 | bch2_bkey_val_to_text(&buf, c, k); | |
54 | trace_move_extent_read(c, buf.buf); | |
55 | printbuf_exit(&buf); | |
56 | } | |
57 | } | |
58 | ||
1c6fdbd8 | 59 | struct moving_io { |
b9fa375b KO |
60 | struct list_head read_list; |
61 | struct list_head io_list; | |
8fcdf814 KO |
62 | struct move_bucket_in_flight *b; |
63 | struct closure cl; | |
64 | bool read_completed; | |
1c6fdbd8 | 65 | |
8fcdf814 KO |
66 | unsigned read_sectors; |
67 | unsigned write_sectors; | |
1c6fdbd8 | 68 | |
8fcdf814 | 69 | struct bch_read_bio rbio; |
1c6fdbd8 | 70 | |
8fcdf814 | 71 | struct data_update write; |
1c6fdbd8 | 72 | /* Must be last since it is variable size */ |
62286a08 | 73 | struct bio_vec bi_inline_vecs[]; |
1c6fdbd8 KO |
74 | }; |
75 | ||
9f311f21 | 76 | static void move_free(struct moving_io *io) |
1c6fdbd8 | 77 | { |
1c6fdbd8 | 78 | struct moving_context *ctxt = io->write.ctxt; |
1c6fdbd8 | 79 | |
8fcdf814 KO |
80 | if (io->b) |
81 | atomic_dec(&io->b->count); | |
82 | ||
7f5c5d20 | 83 | bch2_data_update_exit(&io->write); |
b9fa375b KO |
84 | |
85 | mutex_lock(&ctxt->lock); | |
86 | list_del(&io->io_list); | |
1c6fdbd8 | 87 | wake_up(&ctxt->wait); |
b9fa375b KO |
88 | mutex_unlock(&ctxt->lock); |
89 | ||
1c6fdbd8 KO |
90 | kfree(io); |
91 | } | |
92 | ||
9f311f21 | 93 | static void move_write_done(struct bch_write_op *op) |
1c6fdbd8 | 94 | { |
9f311f21 KO |
95 | struct moving_io *io = container_of(op, struct moving_io, write.op); |
96 | struct moving_context *ctxt = io->write.ctxt; | |
1c6fdbd8 | 97 | |
8e3f913e KO |
98 | if (io->write.op.error) |
99 | ctxt->write_error = true; | |
100 | ||
1c6fdbd8 | 101 | atomic_sub(io->write_sectors, &io->write.ctxt->write_sectors); |
c782c583 | 102 | atomic_dec(&io->write.ctxt->write_ios); |
9f311f21 KO |
103 | move_free(io); |
104 | closure_put(&ctxt->cl); | |
1c6fdbd8 KO |
105 | } |
106 | ||
9f311f21 | 107 | static void move_write(struct moving_io *io) |
1c6fdbd8 | 108 | { |
1c6fdbd8 | 109 | if (unlikely(io->rbio.bio.bi_status || io->rbio.hole)) { |
9f311f21 | 110 | move_free(io); |
1c6fdbd8 KO |
111 | return; |
112 | } | |
113 | ||
9f311f21 | 114 | closure_get(&io->write.ctxt->cl); |
1c6fdbd8 | 115 | atomic_add(io->write_sectors, &io->write.ctxt->write_sectors); |
c782c583 | 116 | atomic_inc(&io->write.ctxt->write_ios); |
9f311f21 | 117 | |
7f5c5d20 | 118 | bch2_data_update_read_done(&io->write, io->rbio.pick.crc); |
1c6fdbd8 KO |
119 | } |
120 | ||
7ffb6a7e | 121 | struct moving_io *bch2_moving_ctxt_next_pending_write(struct moving_context *ctxt) |
1c6fdbd8 KO |
122 | { |
123 | struct moving_io *io = | |
b9fa375b | 124 | list_first_entry_or_null(&ctxt->reads, struct moving_io, read_list); |
1c6fdbd8 KO |
125 | |
126 | return io && io->read_completed ? io : NULL; | |
127 | } | |
128 | ||
129 | static void move_read_endio(struct bio *bio) | |
130 | { | |
131 | struct moving_io *io = container_of(bio, struct moving_io, rbio.bio); | |
132 | struct moving_context *ctxt = io->write.ctxt; | |
133 | ||
134 | atomic_sub(io->read_sectors, &ctxt->read_sectors); | |
c782c583 | 135 | atomic_dec(&ctxt->read_ios); |
1c6fdbd8 KO |
136 | io->read_completed = true; |
137 | ||
f61816d0 | 138 | wake_up(&ctxt->wait); |
1c6fdbd8 KO |
139 | closure_put(&ctxt->cl); |
140 | } | |
141 | ||
63316903 | 142 | void bch2_moving_ctxt_do_pending_writes(struct moving_context *ctxt) |
1c6fdbd8 KO |
143 | { |
144 | struct moving_io *io; | |
145 | ||
7ffb6a7e | 146 | while ((io = bch2_moving_ctxt_next_pending_write(ctxt))) { |
f82755e4 | 147 | bch2_trans_unlock_long(ctxt->trans); |
b9fa375b | 148 | list_del(&io->read_list); |
9f311f21 | 149 | move_write(io); |
1c6fdbd8 KO |
150 | } |
151 | } | |
152 | ||
63316903 | 153 | void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt) |
1c6fdbd8 KO |
154 | { |
155 | unsigned sectors_pending = atomic_read(&ctxt->write_sectors); | |
156 | ||
63316903 | 157 | move_ctxt_wait_event(ctxt, |
1c6fdbd8 KO |
158 | !atomic_read(&ctxt->write_sectors) || |
159 | atomic_read(&ctxt->write_sectors) != sectors_pending); | |
160 | } | |
161 | ||
0c069781 | 162 | void bch2_moving_ctxt_flush_all(struct moving_context *ctxt) |
50e029c6 KO |
163 | { |
164 | move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads)); | |
165 | bch2_trans_unlock_long(ctxt->trans); | |
166 | closure_sync(&ctxt->cl); | |
167 | } | |
168 | ||
0337cc7e KO |
169 | void bch2_moving_ctxt_exit(struct moving_context *ctxt) |
170 | { | |
63316903 | 171 | struct bch_fs *c = ctxt->trans->c; |
b9fa375b | 172 | |
50e029c6 | 173 | bch2_moving_ctxt_flush_all(ctxt); |
c782c583 | 174 | |
0337cc7e | 175 | EBUG_ON(atomic_read(&ctxt->write_sectors)); |
c782c583 KO |
176 | EBUG_ON(atomic_read(&ctxt->write_ios)); |
177 | EBUG_ON(atomic_read(&ctxt->read_sectors)); | |
178 | EBUG_ON(atomic_read(&ctxt->read_ios)); | |
0337cc7e | 179 | |
b9fa375b KO |
180 | mutex_lock(&c->moving_context_lock); |
181 | list_del(&ctxt->list); | |
182 | mutex_unlock(&c->moving_context_lock); | |
63316903 KO |
183 | |
184 | bch2_trans_put(ctxt->trans); | |
185 | memset(ctxt, 0, sizeof(*ctxt)); | |
0337cc7e KO |
186 | } |
187 | ||
188 | void bch2_moving_ctxt_init(struct moving_context *ctxt, | |
189 | struct bch_fs *c, | |
190 | struct bch_ratelimit *rate, | |
191 | struct bch_move_stats *stats, | |
192 | struct write_point_specifier wp, | |
193 | bool wait_on_copygc) | |
194 | { | |
195 | memset(ctxt, 0, sizeof(*ctxt)); | |
196 | ||
63316903 | 197 | ctxt->trans = bch2_trans_get(c); |
b9fa375b | 198 | ctxt->fn = (void *) _RET_IP_; |
0337cc7e KO |
199 | ctxt->rate = rate; |
200 | ctxt->stats = stats; | |
201 | ctxt->wp = wp; | |
202 | ctxt->wait_on_copygc = wait_on_copygc; | |
203 | ||
0337cc7e | 204 | closure_init_stack(&ctxt->cl); |
b9fa375b KO |
205 | |
206 | mutex_init(&ctxt->lock); | |
0337cc7e | 207 | INIT_LIST_HEAD(&ctxt->reads); |
b9fa375b | 208 | INIT_LIST_HEAD(&ctxt->ios); |
0337cc7e KO |
209 | init_waitqueue_head(&ctxt->wait); |
210 | ||
b9fa375b KO |
211 | mutex_lock(&c->moving_context_lock); |
212 | list_add(&ctxt->list, &c->moving_context_list); | |
213 | mutex_unlock(&c->moving_context_lock); | |
96a363a7 | 214 | } |
b9fa375b | 215 | |
96a363a7 KO |
216 | void bch2_move_stats_exit(struct bch_move_stats *stats, struct bch_fs *c) |
217 | { | |
218 | trace_move_data(c, stats); | |
0337cc7e KO |
219 | } |
220 | ||
01e95645 | 221 | void bch2_move_stats_init(struct bch_move_stats *stats, const char *name) |
0337cc7e KO |
222 | { |
223 | memset(stats, 0, sizeof(*stats)); | |
96a363a7 | 224 | stats->data_type = BCH_DATA_user; |
0337cc7e KO |
225 | scnprintf(stats->name, sizeof(stats->name), "%s", name); |
226 | } | |
227 | ||
63316903 | 228 | int bch2_move_extent(struct moving_context *ctxt, |
a0bfe3b0 | 229 | struct move_bucket_in_flight *bucket_in_flight, |
63316903 | 230 | struct btree_iter *iter, |
a0bfe3b0 | 231 | struct bkey_s_c k, |
63316903 | 232 | struct bch_io_opts io_opts, |
a0bfe3b0 | 233 | struct data_update_opts data_opts) |
1c6fdbd8 | 234 | { |
63316903 | 235 | struct btree_trans *trans = ctxt->trans; |
f30dd860 | 236 | struct bch_fs *c = trans->c; |
99aaf570 | 237 | struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); |
1c6fdbd8 | 238 | struct moving_io *io; |
1742237b KO |
239 | const union bch_extent_entry *entry; |
240 | struct extent_ptr_decoded p; | |
99aaf570 | 241 | unsigned sectors = k.k->size, pages; |
1c6fdbd8 KO |
242 | int ret = -ENOMEM; |
243 | ||
96a363a7 KO |
244 | if (ctxt->stats) |
245 | ctxt->stats->pos = BBPOS(iter->btree_id, iter->pos); | |
5a21764d KO |
246 | trace_move_extent2(c, k); |
247 | ||
1be88797 KO |
248 | bch2_data_update_opts_normalize(k, &data_opts); |
249 | ||
250 | if (!data_opts.rewrite_ptrs && | |
251 | !data_opts.extra_replicas) { | |
252 | if (data_opts.kill_ptrs) | |
253 | return bch2_extent_drop_ptrs(trans, iter, k, data_opts); | |
254 | return 0; | |
255 | } | |
256 | ||
a8b3a677 KO |
257 | /* |
258 | * Before memory allocations & taking nocow locks in | |
259 | * bch2_data_update_init(): | |
260 | */ | |
261 | bch2_trans_unlock(trans); | |
262 | ||
1c6fdbd8 | 263 | /* write path might have to decompress data: */ |
99aaf570 | 264 | bkey_for_each_ptr_decode(k.k, ptrs, p, entry) |
1742237b | 265 | sectors = max_t(unsigned, sectors, p.crc.uncompressed_size); |
1c6fdbd8 KO |
266 | |
267 | pages = DIV_ROUND_UP(sectors, PAGE_SECTORS); | |
268 | io = kzalloc(sizeof(struct moving_io) + | |
269 | sizeof(struct bio_vec) * pages, GFP_KERNEL); | |
270 | if (!io) | |
271 | goto err; | |
272 | ||
b9fa375b | 273 | INIT_LIST_HEAD(&io->io_list); |
1c6fdbd8 | 274 | io->write.ctxt = ctxt; |
99aaf570 KO |
275 | io->read_sectors = k.k->size; |
276 | io->write_sectors = k.k->size; | |
1c6fdbd8 KO |
277 | |
278 | bio_init(&io->write.op.wbio.bio, NULL, io->bi_inline_vecs, pages, 0); | |
279 | bio_set_prio(&io->write.op.wbio.bio, | |
280 | IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)); | |
281 | ||
282 | if (bch2_bio_alloc_pages(&io->write.op.wbio.bio, sectors << 9, | |
283 | GFP_KERNEL)) | |
284 | goto err_free; | |
285 | ||
b50dd792 KO |
286 | io->rbio.c = c; |
287 | io->rbio.opts = io_opts; | |
1c6fdbd8 KO |
288 | bio_init(&io->rbio.bio, NULL, io->bi_inline_vecs, pages, 0); |
289 | io->rbio.bio.bi_vcnt = pages; | |
290 | bio_set_prio(&io->rbio.bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)); | |
291 | io->rbio.bio.bi_iter.bi_size = sectors << 9; | |
292 | ||
293 | io->rbio.bio.bi_opf = REQ_OP_READ; | |
99aaf570 | 294 | io->rbio.bio.bi_iter.bi_sector = bkey_start_offset(k.k); |
1c6fdbd8 KO |
295 | io->rbio.bio.bi_end_io = move_read_endio; |
296 | ||
7d9f8468 | 297 | ret = bch2_data_update_init(trans, iter, ctxt, &io->write, ctxt->wp, |
a0bfe3b0 | 298 | io_opts, data_opts, iter->btree_id, k); |
7d9f8468 | 299 | if (ret) |
1c6fdbd8 KO |
300 | goto err_free_pages; |
301 | ||
2f528663 KO |
302 | io->write.op.end_io = move_write_done; |
303 | ||
a0bfe3b0 KO |
304 | if (ctxt->rate) |
305 | bch2_ratelimit_increment(ctxt->rate, k.k->size); | |
306 | ||
2f528663 KO |
307 | if (ctxt->stats) { |
308 | atomic64_inc(&ctxt->stats->keys_moved); | |
309 | atomic64_add(k.k->size, &ctxt->stats->sectors_moved); | |
310 | } | |
311 | ||
8fcdf814 KO |
312 | if (bucket_in_flight) { |
313 | io->b = bucket_in_flight; | |
314 | atomic_inc(&io->b->count); | |
315 | } | |
316 | ||
104c6974 | 317 | this_cpu_add(c->counters[BCH_COUNTER_io_move], k.k->size); |
674cfc26 | 318 | this_cpu_add(c->counters[BCH_COUNTER_move_extent_read], k.k->size); |
5a21764d | 319 | trace_move_extent_read2(c, k); |
b9fa375b KO |
320 | |
321 | mutex_lock(&ctxt->lock); | |
1c6fdbd8 | 322 | atomic_add(io->read_sectors, &ctxt->read_sectors); |
c782c583 | 323 | atomic_inc(&ctxt->read_ios); |
b9fa375b KO |
324 | |
325 | list_add_tail(&io->read_list, &ctxt->reads); | |
326 | list_add_tail(&io->io_list, &ctxt->ios); | |
327 | mutex_unlock(&ctxt->lock); | |
1c6fdbd8 KO |
328 | |
329 | /* | |
330 | * dropped by move_read_endio() - guards against use after free of | |
331 | * ctxt when doing wakeup | |
332 | */ | |
333 | closure_get(&ctxt->cl); | |
5ff75ccb KO |
334 | bch2_read_extent(trans, &io->rbio, |
335 | bkey_start_pos(k.k), | |
a0bfe3b0 | 336 | iter->btree_id, k, 0, |
1c6fdbd8 KO |
337 | BCH_READ_NODECODE| |
338 | BCH_READ_LAST_FRAGMENT); | |
339 | return 0; | |
340 | err_free_pages: | |
341 | bio_free_pages(&io->write.op.wbio.bio); | |
342 | err_free: | |
343 | kfree(io); | |
344 | err: | |
7d9f8468 KO |
345 | if (ret == -BCH_ERR_data_update_done) |
346 | return 0; | |
347 | ||
1b1bd0fd KO |
348 | if (bch2_err_matches(ret, EROFS) || |
349 | bch2_err_matches(ret, BCH_ERR_transaction_restart)) | |
350 | return ret; | |
351 | ||
74644030 KO |
352 | count_event(c, move_extent_start_fail); |
353 | ||
ae4d612c KO |
354 | if (trace_move_extent_start_fail_enabled()) { |
355 | struct printbuf buf = PRINTBUF; | |
356 | ||
357 | bch2_bkey_val_to_text(&buf, c, k); | |
358 | prt_str(&buf, ": "); | |
359 | prt_str(&buf, bch2_err_str(ret)); | |
360 | trace_move_extent_start_fail(c, buf.buf); | |
361 | printbuf_exit(&buf); | |
362 | } | |
1c6fdbd8 KO |
363 | return ret; |
364 | } | |
365 | ||
84809057 KO |
366 | struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans, |
367 | struct per_snapshot_io_opts *io_opts, | |
368 | struct bkey_s_c extent_k) | |
369 | { | |
370 | struct bch_fs *c = trans->c; | |
371 | u32 restart_count = trans->restart_count; | |
372 | int ret = 0; | |
373 | ||
374 | if (io_opts->cur_inum != extent_k.k->p.inode) { | |
84809057 KO |
375 | io_opts->d.nr = 0; |
376 | ||
5028b907 KO |
377 | ret = for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, extent_k.k->p.inode), |
378 | BTREE_ITER_ALL_SNAPSHOTS, k, ({ | |
84809057 KO |
379 | if (k.k->p.offset != extent_k.k->p.inode) |
380 | break; | |
381 | ||
382 | if (!bkey_is_inode(k.k)) | |
383 | continue; | |
384 | ||
385 | struct bch_inode_unpacked inode; | |
386 | BUG_ON(bch2_inode_unpack(k, &inode)); | |
387 | ||
388 | struct snapshot_io_opts_entry e = { .snapshot = k.k->p.snapshot }; | |
389 | bch2_inode_opts_get(&e.io_opts, trans->c, &inode); | |
390 | ||
27b2df98 KO |
391 | darray_push(&io_opts->d, e); |
392 | })); | |
84809057 KO |
393 | io_opts->cur_inum = extent_k.k->p.inode; |
394 | } | |
395 | ||
396 | ret = ret ?: trans_was_restarted(trans, restart_count); | |
397 | if (ret) | |
398 | return ERR_PTR(ret); | |
399 | ||
defd9e39 | 400 | if (extent_k.k->p.snapshot) |
84809057 KO |
401 | darray_for_each(io_opts->d, i) |
402 | if (bch2_snapshot_is_ancestor(c, extent_k.k->p.snapshot, i->snapshot)) | |
403 | return &i->io_opts; | |
84809057 KO |
404 | |
405 | return &io_opts->fs_io_opts; | |
406 | } | |
407 | ||
a0bfe3b0 KO |
408 | int bch2_move_get_io_opts_one(struct btree_trans *trans, |
409 | struct bch_io_opts *io_opts, | |
410 | struct bkey_s_c extent_k) | |
883d9701 | 411 | { |
67e0dd8f | 412 | struct btree_iter iter; |
883d9701 KO |
413 | struct bkey_s_c k; |
414 | int ret; | |
415 | ||
84809057 KO |
416 | /* reflink btree? */ |
417 | if (!extent_k.k->p.inode) { | |
418 | *io_opts = bch2_opts_to_inode_opts(trans->c->opts); | |
419 | return 0; | |
420 | } | |
421 | ||
422 | k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, | |
423 | SPOS(0, extent_k.k->p.inode, extent_k.k->p.snapshot), | |
424 | BTREE_ITER_CACHED); | |
883d9701 | 425 | ret = bkey_err(k); |
84809057 KO |
426 | if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) |
427 | return ret; | |
883d9701 | 428 | |
84809057 KO |
429 | if (!ret && bkey_is_inode(k.k)) { |
430 | struct bch_inode_unpacked inode; | |
431 | bch2_inode_unpack(k, &inode); | |
432 | bch2_inode_opts_get(io_opts, trans->c, &inode); | |
433 | } else { | |
434 | *io_opts = bch2_opts_to_inode_opts(trans->c->opts); | |
443d2760 KO |
435 | } |
436 | ||
67e0dd8f | 437 | bch2_trans_iter_exit(trans, &iter); |
84809057 | 438 | return 0; |
883d9701 KO |
439 | } |
440 | ||
63316903 | 441 | int bch2_move_ratelimit(struct moving_context *ctxt) |
c91996c5 | 442 | { |
63316903 | 443 | struct bch_fs *c = ctxt->trans->c; |
415e5107 | 444 | bool is_kthread = current->flags & PF_KTHREAD; |
c91996c5 DH |
445 | u64 delay; |
446 | ||
50e029c6 KO |
447 | if (ctxt->wait_on_copygc && c->copygc_running) { |
448 | bch2_moving_ctxt_flush_all(ctxt); | |
c91996c5 DH |
449 | wait_event_killable(c->copygc_running_wq, |
450 | !c->copygc_running || | |
415e5107 | 451 | (is_kthread && kthread_should_stop())); |
c91996c5 DH |
452 | } |
453 | ||
454 | do { | |
0337cc7e | 455 | delay = ctxt->rate ? bch2_ratelimit_delay(ctxt->rate) : 0; |
c91996c5 | 456 | |
415e5107 | 457 | if (is_kthread && kthread_should_stop()) |
c91996c5 | 458 | return 1; |
c91996c5 DH |
459 | |
460 | if (delay) | |
261af2f1 | 461 | move_ctxt_wait_event_timeout(ctxt, |
415e5107 KO |
462 | freezing(current) || |
463 | (is_kthread && kthread_should_stop()), | |
261af2f1 | 464 | delay); |
c91996c5 DH |
465 | |
466 | if (unlikely(freezing(current))) { | |
50e029c6 | 467 | bch2_moving_ctxt_flush_all(ctxt); |
c91996c5 DH |
468 | try_to_freeze(); |
469 | } | |
470 | } while (delay); | |
471 | ||
c782c583 KO |
472 | /* |
473 | * XXX: these limits really ought to be per device, SSDs and hard drives | |
474 | * will want different limits | |
475 | */ | |
63316903 | 476 | move_ctxt_wait_event(ctxt, |
c782c583 KO |
477 | atomic_read(&ctxt->write_sectors) < c->opts.move_bytes_in_flight >> 9 && |
478 | atomic_read(&ctxt->read_sectors) < c->opts.move_bytes_in_flight >> 9 && | |
479 | atomic_read(&ctxt->write_ios) < c->opts.move_ios_in_flight && | |
480 | atomic_read(&ctxt->read_ios) < c->opts.move_ios_in_flight); | |
c91996c5 DH |
481 | |
482 | return 0; | |
483 | } | |
484 | ||
63316903 KO |
485 | static int bch2_move_data_btree(struct moving_context *ctxt, |
486 | struct bpos start, | |
487 | struct bpos end, | |
488 | move_pred_fn pred, void *arg, | |
489 | enum btree_id btree_id) | |
1c6fdbd8 | 490 | { |
63316903 KO |
491 | struct btree_trans *trans = ctxt->trans; |
492 | struct bch_fs *c = trans->c; | |
84809057 KO |
493 | struct per_snapshot_io_opts snapshot_io_opts; |
494 | struct bch_io_opts *io_opts; | |
07a1006a | 495 | struct bkey_buf sk; |
67e0dd8f | 496 | struct btree_iter iter; |
1c6fdbd8 | 497 | struct bkey_s_c k; |
7f5c5d20 | 498 | struct data_update_opts data_opts; |
1c6fdbd8 KO |
499 | int ret = 0, ret2; |
500 | ||
84809057 | 501 | per_snapshot_io_opts_init(&snapshot_io_opts, c); |
07a1006a | 502 | bch2_bkey_buf_init(&sk); |
424eb881 | 503 | |
2f528663 KO |
504 | if (ctxt->stats) { |
505 | ctxt->stats->data_type = BCH_DATA_user; | |
d5eade93 | 506 | ctxt->stats->pos = BBPOS(btree_id, start); |
2f528663 | 507 | } |
424eb881 | 508 | |
6bd68ec2 | 509 | bch2_trans_iter_init(trans, &iter, btree_id, start, |
6fed42bb KO |
510 | BTREE_ITER_PREFETCH| |
511 | BTREE_ITER_ALL_SNAPSHOTS); | |
1c6fdbd8 | 512 | |
0337cc7e KO |
513 | if (ctxt->rate) |
514 | bch2_ratelimit_reset(ctxt->rate); | |
1c6fdbd8 | 515 | |
63316903 | 516 | while (!bch2_move_ratelimit(ctxt)) { |
6bd68ec2 | 517 | bch2_trans_begin(trans); |
700c25b3 | 518 | |
67e0dd8f | 519 | k = bch2_btree_iter_peek(&iter); |
1c6fdbd8 KO |
520 | if (!k.k) |
521 | break; | |
8ede9910 | 522 | |
0f238367 | 523 | ret = bkey_err(k); |
549d173c | 524 | if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) |
8ede9910 | 525 | continue; |
1c6fdbd8 KO |
526 | if (ret) |
527 | break; | |
8ede9910 | 528 | |
e88a75eb | 529 | if (bkey_ge(bkey_start_pos(k.k), end)) |
1c6fdbd8 KO |
530 | break; |
531 | ||
2f528663 | 532 | if (ctxt->stats) |
d5eade93 | 533 | ctxt->stats->pos = BBPOS(iter.btree_id, iter.pos); |
8ede9910 | 534 | |
8d84260e | 535 | if (!bkey_extent_is_direct_data(k.k)) |
1c6fdbd8 KO |
536 | goto next_nondata; |
537 | ||
84809057 KO |
538 | io_opts = bch2_move_get_io_opts(trans, &snapshot_io_opts, k); |
539 | ret = PTR_ERR_OR_ZERO(io_opts); | |
8e3f913e KO |
540 | if (ret) |
541 | continue; | |
1c6fdbd8 | 542 | |
7f5c5d20 | 543 | memset(&data_opts, 0, sizeof(data_opts)); |
84809057 | 544 | if (!pred(c, arg, k, io_opts, &data_opts)) |
1c6fdbd8 | 545 | goto next; |
1c6fdbd8 | 546 | |
eb331fe5 KO |
547 | /* |
548 | * The iterator gets unlocked by __bch2_read_extent - need to | |
549 | * save a copy of @k elsewhere: | |
3e3e02e6 | 550 | */ |
07a1006a | 551 | bch2_bkey_buf_reassemble(&sk, c, k); |
35189e09 | 552 | k = bkey_i_to_s_c(sk.k); |
1c6fdbd8 | 553 | |
63316903 | 554 | ret2 = bch2_move_extent(ctxt, NULL, &iter, k, *io_opts, data_opts); |
1c6fdbd8 | 555 | if (ret2) { |
549d173c | 556 | if (bch2_err_matches(ret2, BCH_ERR_transaction_restart)) |
f0e70018 | 557 | continue; |
f0e70018 | 558 | |
1c6fdbd8 KO |
559 | if (ret2 == -ENOMEM) { |
560 | /* memory allocation failure, wait for some IO to finish */ | |
63316903 | 561 | bch2_move_ctxt_wait_for_io(ctxt); |
1c6fdbd8 KO |
562 | continue; |
563 | } | |
564 | ||
565 | /* XXX signal failure */ | |
566 | goto next; | |
567 | } | |
1c6fdbd8 | 568 | next: |
2f528663 KO |
569 | if (ctxt->stats) |
570 | atomic64_add(k.k->size, &ctxt->stats->sectors_seen); | |
1c6fdbd8 | 571 | next_nondata: |
67e0dd8f | 572 | bch2_btree_iter_advance(&iter); |
1c6fdbd8 | 573 | } |
50dc0f69 | 574 | |
6bd68ec2 | 575 | bch2_trans_iter_exit(trans, &iter); |
07a1006a | 576 | bch2_bkey_buf_exit(&sk, c); |
84809057 | 577 | per_snapshot_io_opts_exit(&snapshot_io_opts); |
76426098 KO |
578 | |
579 | return ret; | |
580 | } | |
581 | ||
63316903 | 582 | int __bch2_move_data(struct moving_context *ctxt, |
a0bfe3b0 KO |
583 | struct bbpos start, |
584 | struct bbpos end, | |
585 | move_pred_fn pred, void *arg) | |
76426098 | 586 | { |
63316903 | 587 | struct bch_fs *c = ctxt->trans->c; |
1889ad5a | 588 | enum btree_id id; |
40a53b92 | 589 | int ret = 0; |
76426098 | 590 | |
a0bfe3b0 KO |
591 | for (id = start.btree; |
592 | id <= min_t(unsigned, end.btree, btree_id_nr_alive(c) - 1); | |
1889ad5a | 593 | id++) { |
d5eade93 | 594 | ctxt->stats->pos = BBPOS(id, POS_MIN); |
1889ad5a | 595 | |
a0bfe3b0 KO |
596 | if (!btree_type_has_ptrs(id) || |
597 | !bch2_btree_id_root(c, id)->b) | |
1889ad5a KO |
598 | continue; |
599 | ||
63316903 | 600 | ret = bch2_move_data_btree(ctxt, |
a0bfe3b0 KO |
601 | id == start.btree ? start.pos : POS_MIN, |
602 | id == end.btree ? end.pos : POS_MAX, | |
0337cc7e | 603 | pred, arg, id); |
1889ad5a KO |
604 | if (ret) |
605 | break; | |
606 | } | |
607 | ||
a0bfe3b0 KO |
608 | return ret; |
609 | } | |
610 | ||
611 | int bch2_move_data(struct bch_fs *c, | |
612 | struct bbpos start, | |
613 | struct bbpos end, | |
614 | struct bch_ratelimit *rate, | |
615 | struct bch_move_stats *stats, | |
616 | struct write_point_specifier wp, | |
617 | bool wait_on_copygc, | |
618 | move_pred_fn pred, void *arg) | |
619 | { | |
620 | ||
a0bfe3b0 KO |
621 | struct moving_context ctxt; |
622 | int ret; | |
623 | ||
624 | bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc); | |
63316903 | 625 | ret = __bch2_move_data(&ctxt, start, end, pred, arg); |
0337cc7e | 626 | bch2_moving_ctxt_exit(&ctxt); |
1c6fdbd8 | 627 | |
1c6fdbd8 KO |
628 | return ret; |
629 | } | |
630 | ||
74529338 | 631 | int bch2_evacuate_bucket(struct moving_context *ctxt, |
8fcdf814 | 632 | struct move_bucket_in_flight *bucket_in_flight, |
8e3f913e KO |
633 | struct bpos bucket, int gen, |
634 | struct data_update_opts _data_opts) | |
635 | { | |
63316903 KO |
636 | struct btree_trans *trans = ctxt->trans; |
637 | struct bch_fs *c = trans->c; | |
415e5107 | 638 | bool is_kthread = current->flags & PF_KTHREAD; |
8e3f913e | 639 | struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); |
8e3f913e KO |
640 | struct btree_iter iter; |
641 | struct bkey_buf sk; | |
642 | struct bch_backpointer bp; | |
643 | struct bch_alloc_v4 a_convert; | |
644 | const struct bch_alloc_v4 *a; | |
645 | struct bkey_s_c k; | |
646 | struct data_update_opts data_opts; | |
647 | unsigned dirty_sectors, bucket_size; | |
80c33085 | 648 | u64 fragmentation; |
62a03559 | 649 | struct bpos bp_pos = POS_MIN; |
8e3f913e KO |
650 | int ret = 0; |
651 | ||
5a21764d KO |
652 | trace_bucket_evacuate(c, &bucket); |
653 | ||
8e3f913e | 654 | bch2_bkey_buf_init(&sk); |
8e3f913e | 655 | |
3e36e572 KO |
656 | /* |
657 | * We're not run in a context that handles transaction restarts: | |
658 | */ | |
659 | bch2_trans_begin(trans); | |
660 | ||
80c33085 | 661 | bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, |
8e3f913e | 662 | bucket, BTREE_ITER_CACHED); |
80c33085 | 663 | ret = lockrestart_do(trans, |
8e3f913e | 664 | bkey_err(k = bch2_btree_iter_peek_slot(&iter))); |
80c33085 | 665 | bch2_trans_iter_exit(trans, &iter); |
8e3f913e | 666 | |
cf904c8d KO |
667 | bch_err_msg(c, ret, "looking up alloc key"); |
668 | if (ret) | |
8e3f913e | 669 | goto err; |
8e3f913e KO |
670 | |
671 | a = bch2_alloc_to_v4(k, &a_convert); | |
dafff7e5 | 672 | dirty_sectors = bch2_bucket_sectors_dirty(*a); |
8e3f913e | 673 | bucket_size = bch_dev_bkey_exists(c, bucket.inode)->mi.bucket_size; |
80c33085 | 674 | fragmentation = a->fragmentation_lru; |
8e3f913e | 675 | |
cb13f471 KO |
676 | ret = bch2_btree_write_buffer_tryflush(trans); |
677 | bch_err_msg(c, ret, "flushing btree write buffer"); | |
678 | if (ret) | |
8e3f913e | 679 | goto err; |
8e3f913e | 680 | |
63316903 | 681 | while (!(ret = bch2_move_ratelimit(ctxt))) { |
415e5107 KO |
682 | if (is_kthread && kthread_should_stop()) |
683 | break; | |
684 | ||
80c33085 | 685 | bch2_trans_begin(trans); |
8e3f913e | 686 | |
80c33085 | 687 | ret = bch2_get_next_backpointer(trans, bucket, gen, |
62a03559 | 688 | &bp_pos, &bp, |
53b1c6f4 | 689 | BTREE_ITER_CACHED); |
8e3f913e KO |
690 | if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) |
691 | continue; | |
692 | if (ret) | |
693 | goto err; | |
62a03559 | 694 | if (bkey_eq(bp_pos, POS_MAX)) |
8e3f913e KO |
695 | break; |
696 | ||
697 | if (!bp.level) { | |
62a03559 | 698 | k = bch2_backpointer_get_key(trans, &iter, bp_pos, bp, 0); |
8e3f913e KO |
699 | ret = bkey_err(k); |
700 | if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) | |
701 | continue; | |
702 | if (ret) | |
703 | goto err; | |
704 | if (!k.k) | |
705 | goto next; | |
706 | ||
707 | bch2_bkey_buf_reassemble(&sk, c, k); | |
708 | k = bkey_i_to_s_c(sk.k); | |
709 | ||
84809057 | 710 | ret = bch2_move_get_io_opts_one(trans, &io_opts, k); |
8e3f913e | 711 | if (ret) { |
80c33085 | 712 | bch2_trans_iter_exit(trans, &iter); |
8e3f913e KO |
713 | continue; |
714 | } | |
715 | ||
716 | data_opts = _data_opts; | |
717 | data_opts.target = io_opts.background_target; | |
718 | data_opts.rewrite_ptrs = 0; | |
719 | ||
0beebd92 | 720 | unsigned i = 0; |
8e3f913e | 721 | bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) { |
3f5d3fb4 | 722 | if (ptr->dev == bucket.inode) { |
8e3f913e | 723 | data_opts.rewrite_ptrs |= 1U << i; |
3f5d3fb4 KO |
724 | if (ptr->cached) { |
725 | bch2_trans_iter_exit(trans, &iter); | |
726 | goto next; | |
727 | } | |
728 | } | |
8e3f913e KO |
729 | i++; |
730 | } | |
731 | ||
63316903 KO |
732 | ret = bch2_move_extent(ctxt, bucket_in_flight, |
733 | &iter, k, io_opts, data_opts); | |
80c33085 | 734 | bch2_trans_iter_exit(trans, &iter); |
8e3f913e KO |
735 | |
736 | if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) | |
737 | continue; | |
738 | if (ret == -ENOMEM) { | |
739 | /* memory allocation failure, wait for some IO to finish */ | |
63316903 | 740 | bch2_move_ctxt_wait_for_io(ctxt); |
8e3f913e KO |
741 | continue; |
742 | } | |
743 | if (ret) | |
744 | goto err; | |
745 | ||
2f528663 KO |
746 | if (ctxt->stats) |
747 | atomic64_add(k.k->size, &ctxt->stats->sectors_seen); | |
8e3f913e KO |
748 | } else { |
749 | struct btree *b; | |
750 | ||
62a03559 | 751 | b = bch2_backpointer_get_node(trans, &iter, bp_pos, bp); |
8e3f913e KO |
752 | ret = PTR_ERR_OR_ZERO(b); |
753 | if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) | |
754 | continue; | |
755 | if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) | |
756 | continue; | |
757 | if (ret) | |
758 | goto err; | |
759 | if (!b) | |
760 | goto next; | |
761 | ||
80c33085 KO |
762 | ret = bch2_btree_node_rewrite(trans, &iter, b, 0); |
763 | bch2_trans_iter_exit(trans, &iter); | |
8e3f913e KO |
764 | |
765 | if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) | |
766 | continue; | |
767 | if (ret) | |
768 | goto err; | |
769 | ||
770 | if (ctxt->rate) | |
771 | bch2_ratelimit_increment(ctxt->rate, | |
772 | c->opts.btree_node_size >> 9); | |
2f528663 KO |
773 | if (ctxt->stats) { |
774 | atomic64_add(c->opts.btree_node_size >> 9, &ctxt->stats->sectors_seen); | |
775 | atomic64_add(c->opts.btree_node_size >> 9, &ctxt->stats->sectors_moved); | |
776 | } | |
8e3f913e KO |
777 | } |
778 | next: | |
62a03559 | 779 | bp_pos = bpos_nosnap_successor(bp_pos); |
8e3f913e KO |
780 | } |
781 | ||
80c33085 | 782 | trace_evacuate_bucket(c, &bucket, dirty_sectors, bucket_size, fragmentation, ret); |
8e3f913e | 783 | err: |
8e3f913e KO |
784 | bch2_bkey_buf_exit(&sk, c); |
785 | return ret; | |
786 | } | |
787 | ||
7f5c5d20 KO |
788 | typedef bool (*move_btree_pred)(struct bch_fs *, void *, |
789 | struct btree *, struct bch_io_opts *, | |
790 | struct data_update_opts *); | |
1889ad5a | 791 | |
1c6fdbd8 | 792 | static int bch2_move_btree(struct bch_fs *c, |
3c843a67 KO |
793 | struct bbpos start, |
794 | struct bbpos end, | |
1889ad5a | 795 | move_btree_pred pred, void *arg, |
1c6fdbd8 KO |
796 | struct bch_move_stats *stats) |
797 | { | |
1889ad5a | 798 | bool kthread = (current->flags & PF_KTHREAD) != 0; |
1c6fdbd8 | 799 | struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); |
96a363a7 KO |
800 | struct moving_context ctxt; |
801 | struct btree_trans *trans; | |
67e0dd8f | 802 | struct btree_iter iter; |
1c6fdbd8 | 803 | struct btree *b; |
3c843a67 | 804 | enum btree_id btree; |
7f5c5d20 | 805 | struct data_update_opts data_opts; |
1c6fdbd8 KO |
806 | int ret = 0; |
807 | ||
96a363a7 KO |
808 | bch2_moving_ctxt_init(&ctxt, c, NULL, stats, |
809 | writepoint_ptr(&c->btree_write_point), | |
810 | true); | |
811 | trans = ctxt.trans; | |
424eb881 | 812 | |
89fd25be | 813 | stats->data_type = BCH_DATA_btree; |
1c6fdbd8 | 814 | |
3c843a67 KO |
815 | for (btree = start.btree; |
816 | btree <= min_t(unsigned, end.btree, btree_id_nr_alive(c) - 1); | |
817 | btree ++) { | |
818 | stats->pos = BBPOS(btree, POS_MIN); | |
424eb881 | 819 | |
3c843a67 | 820 | if (!bch2_btree_id_root(c, btree)->b) |
faa6cb6c KO |
821 | continue; |
822 | ||
3c843a67 | 823 | bch2_trans_node_iter_init(trans, &iter, btree, POS_MIN, 0, 0, |
4b09ef12 | 824 | BTREE_ITER_PREFETCH); |
d355c6f4 | 825 | retry: |
b71717da | 826 | ret = 0; |
6bd68ec2 | 827 | while (bch2_trans_begin(trans), |
d355c6f4 KO |
828 | (b = bch2_btree_iter_peek_node(&iter)) && |
829 | !(ret = PTR_ERR_OR_ZERO(b))) { | |
1889ad5a | 830 | if (kthread && kthread_should_stop()) |
7b7278bb | 831 | break; |
1889ad5a | 832 | |
3c843a67 KO |
833 | if ((cmp_int(btree, end.btree) ?: |
834 | bpos_cmp(b->key.k.p, end.pos)) > 0) | |
1889ad5a KO |
835 | break; |
836 | ||
d5eade93 | 837 | stats->pos = BBPOS(iter.btree_id, iter.pos); |
424eb881 | 838 | |
7f5c5d20 | 839 | if (!pred(c, arg, b, &io_opts, &data_opts)) |
1c6fdbd8 | 840 | goto next; |
1c6fdbd8 | 841 | |
6bd68ec2 | 842 | ret = bch2_btree_node_rewrite(trans, &iter, b, 0) ?: ret; |
549d173c | 843 | if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) |
f3cf0999 KO |
844 | continue; |
845 | if (ret) | |
846 | break; | |
1c6fdbd8 | 847 | next: |
4b09ef12 | 848 | bch2_btree_iter_next_node(&iter); |
1c6fdbd8 | 849 | } |
549d173c | 850 | if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) |
d355c6f4 KO |
851 | goto retry; |
852 | ||
6bd68ec2 | 853 | bch2_trans_iter_exit(trans, &iter); |
1c6fdbd8 | 854 | |
7b7278bb KO |
855 | if (kthread && kthread_should_stop()) |
856 | break; | |
1c6fdbd8 | 857 | } |
7b7278bb | 858 | |
96a363a7 KO |
859 | bch_err_fn(c, ret); |
860 | bch2_moving_ctxt_exit(&ctxt); | |
c0960603 | 861 | bch2_btree_interior_updates_flush(c); |
23af498c | 862 | |
1c6fdbd8 KO |
863 | return ret; |
864 | } | |
865 | ||
7f5c5d20 KO |
866 | static bool rereplicate_pred(struct bch_fs *c, void *arg, |
867 | struct bkey_s_c k, | |
868 | struct bch_io_opts *io_opts, | |
869 | struct data_update_opts *data_opts) | |
1c6fdbd8 | 870 | { |
26609b61 | 871 | unsigned nr_good = bch2_bkey_durability(c, k); |
e8bde78a KO |
872 | unsigned replicas = bkey_is_btree_ptr(k.k) |
873 | ? c->opts.metadata_replicas | |
874 | : io_opts->data_replicas; | |
1c6fdbd8 KO |
875 | |
876 | if (!nr_good || nr_good >= replicas) | |
7f5c5d20 | 877 | return false; |
1c6fdbd8 KO |
878 | |
879 | data_opts->target = 0; | |
7f5c5d20 | 880 | data_opts->extra_replicas = replicas - nr_good; |
26609b61 | 881 | data_opts->btree_insert_flags = 0; |
7f5c5d20 | 882 | return true; |
1c6fdbd8 KO |
883 | } |
884 | ||
7f5c5d20 KO |
885 | static bool migrate_pred(struct bch_fs *c, void *arg, |
886 | struct bkey_s_c k, | |
887 | struct bch_io_opts *io_opts, | |
888 | struct data_update_opts *data_opts) | |
1c6fdbd8 | 889 | { |
7f5c5d20 | 890 | struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); |
1c6fdbd8 | 891 | struct bch_ioctl_data *op = arg; |
7f5c5d20 | 892 | unsigned i = 0; |
1c6fdbd8 | 893 | |
7f5c5d20 | 894 | data_opts->rewrite_ptrs = 0; |
1c6fdbd8 | 895 | data_opts->target = 0; |
7f5c5d20 | 896 | data_opts->extra_replicas = 0; |
1c6fdbd8 | 897 | data_opts->btree_insert_flags = 0; |
7f5c5d20 KO |
898 | |
899 | bkey_for_each_ptr(ptrs, ptr) { | |
900 | if (ptr->dev == op->migrate.dev) | |
901 | data_opts->rewrite_ptrs |= 1U << i; | |
902 | i++; | |
903 | } | |
904 | ||
3e3e02e6 | 905 | return data_opts->rewrite_ptrs != 0; |
1c6fdbd8 KO |
906 | } |
907 | ||
7f5c5d20 KO |
908 | static bool rereplicate_btree_pred(struct bch_fs *c, void *arg, |
909 | struct btree *b, | |
910 | struct bch_io_opts *io_opts, | |
911 | struct data_update_opts *data_opts) | |
1889ad5a KO |
912 | { |
913 | return rereplicate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts); | |
914 | } | |
915 | ||
7f5c5d20 KO |
916 | static bool migrate_btree_pred(struct bch_fs *c, void *arg, |
917 | struct btree *b, | |
918 | struct bch_io_opts *io_opts, | |
919 | struct data_update_opts *data_opts) | |
1889ad5a KO |
920 | { |
921 | return migrate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts); | |
922 | } | |
923 | ||
e01dacf7 KO |
924 | static bool bformat_needs_redo(struct bkey_format *f) |
925 | { | |
926 | unsigned i; | |
927 | ||
928 | for (i = 0; i < f->nr_fields; i++) { | |
929 | unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i]; | |
930 | u64 unpacked_mask = ~((~0ULL << 1) << (unpacked_bits - 1)); | |
931 | u64 field_offset = le64_to_cpu(f->field_offset[i]); | |
932 | ||
933 | if (f->bits_per_field[i] > unpacked_bits) | |
934 | return true; | |
935 | ||
936 | if ((f->bits_per_field[i] == unpacked_bits) && field_offset) | |
937 | return true; | |
938 | ||
939 | if (((field_offset + ((1ULL << f->bits_per_field[i]) - 1)) & | |
940 | unpacked_mask) < | |
941 | field_offset) | |
942 | return true; | |
943 | } | |
944 | ||
945 | return false; | |
946 | } | |
947 | ||
7f5c5d20 KO |
948 | static bool rewrite_old_nodes_pred(struct bch_fs *c, void *arg, |
949 | struct btree *b, | |
950 | struct bch_io_opts *io_opts, | |
951 | struct data_update_opts *data_opts) | |
1889ad5a KO |
952 | { |
953 | if (b->version_ondisk != c->sb.version || | |
e01dacf7 KO |
954 | btree_node_need_rewrite(b) || |
955 | bformat_needs_redo(&b->format)) { | |
1889ad5a | 956 | data_opts->target = 0; |
7f5c5d20 | 957 | data_opts->extra_replicas = 0; |
1889ad5a | 958 | data_opts->btree_insert_flags = 0; |
7f5c5d20 | 959 | return true; |
1889ad5a KO |
960 | } |
961 | ||
7f5c5d20 | 962 | return false; |
1889ad5a KO |
963 | } |
964 | ||
a4805d66 KO |
965 | int bch2_scan_old_btree_nodes(struct bch_fs *c, struct bch_move_stats *stats) |
966 | { | |
967 | int ret; | |
968 | ||
969 | ret = bch2_move_btree(c, | |
3c843a67 KO |
970 | BBPOS_MIN, |
971 | BBPOS_MAX, | |
a4805d66 KO |
972 | rewrite_old_nodes_pred, c, stats); |
973 | if (!ret) { | |
974 | mutex_lock(&c->sb_lock); | |
c0ebe3e4 KO |
975 | c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done); |
976 | c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done); | |
a4805d66 KO |
977 | c->disk_sb.sb->version_min = c->disk_sb.sb->version; |
978 | bch2_write_super(c); | |
979 | mutex_unlock(&c->sb_lock); | |
980 | } | |
981 | ||
96a363a7 | 982 | bch_err_fn(c, ret); |
a4805d66 KO |
983 | return ret; |
984 | } | |
985 | ||
ba11c7d6 KO |
986 | static bool drop_extra_replicas_pred(struct bch_fs *c, void *arg, |
987 | struct bkey_s_c k, | |
988 | struct bch_io_opts *io_opts, | |
989 | struct data_update_opts *data_opts) | |
990 | { | |
991 | unsigned durability = bch2_bkey_durability(c, k); | |
992 | unsigned replicas = bkey_is_btree_ptr(k.k) | |
993 | ? c->opts.metadata_replicas | |
994 | : io_opts->data_replicas; | |
995 | const union bch_extent_entry *entry; | |
996 | struct extent_ptr_decoded p; | |
997 | unsigned i = 0; | |
998 | ||
999 | bkey_for_each_ptr_decode(k.k, bch2_bkey_ptrs_c(k), p, entry) { | |
1000 | unsigned d = bch2_extent_ptr_durability(c, &p); | |
1001 | ||
1002 | if (d && durability - d >= replicas) { | |
1003 | data_opts->kill_ptrs |= BIT(i); | |
1004 | durability -= d; | |
1005 | } | |
1006 | ||
1007 | i++; | |
1008 | } | |
1009 | ||
1010 | return data_opts->kill_ptrs != 0; | |
1011 | } | |
1012 | ||
1013 | static bool drop_extra_replicas_btree_pred(struct bch_fs *c, void *arg, | |
1014 | struct btree *b, | |
1015 | struct bch_io_opts *io_opts, | |
1016 | struct data_update_opts *data_opts) | |
1017 | { | |
1018 | return drop_extra_replicas_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts); | |
1019 | } | |
1020 | ||
1c6fdbd8 KO |
1021 | int bch2_data_job(struct bch_fs *c, |
1022 | struct bch_move_stats *stats, | |
1023 | struct bch_ioctl_data op) | |
1024 | { | |
3c843a67 KO |
1025 | struct bbpos start = BBPOS(op.start_btree, op.start_pos); |
1026 | struct bbpos end = BBPOS(op.end_btree, op.end_pos); | |
1c6fdbd8 KO |
1027 | int ret = 0; |
1028 | ||
01e95645 KO |
1029 | if (op.op >= BCH_DATA_OP_NR) |
1030 | return -EINVAL; | |
1031 | ||
1032 | bch2_move_stats_init(stats, bch2_data_ops_strs[op.op]); | |
1033 | ||
1c6fdbd8 | 1034 | switch (op.op) { |
01e95645 | 1035 | case BCH_DATA_OP_rereplicate: |
89fd25be | 1036 | stats->data_type = BCH_DATA_journal; |
1c6fdbd8 | 1037 | ret = bch2_journal_flush_device_pins(&c->journal, -1); |
3c843a67 | 1038 | ret = bch2_move_btree(c, start, end, |
1889ad5a | 1039 | rereplicate_btree_pred, c, stats) ?: ret; |
3c843a67 | 1040 | ret = bch2_move_data(c, start, end, |
0337cc7e KO |
1041 | NULL, |
1042 | stats, | |
1043 | writepoint_hashed((unsigned long) current), | |
1044 | true, | |
1045 | rereplicate_pred, c) ?: ret; | |
ae0ff7b8 | 1046 | ret = bch2_replicas_gc2(c) ?: ret; |
1c6fdbd8 | 1047 | break; |
01e95645 | 1048 | case BCH_DATA_OP_migrate: |
1c6fdbd8 KO |
1049 | if (op.migrate.dev >= c->sb.nr_devices) |
1050 | return -EINVAL; | |
1051 | ||
89fd25be | 1052 | stats->data_type = BCH_DATA_journal; |
1c6fdbd8 | 1053 | ret = bch2_journal_flush_device_pins(&c->journal, op.migrate.dev); |
3c843a67 | 1054 | ret = bch2_move_btree(c, start, end, |
1889ad5a | 1055 | migrate_btree_pred, &op, stats) ?: ret; |
3c843a67 | 1056 | ret = bch2_move_data(c, start, end, |
0337cc7e KO |
1057 | NULL, |
1058 | stats, | |
1059 | writepoint_hashed((unsigned long) current), | |
1060 | true, | |
1061 | migrate_pred, &op) ?: ret; | |
ae0ff7b8 | 1062 | ret = bch2_replicas_gc2(c) ?: ret; |
1c6fdbd8 | 1063 | break; |
01e95645 | 1064 | case BCH_DATA_OP_rewrite_old_nodes: |
a4805d66 | 1065 | ret = bch2_scan_old_btree_nodes(c, stats); |
1889ad5a | 1066 | break; |
ba11c7d6 KO |
1067 | case BCH_DATA_OP_drop_extra_replicas: |
1068 | ret = bch2_move_btree(c, start, end, | |
1069 | drop_extra_replicas_btree_pred, c, stats) ?: ret; | |
1070 | ret = bch2_move_data(c, start, end, NULL, stats, | |
1071 | writepoint_hashed((unsigned long) current), | |
1072 | true, | |
1073 | drop_extra_replicas_pred, c) ?: ret; | |
1074 | ret = bch2_replicas_gc2(c) ?: ret; | |
1075 | break; | |
1c6fdbd8 KO |
1076 | default: |
1077 | ret = -EINVAL; | |
1078 | } | |
1079 | ||
01e95645 | 1080 | bch2_move_stats_exit(stats, c); |
1c6fdbd8 KO |
1081 | return ret; |
1082 | } | |
b9fa375b | 1083 | |
96a363a7 | 1084 | void bch2_move_stats_to_text(struct printbuf *out, struct bch_move_stats *stats) |
b9fa375b | 1085 | { |
96a363a7 KO |
1086 | prt_printf(out, "%s: data type=%s pos=", |
1087 | stats->name, | |
1088 | bch2_data_types[stats->data_type]); | |
1089 | bch2_bbpos_to_text(out, stats->pos); | |
1090 | prt_newline(out); | |
1091 | printbuf_indent_add(out, 2); | |
b9fa375b | 1092 | |
96a363a7 KO |
1093 | prt_str(out, "keys moved: "); |
1094 | prt_u64(out, atomic64_read(&stats->keys_moved)); | |
9d2a7bd8 KO |
1095 | prt_newline(out); |
1096 | ||
96a363a7 KO |
1097 | prt_str(out, "keys raced: "); |
1098 | prt_u64(out, atomic64_read(&stats->keys_raced)); | |
1099 | prt_newline(out); | |
1100 | ||
1101 | prt_str(out, "bytes seen: "); | |
1102 | prt_human_readable_u64(out, atomic64_read(&stats->sectors_seen) << 9); | |
1103 | prt_newline(out); | |
1104 | ||
1105 | prt_str(out, "bytes moved: "); | |
1106 | prt_human_readable_u64(out, atomic64_read(&stats->sectors_moved) << 9); | |
b9fa375b | 1107 | prt_newline(out); |
96a363a7 KO |
1108 | |
1109 | prt_str(out, "bytes raced: "); | |
1110 | prt_human_readable_u64(out, atomic64_read(&stats->sectors_raced) << 9); | |
1111 | prt_newline(out); | |
1112 | ||
1113 | printbuf_indent_sub(out, 2); | |
1114 | } | |
1115 | ||
1116 | static void bch2_moving_ctxt_to_text(struct printbuf *out, struct bch_fs *c, struct moving_context *ctxt) | |
1117 | { | |
1118 | struct moving_io *io; | |
1119 | ||
1120 | bch2_move_stats_to_text(out, ctxt->stats); | |
b9fa375b KO |
1121 | printbuf_indent_add(out, 2); |
1122 | ||
9d2a7bd8 | 1123 | prt_printf(out, "reads: ios %u/%u sectors %u/%u", |
b9fa375b | 1124 | atomic_read(&ctxt->read_ios), |
9d2a7bd8 KO |
1125 | c->opts.move_ios_in_flight, |
1126 | atomic_read(&ctxt->read_sectors), | |
1127 | c->opts.move_bytes_in_flight >> 9); | |
b9fa375b KO |
1128 | prt_newline(out); |
1129 | ||
9d2a7bd8 | 1130 | prt_printf(out, "writes: ios %u/%u sectors %u/%u", |
b9fa375b | 1131 | atomic_read(&ctxt->write_ios), |
9d2a7bd8 KO |
1132 | c->opts.move_ios_in_flight, |
1133 | atomic_read(&ctxt->write_sectors), | |
1134 | c->opts.move_bytes_in_flight >> 9); | |
b9fa375b KO |
1135 | prt_newline(out); |
1136 | ||
1137 | printbuf_indent_add(out, 2); | |
1138 | ||
1139 | mutex_lock(&ctxt->lock); | |
9d2a7bd8 | 1140 | list_for_each_entry(io, &ctxt->ios, io_list) |
b9fa375b | 1141 | bch2_write_op_to_text(out, &io->write.op); |
b9fa375b KO |
1142 | mutex_unlock(&ctxt->lock); |
1143 | ||
1144 | printbuf_indent_sub(out, 4); | |
1145 | } | |
1146 | ||
1147 | void bch2_fs_moving_ctxts_to_text(struct printbuf *out, struct bch_fs *c) | |
1148 | { | |
1149 | struct moving_context *ctxt; | |
1150 | ||
1151 | mutex_lock(&c->moving_context_lock); | |
1152 | list_for_each_entry(ctxt, &c->moving_context_list, list) | |
9d2a7bd8 | 1153 | bch2_moving_ctxt_to_text(out, c, ctxt); |
b9fa375b KO |
1154 | mutex_unlock(&c->moving_context_lock); |
1155 | } | |
1156 | ||
1157 | void bch2_fs_move_init(struct bch_fs *c) | |
1158 | { | |
1159 | INIT_LIST_HEAD(&c->moving_context_list); | |
1160 | mutex_init(&c->moving_context_lock); | |
b9fa375b | 1161 | } |