Commit | Line | Data |
---|---|---|
1c6fdbd8 KO |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | ||
3 | #include "bcachefs.h" | |
8e3f913e | 4 | #include "alloc_background.h" |
7b3f84ea | 5 | #include "alloc_foreground.h" |
8e3f913e | 6 | #include "backpointers.h" |
07a1006a | 7 | #include "bkey_buf.h" |
1c6fdbd8 | 8 | #include "btree_gc.h" |
ec4edd7b | 9 | #include "btree_io.h" |
1c6fdbd8 | 10 | #include "btree_update.h" |
7ef2a73a | 11 | #include "btree_update_interior.h" |
8e3f913e | 12 | #include "btree_write_buffer.h" |
189c176c | 13 | #include "compress.h" |
4628529f | 14 | #include "disk_groups.h" |
961b2d62 | 15 | #include "ec.h" |
d4bf5eec | 16 | #include "errcode.h" |
8e3f913e | 17 | #include "error.h" |
1c6fdbd8 | 18 | #include "inode.h" |
1809b8cb KO |
19 | #include "io_read.h" |
20 | #include "io_write.h" | |
1c6fdbd8 KO |
21 | #include "journal_reclaim.h" |
22 | #include "keylist.h" | |
23 | #include "move.h" | |
161d1383 | 24 | #include "rebalance.h" |
80c6352c | 25 | #include "reflink.h" |
1c6fdbd8 | 26 | #include "replicas.h" |
84809057 | 27 | #include "snapshot.h" |
1c6fdbd8 KO |
28 | #include "super-io.h" |
29 | #include "trace.h" | |
30 | ||
31 | #include <linux/ioprio.h> | |
32 | #include <linux/kthread.h> | |
33 | ||
01e95645 KO |
34 | const char * const bch2_data_ops_strs[] = { |
35 | #define x(t, n, ...) [n] = #t, | |
36 | BCH_DATA_OPS() | |
37 | #undef x | |
38 | NULL | |
39 | }; | |
40 | ||
060ff4b7 KO |
41 | struct evacuate_bucket_arg { |
42 | struct bpos bucket; | |
43 | int gen; | |
44 | struct data_update_opts data_opts; | |
45 | }; | |
46 | ||
47 | static bool evacuate_bucket_pred(struct bch_fs *, void *, | |
48 | enum btree_id, struct bkey_s_c, | |
49 | struct bch_io_opts *, | |
50 | struct data_update_opts *); | |
51 | ||
52 | static noinline void | |
53 | trace_io_move2(struct bch_fs *c, struct bkey_s_c k, | |
54 | struct bch_io_opts *io_opts, | |
55 | struct data_update_opts *data_opts) | |
5a21764d | 56 | { |
060ff4b7 | 57 | struct printbuf buf = PRINTBUF; |
5a21764d | 58 | |
060ff4b7 KO |
59 | bch2_bkey_val_to_text(&buf, c, k); |
60 | prt_newline(&buf); | |
61 | bch2_data_update_opts_to_text(&buf, c, io_opts, data_opts); | |
62 | trace_io_move(c, buf.buf); | |
63 | printbuf_exit(&buf); | |
5a21764d KO |
64 | } |
65 | ||
060ff4b7 | 66 | static noinline void trace_io_move_read2(struct bch_fs *c, struct bkey_s_c k) |
5a21764d | 67 | { |
060ff4b7 | 68 | struct printbuf buf = PRINTBUF; |
5a21764d | 69 | |
060ff4b7 KO |
70 | bch2_bkey_val_to_text(&buf, c, k); |
71 | trace_io_move_read(c, buf.buf); | |
72 | printbuf_exit(&buf); | |
73 | } | |
74 | ||
75 | static noinline void | |
76 | trace_io_move_pred2(struct bch_fs *c, struct bkey_s_c k, | |
77 | struct bch_io_opts *io_opts, | |
78 | struct data_update_opts *data_opts, | |
79 | move_pred_fn pred, void *_arg, bool p) | |
80 | { | |
81 | struct printbuf buf = PRINTBUF; | |
82 | ||
83 | prt_printf(&buf, "%ps: %u", pred, p); | |
84 | ||
85 | if (pred == evacuate_bucket_pred) { | |
86 | struct evacuate_bucket_arg *arg = _arg; | |
87 | prt_printf(&buf, " gen=%u", arg->gen); | |
5a21764d | 88 | } |
060ff4b7 KO |
89 | |
90 | prt_newline(&buf); | |
91 | bch2_bkey_val_to_text(&buf, c, k); | |
92 | prt_newline(&buf); | |
93 | bch2_data_update_opts_to_text(&buf, c, io_opts, data_opts); | |
94 | trace_io_move_pred(c, buf.buf); | |
95 | printbuf_exit(&buf); | |
5a21764d KO |
96 | } |
97 | ||
c7897b50 KO |
98 | static noinline void |
99 | trace_io_move_evacuate_bucket2(struct bch_fs *c, struct bpos bucket, int gen) | |
100 | { | |
101 | struct printbuf buf = PRINTBUF; | |
102 | ||
103 | prt_printf(&buf, "bucket: "); | |
104 | bch2_bpos_to_text(&buf, bucket); | |
105 | prt_printf(&buf, " gen: %i\n", gen); | |
106 | ||
107 | trace_io_move_evacuate_bucket(c, buf.buf); | |
108 | printbuf_exit(&buf); | |
109 | } | |
110 | ||
1c6fdbd8 | 111 | struct moving_io { |
b9fa375b KO |
112 | struct list_head read_list; |
113 | struct list_head io_list; | |
49188a93 | 114 | struct move_bucket *b; |
8fcdf814 KO |
115 | struct closure cl; |
116 | bool read_completed; | |
1c6fdbd8 | 117 | |
8fcdf814 KO |
118 | unsigned read_sectors; |
119 | unsigned write_sectors; | |
1c6fdbd8 | 120 | |
8fcdf814 | 121 | struct data_update write; |
1c6fdbd8 KO |
122 | }; |
123 | ||
9f311f21 | 124 | static void move_free(struct moving_io *io) |
1c6fdbd8 | 125 | { |
1c6fdbd8 | 126 | struct moving_context *ctxt = io->write.ctxt; |
1c6fdbd8 | 127 | |
8fcdf814 KO |
128 | if (io->b) |
129 | atomic_dec(&io->b->count); | |
130 | ||
b9fa375b KO |
131 | mutex_lock(&ctxt->lock); |
132 | list_del(&io->io_list); | |
1c6fdbd8 | 133 | wake_up(&ctxt->wait); |
b9fa375b KO |
134 | mutex_unlock(&ctxt->lock); |
135 | ||
f269ae55 KO |
136 | if (!io->write.data_opts.scrub) { |
137 | bch2_data_update_exit(&io->write); | |
138 | } else { | |
139 | bch2_bio_free_pages_pool(io->write.op.c, &io->write.op.wbio.bio); | |
140 | kfree(io->write.bvecs); | |
141 | } | |
1c6fdbd8 KO |
142 | kfree(io); |
143 | } | |
144 | ||
9f311f21 | 145 | static void move_write_done(struct bch_write_op *op) |
1c6fdbd8 | 146 | { |
9f311f21 | 147 | struct moving_io *io = container_of(op, struct moving_io, write.op); |
fb8a9a32 | 148 | struct bch_fs *c = op->c; |
9f311f21 | 149 | struct moving_context *ctxt = io->write.ctxt; |
1c6fdbd8 | 150 | |
fb8a9a32 KO |
151 | if (op->error) { |
152 | if (trace_io_move_write_fail_enabled()) { | |
153 | struct printbuf buf = PRINTBUF; | |
154 | ||
155 | bch2_write_op_to_text(&buf, op); | |
fb8a9a32 KO |
156 | trace_io_move_write_fail(c, buf.buf); |
157 | printbuf_exit(&buf); | |
158 | } | |
159 | this_cpu_inc(c->counters[BCH_COUNTER_io_move_write_fail]); | |
160 | ||
8e3f913e | 161 | ctxt->write_error = true; |
fb8a9a32 | 162 | } |
8e3f913e | 163 | |
fb8a9a32 KO |
164 | atomic_sub(io->write_sectors, &ctxt->write_sectors); |
165 | atomic_dec(&ctxt->write_ios); | |
9f311f21 KO |
166 | move_free(io); |
167 | closure_put(&ctxt->cl); | |
1c6fdbd8 KO |
168 | } |
169 | ||
9f311f21 | 170 | static void move_write(struct moving_io *io) |
1c6fdbd8 | 171 | { |
cb8336ca | 172 | struct bch_fs *c = io->write.op.c; |
f269ae55 | 173 | struct moving_context *ctxt = io->write.ctxt; |
cb8336ca | 174 | struct bch_read_bio *rbio = &io->write.rbio; |
f269ae55 KO |
175 | |
176 | if (ctxt->stats) { | |
cb8336ca | 177 | if (rbio->bio.bi_status) |
f269ae55 KO |
178 | atomic64_add(io->write.rbio.bvec_iter.bi_size >> 9, |
179 | &ctxt->stats->sectors_error_uncorrected); | |
cb8336ca | 180 | else if (rbio->saw_error) |
f269ae55 KO |
181 | atomic64_add(io->write.rbio.bvec_iter.bi_size >> 9, |
182 | &ctxt->stats->sectors_error_corrected); | |
183 | } | |
184 | ||
cb8336ca KO |
185 | /* |
186 | * If the extent has been bitrotted, we're going to have to give it a | |
187 | * new checksum in order to move it - but the poison bit will ensure | |
188 | * that userspace still gets the appropriate error. | |
189 | */ | |
190 | if (unlikely(rbio->ret == -BCH_ERR_data_read_csum_err && | |
191 | (bch2_bkey_extent_flags(bkey_i_to_s_c(io->write.k.k)) & BIT_ULL(BCH_EXTENT_FLAG_poisoned)))) { | |
192 | struct bch_extent_crc_unpacked crc = rbio->pick.crc; | |
193 | struct nonce nonce = extent_nonce(rbio->version, crc); | |
194 | ||
195 | rbio->pick.crc.csum = bch2_checksum_bio(c, rbio->pick.crc.csum_type, | |
196 | nonce, &rbio->bio); | |
197 | rbio->ret = 0; | |
198 | } | |
199 | ||
200 | if (unlikely(rbio->ret || io->write.data_opts.scrub)) { | |
9f311f21 | 201 | move_free(io); |
1c6fdbd8 KO |
202 | return; |
203 | } | |
204 | ||
157ea583 | 205 | if (trace_io_move_write_enabled()) { |
fa3185af KO |
206 | struct printbuf buf = PRINTBUF; |
207 | ||
208 | bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(io->write.k.k)); | |
157ea583 | 209 | trace_io_move_write(c, buf.buf); |
fa3185af KO |
210 | printbuf_exit(&buf); |
211 | } | |
212 | ||
9f311f21 | 213 | closure_get(&io->write.ctxt->cl); |
1c6fdbd8 | 214 | atomic_add(io->write_sectors, &io->write.ctxt->write_sectors); |
c782c583 | 215 | atomic_inc(&io->write.ctxt->write_ios); |
9f311f21 | 216 | |
6f7111f8 | 217 | bch2_data_update_read_done(&io->write); |
1c6fdbd8 KO |
218 | } |
219 | ||
7ffb6a7e | 220 | struct moving_io *bch2_moving_ctxt_next_pending_write(struct moving_context *ctxt) |
1c6fdbd8 KO |
221 | { |
222 | struct moving_io *io = | |
b9fa375b | 223 | list_first_entry_or_null(&ctxt->reads, struct moving_io, read_list); |
1c6fdbd8 KO |
224 | |
225 | return io && io->read_completed ? io : NULL; | |
226 | } | |
227 | ||
228 | static void move_read_endio(struct bio *bio) | |
229 | { | |
a70bd976 | 230 | struct moving_io *io = container_of(bio, struct moving_io, write.rbio.bio); |
1c6fdbd8 KO |
231 | struct moving_context *ctxt = io->write.ctxt; |
232 | ||
233 | atomic_sub(io->read_sectors, &ctxt->read_sectors); | |
c782c583 | 234 | atomic_dec(&ctxt->read_ios); |
1c6fdbd8 KO |
235 | io->read_completed = true; |
236 | ||
f61816d0 | 237 | wake_up(&ctxt->wait); |
1c6fdbd8 KO |
238 | closure_put(&ctxt->cl); |
239 | } | |
240 | ||
63316903 | 241 | void bch2_moving_ctxt_do_pending_writes(struct moving_context *ctxt) |
1c6fdbd8 KO |
242 | { |
243 | struct moving_io *io; | |
244 | ||
7ffb6a7e | 245 | while ((io = bch2_moving_ctxt_next_pending_write(ctxt))) { |
f82755e4 | 246 | bch2_trans_unlock_long(ctxt->trans); |
b9fa375b | 247 | list_del(&io->read_list); |
9f311f21 | 248 | move_write(io); |
1c6fdbd8 KO |
249 | } |
250 | } | |
251 | ||
63316903 | 252 | void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt) |
1c6fdbd8 KO |
253 | { |
254 | unsigned sectors_pending = atomic_read(&ctxt->write_sectors); | |
255 | ||
63316903 | 256 | move_ctxt_wait_event(ctxt, |
1c6fdbd8 KO |
257 | !atomic_read(&ctxt->write_sectors) || |
258 | atomic_read(&ctxt->write_sectors) != sectors_pending); | |
259 | } | |
260 | ||
0c069781 | 261 | void bch2_moving_ctxt_flush_all(struct moving_context *ctxt) |
50e029c6 KO |
262 | { |
263 | move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads)); | |
264 | bch2_trans_unlock_long(ctxt->trans); | |
265 | closure_sync(&ctxt->cl); | |
266 | } | |
267 | ||
0337cc7e KO |
268 | void bch2_moving_ctxt_exit(struct moving_context *ctxt) |
269 | { | |
63316903 | 270 | struct bch_fs *c = ctxt->trans->c; |
b9fa375b | 271 | |
50e029c6 | 272 | bch2_moving_ctxt_flush_all(ctxt); |
c782c583 | 273 | |
0337cc7e | 274 | EBUG_ON(atomic_read(&ctxt->write_sectors)); |
c782c583 KO |
275 | EBUG_ON(atomic_read(&ctxt->write_ios)); |
276 | EBUG_ON(atomic_read(&ctxt->read_sectors)); | |
277 | EBUG_ON(atomic_read(&ctxt->read_ios)); | |
0337cc7e | 278 | |
b9fa375b KO |
279 | mutex_lock(&c->moving_context_lock); |
280 | list_del(&ctxt->list); | |
281 | mutex_unlock(&c->moving_context_lock); | |
63316903 | 282 | |
69785001 KO |
283 | /* |
284 | * Generally, releasing a transaction within a transaction restart means | |
285 | * an unhandled transaction restart: but this can happen legitimately | |
286 | * within the move code, e.g. when bch2_move_ratelimit() tells us to | |
287 | * exit before we've retried | |
288 | */ | |
289 | bch2_trans_begin(ctxt->trans); | |
63316903 KO |
290 | bch2_trans_put(ctxt->trans); |
291 | memset(ctxt, 0, sizeof(*ctxt)); | |
0337cc7e KO |
292 | } |
293 | ||
294 | void bch2_moving_ctxt_init(struct moving_context *ctxt, | |
295 | struct bch_fs *c, | |
296 | struct bch_ratelimit *rate, | |
297 | struct bch_move_stats *stats, | |
298 | struct write_point_specifier wp, | |
299 | bool wait_on_copygc) | |
300 | { | |
301 | memset(ctxt, 0, sizeof(*ctxt)); | |
302 | ||
63316903 | 303 | ctxt->trans = bch2_trans_get(c); |
b9fa375b | 304 | ctxt->fn = (void *) _RET_IP_; |
0337cc7e KO |
305 | ctxt->rate = rate; |
306 | ctxt->stats = stats; | |
307 | ctxt->wp = wp; | |
308 | ctxt->wait_on_copygc = wait_on_copygc; | |
309 | ||
0337cc7e | 310 | closure_init_stack(&ctxt->cl); |
b9fa375b KO |
311 | |
312 | mutex_init(&ctxt->lock); | |
0337cc7e | 313 | INIT_LIST_HEAD(&ctxt->reads); |
b9fa375b | 314 | INIT_LIST_HEAD(&ctxt->ios); |
0337cc7e KO |
315 | init_waitqueue_head(&ctxt->wait); |
316 | ||
b9fa375b KO |
317 | mutex_lock(&c->moving_context_lock); |
318 | list_add(&ctxt->list, &c->moving_context_list); | |
319 | mutex_unlock(&c->moving_context_lock); | |
96a363a7 | 320 | } |
b9fa375b | 321 | |
96a363a7 KO |
322 | void bch2_move_stats_exit(struct bch_move_stats *stats, struct bch_fs *c) |
323 | { | |
324 | trace_move_data(c, stats); | |
0337cc7e KO |
325 | } |
326 | ||
01e95645 | 327 | void bch2_move_stats_init(struct bch_move_stats *stats, const char *name) |
0337cc7e KO |
328 | { |
329 | memset(stats, 0, sizeof(*stats)); | |
96a363a7 | 330 | stats->data_type = BCH_DATA_user; |
0337cc7e KO |
331 | scnprintf(stats->name, sizeof(stats->name), "%s", name); |
332 | } | |
333 | ||
63316903 | 334 | int bch2_move_extent(struct moving_context *ctxt, |
49188a93 | 335 | struct move_bucket *bucket_in_flight, |
63316903 | 336 | struct btree_iter *iter, |
a0bfe3b0 | 337 | struct bkey_s_c k, |
63316903 | 338 | struct bch_io_opts io_opts, |
a0bfe3b0 | 339 | struct data_update_opts data_opts) |
1c6fdbd8 | 340 | { |
63316903 | 341 | struct btree_trans *trans = ctxt->trans; |
f30dd860 | 342 | struct bch_fs *c = trans->c; |
1c6fdbd8 KO |
343 | int ret = -ENOMEM; |
344 | ||
060ff4b7 KO |
345 | if (trace_io_move_enabled()) |
346 | trace_io_move2(c, k, &io_opts, &data_opts); | |
157ea583 | 347 | this_cpu_add(c->counters[BCH_COUNTER_io_move], k.k->size); |
189c176c | 348 | |
96a363a7 KO |
349 | if (ctxt->stats) |
350 | ctxt->stats->pos = BBPOS(iter->btree_id, iter->pos); | |
5a21764d | 351 | |
1be88797 KO |
352 | bch2_data_update_opts_normalize(k, &data_opts); |
353 | ||
354 | if (!data_opts.rewrite_ptrs && | |
f269ae55 KO |
355 | !data_opts.extra_replicas && |
356 | !data_opts.scrub) { | |
1be88797 | 357 | if (data_opts.kill_ptrs) |
a34eef6d | 358 | return bch2_extent_drop_ptrs(trans, iter, k, &io_opts, &data_opts); |
1be88797 KO |
359 | return 0; |
360 | } | |
361 | ||
f02d1532 KO |
362 | struct moving_io *io = allocate_dropping_locks(trans, ret, |
363 | kzalloc(sizeof(struct moving_io), _gfp)); | |
1c6fdbd8 KO |
364 | if (!io) |
365 | goto err; | |
366 | ||
f02d1532 KO |
367 | if (ret) |
368 | goto err_free; | |
369 | ||
b9fa375b | 370 | INIT_LIST_HEAD(&io->io_list); |
1c6fdbd8 | 371 | io->write.ctxt = ctxt; |
99aaf570 KO |
372 | io->read_sectors = k.k->size; |
373 | io->write_sectors = k.k->size; | |
1c6fdbd8 | 374 | |
f269ae55 KO |
375 | if (!data_opts.scrub) { |
376 | ret = bch2_data_update_init(trans, iter, ctxt, &io->write, ctxt->wp, | |
377 | &io_opts, data_opts, iter->btree_id, k); | |
378 | if (ret) | |
379 | goto err_free; | |
380 | ||
381 | io->write.op.end_io = move_write_done; | |
382 | } else { | |
383 | bch2_bkey_buf_init(&io->write.k); | |
384 | bch2_bkey_buf_reassemble(&io->write.k, c, k); | |
385 | ||
386 | io->write.op.c = c; | |
387 | io->write.data_opts = data_opts; | |
388 | ||
f02d1532 KO |
389 | bch2_trans_unlock(trans); |
390 | ||
f269ae55 KO |
391 | ret = bch2_data_update_bios_init(&io->write, c, &io_opts); |
392 | if (ret) | |
393 | goto err_free; | |
394 | } | |
6f7111f8 KO |
395 | |
396 | io->write.rbio.bio.bi_end_io = move_read_endio; | |
397 | io->write.rbio.bio.bi_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0); | |
1c6fdbd8 | 398 | |
a0bfe3b0 KO |
399 | if (ctxt->rate) |
400 | bch2_ratelimit_increment(ctxt->rate, k.k->size); | |
401 | ||
2f528663 KO |
402 | if (ctxt->stats) { |
403 | atomic64_inc(&ctxt->stats->keys_moved); | |
404 | atomic64_add(k.k->size, &ctxt->stats->sectors_moved); | |
405 | } | |
406 | ||
8fcdf814 KO |
407 | if (bucket_in_flight) { |
408 | io->b = bucket_in_flight; | |
409 | atomic_inc(&io->b->count); | |
410 | } | |
411 | ||
060ff4b7 KO |
412 | if (trace_io_move_read_enabled()) |
413 | trace_io_move_read2(c, k); | |
b9fa375b KO |
414 | |
415 | mutex_lock(&ctxt->lock); | |
1c6fdbd8 | 416 | atomic_add(io->read_sectors, &ctxt->read_sectors); |
c782c583 | 417 | atomic_inc(&ctxt->read_ios); |
b9fa375b KO |
418 | |
419 | list_add_tail(&io->read_list, &ctxt->reads); | |
420 | list_add_tail(&io->io_list, &ctxt->ios); | |
421 | mutex_unlock(&ctxt->lock); | |
1c6fdbd8 KO |
422 | |
423 | /* | |
424 | * dropped by move_read_endio() - guards against use after free of | |
425 | * ctxt when doing wakeup | |
426 | */ | |
427 | closure_get(&ctxt->cl); | |
f269ae55 KO |
428 | __bch2_read_extent(trans, &io->write.rbio, |
429 | io->write.rbio.bio.bi_iter, | |
430 | bkey_start_pos(k.k), | |
431 | iter->btree_id, k, 0, | |
432 | NULL, | |
f269ae55 KO |
433 | BCH_READ_last_fragment, |
434 | data_opts.scrub ? data_opts.read_dev : -1); | |
1c6fdbd8 | 435 | return 0; |
1c6fdbd8 KO |
436 | err_free: |
437 | kfree(io); | |
438 | err: | |
1b1bd0fd KO |
439 | if (bch2_err_matches(ret, EROFS) || |
440 | bch2_err_matches(ret, BCH_ERR_transaction_restart)) | |
441 | return ret; | |
442 | ||
157ea583 | 443 | count_event(c, io_move_start_fail); |
74644030 | 444 | |
157ea583 | 445 | if (trace_io_move_start_fail_enabled()) { |
ae4d612c KO |
446 | struct printbuf buf = PRINTBUF; |
447 | ||
448 | bch2_bkey_val_to_text(&buf, c, k); | |
449 | prt_str(&buf, ": "); | |
450 | prt_str(&buf, bch2_err_str(ret)); | |
157ea583 | 451 | trace_io_move_start_fail(c, buf.buf); |
ae4d612c KO |
452 | printbuf_exit(&buf); |
453 | } | |
327971ce KO |
454 | |
455 | if (bch2_err_matches(ret, BCH_ERR_data_update_done)) | |
456 | return 0; | |
1c6fdbd8 KO |
457 | return ret; |
458 | } | |
459 | ||
84b9f171 | 460 | struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans, |
84809057 | 461 | struct per_snapshot_io_opts *io_opts, |
80c6352c | 462 | struct bpos extent_pos, /* extent_iter, extent_k may be in reflink btree */ |
6aa0bd0f | 463 | struct btree_iter *extent_iter, |
84809057 KO |
464 | struct bkey_s_c extent_k) |
465 | { | |
466 | struct bch_fs *c = trans->c; | |
467 | u32 restart_count = trans->restart_count; | |
6aa0bd0f | 468 | struct bch_io_opts *opts_ret = &io_opts->fs_io_opts; |
84809057 KO |
469 | int ret = 0; |
470 | ||
3484840e KO |
471 | if (extent_iter->min_depth) |
472 | return opts_ret; | |
473 | ||
6aa0bd0f KO |
474 | if (extent_k.k->type == KEY_TYPE_reflink_v) |
475 | goto out; | |
476 | ||
80c6352c | 477 | if (io_opts->cur_inum != extent_pos.inode) { |
84809057 KO |
478 | io_opts->d.nr = 0; |
479 | ||
80c6352c | 480 | ret = for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, extent_pos.inode), |
5dd8c60e | 481 | BTREE_ITER_all_snapshots, k, ({ |
80c6352c | 482 | if (k.k->p.offset != extent_pos.inode) |
84809057 KO |
483 | break; |
484 | ||
485 | if (!bkey_is_inode(k.k)) | |
486 | continue; | |
487 | ||
488 | struct bch_inode_unpacked inode; | |
644457ed KO |
489 | _ret3 = bch2_inode_unpack(k, &inode); |
490 | if (_ret3) | |
491 | break; | |
84809057 KO |
492 | |
493 | struct snapshot_io_opts_entry e = { .snapshot = k.k->p.snapshot }; | |
494 | bch2_inode_opts_get(&e.io_opts, trans->c, &inode); | |
495 | ||
27b2df98 KO |
496 | darray_push(&io_opts->d, e); |
497 | })); | |
80c6352c | 498 | io_opts->cur_inum = extent_pos.inode; |
84809057 KO |
499 | } |
500 | ||
501 | ret = ret ?: trans_was_restarted(trans, restart_count); | |
502 | if (ret) | |
503 | return ERR_PTR(ret); | |
504 | ||
defd9e39 | 505 | if (extent_k.k->p.snapshot) |
84809057 | 506 | darray_for_each(io_opts->d, i) |
6aa0bd0f KO |
507 | if (bch2_snapshot_is_ancestor(c, extent_k.k->p.snapshot, i->snapshot)) { |
508 | opts_ret = &i->io_opts; | |
509 | break; | |
510 | } | |
511 | out: | |
161d1383 | 512 | ret = bch2_get_update_rebalance_opts(trans, opts_ret, extent_iter, extent_k); |
6aa0bd0f KO |
513 | if (ret) |
514 | return ERR_PTR(ret); | |
515 | return opts_ret; | |
84809057 KO |
516 | } |
517 | ||
a0bfe3b0 KO |
518 | int bch2_move_get_io_opts_one(struct btree_trans *trans, |
519 | struct bch_io_opts *io_opts, | |
6aa0bd0f | 520 | struct btree_iter *extent_iter, |
a0bfe3b0 | 521 | struct bkey_s_c extent_k) |
883d9701 | 522 | { |
6aa0bd0f KO |
523 | struct bch_fs *c = trans->c; |
524 | ||
525 | *io_opts = bch2_opts_to_inode_opts(c->opts); | |
883d9701 | 526 | |
84809057 | 527 | /* reflink btree? */ |
6aa0bd0f KO |
528 | if (!extent_k.k->p.inode) |
529 | goto out; | |
84809057 | 530 | |
6aa0bd0f KO |
531 | struct btree_iter inode_iter; |
532 | struct bkey_s_c inode_k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes, | |
84809057 | 533 | SPOS(0, extent_k.k->p.inode, extent_k.k->p.snapshot), |
5dd8c60e | 534 | BTREE_ITER_cached); |
6aa0bd0f | 535 | int ret = bkey_err(inode_k); |
84809057 KO |
536 | if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) |
537 | return ret; | |
883d9701 | 538 | |
6aa0bd0f | 539 | if (!ret && bkey_is_inode(inode_k.k)) { |
84809057 | 540 | struct bch_inode_unpacked inode; |
6aa0bd0f KO |
541 | bch2_inode_unpack(inode_k, &inode); |
542 | bch2_inode_opts_get(io_opts, c, &inode); | |
443d2760 | 543 | } |
6aa0bd0f | 544 | bch2_trans_iter_exit(trans, &inode_iter); |
060ff4b7 | 545 | /* seem to be spinning here? */ |
6aa0bd0f | 546 | out: |
161d1383 | 547 | return bch2_get_update_rebalance_opts(trans, io_opts, extent_iter, extent_k); |
883d9701 KO |
548 | } |
549 | ||
63316903 | 550 | int bch2_move_ratelimit(struct moving_context *ctxt) |
c91996c5 | 551 | { |
63316903 | 552 | struct bch_fs *c = ctxt->trans->c; |
415e5107 | 553 | bool is_kthread = current->flags & PF_KTHREAD; |
c91996c5 DH |
554 | u64 delay; |
555 | ||
50e029c6 KO |
556 | if (ctxt->wait_on_copygc && c->copygc_running) { |
557 | bch2_moving_ctxt_flush_all(ctxt); | |
c91996c5 DH |
558 | wait_event_killable(c->copygc_running_wq, |
559 | !c->copygc_running || | |
415e5107 | 560 | (is_kthread && kthread_should_stop())); |
c91996c5 DH |
561 | } |
562 | ||
563 | do { | |
0337cc7e | 564 | delay = ctxt->rate ? bch2_ratelimit_delay(ctxt->rate) : 0; |
c91996c5 | 565 | |
415e5107 | 566 | if (is_kthread && kthread_should_stop()) |
c91996c5 | 567 | return 1; |
c91996c5 DH |
568 | |
569 | if (delay) | |
261af2f1 | 570 | move_ctxt_wait_event_timeout(ctxt, |
415e5107 KO |
571 | freezing(current) || |
572 | (is_kthread && kthread_should_stop()), | |
261af2f1 | 573 | delay); |
c91996c5 DH |
574 | |
575 | if (unlikely(freezing(current))) { | |
50e029c6 | 576 | bch2_moving_ctxt_flush_all(ctxt); |
c91996c5 DH |
577 | try_to_freeze(); |
578 | } | |
579 | } while (delay); | |
580 | ||
c782c583 KO |
581 | /* |
582 | * XXX: these limits really ought to be per device, SSDs and hard drives | |
583 | * will want different limits | |
584 | */ | |
63316903 | 585 | move_ctxt_wait_event(ctxt, |
c782c583 KO |
586 | atomic_read(&ctxt->write_sectors) < c->opts.move_bytes_in_flight >> 9 && |
587 | atomic_read(&ctxt->read_sectors) < c->opts.move_bytes_in_flight >> 9 && | |
588 | atomic_read(&ctxt->write_ios) < c->opts.move_ios_in_flight && | |
589 | atomic_read(&ctxt->read_ios) < c->opts.move_ios_in_flight); | |
c91996c5 DH |
590 | |
591 | return 0; | |
592 | } | |
593 | ||
9314e2fb KO |
594 | /* |
595 | * Move requires non extents iterators, and there's also no need for it to | |
596 | * signal indirect_extent_missing_error: | |
597 | */ | |
598 | static struct bkey_s_c bch2_lookup_indirect_extent_for_move(struct btree_trans *trans, | |
599 | struct btree_iter *iter, | |
600 | struct bkey_s_c_reflink_p p) | |
601 | { | |
602 | if (unlikely(REFLINK_P_ERROR(p.v))) | |
603 | return bkey_s_c_null; | |
604 | ||
605 | struct bpos reflink_pos = POS(0, REFLINK_P_IDX(p.v)); | |
606 | ||
607 | bch2_trans_iter_init(trans, iter, | |
608 | BTREE_ID_reflink, reflink_pos, | |
609 | BTREE_ITER_not_extents); | |
610 | ||
9180ad2e | 611 | struct bkey_s_c k = bch2_btree_iter_peek(trans, iter); |
9314e2fb KO |
612 | if (!k.k || bkey_err(k)) { |
613 | bch2_trans_iter_exit(trans, iter); | |
614 | return k; | |
615 | } | |
616 | ||
617 | if (bkey_lt(reflink_pos, bkey_start_pos(k.k))) { | |
618 | bch2_trans_iter_exit(trans, iter); | |
619 | return bkey_s_c_null; | |
620 | } | |
621 | ||
622 | return k; | |
623 | } | |
624 | ||
3484840e KO |
625 | int bch2_move_data_btree(struct moving_context *ctxt, |
626 | struct bpos start, | |
627 | struct bpos end, | |
628 | move_pred_fn pred, void *arg, | |
629 | enum btree_id btree_id, unsigned level) | |
1c6fdbd8 | 630 | { |
63316903 KO |
631 | struct btree_trans *trans = ctxt->trans; |
632 | struct bch_fs *c = trans->c; | |
84809057 KO |
633 | struct per_snapshot_io_opts snapshot_io_opts; |
634 | struct bch_io_opts *io_opts; | |
07a1006a | 635 | struct bkey_buf sk; |
80c6352c | 636 | struct btree_iter iter, reflink_iter = {}; |
1c6fdbd8 | 637 | struct bkey_s_c k; |
7f5c5d20 | 638 | struct data_update_opts data_opts; |
80c6352c KO |
639 | /* |
640 | * If we're moving a single file, also process reflinked data it points | |
641 | * to (this includes propagating changed io_opts from the inode to the | |
642 | * extent): | |
643 | */ | |
644 | bool walk_indirect = start.inode == end.inode; | |
1c6fdbd8 KO |
645 | int ret = 0, ret2; |
646 | ||
84809057 | 647 | per_snapshot_io_opts_init(&snapshot_io_opts, c); |
07a1006a | 648 | bch2_bkey_buf_init(&sk); |
424eb881 | 649 | |
2f528663 KO |
650 | if (ctxt->stats) { |
651 | ctxt->stats->data_type = BCH_DATA_user; | |
d5eade93 | 652 | ctxt->stats->pos = BBPOS(btree_id, start); |
2f528663 | 653 | } |
424eb881 | 654 | |
fe27298b | 655 | retry_root: |
319fef29 | 656 | bch2_trans_begin(trans); |
fe27298b KO |
657 | |
658 | if (level == bch2_btree_id_root(c, btree_id)->level + 1) { | |
659 | bch2_trans_node_iter_init(trans, &iter, btree_id, start, 0, level - 1, | |
660 | BTREE_ITER_prefetch| | |
661 | BTREE_ITER_not_extents| | |
662 | BTREE_ITER_all_snapshots); | |
663 | struct btree *b = bch2_btree_iter_peek_node(trans, &iter); | |
664 | ret = PTR_ERR_OR_ZERO(b); | |
665 | if (ret) | |
666 | goto root_err; | |
667 | ||
668 | if (b != btree_node_root(c, b)) { | |
669 | bch2_trans_iter_exit(trans, &iter); | |
670 | goto retry_root; | |
671 | } | |
672 | ||
673 | k = bkey_i_to_s_c(&b->key); | |
674 | ||
675 | io_opts = bch2_move_get_io_opts(trans, &snapshot_io_opts, | |
676 | iter.pos, &iter, k); | |
677 | ret = PTR_ERR_OR_ZERO(io_opts); | |
678 | if (ret) | |
679 | goto root_err; | |
680 | ||
681 | memset(&data_opts, 0, sizeof(data_opts)); | |
682 | if (!pred(c, arg, iter.btree_id, k, io_opts, &data_opts)) | |
683 | goto out; | |
684 | ||
685 | ||
686 | if (!data_opts.scrub) | |
687 | ret = bch2_btree_node_rewrite_pos(trans, btree_id, level, | |
688 | k.k->p, data_opts.target, 0); | |
689 | else | |
690 | ret = bch2_btree_node_scrub(trans, btree_id, level, k, data_opts.read_dev); | |
691 | ||
692 | root_err: | |
693 | if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) { | |
694 | bch2_trans_iter_exit(trans, &iter); | |
695 | goto retry_root; | |
696 | } | |
697 | ||
698 | goto out; | |
699 | } | |
700 | ||
3484840e KO |
701 | bch2_trans_node_iter_init(trans, &iter, btree_id, start, 0, level, |
702 | BTREE_ITER_prefetch| | |
703 | BTREE_ITER_not_extents| | |
704 | BTREE_ITER_all_snapshots); | |
1c6fdbd8 | 705 | |
0337cc7e KO |
706 | if (ctxt->rate) |
707 | bch2_ratelimit_reset(ctxt->rate); | |
1c6fdbd8 | 708 | |
63316903 | 709 | while (!bch2_move_ratelimit(ctxt)) { |
80c6352c KO |
710 | struct btree_iter *extent_iter = &iter; |
711 | ||
6bd68ec2 | 712 | bch2_trans_begin(trans); |
700c25b3 | 713 | |
9180ad2e | 714 | k = bch2_btree_iter_peek(trans, &iter); |
1c6fdbd8 KO |
715 | if (!k.k) |
716 | break; | |
8ede9910 | 717 | |
0f238367 | 718 | ret = bkey_err(k); |
549d173c | 719 | if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) |
8ede9910 | 720 | continue; |
1c6fdbd8 KO |
721 | if (ret) |
722 | break; | |
8ede9910 | 723 | |
3484840e | 724 | if (bkey_gt(bkey_start_pos(k.k), end)) |
1c6fdbd8 KO |
725 | break; |
726 | ||
2f528663 | 727 | if (ctxt->stats) |
d5eade93 | 728 | ctxt->stats->pos = BBPOS(iter.btree_id, iter.pos); |
8ede9910 | 729 | |
80c6352c KO |
730 | if (walk_indirect && |
731 | k.k->type == KEY_TYPE_reflink_p && | |
732 | REFLINK_P_MAY_UPDATE_OPTIONS(bkey_s_c_to_reflink_p(k).v)) { | |
733 | struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); | |
80c6352c KO |
734 | |
735 | bch2_trans_iter_exit(trans, &reflink_iter); | |
9314e2fb | 736 | k = bch2_lookup_indirect_extent_for_move(trans, &reflink_iter, p); |
80c6352c KO |
737 | ret = bkey_err(k); |
738 | if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) | |
739 | continue; | |
740 | if (ret) | |
741 | break; | |
742 | ||
9314e2fb | 743 | if (!k.k) |
80c6352c KO |
744 | goto next_nondata; |
745 | ||
746 | /* | |
747 | * XXX: reflink pointers may point to multiple indirect | |
748 | * extents, so don't advance past the entire reflink | |
749 | * pointer - need to fixup iter->k | |
750 | */ | |
751 | extent_iter = &reflink_iter; | |
752 | } | |
753 | ||
8d84260e | 754 | if (!bkey_extent_is_direct_data(k.k)) |
1c6fdbd8 KO |
755 | goto next_nondata; |
756 | ||
80c6352c KO |
757 | io_opts = bch2_move_get_io_opts(trans, &snapshot_io_opts, |
758 | iter.pos, extent_iter, k); | |
84809057 | 759 | ret = PTR_ERR_OR_ZERO(io_opts); |
8e3f913e KO |
760 | if (ret) |
761 | continue; | |
1c6fdbd8 | 762 | |
7f5c5d20 | 763 | memset(&data_opts, 0, sizeof(data_opts)); |
7a274285 | 764 | if (!pred(c, arg, extent_iter->btree_id, k, io_opts, &data_opts)) |
1c6fdbd8 | 765 | goto next; |
1c6fdbd8 | 766 | |
eb331fe5 KO |
767 | /* |
768 | * The iterator gets unlocked by __bch2_read_extent - need to | |
769 | * save a copy of @k elsewhere: | |
3e3e02e6 | 770 | */ |
07a1006a | 771 | bch2_bkey_buf_reassemble(&sk, c, k); |
35189e09 | 772 | k = bkey_i_to_s_c(sk.k); |
1c6fdbd8 | 773 | |
3484840e KO |
774 | if (!level) |
775 | ret2 = bch2_move_extent(ctxt, NULL, extent_iter, k, *io_opts, data_opts); | |
776 | else if (!data_opts.scrub) | |
777 | ret2 = bch2_btree_node_rewrite_pos(trans, btree_id, level, | |
778 | k.k->p, data_opts.target, 0); | |
779 | else | |
780 | ret2 = bch2_btree_node_scrub(trans, btree_id, level, k, data_opts.read_dev); | |
781 | ||
1c6fdbd8 | 782 | if (ret2) { |
549d173c | 783 | if (bch2_err_matches(ret2, BCH_ERR_transaction_restart)) |
f0e70018 | 784 | continue; |
f0e70018 | 785 | |
6f7111f8 | 786 | if (bch2_err_matches(ret2, ENOMEM)) { |
1c6fdbd8 | 787 | /* memory allocation failure, wait for some IO to finish */ |
63316903 | 788 | bch2_move_ctxt_wait_for_io(ctxt); |
1c6fdbd8 KO |
789 | continue; |
790 | } | |
791 | ||
792 | /* XXX signal failure */ | |
793 | goto next; | |
794 | } | |
1c6fdbd8 | 795 | next: |
2f528663 KO |
796 | if (ctxt->stats) |
797 | atomic64_add(k.k->size, &ctxt->stats->sectors_seen); | |
1c6fdbd8 | 798 | next_nondata: |
3484840e KO |
799 | if (!bch2_btree_iter_advance(trans, &iter)) |
800 | break; | |
1c6fdbd8 | 801 | } |
fe27298b | 802 | out: |
80c6352c | 803 | bch2_trans_iter_exit(trans, &reflink_iter); |
6bd68ec2 | 804 | bch2_trans_iter_exit(trans, &iter); |
07a1006a | 805 | bch2_bkey_buf_exit(&sk, c); |
84809057 | 806 | per_snapshot_io_opts_exit(&snapshot_io_opts); |
76426098 KO |
807 | |
808 | return ret; | |
809 | } | |
810 | ||
63316903 | 811 | int __bch2_move_data(struct moving_context *ctxt, |
a0bfe3b0 KO |
812 | struct bbpos start, |
813 | struct bbpos end, | |
814 | move_pred_fn pred, void *arg) | |
76426098 | 815 | { |
63316903 | 816 | struct bch_fs *c = ctxt->trans->c; |
1889ad5a | 817 | enum btree_id id; |
40a53b92 | 818 | int ret = 0; |
76426098 | 819 | |
a0bfe3b0 KO |
820 | for (id = start.btree; |
821 | id <= min_t(unsigned, end.btree, btree_id_nr_alive(c) - 1); | |
1889ad5a | 822 | id++) { |
d5eade93 | 823 | ctxt->stats->pos = BBPOS(id, POS_MIN); |
1889ad5a | 824 | |
a0bfe3b0 KO |
825 | if (!btree_type_has_ptrs(id) || |
826 | !bch2_btree_id_root(c, id)->b) | |
1889ad5a KO |
827 | continue; |
828 | ||
63316903 | 829 | ret = bch2_move_data_btree(ctxt, |
a0bfe3b0 KO |
830 | id == start.btree ? start.pos : POS_MIN, |
831 | id == end.btree ? end.pos : POS_MAX, | |
3484840e | 832 | pred, arg, id, 0); |
1889ad5a KO |
833 | if (ret) |
834 | break; | |
835 | } | |
836 | ||
a0bfe3b0 KO |
837 | return ret; |
838 | } | |
839 | ||
840 | int bch2_move_data(struct bch_fs *c, | |
841 | struct bbpos start, | |
842 | struct bbpos end, | |
843 | struct bch_ratelimit *rate, | |
844 | struct bch_move_stats *stats, | |
845 | struct write_point_specifier wp, | |
846 | bool wait_on_copygc, | |
847 | move_pred_fn pred, void *arg) | |
848 | { | |
a0bfe3b0 | 849 | struct moving_context ctxt; |
a0bfe3b0 KO |
850 | |
851 | bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc); | |
987fdbdb | 852 | int ret = __bch2_move_data(&ctxt, start, end, pred, arg); |
0337cc7e | 853 | bch2_moving_ctxt_exit(&ctxt); |
1c6fdbd8 | 854 | |
1c6fdbd8 KO |
855 | return ret; |
856 | } | |
857 | ||
987fdbdb | 858 | static int __bch2_move_data_phys(struct moving_context *ctxt, |
49188a93 | 859 | struct move_bucket *bucket_in_flight, |
987fdbdb KO |
860 | unsigned dev, |
861 | u64 bucket_start, | |
862 | u64 bucket_end, | |
f269ae55 | 863 | unsigned data_types, |
39cea302 | 864 | bool copygc, |
987fdbdb | 865 | move_pred_fn pred, void *arg) |
8e3f913e | 866 | { |
63316903 KO |
867 | struct btree_trans *trans = ctxt->trans; |
868 | struct bch_fs *c = trans->c; | |
415e5107 | 869 | bool is_kthread = current->flags & PF_KTHREAD; |
8e3f913e | 870 | struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); |
9e92d6e9 | 871 | struct btree_iter iter = {}, bp_iter = {}; |
8e3f913e | 872 | struct bkey_buf sk; |
8e3f913e | 873 | struct bkey_s_c k; |
056cae1c | 874 | struct bkey_buf last_flushed; |
39cea302 | 875 | u64 check_mismatch_done = bucket_start; |
8e3f913e KO |
876 | int ret = 0; |
877 | ||
987fdbdb | 878 | struct bch_dev *ca = bch2_dev_tryget(c, dev); |
cb4d340a KO |
879 | if (!ca) |
880 | return 0; | |
881 | ||
987fdbdb KO |
882 | bucket_end = min(bucket_end, ca->mi.nbuckets); |
883 | ||
884 | struct bpos bp_start = bucket_pos_to_bp_start(ca, POS(dev, bucket_start)); | |
885 | struct bpos bp_end = bucket_pos_to_bp_end(ca, POS(dev, bucket_end)); | |
5a21764d | 886 | |
056cae1c KO |
887 | bch2_bkey_buf_init(&last_flushed); |
888 | bkey_init(&last_flushed.k->k); | |
8e3f913e | 889 | bch2_bkey_buf_init(&sk); |
8e3f913e | 890 | |
3e36e572 KO |
891 | /* |
892 | * We're not run in a context that handles transaction restarts: | |
893 | */ | |
894 | bch2_trans_begin(trans); | |
895 | ||
987fdbdb | 896 | bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_backpointers, bp_start, 0); |
8e3f913e | 897 | |
cb13f471 | 898 | ret = bch2_btree_write_buffer_tryflush(trans); |
2fea3aa7 KO |
899 | if (!bch2_err_matches(ret, EROFS)) |
900 | bch_err_msg(c, ret, "flushing btree write buffer"); | |
cb13f471 | 901 | if (ret) |
8e3f913e | 902 | goto err; |
8e3f913e | 903 | |
63316903 | 904 | while (!(ret = bch2_move_ratelimit(ctxt))) { |
415e5107 KO |
905 | if (is_kthread && kthread_should_stop()) |
906 | break; | |
907 | ||
80c33085 | 908 | bch2_trans_begin(trans); |
8e3f913e | 909 | |
9180ad2e | 910 | k = bch2_btree_iter_peek(trans, &bp_iter); |
9e92d6e9 | 911 | ret = bkey_err(k); |
8e3f913e KO |
912 | if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) |
913 | continue; | |
914 | if (ret) | |
915 | goto err; | |
9e92d6e9 | 916 | |
987fdbdb | 917 | if (!k.k || bkey_gt(k.k->p, bp_end)) |
8e3f913e KO |
918 | break; |
919 | ||
39cea302 KO |
920 | if (check_mismatch_done < bp_pos_to_bucket(ca, k.k->p).offset) { |
921 | while (check_mismatch_done < bp_pos_to_bucket(ca, k.k->p).offset) { | |
922 | bch2_check_bucket_backpointer_mismatch(trans, ca, check_mismatch_done++, | |
923 | copygc, &last_flushed); | |
924 | } | |
925 | continue; | |
926 | } | |
927 | ||
9e92d6e9 KO |
928 | if (k.k->type != KEY_TYPE_backpointer) |
929 | goto next; | |
930 | ||
931 | struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k); | |
932 | ||
987fdbdb | 933 | if (ctxt->stats) |
2a2f7aaa | 934 | ctxt->stats->offset = bp.k->p.offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT; |
987fdbdb | 935 | |
f269ae55 KO |
936 | if (!(data_types & BIT(bp.v->data_type))) |
937 | goto next; | |
938 | ||
88d961b5 KO |
939 | if (!bp.v->level && bp.v->btree_id == BTREE_ID_stripes) |
940 | goto next; | |
941 | ||
2a2f7aaa KO |
942 | k = bch2_backpointer_get_key(trans, bp, &iter, 0, &last_flushed); |
943 | ret = bkey_err(k); | |
944 | if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) | |
945 | continue; | |
946 | if (ret) | |
947 | goto err; | |
948 | if (!k.k) | |
949 | goto next; | |
8e3f913e | 950 | |
2a2f7aaa | 951 | if (!bp.v->level) { |
6aa0bd0f | 952 | ret = bch2_move_get_io_opts_one(trans, &io_opts, &iter, k); |
8e3f913e | 953 | if (ret) { |
80c33085 | 954 | bch2_trans_iter_exit(trans, &iter); |
8e3f913e KO |
955 | continue; |
956 | } | |
2a2f7aaa | 957 | } |
8e3f913e | 958 | |
2a2f7aaa | 959 | struct data_update_opts data_opts = {}; |
060ff4b7 KO |
960 | bool p = pred(c, arg, bp.v->btree_id, k, &io_opts, &data_opts); |
961 | ||
962 | if (trace_io_move_pred_enabled()) | |
963 | trace_io_move_pred2(c, k, &io_opts, &data_opts, | |
964 | pred, arg, p); | |
965 | ||
966 | if (!p) { | |
80c33085 | 967 | bch2_trans_iter_exit(trans, &iter); |
2a2f7aaa KO |
968 | goto next; |
969 | } | |
8e3f913e | 970 | |
f269ae55 KO |
971 | if (data_opts.scrub && |
972 | !bch2_dev_idx_is_online(c, data_opts.read_dev)) { | |
973 | bch2_trans_iter_exit(trans, &iter); | |
09b9c72b | 974 | ret = bch_err_throw(c, device_offline); |
f269ae55 KO |
975 | break; |
976 | } | |
977 | ||
2a2f7aaa KO |
978 | bch2_bkey_buf_reassemble(&sk, c, k); |
979 | k = bkey_i_to_s_c(sk.k); | |
987fdbdb | 980 | |
2a2f7aaa | 981 | /* move_extent will drop locks */ |
f269ae55 | 982 | unsigned sectors = bp.v->bucket_len; |
ec4edd7b | 983 | |
f269ae55 KO |
984 | if (!bp.v->level) |
985 | ret = bch2_move_extent(ctxt, bucket_in_flight, &iter, k, io_opts, data_opts); | |
986 | else if (!data_opts.scrub) | |
f3c8eaf7 KO |
987 | ret = bch2_btree_node_rewrite_pos(trans, bp.v->btree_id, bp.v->level, |
988 | k.k->p, data_opts.target, 0); | |
f269ae55 KO |
989 | else |
990 | ret = bch2_btree_node_scrub(trans, bp.v->btree_id, bp.v->level, k, data_opts.read_dev); | |
8e3f913e | 991 | |
2a2f7aaa | 992 | bch2_trans_iter_exit(trans, &iter); |
8e3f913e | 993 | |
2a2f7aaa KO |
994 | if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) |
995 | continue; | |
996 | if (ret == -ENOMEM) { | |
997 | /* memory allocation failure, wait for some IO to finish */ | |
998 | bch2_move_ctxt_wait_for_io(ctxt); | |
999 | continue; | |
8e3f913e | 1000 | } |
2a2f7aaa KO |
1001 | if (ret) |
1002 | goto err; | |
1003 | ||
1004 | if (ctxt->stats) | |
1005 | atomic64_add(sectors, &ctxt->stats->sectors_seen); | |
8e3f913e | 1006 | next: |
9180ad2e | 1007 | bch2_btree_iter_advance(trans, &bp_iter); |
8e3f913e | 1008 | } |
39cea302 KO |
1009 | |
1010 | while (check_mismatch_done < bucket_end) | |
1011 | bch2_check_bucket_backpointer_mismatch(trans, ca, check_mismatch_done++, | |
1012 | copygc, &last_flushed); | |
8e3f913e | 1013 | err: |
9e92d6e9 | 1014 | bch2_trans_iter_exit(trans, &bp_iter); |
8e3f913e | 1015 | bch2_bkey_buf_exit(&sk, c); |
056cae1c | 1016 | bch2_bkey_buf_exit(&last_flushed, c); |
39cea302 | 1017 | bch2_dev_put(ca); |
8e3f913e KO |
1018 | return ret; |
1019 | } | |
1020 | ||
ecedc87c KO |
1021 | int bch2_move_data_phys(struct bch_fs *c, |
1022 | unsigned dev, | |
1023 | u64 start, | |
1024 | u64 end, | |
1025 | unsigned data_types, | |
1026 | struct bch_ratelimit *rate, | |
1027 | struct bch_move_stats *stats, | |
1028 | struct write_point_specifier wp, | |
1029 | bool wait_on_copygc, | |
1030 | move_pred_fn pred, void *arg) | |
f269ae55 KO |
1031 | { |
1032 | struct moving_context ctxt; | |
1033 | ||
1034 | bch2_trans_run(c, bch2_btree_write_buffer_flush_sync(trans)); | |
1035 | ||
1036 | bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc); | |
3a2a0d08 KO |
1037 | if (ctxt.stats) { |
1038 | ctxt.stats->phys = true; | |
1039 | ctxt.stats->data_type = (int) DATA_PROGRESS_DATA_TYPE_phys; | |
1040 | } | |
f269ae55 | 1041 | |
39cea302 KO |
1042 | int ret = __bch2_move_data_phys(&ctxt, NULL, dev, start, end, |
1043 | data_types, false, pred, arg); | |
f269ae55 KO |
1044 | bch2_moving_ctxt_exit(&ctxt); |
1045 | ||
1046 | return ret; | |
1047 | } | |
1048 | ||
7a274285 KO |
1049 | static bool evacuate_bucket_pred(struct bch_fs *c, void *_arg, |
1050 | enum btree_id btree, struct bkey_s_c k, | |
987fdbdb KO |
1051 | struct bch_io_opts *io_opts, |
1052 | struct data_update_opts *data_opts) | |
1053 | { | |
1054 | struct evacuate_bucket_arg *arg = _arg; | |
1055 | ||
1056 | *data_opts = arg->data_opts; | |
1057 | ||
1058 | unsigned i = 0; | |
1059 | bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) { | |
1060 | if (ptr->dev == arg->bucket.inode && | |
1061 | (arg->gen < 0 || arg->gen == ptr->gen) && | |
1062 | !ptr->cached) | |
1063 | data_opts->rewrite_ptrs |= BIT(i); | |
1064 | i++; | |
1065 | } | |
1066 | ||
1067 | return data_opts->rewrite_ptrs != 0; | |
1068 | } | |
1069 | ||
1070 | int bch2_evacuate_bucket(struct moving_context *ctxt, | |
49188a93 KO |
1071 | struct move_bucket *bucket_in_flight, |
1072 | struct bpos bucket, int gen, | |
1073 | struct data_update_opts data_opts) | |
987fdbdb | 1074 | { |
c7897b50 | 1075 | struct bch_fs *c = ctxt->trans->c; |
987fdbdb KO |
1076 | struct evacuate_bucket_arg arg = { bucket, gen, data_opts, }; |
1077 | ||
c7897b50 KO |
1078 | count_event(c, io_move_evacuate_bucket); |
1079 | if (trace_io_move_evacuate_bucket_enabled()) | |
1080 | trace_io_move_evacuate_bucket2(c, bucket, gen); | |
1081 | ||
987fdbdb KO |
1082 | return __bch2_move_data_phys(ctxt, bucket_in_flight, |
1083 | bucket.inode, | |
1084 | bucket.offset, | |
1085 | bucket.offset + 1, | |
f269ae55 | 1086 | ~0, |
39cea302 | 1087 | true, |
987fdbdb KO |
1088 | evacuate_bucket_pred, &arg); |
1089 | } | |
1090 | ||
7f5c5d20 KO |
1091 | typedef bool (*move_btree_pred)(struct bch_fs *, void *, |
1092 | struct btree *, struct bch_io_opts *, | |
1093 | struct data_update_opts *); | |
1889ad5a | 1094 | |
1c6fdbd8 | 1095 | static int bch2_move_btree(struct bch_fs *c, |
3c843a67 KO |
1096 | struct bbpos start, |
1097 | struct bbpos end, | |
1889ad5a | 1098 | move_btree_pred pred, void *arg, |
1c6fdbd8 KO |
1099 | struct bch_move_stats *stats) |
1100 | { | |
1889ad5a | 1101 | bool kthread = (current->flags & PF_KTHREAD) != 0; |
1c6fdbd8 | 1102 | struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); |
96a363a7 KO |
1103 | struct moving_context ctxt; |
1104 | struct btree_trans *trans; | |
67e0dd8f | 1105 | struct btree_iter iter; |
1c6fdbd8 | 1106 | struct btree *b; |
3c843a67 | 1107 | enum btree_id btree; |
7f5c5d20 | 1108 | struct data_update_opts data_opts; |
1c6fdbd8 KO |
1109 | int ret = 0; |
1110 | ||
96a363a7 KO |
1111 | bch2_moving_ctxt_init(&ctxt, c, NULL, stats, |
1112 | writepoint_ptr(&c->btree_write_point), | |
1113 | true); | |
1114 | trans = ctxt.trans; | |
424eb881 | 1115 | |
89fd25be | 1116 | stats->data_type = BCH_DATA_btree; |
1c6fdbd8 | 1117 | |
3c843a67 KO |
1118 | for (btree = start.btree; |
1119 | btree <= min_t(unsigned, end.btree, btree_id_nr_alive(c) - 1); | |
1120 | btree ++) { | |
1121 | stats->pos = BBPOS(btree, POS_MIN); | |
424eb881 | 1122 | |
3c843a67 | 1123 | if (!bch2_btree_id_root(c, btree)->b) |
faa6cb6c KO |
1124 | continue; |
1125 | ||
3c843a67 | 1126 | bch2_trans_node_iter_init(trans, &iter, btree, POS_MIN, 0, 0, |
5dd8c60e | 1127 | BTREE_ITER_prefetch); |
d355c6f4 | 1128 | retry: |
b71717da | 1129 | ret = 0; |
6bd68ec2 | 1130 | while (bch2_trans_begin(trans), |
9180ad2e | 1131 | (b = bch2_btree_iter_peek_node(trans, &iter)) && |
d355c6f4 | 1132 | !(ret = PTR_ERR_OR_ZERO(b))) { |
1889ad5a | 1133 | if (kthread && kthread_should_stop()) |
7b7278bb | 1134 | break; |
1889ad5a | 1135 | |
3c843a67 KO |
1136 | if ((cmp_int(btree, end.btree) ?: |
1137 | bpos_cmp(b->key.k.p, end.pos)) > 0) | |
1889ad5a KO |
1138 | break; |
1139 | ||
d5eade93 | 1140 | stats->pos = BBPOS(iter.btree_id, iter.pos); |
424eb881 | 1141 | |
7f5c5d20 | 1142 | if (!pred(c, arg, b, &io_opts, &data_opts)) |
1c6fdbd8 | 1143 | goto next; |
1c6fdbd8 | 1144 | |
f3c8eaf7 | 1145 | ret = bch2_btree_node_rewrite(trans, &iter, b, 0, 0) ?: ret; |
549d173c | 1146 | if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) |
f3cf0999 KO |
1147 | continue; |
1148 | if (ret) | |
1149 | break; | |
1c6fdbd8 | 1150 | next: |
9180ad2e | 1151 | bch2_btree_iter_next_node(trans, &iter); |
1c6fdbd8 | 1152 | } |
549d173c | 1153 | if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) |
d355c6f4 KO |
1154 | goto retry; |
1155 | ||
6bd68ec2 | 1156 | bch2_trans_iter_exit(trans, &iter); |
1c6fdbd8 | 1157 | |
7b7278bb KO |
1158 | if (kthread && kthread_should_stop()) |
1159 | break; | |
1c6fdbd8 | 1160 | } |
7b7278bb | 1161 | |
96a363a7 KO |
1162 | bch_err_fn(c, ret); |
1163 | bch2_moving_ctxt_exit(&ctxt); | |
c0960603 | 1164 | bch2_btree_interior_updates_flush(c); |
23af498c | 1165 | |
1c6fdbd8 KO |
1166 | return ret; |
1167 | } | |
1168 | ||
7f5c5d20 | 1169 | static bool rereplicate_pred(struct bch_fs *c, void *arg, |
7a274285 | 1170 | enum btree_id btree, struct bkey_s_c k, |
7f5c5d20 KO |
1171 | struct bch_io_opts *io_opts, |
1172 | struct data_update_opts *data_opts) | |
1c6fdbd8 | 1173 | { |
26609b61 | 1174 | unsigned nr_good = bch2_bkey_durability(c, k); |
e8bde78a KO |
1175 | unsigned replicas = bkey_is_btree_ptr(k.k) |
1176 | ? c->opts.metadata_replicas | |
1177 | : io_opts->data_replicas; | |
1c6fdbd8 | 1178 | |
18dad454 | 1179 | guard(rcu)(); |
fdccb243 KO |
1180 | struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); |
1181 | unsigned i = 0; | |
1182 | bkey_for_each_ptr(ptrs, ptr) { | |
1183 | struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev); | |
1184 | if (!ptr->cached && | |
1185 | (!ca || !ca->mi.durability)) | |
1186 | data_opts->kill_ptrs |= BIT(i); | |
1187 | i++; | |
1188 | } | |
fdccb243 KO |
1189 | |
1190 | if (!data_opts->kill_ptrs && | |
1191 | (!nr_good || nr_good >= replicas)) | |
7f5c5d20 | 1192 | return false; |
1c6fdbd8 KO |
1193 | |
1194 | data_opts->target = 0; | |
7f5c5d20 | 1195 | data_opts->extra_replicas = replicas - nr_good; |
26609b61 | 1196 | data_opts->btree_insert_flags = 0; |
7f5c5d20 | 1197 | return true; |
1c6fdbd8 KO |
1198 | } |
1199 | ||
7f5c5d20 | 1200 | static bool migrate_pred(struct bch_fs *c, void *arg, |
7a274285 | 1201 | enum btree_id btree, struct bkey_s_c k, |
7f5c5d20 KO |
1202 | struct bch_io_opts *io_opts, |
1203 | struct data_update_opts *data_opts) | |
1c6fdbd8 | 1204 | { |
7f5c5d20 | 1205 | struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); |
1c6fdbd8 | 1206 | struct bch_ioctl_data *op = arg; |
7f5c5d20 | 1207 | unsigned i = 0; |
1c6fdbd8 | 1208 | |
7f5c5d20 | 1209 | data_opts->rewrite_ptrs = 0; |
1c6fdbd8 | 1210 | data_opts->target = 0; |
7f5c5d20 | 1211 | data_opts->extra_replicas = 0; |
1c6fdbd8 | 1212 | data_opts->btree_insert_flags = 0; |
7f5c5d20 KO |
1213 | |
1214 | bkey_for_each_ptr(ptrs, ptr) { | |
1215 | if (ptr->dev == op->migrate.dev) | |
1216 | data_opts->rewrite_ptrs |= 1U << i; | |
1217 | i++; | |
1218 | } | |
1219 | ||
3e3e02e6 | 1220 | return data_opts->rewrite_ptrs != 0; |
1c6fdbd8 KO |
1221 | } |
1222 | ||
7f5c5d20 KO |
1223 | static bool rereplicate_btree_pred(struct bch_fs *c, void *arg, |
1224 | struct btree *b, | |
1225 | struct bch_io_opts *io_opts, | |
1226 | struct data_update_opts *data_opts) | |
1889ad5a | 1227 | { |
7a274285 | 1228 | return rereplicate_pred(c, arg, b->c.btree_id, bkey_i_to_s_c(&b->key), io_opts, data_opts); |
1889ad5a KO |
1229 | } |
1230 | ||
0ec5b3b7 KO |
1231 | /* |
1232 | * Ancient versions of bcachefs produced packed formats which could represent | |
1233 | * keys that the in memory format cannot represent; this checks for those | |
1234 | * formats so we can get rid of them. | |
1235 | */ | |
e01dacf7 KO |
1236 | static bool bformat_needs_redo(struct bkey_format *f) |
1237 | { | |
61692c78 KO |
1238 | for (unsigned i = 0; i < f->nr_fields; i++) |
1239 | if (bch2_bkey_format_field_overflows(f, i)) | |
e01dacf7 KO |
1240 | return true; |
1241 | ||
e01dacf7 KO |
1242 | return false; |
1243 | } | |
1244 | ||
7f5c5d20 KO |
1245 | static bool rewrite_old_nodes_pred(struct bch_fs *c, void *arg, |
1246 | struct btree *b, | |
1247 | struct bch_io_opts *io_opts, | |
1248 | struct data_update_opts *data_opts) | |
1889ad5a KO |
1249 | { |
1250 | if (b->version_ondisk != c->sb.version || | |
e01dacf7 KO |
1251 | btree_node_need_rewrite(b) || |
1252 | bformat_needs_redo(&b->format)) { | |
1889ad5a | 1253 | data_opts->target = 0; |
7f5c5d20 | 1254 | data_opts->extra_replicas = 0; |
1889ad5a | 1255 | data_opts->btree_insert_flags = 0; |
7f5c5d20 | 1256 | return true; |
1889ad5a KO |
1257 | } |
1258 | ||
7f5c5d20 | 1259 | return false; |
1889ad5a KO |
1260 | } |
1261 | ||
a4805d66 KO |
1262 | int bch2_scan_old_btree_nodes(struct bch_fs *c, struct bch_move_stats *stats) |
1263 | { | |
1264 | int ret; | |
1265 | ||
1266 | ret = bch2_move_btree(c, | |
3c843a67 KO |
1267 | BBPOS_MIN, |
1268 | BBPOS_MAX, | |
a4805d66 KO |
1269 | rewrite_old_nodes_pred, c, stats); |
1270 | if (!ret) { | |
1271 | mutex_lock(&c->sb_lock); | |
c0ebe3e4 KO |
1272 | c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done); |
1273 | c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done); | |
a4805d66 KO |
1274 | c->disk_sb.sb->version_min = c->disk_sb.sb->version; |
1275 | bch2_write_super(c); | |
1276 | mutex_unlock(&c->sb_lock); | |
1277 | } | |
1278 | ||
96a363a7 | 1279 | bch_err_fn(c, ret); |
a4805d66 KO |
1280 | return ret; |
1281 | } | |
1282 | ||
ba11c7d6 | 1283 | static bool drop_extra_replicas_pred(struct bch_fs *c, void *arg, |
7a274285 | 1284 | enum btree_id btree, struct bkey_s_c k, |
ba11c7d6 KO |
1285 | struct bch_io_opts *io_opts, |
1286 | struct data_update_opts *data_opts) | |
1287 | { | |
1288 | unsigned durability = bch2_bkey_durability(c, k); | |
1289 | unsigned replicas = bkey_is_btree_ptr(k.k) | |
1290 | ? c->opts.metadata_replicas | |
1291 | : io_opts->data_replicas; | |
1292 | const union bch_extent_entry *entry; | |
1293 | struct extent_ptr_decoded p; | |
1294 | unsigned i = 0; | |
1295 | ||
18dad454 | 1296 | guard(rcu)(); |
ba11c7d6 KO |
1297 | bkey_for_each_ptr_decode(k.k, bch2_bkey_ptrs_c(k), p, entry) { |
1298 | unsigned d = bch2_extent_ptr_durability(c, &p); | |
1299 | ||
1300 | if (d && durability - d >= replicas) { | |
1301 | data_opts->kill_ptrs |= BIT(i); | |
1302 | durability -= d; | |
1303 | } | |
1304 | ||
1305 | i++; | |
1306 | } | |
1307 | ||
1308 | return data_opts->kill_ptrs != 0; | |
1309 | } | |
1310 | ||
1311 | static bool drop_extra_replicas_btree_pred(struct bch_fs *c, void *arg, | |
1312 | struct btree *b, | |
1313 | struct bch_io_opts *io_opts, | |
1314 | struct data_update_opts *data_opts) | |
1315 | { | |
7a274285 KO |
1316 | return drop_extra_replicas_pred(c, arg, b->c.btree_id, bkey_i_to_s_c(&b->key), |
1317 | io_opts, data_opts); | |
ba11c7d6 KO |
1318 | } |
1319 | ||
f269ae55 | 1320 | static bool scrub_pred(struct bch_fs *c, void *_arg, |
7a274285 | 1321 | enum btree_id btree, struct bkey_s_c k, |
f269ae55 KO |
1322 | struct bch_io_opts *io_opts, |
1323 | struct data_update_opts *data_opts) | |
1324 | { | |
1325 | struct bch_ioctl_data *arg = _arg; | |
1326 | ||
1327 | if (k.k->type != KEY_TYPE_btree_ptr_v2) { | |
1328 | struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); | |
1329 | const union bch_extent_entry *entry; | |
1330 | struct extent_ptr_decoded p; | |
1331 | bkey_for_each_ptr_decode(k.k, ptrs, p, entry) | |
1332 | if (p.ptr.dev == arg->migrate.dev) { | |
1333 | if (!p.crc.csum_type) | |
1334 | return false; | |
1335 | break; | |
1336 | } | |
1337 | } | |
1338 | ||
1339 | data_opts->scrub = true; | |
1340 | data_opts->read_dev = arg->migrate.dev; | |
1341 | return true; | |
1342 | } | |
1343 | ||
1c6fdbd8 KO |
1344 | int bch2_data_job(struct bch_fs *c, |
1345 | struct bch_move_stats *stats, | |
1346 | struct bch_ioctl_data op) | |
1347 | { | |
3c843a67 KO |
1348 | struct bbpos start = BBPOS(op.start_btree, op.start_pos); |
1349 | struct bbpos end = BBPOS(op.end_btree, op.end_pos); | |
1c6fdbd8 KO |
1350 | int ret = 0; |
1351 | ||
01e95645 KO |
1352 | if (op.op >= BCH_DATA_OP_NR) |
1353 | return -EINVAL; | |
1354 | ||
1355 | bch2_move_stats_init(stats, bch2_data_ops_strs[op.op]); | |
1356 | ||
1c6fdbd8 | 1357 | switch (op.op) { |
f269ae55 KO |
1358 | case BCH_DATA_OP_scrub: |
1359 | /* | |
1360 | * prevent tests from spuriously failing, make sure we see all | |
1361 | * btree nodes that need to be repaired | |
1362 | */ | |
1363 | bch2_btree_interior_updates_flush(c); | |
1364 | ||
1365 | ret = bch2_move_data_phys(c, op.scrub.dev, 0, U64_MAX, | |
1366 | op.scrub.data_types, | |
1367 | NULL, | |
1368 | stats, | |
1369 | writepoint_hashed((unsigned long) current), | |
1370 | false, | |
1371 | scrub_pred, &op) ?: ret; | |
1372 | break; | |
1373 | ||
01e95645 | 1374 | case BCH_DATA_OP_rereplicate: |
89fd25be | 1375 | stats->data_type = BCH_DATA_journal; |
1c6fdbd8 | 1376 | ret = bch2_journal_flush_device_pins(&c->journal, -1); |
3c843a67 | 1377 | ret = bch2_move_btree(c, start, end, |
1889ad5a | 1378 | rereplicate_btree_pred, c, stats) ?: ret; |
3c843a67 | 1379 | ret = bch2_move_data(c, start, end, |
0337cc7e KO |
1380 | NULL, |
1381 | stats, | |
1382 | writepoint_hashed((unsigned long) current), | |
1383 | true, | |
1384 | rereplicate_pred, c) ?: ret; | |
ae0ff7b8 | 1385 | ret = bch2_replicas_gc2(c) ?: ret; |
1c6fdbd8 | 1386 | break; |
01e95645 | 1387 | case BCH_DATA_OP_migrate: |
1c6fdbd8 KO |
1388 | if (op.migrate.dev >= c->sb.nr_devices) |
1389 | return -EINVAL; | |
1390 | ||
89fd25be | 1391 | stats->data_type = BCH_DATA_journal; |
1c6fdbd8 | 1392 | ret = bch2_journal_flush_device_pins(&c->journal, op.migrate.dev); |
e63cf203 KO |
1393 | ret = bch2_move_data_phys(c, op.migrate.dev, 0, U64_MAX, |
1394 | ~0, | |
1395 | NULL, | |
1396 | stats, | |
1397 | writepoint_hashed((unsigned long) current), | |
1398 | true, | |
1399 | migrate_pred, &op) ?: ret; | |
1400 | bch2_btree_interior_updates_flush(c); | |
ae0ff7b8 | 1401 | ret = bch2_replicas_gc2(c) ?: ret; |
1c6fdbd8 | 1402 | break; |
01e95645 | 1403 | case BCH_DATA_OP_rewrite_old_nodes: |
a4805d66 | 1404 | ret = bch2_scan_old_btree_nodes(c, stats); |
1889ad5a | 1405 | break; |
ba11c7d6 KO |
1406 | case BCH_DATA_OP_drop_extra_replicas: |
1407 | ret = bch2_move_btree(c, start, end, | |
1408 | drop_extra_replicas_btree_pred, c, stats) ?: ret; | |
1409 | ret = bch2_move_data(c, start, end, NULL, stats, | |
1410 | writepoint_hashed((unsigned long) current), | |
1411 | true, | |
1412 | drop_extra_replicas_pred, c) ?: ret; | |
1413 | ret = bch2_replicas_gc2(c) ?: ret; | |
1414 | break; | |
1c6fdbd8 KO |
1415 | default: |
1416 | ret = -EINVAL; | |
1417 | } | |
1418 | ||
01e95645 | 1419 | bch2_move_stats_exit(stats, c); |
1c6fdbd8 KO |
1420 | return ret; |
1421 | } | |
b9fa375b | 1422 | |
96a363a7 | 1423 | void bch2_move_stats_to_text(struct printbuf *out, struct bch_move_stats *stats) |
b9fa375b | 1424 | { |
e58f963c KO |
1425 | prt_printf(out, "%s: data type==", stats->name); |
1426 | bch2_prt_data_type(out, stats->data_type); | |
1427 | prt_str(out, " pos="); | |
96a363a7 KO |
1428 | bch2_bbpos_to_text(out, stats->pos); |
1429 | prt_newline(out); | |
1430 | printbuf_indent_add(out, 2); | |
b9fa375b | 1431 | |
c991fbee KO |
1432 | prt_printf(out, "keys moved:\t%llu\n", atomic64_read(&stats->keys_moved)); |
1433 | prt_printf(out, "keys raced:\t%llu\n", atomic64_read(&stats->keys_raced)); | |
1434 | prt_printf(out, "bytes seen:\t"); | |
96a363a7 KO |
1435 | prt_human_readable_u64(out, atomic64_read(&stats->sectors_seen) << 9); |
1436 | prt_newline(out); | |
1437 | ||
c991fbee | 1438 | prt_printf(out, "bytes moved:\t"); |
96a363a7 | 1439 | prt_human_readable_u64(out, atomic64_read(&stats->sectors_moved) << 9); |
b9fa375b | 1440 | prt_newline(out); |
96a363a7 | 1441 | |
c991fbee | 1442 | prt_printf(out, "bytes raced:\t"); |
96a363a7 KO |
1443 | prt_human_readable_u64(out, atomic64_read(&stats->sectors_raced) << 9); |
1444 | prt_newline(out); | |
1445 | ||
1446 | printbuf_indent_sub(out, 2); | |
1447 | } | |
1448 | ||
1449 | static void bch2_moving_ctxt_to_text(struct printbuf *out, struct bch_fs *c, struct moving_context *ctxt) | |
1450 | { | |
c991fbee KO |
1451 | if (!out->nr_tabstops) |
1452 | printbuf_tabstop_push(out, 32); | |
96a363a7 KO |
1453 | |
1454 | bch2_move_stats_to_text(out, ctxt->stats); | |
b9fa375b KO |
1455 | printbuf_indent_add(out, 2); |
1456 | ||
7423330e | 1457 | prt_printf(out, "reads: ios %u/%u sectors %u/%u\n", |
b9fa375b | 1458 | atomic_read(&ctxt->read_ios), |
9d2a7bd8 KO |
1459 | c->opts.move_ios_in_flight, |
1460 | atomic_read(&ctxt->read_sectors), | |
1461 | c->opts.move_bytes_in_flight >> 9); | |
b9fa375b | 1462 | |
7423330e | 1463 | prt_printf(out, "writes: ios %u/%u sectors %u/%u\n", |
b9fa375b | 1464 | atomic_read(&ctxt->write_ios), |
9d2a7bd8 KO |
1465 | c->opts.move_ios_in_flight, |
1466 | atomic_read(&ctxt->write_sectors), | |
1467 | c->opts.move_bytes_in_flight >> 9); | |
b9fa375b KO |
1468 | |
1469 | printbuf_indent_add(out, 2); | |
1470 | ||
1471 | mutex_lock(&ctxt->lock); | |
c991fbee | 1472 | struct moving_io *io; |
9d2a7bd8 | 1473 | list_for_each_entry(io, &ctxt->ios, io_list) |
3075e68d | 1474 | bch2_data_update_inflight_to_text(out, &io->write); |
b9fa375b KO |
1475 | mutex_unlock(&ctxt->lock); |
1476 | ||
1477 | printbuf_indent_sub(out, 4); | |
1478 | } | |
1479 | ||
1480 | void bch2_fs_moving_ctxts_to_text(struct printbuf *out, struct bch_fs *c) | |
1481 | { | |
1482 | struct moving_context *ctxt; | |
1483 | ||
1484 | mutex_lock(&c->moving_context_lock); | |
1485 | list_for_each_entry(ctxt, &c->moving_context_list, list) | |
9d2a7bd8 | 1486 | bch2_moving_ctxt_to_text(out, c, ctxt); |
b9fa375b KO |
1487 | mutex_unlock(&c->moving_context_lock); |
1488 | } | |
1489 | ||
1490 | void bch2_fs_move_init(struct bch_fs *c) | |
1491 | { | |
1492 | INIT_LIST_HEAD(&c->moving_context_list); | |
1493 | mutex_init(&c->moving_context_lock); | |
b9fa375b | 1494 | } |