powercap: intel_rapl_tpmi: Enable PMU support
[linux-block.git] / fs / bcachefs / backpointers.c
1 // SPDX-License-Identifier: GPL-2.0
2 #include "bcachefs.h"
3 #include "bbpos.h"
4 #include "alloc_background.h"
5 #include "backpointers.h"
6 #include "bkey_buf.h"
7 #include "btree_cache.h"
8 #include "btree_update.h"
9 #include "btree_update_interior.h"
10 #include "btree_write_buffer.h"
11 #include "checksum.h"
12 #include "error.h"
13
14 #include <linux/mm.h>
15
16 static bool extent_matches_bp(struct bch_fs *c,
17                               enum btree_id btree_id, unsigned level,
18                               struct bkey_s_c k,
19                               struct bpos bucket,
20                               struct bch_backpointer bp)
21 {
22         struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
23         const union bch_extent_entry *entry;
24         struct extent_ptr_decoded p;
25
26         bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
27                 struct bpos bucket2;
28                 struct bch_backpointer bp2;
29
30                 if (p.ptr.cached)
31                         continue;
32
33                 bch2_extent_ptr_to_bp(c, btree_id, level, k, p, entry, &bucket2, &bp2);
34                 if (bpos_eq(bucket, bucket2) &&
35                     !memcmp(&bp, &bp2, sizeof(bp)))
36                         return true;
37         }
38
39         return false;
40 }
41
42 int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k,
43                              enum bkey_invalid_flags flags,
44                              struct printbuf *err)
45 {
46         struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
47
48         /* these will be caught by fsck */
49         if (!bch2_dev_exists2(c, bp.k->p.inode))
50                 return 0;
51
52         struct bpos bucket = bp_pos_to_bucket(c, bp.k->p);
53         int ret = 0;
54
55         bkey_fsck_err_on(!bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset)),
56                          c, err,
57                          backpointer_pos_wrong,
58                          "backpointer at wrong pos");
59 fsck_err:
60         return ret;
61 }
62
63 void bch2_backpointer_to_text(struct printbuf *out, const struct bch_backpointer *bp)
64 {
65         prt_printf(out, "btree=%s l=%u offset=%llu:%u len=%u pos=",
66                bch2_btree_id_str(bp->btree_id),
67                bp->level,
68                (u64) (bp->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT),
69                (u32) bp->bucket_offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT),
70                bp->bucket_len);
71         bch2_bpos_to_text(out, bp->pos);
72 }
73
74 void bch2_backpointer_k_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
75 {
76         if (bch2_dev_exists2(c, k.k->p.inode)) {
77                 prt_str(out, "bucket=");
78                 bch2_bpos_to_text(out, bp_pos_to_bucket(c, k.k->p));
79                 prt_str(out, " ");
80         }
81
82         bch2_backpointer_to_text(out, bkey_s_c_to_backpointer(k).v);
83 }
84
85 void bch2_backpointer_swab(struct bkey_s k)
86 {
87         struct bkey_s_backpointer bp = bkey_s_to_backpointer(k);
88
89         bp.v->bucket_offset     = swab40(bp.v->bucket_offset);
90         bp.v->bucket_len        = swab32(bp.v->bucket_len);
91         bch2_bpos_swab(&bp.v->pos);
92 }
93
94 static noinline int backpointer_mod_err(struct btree_trans *trans,
95                                         struct bch_backpointer bp,
96                                         struct bkey_s_c bp_k,
97                                         struct bkey_s_c orig_k,
98                                         bool insert)
99 {
100         struct bch_fs *c = trans->c;
101         struct printbuf buf = PRINTBUF;
102
103         if (insert) {
104                 prt_printf(&buf, "existing backpointer found when inserting ");
105                 bch2_backpointer_to_text(&buf, &bp);
106                 prt_newline(&buf);
107                 printbuf_indent_add(&buf, 2);
108
109                 prt_printf(&buf, "found ");
110                 bch2_bkey_val_to_text(&buf, c, bp_k);
111                 prt_newline(&buf);
112
113                 prt_printf(&buf, "for ");
114                 bch2_bkey_val_to_text(&buf, c, orig_k);
115
116                 bch_err(c, "%s", buf.buf);
117         } else if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) {
118                 prt_printf(&buf, "backpointer not found when deleting");
119                 prt_newline(&buf);
120                 printbuf_indent_add(&buf, 2);
121
122                 prt_printf(&buf, "searching for ");
123                 bch2_backpointer_to_text(&buf, &bp);
124                 prt_newline(&buf);
125
126                 prt_printf(&buf, "got ");
127                 bch2_bkey_val_to_text(&buf, c, bp_k);
128                 prt_newline(&buf);
129
130                 prt_printf(&buf, "for ");
131                 bch2_bkey_val_to_text(&buf, c, orig_k);
132
133                 bch_err(c, "%s", buf.buf);
134         }
135
136         printbuf_exit(&buf);
137
138         if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) {
139                 return bch2_inconsistent_error(c) ? BCH_ERR_erofs_unfixed_errors : 0;
140         } else {
141                 return 0;
142         }
143 }
144
145 int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans,
146                                 struct bpos bucket,
147                                 struct bch_backpointer bp,
148                                 struct bkey_s_c orig_k,
149                                 bool insert)
150 {
151         struct btree_iter bp_iter;
152         struct bkey_s_c k;
153         struct bkey_i_backpointer *bp_k;
154         int ret;
155
156         bp_k = bch2_trans_kmalloc_nomemzero(trans, sizeof(struct bkey_i_backpointer));
157         ret = PTR_ERR_OR_ZERO(bp_k);
158         if (ret)
159                 return ret;
160
161         bkey_backpointer_init(&bp_k->k_i);
162         bp_k->k.p = bucket_pos_to_bp(trans->c, bucket, bp.bucket_offset);
163         bp_k->v = bp;
164
165         if (!insert) {
166                 bp_k->k.type = KEY_TYPE_deleted;
167                 set_bkey_val_u64s(&bp_k->k, 0);
168         }
169
170         k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers,
171                                bp_k->k.p,
172                                BTREE_ITER_INTENT|
173                                BTREE_ITER_SLOTS|
174                                BTREE_ITER_WITH_UPDATES);
175         ret = bkey_err(k);
176         if (ret)
177                 goto err;
178
179         if (insert
180             ? k.k->type
181             : (k.k->type != KEY_TYPE_backpointer ||
182                memcmp(bkey_s_c_to_backpointer(k).v, &bp, sizeof(bp)))) {
183                 ret = backpointer_mod_err(trans, bp, k, orig_k, insert);
184                 if (ret)
185                         goto err;
186         }
187
188         ret = bch2_trans_update(trans, &bp_iter, &bp_k->k_i, 0);
189 err:
190         bch2_trans_iter_exit(trans, &bp_iter);
191         return ret;
192 }
193
194 /*
195  * Find the next backpointer >= *bp_offset:
196  */
197 int bch2_get_next_backpointer(struct btree_trans *trans,
198                               struct bpos bucket, int gen,
199                               struct bpos *bp_pos,
200                               struct bch_backpointer *bp,
201                               unsigned iter_flags)
202 {
203         struct bch_fs *c = trans->c;
204         struct bpos bp_end_pos = bucket_pos_to_bp(c, bpos_nosnap_successor(bucket), 0);
205         struct btree_iter alloc_iter = { NULL }, bp_iter = { NULL };
206         struct bkey_s_c k;
207         int ret = 0;
208
209         if (bpos_ge(*bp_pos, bp_end_pos))
210                 goto done;
211
212         if (gen >= 0) {
213                 k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc,
214                                        bucket, BTREE_ITER_CACHED|iter_flags);
215                 ret = bkey_err(k);
216                 if (ret)
217                         goto out;
218
219                 if (k.k->type != KEY_TYPE_alloc_v4 ||
220                     bkey_s_c_to_alloc_v4(k).v->gen != gen)
221                         goto done;
222         }
223
224         *bp_pos = bpos_max(*bp_pos, bucket_pos_to_bp(c, bucket, 0));
225
226         for_each_btree_key_norestart(trans, bp_iter, BTREE_ID_backpointers,
227                                      *bp_pos, iter_flags, k, ret) {
228                 if (bpos_ge(k.k->p, bp_end_pos))
229                         break;
230
231                 *bp_pos = k.k->p;
232                 *bp = *bkey_s_c_to_backpointer(k).v;
233                 goto out;
234         }
235 done:
236         *bp_pos = SPOS_MAX;
237 out:
238         bch2_trans_iter_exit(trans, &bp_iter);
239         bch2_trans_iter_exit(trans, &alloc_iter);
240         return ret;
241 }
242
243 static void backpointer_not_found(struct btree_trans *trans,
244                                   struct bpos bp_pos,
245                                   struct bch_backpointer bp,
246                                   struct bkey_s_c k)
247 {
248         struct bch_fs *c = trans->c;
249         struct printbuf buf = PRINTBUF;
250         struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
251
252         /*
253          * If we're using the btree write buffer, the backpointer we were
254          * looking at may have already been deleted - failure to find what it
255          * pointed to is not an error:
256          */
257         if (likely(!bch2_backpointers_no_use_write_buffer))
258                 return;
259
260         prt_printf(&buf, "backpointer doesn't match %s it points to:\n  ",
261                    bp.level ? "btree node" : "extent");
262         prt_printf(&buf, "bucket: ");
263         bch2_bpos_to_text(&buf, bucket);
264         prt_printf(&buf, "\n  ");
265
266         prt_printf(&buf, "backpointer pos: ");
267         bch2_bpos_to_text(&buf, bp_pos);
268         prt_printf(&buf, "\n  ");
269
270         bch2_backpointer_to_text(&buf, &bp);
271         prt_printf(&buf, "\n  ");
272         bch2_bkey_val_to_text(&buf, c, k);
273         if (c->curr_recovery_pass >= BCH_RECOVERY_PASS_check_extents_to_backpointers)
274                 bch_err_ratelimited(c, "%s", buf.buf);
275         else
276                 bch2_trans_inconsistent(trans, "%s", buf.buf);
277
278         printbuf_exit(&buf);
279 }
280
281 struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
282                                          struct btree_iter *iter,
283                                          struct bpos bp_pos,
284                                          struct bch_backpointer bp,
285                                          unsigned iter_flags)
286 {
287         if (likely(!bp.level)) {
288                 struct bch_fs *c = trans->c;
289                 struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
290                 struct bkey_s_c k;
291
292                 bch2_trans_node_iter_init(trans, iter,
293                                           bp.btree_id,
294                                           bp.pos,
295                                           0, 0,
296                                           iter_flags);
297                 k = bch2_btree_iter_peek_slot(iter);
298                 if (bkey_err(k)) {
299                         bch2_trans_iter_exit(trans, iter);
300                         return k;
301                 }
302
303                 if (k.k && extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp))
304                         return k;
305
306                 bch2_trans_iter_exit(trans, iter);
307                 backpointer_not_found(trans, bp_pos, bp, k);
308                 return bkey_s_c_null;
309         } else {
310                 struct btree *b = bch2_backpointer_get_node(trans, iter, bp_pos, bp);
311
312                 if (IS_ERR_OR_NULL(b)) {
313                         bch2_trans_iter_exit(trans, iter);
314                         return IS_ERR(b) ? bkey_s_c_err(PTR_ERR(b)) : bkey_s_c_null;
315                 }
316                 return bkey_i_to_s_c(&b->key);
317         }
318 }
319
320 struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
321                                         struct btree_iter *iter,
322                                         struct bpos bp_pos,
323                                         struct bch_backpointer bp)
324 {
325         struct bch_fs *c = trans->c;
326         struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
327         struct btree *b;
328
329         BUG_ON(!bp.level);
330
331         bch2_trans_node_iter_init(trans, iter,
332                                   bp.btree_id,
333                                   bp.pos,
334                                   0,
335                                   bp.level - 1,
336                                   0);
337         b = bch2_btree_iter_peek_node(iter);
338         if (IS_ERR_OR_NULL(b))
339                 goto err;
340
341         BUG_ON(b->c.level != bp.level - 1);
342
343         if (extent_matches_bp(c, bp.btree_id, bp.level,
344                               bkey_i_to_s_c(&b->key),
345                               bucket, bp))
346                 return b;
347
348         if (btree_node_will_make_reachable(b)) {
349                 b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node);
350         } else {
351                 backpointer_not_found(trans, bp_pos, bp, bkey_i_to_s_c(&b->key));
352                 b = NULL;
353         }
354 err:
355         bch2_trans_iter_exit(trans, iter);
356         return b;
357 }
358
359 static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_iter *bp_iter,
360                                         struct bkey_s_c k)
361 {
362         struct bch_fs *c = trans->c;
363         struct btree_iter alloc_iter = { NULL };
364         struct bkey_s_c alloc_k;
365         struct printbuf buf = PRINTBUF;
366         int ret = 0;
367
368         if (fsck_err_on(!bch2_dev_exists2(c, k.k->p.inode), c,
369                         backpointer_to_missing_device,
370                         "backpointer for missing device:\n%s",
371                         (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
372                 ret = bch2_btree_delete_at(trans, bp_iter, 0);
373                 goto out;
374         }
375
376         alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc,
377                                      bp_pos_to_bucket(c, k.k->p), 0);
378         ret = bkey_err(alloc_k);
379         if (ret)
380                 goto out;
381
382         if (fsck_err_on(alloc_k.k->type != KEY_TYPE_alloc_v4, c,
383                         backpointer_to_missing_alloc,
384                         "backpointer for nonexistent alloc key: %llu:%llu:0\n%s",
385                         alloc_iter.pos.inode, alloc_iter.pos.offset,
386                         (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
387                 ret = bch2_btree_delete_at(trans, bp_iter, 0);
388                 goto out;
389         }
390 out:
391 fsck_err:
392         bch2_trans_iter_exit(trans, &alloc_iter);
393         printbuf_exit(&buf);
394         return ret;
395 }
396
397 /* verify that every backpointer has a corresponding alloc key */
398 int bch2_check_btree_backpointers(struct bch_fs *c)
399 {
400         int ret = bch2_trans_run(c,
401                 for_each_btree_key_commit(trans, iter,
402                         BTREE_ID_backpointers, POS_MIN, 0, k,
403                         NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
404                   bch2_check_btree_backpointer(trans, &iter, k)));
405         bch_err_fn(c, ret);
406         return ret;
407 }
408
409 static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r)
410 {
411         return bpos_eq(l.k->p, r.k->p) &&
412                 bkey_bytes(l.k) == bkey_bytes(r.k) &&
413                 !memcmp(l.v, r.v, bkey_val_bytes(l.k));
414 }
415
416 struct extents_to_bp_state {
417         struct bpos     bucket_start;
418         struct bpos     bucket_end;
419         struct bkey_buf last_flushed;
420 };
421
422 static int drop_dev_and_update(struct btree_trans *trans, enum btree_id btree,
423                                struct bkey_s_c extent, unsigned dev)
424 {
425         struct bkey_i *n = bch2_bkey_make_mut_noupdate(trans, extent);
426         int ret = PTR_ERR_OR_ZERO(n);
427         if (ret)
428                 return ret;
429
430         bch2_bkey_drop_device(bkey_i_to_s(n), dev);
431         return bch2_btree_insert_trans(trans, btree, n, 0);
432 }
433
434 static int check_extent_checksum(struct btree_trans *trans,
435                                  enum btree_id btree, struct bkey_s_c extent,
436                                  enum btree_id o_btree, struct bkey_s_c extent2, unsigned dev)
437 {
438         struct bch_fs *c = trans->c;
439         struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(extent);
440         const union bch_extent_entry *entry;
441         struct extent_ptr_decoded p;
442         struct printbuf buf = PRINTBUF;
443         void *data_buf = NULL;
444         struct bio *bio = NULL;
445         size_t bytes;
446         int ret = 0;
447
448         if (bkey_is_btree_ptr(extent.k))
449                 return false;
450
451         bkey_for_each_ptr_decode(extent.k, ptrs, p, entry)
452                 if (p.ptr.dev == dev)
453                         goto found;
454         BUG();
455 found:
456         if (!p.crc.csum_type)
457                 return false;
458
459         bytes = p.crc.compressed_size << 9;
460
461         struct bch_dev *ca = bch_dev_bkey_exists(c, dev);
462         if (!bch2_dev_get_ioref(ca, READ))
463                 return false;
464
465         data_buf = kvmalloc(bytes, GFP_KERNEL);
466         if (!data_buf) {
467                 ret = -ENOMEM;
468                 goto err;
469         }
470
471         bio = bio_alloc(ca->disk_sb.bdev, 1, REQ_OP_READ, GFP_KERNEL);
472         bio->bi_iter.bi_sector = p.ptr.offset;
473         bch2_bio_map(bio, data_buf, bytes);
474         ret = submit_bio_wait(bio);
475         if (ret)
476                 goto err;
477
478         prt_str(&buf, "extents pointing to same space, but first extent checksum bad:");
479         prt_printf(&buf, "\n  %s ", bch2_btree_id_str(btree));
480         bch2_bkey_val_to_text(&buf, c, extent);
481         prt_printf(&buf, "\n  %s ", bch2_btree_id_str(o_btree));
482         bch2_bkey_val_to_text(&buf, c, extent2);
483
484         struct nonce nonce = extent_nonce(extent.k->version, p.crc);
485         struct bch_csum csum = bch2_checksum(c, p.crc.csum_type, nonce, data_buf, bytes);
486         if (fsck_err_on(bch2_crc_cmp(csum, p.crc.csum),
487                         c, dup_backpointer_to_bad_csum_extent,
488                         "%s", buf.buf))
489                 ret = drop_dev_and_update(trans, btree, extent, dev) ?: 1;
490 fsck_err:
491 err:
492         if (bio)
493                 bio_put(bio);
494         kvfree(data_buf);
495         percpu_ref_put(&ca->io_ref);
496         printbuf_exit(&buf);
497         return ret;
498 }
499
500 static int check_bp_exists(struct btree_trans *trans,
501                            struct extents_to_bp_state *s,
502                            struct bpos bucket,
503                            struct bch_backpointer bp,
504                            struct bkey_s_c orig_k)
505 {
506         struct bch_fs *c = trans->c;
507         struct btree_iter bp_iter = {};
508         struct btree_iter other_extent_iter = {};
509         struct printbuf buf = PRINTBUF;
510         struct bkey_s_c bp_k;
511         struct bkey_buf tmp;
512         int ret;
513
514         bch2_bkey_buf_init(&tmp);
515
516         if (!bch2_dev_bucket_exists(c, bucket)) {
517                 prt_str(&buf, "extent for nonexistent device:bucket ");
518                 bch2_bpos_to_text(&buf, bucket);
519                 prt_str(&buf, "\n  ");
520                 bch2_bkey_val_to_text(&buf, c, orig_k);
521                 bch_err(c, "%s", buf.buf);
522                 return -BCH_ERR_fsck_repair_unimplemented;
523         }
524
525         if (bpos_lt(bucket, s->bucket_start) ||
526             bpos_gt(bucket, s->bucket_end))
527                 return 0;
528
529         bp_k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers,
530                                   bucket_pos_to_bp(c, bucket, bp.bucket_offset),
531                                   0);
532         ret = bkey_err(bp_k);
533         if (ret)
534                 goto err;
535
536         if (bp_k.k->type != KEY_TYPE_backpointer ||
537             memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) {
538                 bch2_bkey_buf_reassemble(&tmp, c, orig_k);
539
540                 if (!bkey_and_val_eq(orig_k, bkey_i_to_s_c(s->last_flushed.k))) {
541                         if (bp.level) {
542                                 bch2_trans_unlock(trans);
543                                 bch2_btree_interior_updates_flush(c);
544                         }
545
546                         ret = bch2_btree_write_buffer_flush_sync(trans);
547                         if (ret)
548                                 goto err;
549
550                         bch2_bkey_buf_copy(&s->last_flushed, c, tmp.k);
551                         ret = -BCH_ERR_transaction_restart_write_buffer_flush;
552                         goto out;
553                 }
554
555                 goto check_existing_bp;
556         }
557 out:
558 err:
559 fsck_err:
560         bch2_trans_iter_exit(trans, &other_extent_iter);
561         bch2_trans_iter_exit(trans, &bp_iter);
562         bch2_bkey_buf_exit(&tmp, c);
563         printbuf_exit(&buf);
564         return ret;
565 check_existing_bp:
566         /* Do we have a backpointer for a different extent? */
567         if (bp_k.k->type != KEY_TYPE_backpointer)
568                 goto missing;
569
570         struct bch_backpointer other_bp = *bkey_s_c_to_backpointer(bp_k).v;
571
572         struct bkey_s_c other_extent =
573                 bch2_backpointer_get_key(trans, &other_extent_iter, bp_k.k->p, other_bp, 0);
574         ret = bkey_err(other_extent);
575         if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
576                 ret = 0;
577         if (ret)
578                 goto err;
579
580         if (!other_extent.k)
581                 goto missing;
582
583         if (bch2_extents_match(orig_k, other_extent)) {
584                 printbuf_reset(&buf);
585                 prt_printf(&buf, "duplicate versions of same extent, deleting smaller\n  ");
586                 bch2_bkey_val_to_text(&buf, c, orig_k);
587                 prt_str(&buf, "\n  ");
588                 bch2_bkey_val_to_text(&buf, c, other_extent);
589                 bch_err(c, "%s", buf.buf);
590
591                 if (other_extent.k->size <= orig_k.k->size) {
592                         ret = drop_dev_and_update(trans, other_bp.btree_id, other_extent, bucket.inode);
593                         if (ret)
594                                 goto err;
595                         goto out;
596                 } else {
597                         ret = drop_dev_and_update(trans, bp.btree_id, orig_k, bucket.inode);
598                         if (ret)
599                                 goto err;
600                         goto missing;
601                 }
602         }
603
604         ret = check_extent_checksum(trans, other_bp.btree_id, other_extent, bp.btree_id, orig_k, bucket.inode);
605         if (ret < 0)
606                 goto err;
607         if (ret) {
608                 ret = 0;
609                 goto missing;
610         }
611
612         ret = check_extent_checksum(trans, bp.btree_id, orig_k, other_bp.btree_id, other_extent, bucket.inode);
613         if (ret < 0)
614                 goto err;
615         if (ret) {
616                 ret = 0;
617                 goto out;
618         }
619
620         printbuf_reset(&buf);
621         prt_printf(&buf, "duplicate extents pointing to same space on dev %llu\n  ", bucket.inode);
622         bch2_bkey_val_to_text(&buf, c, orig_k);
623         prt_str(&buf, "\n  ");
624         bch2_bkey_val_to_text(&buf, c, other_extent);
625         bch_err(c, "%s", buf.buf);
626         ret = -BCH_ERR_fsck_repair_unimplemented;
627         goto err;
628 missing:
629         printbuf_reset(&buf);
630         prt_printf(&buf, "missing backpointer for btree=%s l=%u ",
631                bch2_btree_id_str(bp.btree_id), bp.level);
632         bch2_bkey_val_to_text(&buf, c, orig_k);
633         prt_printf(&buf, "\n  got:   ");
634         bch2_bkey_val_to_text(&buf, c, bp_k);
635
636         struct bkey_i_backpointer n_bp_k;
637         bkey_backpointer_init(&n_bp_k.k_i);
638         n_bp_k.k.p = bucket_pos_to_bp(trans->c, bucket, bp.bucket_offset);
639         n_bp_k.v = bp;
640         prt_printf(&buf, "\n  want:  ");
641         bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&n_bp_k.k_i));
642
643         if (fsck_err(c, ptr_to_missing_backpointer, "%s", buf.buf))
644                 ret = bch2_bucket_backpointer_mod(trans, bucket, bp, orig_k, true);
645
646         goto out;
647 }
648
649 static int check_extent_to_backpointers(struct btree_trans *trans,
650                                         struct extents_to_bp_state *s,
651                                         enum btree_id btree, unsigned level,
652                                         struct bkey_s_c k)
653 {
654         struct bch_fs *c = trans->c;
655         struct bkey_ptrs_c ptrs;
656         const union bch_extent_entry *entry;
657         struct extent_ptr_decoded p;
658         int ret;
659
660         ptrs = bch2_bkey_ptrs_c(k);
661         bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
662                 struct bpos bucket_pos;
663                 struct bch_backpointer bp;
664
665                 if (p.ptr.cached)
666                         continue;
667
668                 bch2_extent_ptr_to_bp(c, btree, level, k, p, entry, &bucket_pos, &bp);
669
670                 ret = check_bp_exists(trans, s, bucket_pos, bp, k);
671                 if (ret)
672                         return ret;
673         }
674
675         return 0;
676 }
677
678 static int check_btree_root_to_backpointers(struct btree_trans *trans,
679                                             struct extents_to_bp_state *s,
680                                             enum btree_id btree_id,
681                                             int *level)
682 {
683         struct bch_fs *c = trans->c;
684         struct btree_iter iter;
685         struct btree *b;
686         struct bkey_s_c k;
687         int ret;
688 retry:
689         bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN,
690                                   0, bch2_btree_id_root(c, btree_id)->b->c.level, 0);
691         b = bch2_btree_iter_peek_node(&iter);
692         ret = PTR_ERR_OR_ZERO(b);
693         if (ret)
694                 goto err;
695
696         if (b != btree_node_root(c, b)) {
697                 bch2_trans_iter_exit(trans, &iter);
698                 goto retry;
699         }
700
701         *level = b->c.level;
702
703         k = bkey_i_to_s_c(&b->key);
704         ret = check_extent_to_backpointers(trans, s, btree_id, b->c.level + 1, k);
705 err:
706         bch2_trans_iter_exit(trans, &iter);
707         return ret;
708 }
709
710 static inline struct bbpos bp_to_bbpos(struct bch_backpointer bp)
711 {
712         return (struct bbpos) {
713                 .btree  = bp.btree_id,
714                 .pos    = bp.pos,
715         };
716 }
717
718 static u64 mem_may_pin_bytes(struct bch_fs *c)
719 {
720         struct sysinfo i;
721         si_meminfo(&i);
722
723         u64 mem_bytes = i.totalram * i.mem_unit;
724         return div_u64(mem_bytes * c->opts.fsck_memory_usage_percent, 100);
725 }
726
727 static size_t btree_nodes_fit_in_ram(struct bch_fs *c)
728 {
729         return div_u64(mem_may_pin_bytes(c), c->opts.btree_node_size);
730 }
731
732 static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
733                                         u64 btree_leaf_mask,
734                                         u64 btree_interior_mask,
735                                         struct bbpos start, struct bbpos *end)
736 {
737         struct bch_fs *c = trans->c;
738         s64 mem_may_pin = mem_may_pin_bytes(c);
739         int ret = 0;
740
741         btree_interior_mask |= btree_leaf_mask;
742
743         c->btree_cache.pinned_nodes_leaf_mask           = btree_leaf_mask;
744         c->btree_cache.pinned_nodes_interior_mask       = btree_interior_mask;
745         c->btree_cache.pinned_nodes_start               = start;
746         c->btree_cache.pinned_nodes_end                 = *end = BBPOS_MAX;
747
748         for (enum btree_id btree = start.btree;
749              btree < BTREE_ID_NR && !ret;
750              btree++) {
751                 unsigned depth = ((1U << btree) & btree_leaf_mask) ? 0 : 1;
752                 struct btree_iter iter;
753                 struct btree *b;
754
755                 if (!((1U << btree) & btree_leaf_mask) &&
756                     !((1U << btree) & btree_interior_mask))
757                         continue;
758
759                 __for_each_btree_node(trans, iter, btree,
760                                       btree == start.btree ? start.pos : POS_MIN,
761                                       0, depth, BTREE_ITER_PREFETCH, b, ret) {
762                         mem_may_pin -= btree_buf_bytes(b);
763                         if (mem_may_pin <= 0) {
764                                 c->btree_cache.pinned_nodes_end = *end =
765                                         BBPOS(btree, b->key.k.p);
766                                 bch2_trans_iter_exit(trans, &iter);
767                                 return 0;
768                         }
769                 }
770                 bch2_trans_iter_exit(trans, &iter);
771         }
772
773         return ret;
774 }
775
776 static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
777                                                    struct extents_to_bp_state *s)
778 {
779         struct bch_fs *c = trans->c;
780         int ret = 0;
781
782         for (enum btree_id btree_id = 0;
783              btree_id < btree_id_nr_alive(c);
784              btree_id++) {
785                 int level, depth = btree_type_has_ptrs(btree_id) ? 0 : 1;
786
787                 ret = commit_do(trans, NULL, NULL,
788                                 BCH_TRANS_COMMIT_no_enospc,
789                                 check_btree_root_to_backpointers(trans, s, btree_id, &level));
790                 if (ret)
791                         return ret;
792
793                 while (level >= depth) {
794                         struct btree_iter iter;
795                         bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0,
796                                                   level,
797                                                   BTREE_ITER_PREFETCH);
798                         while (1) {
799                                 bch2_trans_begin(trans);
800
801                                 struct bkey_s_c k = bch2_btree_iter_peek(&iter);
802                                 if (!k.k)
803                                         break;
804                                 ret = bkey_err(k) ?:
805                                         check_extent_to_backpointers(trans, s, btree_id, level, k) ?:
806                                         bch2_trans_commit(trans, NULL, NULL,
807                                                           BCH_TRANS_COMMIT_no_enospc);
808                                 if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
809                                         ret = 0;
810                                         continue;
811                                 }
812                                 if (ret)
813                                         break;
814                                 if (bpos_eq(iter.pos, SPOS_MAX))
815                                         break;
816                                 bch2_btree_iter_advance(&iter);
817                         }
818                         bch2_trans_iter_exit(trans, &iter);
819
820                         if (ret)
821                                 return ret;
822
823                         --level;
824                 }
825         }
826
827         return 0;
828 }
829
830 int bch2_check_extents_to_backpointers(struct bch_fs *c)
831 {
832         struct btree_trans *trans = bch2_trans_get(c);
833         struct extents_to_bp_state s = { .bucket_start = POS_MIN };
834         int ret;
835
836         bch2_bkey_buf_init(&s.last_flushed);
837         bkey_init(&s.last_flushed.k->k);
838
839         while (1) {
840                 struct bbpos end;
841                 ret = bch2_get_btree_in_memory_pos(trans,
842                                 BIT_ULL(BTREE_ID_backpointers),
843                                 BIT_ULL(BTREE_ID_backpointers),
844                                 BBPOS(BTREE_ID_backpointers, s.bucket_start), &end);
845                 if (ret)
846                         break;
847
848                 s.bucket_end = end.pos;
849
850                 if ( bpos_eq(s.bucket_start, POS_MIN) &&
851                     !bpos_eq(s.bucket_end, SPOS_MAX))
852                         bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass",
853                                     __func__, btree_nodes_fit_in_ram(c));
854
855                 if (!bpos_eq(s.bucket_start, POS_MIN) ||
856                     !bpos_eq(s.bucket_end, SPOS_MAX)) {
857                         struct printbuf buf = PRINTBUF;
858
859                         prt_str(&buf, "check_extents_to_backpointers(): ");
860                         bch2_bpos_to_text(&buf, s.bucket_start);
861                         prt_str(&buf, "-");
862                         bch2_bpos_to_text(&buf, s.bucket_end);
863
864                         bch_verbose(c, "%s", buf.buf);
865                         printbuf_exit(&buf);
866                 }
867
868                 ret = bch2_check_extents_to_backpointers_pass(trans, &s);
869                 if (ret || bpos_eq(s.bucket_end, SPOS_MAX))
870                         break;
871
872                 s.bucket_start = bpos_successor(s.bucket_end);
873         }
874         bch2_trans_put(trans);
875         bch2_bkey_buf_exit(&s.last_flushed, c);
876
877         c->btree_cache.pinned_nodes_leaf_mask = 0;
878         c->btree_cache.pinned_nodes_interior_mask = 0;
879
880         bch_err_fn(c, ret);
881         return ret;
882 }
883
884 static int check_one_backpointer(struct btree_trans *trans,
885                                  struct bbpos start,
886                                  struct bbpos end,
887                                  struct bkey_s_c_backpointer bp,
888                                  struct bpos *last_flushed_pos)
889 {
890         struct bch_fs *c = trans->c;
891         struct btree_iter iter;
892         struct bbpos pos = bp_to_bbpos(*bp.v);
893         struct bkey_s_c k;
894         struct printbuf buf = PRINTBUF;
895         int ret;
896
897         if (bbpos_cmp(pos, start) < 0 ||
898             bbpos_cmp(pos, end) > 0)
899                 return 0;
900
901         k = bch2_backpointer_get_key(trans, &iter, bp.k->p, *bp.v, 0);
902         ret = bkey_err(k);
903         if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
904                 return 0;
905         if (ret)
906                 return ret;
907
908         if (!k.k && !bpos_eq(*last_flushed_pos, bp.k->p)) {
909                 *last_flushed_pos = bp.k->p;
910                 ret = bch2_btree_write_buffer_flush_sync(trans) ?:
911                         -BCH_ERR_transaction_restart_write_buffer_flush;
912                 goto out;
913         }
914
915         if (fsck_err_on(!k.k, c,
916                         backpointer_to_missing_ptr,
917                         "backpointer for missing %s\n  %s",
918                         bp.v->level ? "btree node" : "extent",
919                         (bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) {
920                 ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p);
921                 goto out;
922         }
923 out:
924 fsck_err:
925         bch2_trans_iter_exit(trans, &iter);
926         printbuf_exit(&buf);
927         return ret;
928 }
929
930 static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
931                                                    struct bbpos start,
932                                                    struct bbpos end)
933 {
934         struct bpos last_flushed_pos = SPOS_MAX;
935
936         return for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers,
937                                   POS_MIN, BTREE_ITER_PREFETCH, k,
938                                   NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
939                 check_one_backpointer(trans, start, end,
940                                       bkey_s_c_to_backpointer(k),
941                                       &last_flushed_pos));
942 }
943
944 int bch2_check_backpointers_to_extents(struct bch_fs *c)
945 {
946         struct btree_trans *trans = bch2_trans_get(c);
947         struct bbpos start = (struct bbpos) { .btree = 0, .pos = POS_MIN, }, end;
948         int ret;
949
950         while (1) {
951                 ret = bch2_get_btree_in_memory_pos(trans,
952                                                    (1U << BTREE_ID_extents)|
953                                                    (1U << BTREE_ID_reflink),
954                                                    ~0,
955                                                    start, &end);
956                 if (ret)
957                         break;
958
959                 if (!bbpos_cmp(start, BBPOS_MIN) &&
960                     bbpos_cmp(end, BBPOS_MAX))
961                         bch_verbose(c, "%s(): extents do not fit in ram, running in multiple passes with %zu nodes per pass",
962                                     __func__, btree_nodes_fit_in_ram(c));
963
964                 if (bbpos_cmp(start, BBPOS_MIN) ||
965                     bbpos_cmp(end, BBPOS_MAX)) {
966                         struct printbuf buf = PRINTBUF;
967
968                         prt_str(&buf, "check_backpointers_to_extents(): ");
969                         bch2_bbpos_to_text(&buf, start);
970                         prt_str(&buf, "-");
971                         bch2_bbpos_to_text(&buf, end);
972
973                         bch_verbose(c, "%s", buf.buf);
974                         printbuf_exit(&buf);
975                 }
976
977                 ret = bch2_check_backpointers_to_extents_pass(trans, start, end);
978                 if (ret || !bbpos_cmp(end, BBPOS_MAX))
979                         break;
980
981                 start = bbpos_successor(end);
982         }
983         bch2_trans_put(trans);
984
985         c->btree_cache.pinned_nodes_leaf_mask = 0;
986         c->btree_cache.pinned_nodes_interior_mask = 0;
987
988         bch_err_fn(c, ret);
989         return ret;
990 }