1 // SPDX-License-Identifier: GPL-2.0
5 #include "disk_groups.h"
16 #include <linux/backing-dev.h>
17 #include <linux/sort.h>
19 static const struct blk_holder_ops bch2_sb_handle_bdev_ops = {
22 const char * const bch2_sb_fields[] = {
23 #define x(name, nr) #name,
29 static const char *bch2_sb_field_validate(struct bch_sb *,
30 struct bch_sb_field *);
32 struct bch_sb_field *bch2_sb_field_get(struct bch_sb *sb,
33 enum bch_sb_field_type type)
35 struct bch_sb_field *f;
37 /* XXX: need locking around superblock to access optional fields */
39 vstruct_for_each(sb, f)
40 if (le32_to_cpu(f->type) == type)
45 static struct bch_sb_field *__bch2_sb_field_resize(struct bch_sb_handle *sb,
46 struct bch_sb_field *f,
49 unsigned old_u64s = f ? le32_to_cpu(f->u64s) : 0;
50 unsigned sb_u64s = le32_to_cpu(sb->sb->u64s) + u64s - old_u64s;
52 BUG_ON(get_order(__vstruct_bytes(struct bch_sb, sb_u64s)) >
56 f = vstruct_last(sb->sb);
57 memset(f, 0, sizeof(u64) * u64s);
58 f->u64s = cpu_to_le32(u64s);
66 f->u64s = cpu_to_le32(u64s);
72 memmove(dst, src, vstruct_end(sb->sb) - src);
75 memset(src, 0, dst - src);
78 sb->sb->u64s = cpu_to_le32(sb_u64s);
80 return u64s ? f : NULL;
83 void bch2_sb_field_delete(struct bch_sb_handle *sb,
84 enum bch_sb_field_type type)
86 struct bch_sb_field *f = bch2_sb_field_get(sb->sb, type);
89 __bch2_sb_field_resize(sb, f, 0);
92 /* Superblock realloc/free: */
94 void bch2_free_super(struct bch_sb_handle *sb)
98 if (!IS_ERR_OR_NULL(sb->bdev))
99 blkdev_put(sb->bdev, sb->holder);
102 free_pages((unsigned long) sb->sb, sb->page_order);
103 memset(sb, 0, sizeof(*sb));
106 int bch2_sb_realloc(struct bch_sb_handle *sb, unsigned u64s)
108 size_t new_bytes = __vstruct_bytes(struct bch_sb, u64s);
109 unsigned order = get_order(new_bytes);
110 struct bch_sb *new_sb;
113 if (sb->sb && sb->page_order >= order)
116 if (sb->have_layout) {
117 u64 max_bytes = 512 << sb->sb->layout.sb_max_size_bits;
119 if (new_bytes > max_bytes) {
120 pr_err("%pg: superblock too big: want %zu but have %llu",
121 sb->bdev, new_bytes, max_bytes);
126 if (sb->page_order >= order && sb->sb)
129 if (dynamic_fault("bcachefs:add:super_realloc"))
133 unsigned nr_bvecs = 1 << order;
135 bio = bio_kmalloc(nr_bvecs, GFP_KERNEL);
139 bio_init(bio, NULL, bio->bi_inline_vecs, nr_bvecs, 0);
146 new_sb = (void *) __get_free_pages(GFP_NOFS|__GFP_ZERO, order);
151 memcpy(new_sb, sb->sb, PAGE_SIZE << sb->page_order);
153 free_pages((unsigned long) sb->sb, sb->page_order);
156 sb->page_order = order;
161 struct bch_sb_field *bch2_sb_field_resize(struct bch_sb_handle *sb,
162 enum bch_sb_field_type type,
165 struct bch_sb_field *f = bch2_sb_field_get(sb->sb, type);
166 ssize_t old_u64s = f ? le32_to_cpu(f->u64s) : 0;
167 ssize_t d = -old_u64s + u64s;
169 if (bch2_sb_realloc(sb, le32_to_cpu(sb->sb->u64s) + d))
173 struct bch_fs *c = container_of(sb, struct bch_fs, disk_sb);
177 lockdep_assert_held(&c->sb_lock);
179 /* XXX: we're not checking that offline device have enough space */
181 for_each_online_member(ca, c, i) {
182 struct bch_sb_handle *sb = &ca->disk_sb;
184 if (bch2_sb_realloc(sb, le32_to_cpu(sb->sb->u64s) + d)) {
185 percpu_ref_put(&ca->ref);
191 f = bch2_sb_field_get(sb->sb, type);
192 f = __bch2_sb_field_resize(sb, f, u64s);
194 f->type = cpu_to_le32(type);
198 /* Superblock validate: */
200 static inline void __bch2_sb_layout_size_assert(void)
202 BUILD_BUG_ON(sizeof(struct bch_sb_layout) != 512);
205 static const char *validate_sb_layout(struct bch_sb_layout *layout)
207 u64 offset, prev_offset, max_sectors;
210 if (!uuid_equal(&layout->magic, &BCACHE_MAGIC) &&
211 !uuid_equal(&layout->magic, &BCHFS_MAGIC))
212 return "Not a bcachefs superblock layout";
214 if (layout->layout_type != 0)
215 return "Invalid superblock layout type";
217 if (!layout->nr_superblocks)
218 return "Invalid superblock layout: no superblocks";
220 if (layout->nr_superblocks > ARRAY_SIZE(layout->sb_offset))
221 return "Invalid superblock layout: too many superblocks";
223 max_sectors = 1 << layout->sb_max_size_bits;
225 prev_offset = le64_to_cpu(layout->sb_offset[0]);
227 for (i = 1; i < layout->nr_superblocks; i++) {
228 offset = le64_to_cpu(layout->sb_offset[i]);
230 if (offset < prev_offset + max_sectors)
231 return "Invalid superblock layout: superblocks overlap";
232 prev_offset = offset;
238 const char *bch2_sb_validate(struct bch_sb_handle *disk_sb)
240 struct bch_sb *sb = disk_sb->sb;
241 struct bch_sb_field *f;
242 struct bch_sb_field_members *mi;
244 u32 version, version_min;
247 version = le16_to_cpu(sb->version);
248 version_min = version >= bcachefs_metadata_version_new_versioning
249 ? le16_to_cpu(sb->version_min)
252 if (version >= bcachefs_metadata_version_max ||
253 version_min < bcachefs_metadata_version_min)
254 return "Unsupported superblock version";
256 if (version_min > version)
257 return "Bad minimum version";
259 if (sb->features[1] ||
260 (le64_to_cpu(sb->features[0]) & (~0ULL << BCH_FEATURE_NR)))
261 return "Filesystem has incompatible features";
263 block_size = le16_to_cpu(sb->block_size);
265 if (!is_power_of_2(block_size) ||
266 block_size > PAGE_SECTORS)
267 return "Bad block size";
269 if (bch2_is_zero(sb->user_uuid.b, sizeof(sb->user_uuid)))
270 return "Bad user UUID";
272 if (bch2_is_zero(sb->uuid.b, sizeof(sb->uuid)))
273 return "Bad internal UUID";
275 if (!sb->nr_devices ||
276 sb->nr_devices <= sb->dev_idx ||
277 sb->nr_devices > BCH_SB_MEMBERS_MAX)
278 return "Bad number of member devices";
280 if (!BCH_SB_META_REPLICAS_WANT(sb) ||
281 BCH_SB_META_REPLICAS_WANT(sb) >= BCH_REPLICAS_MAX)
282 return "Invalid number of metadata replicas";
284 if (!BCH_SB_META_REPLICAS_REQ(sb) ||
285 BCH_SB_META_REPLICAS_REQ(sb) >= BCH_REPLICAS_MAX)
286 return "Invalid number of metadata replicas";
288 if (!BCH_SB_DATA_REPLICAS_WANT(sb) ||
289 BCH_SB_DATA_REPLICAS_WANT(sb) >= BCH_REPLICAS_MAX)
290 return "Invalid number of data replicas";
292 if (!BCH_SB_DATA_REPLICAS_REQ(sb) ||
293 BCH_SB_DATA_REPLICAS_REQ(sb) >= BCH_REPLICAS_MAX)
294 return "Invalid number of data replicas";
296 if (BCH_SB_META_CSUM_TYPE(sb) >= BCH_CSUM_OPT_NR)
297 return "Invalid metadata checksum type";
299 if (BCH_SB_DATA_CSUM_TYPE(sb) >= BCH_CSUM_OPT_NR)
300 return "Invalid metadata checksum type";
302 if (BCH_SB_COMPRESSION_TYPE(sb) >= BCH_COMPRESSION_OPT_NR)
303 return "Invalid compression type";
305 if (!BCH_SB_BTREE_NODE_SIZE(sb))
306 return "Btree node size not set";
308 if (!is_power_of_2(BCH_SB_BTREE_NODE_SIZE(sb)))
309 return "Btree node size not a power of two";
311 if (BCH_SB_GC_RESERVE(sb) < 5)
312 return "gc reserve percentage too small";
314 if (!sb->time_precision ||
315 le32_to_cpu(sb->time_precision) > NSEC_PER_SEC)
316 return "invalid time precision";
318 /* validate layout */
319 err = validate_sb_layout(&sb->layout);
323 vstruct_for_each(sb, f) {
325 return "Invalid superblock: invalid optional field";
327 if (vstruct_next(f) > vstruct_last(sb))
328 return "Invalid superblock: invalid optional field";
331 /* members must be validated first: */
332 mi = bch2_sb_get_members(sb);
334 return "Invalid superblock: member info area missing";
336 err = bch2_sb_field_validate(sb, &mi->field);
340 vstruct_for_each(sb, f) {
341 if (le32_to_cpu(f->type) == BCH_SB_FIELD_members)
344 err = bch2_sb_field_validate(sb, f);
354 static void bch2_sb_update(struct bch_fs *c)
356 struct bch_sb *src = c->disk_sb.sb;
357 struct bch_sb_field_members *mi = bch2_sb_get_members(src);
361 lockdep_assert_held(&c->sb_lock);
363 c->sb.uuid = src->uuid;
364 c->sb.user_uuid = src->user_uuid;
365 c->sb.version = le16_to_cpu(src->version);
366 c->sb.nr_devices = src->nr_devices;
367 c->sb.clean = BCH_SB_CLEAN(src);
368 c->sb.encryption_type = BCH_SB_ENCRYPTION_TYPE(src);
369 c->sb.encoded_extent_max= 1 << BCH_SB_ENCODED_EXTENT_MAX_BITS(src);
370 c->sb.time_base_lo = le64_to_cpu(src->time_base_lo);
371 c->sb.time_base_hi = le32_to_cpu(src->time_base_hi);
372 c->sb.time_precision = le32_to_cpu(src->time_precision);
373 c->sb.features = le64_to_cpu(src->features[0]);
374 c->sb.compat = le64_to_cpu(src->compat[0]);
376 for_each_member_device(ca, c, i)
377 ca->mi = bch2_mi_to_cpu(mi->members + i);
380 /* doesn't copy member info */
381 static void __copy_super(struct bch_sb_handle *dst_handle, struct bch_sb *src)
383 struct bch_sb_field *src_f, *dst_f;
384 struct bch_sb *dst = dst_handle->sb;
387 dst->version = src->version;
388 dst->version_min = src->version_min;
390 dst->uuid = src->uuid;
391 dst->user_uuid = src->user_uuid;
392 memcpy(dst->label, src->label, sizeof(dst->label));
394 dst->block_size = src->block_size;
395 dst->nr_devices = src->nr_devices;
397 dst->time_base_lo = src->time_base_lo;
398 dst->time_base_hi = src->time_base_hi;
399 dst->time_precision = src->time_precision;
401 memcpy(dst->flags, src->flags, sizeof(dst->flags));
402 memcpy(dst->features, src->features, sizeof(dst->features));
403 memcpy(dst->compat, src->compat, sizeof(dst->compat));
405 for (i = 0; i < BCH_SB_FIELD_NR; i++) {
406 if (i == BCH_SB_FIELD_journal)
409 src_f = bch2_sb_field_get(src, i);
410 dst_f = bch2_sb_field_get(dst, i);
411 dst_f = __bch2_sb_field_resize(dst_handle, dst_f,
412 src_f ? le32_to_cpu(src_f->u64s) : 0);
415 memcpy(dst_f, src_f, vstruct_bytes(src_f));
419 int bch2_sb_to_fs(struct bch_fs *c, struct bch_sb *src)
421 struct bch_sb_field_journal *journal_buckets =
422 bch2_sb_get_journal(src);
423 unsigned journal_u64s = journal_buckets
424 ? le32_to_cpu(journal_buckets->field.u64s)
428 lockdep_assert_held(&c->sb_lock);
430 ret = bch2_sb_realloc(&c->disk_sb,
431 le32_to_cpu(src->u64s) - journal_u64s);
435 __copy_super(&c->disk_sb, src);
437 ret = bch2_sb_replicas_to_cpu_replicas(c);
441 ret = bch2_sb_disk_groups_to_cpu(c);
449 int bch2_sb_from_fs(struct bch_fs *c, struct bch_dev *ca)
451 struct bch_sb *src = c->disk_sb.sb, *dst = ca->disk_sb.sb;
452 struct bch_sb_field_journal *journal_buckets =
453 bch2_sb_get_journal(dst);
454 unsigned journal_u64s = journal_buckets
455 ? le32_to_cpu(journal_buckets->field.u64s)
457 unsigned u64s = le32_to_cpu(src->u64s) + journal_u64s;
460 ret = bch2_sb_realloc(&ca->disk_sb, u64s);
464 __copy_super(&ca->disk_sb, src);
468 /* read superblock: */
470 static const char *read_one_super(struct bch_sb_handle *sb, u64 offset)
472 struct bch_csum csum;
475 bio_reset(sb->bio, sb->bdev, REQ_OP_READ|REQ_SYNC|REQ_META);
476 sb->bio->bi_iter.bi_sector = offset;
477 sb->bio->bi_iter.bi_size = PAGE_SIZE << sb->page_order;
478 bch2_bio_map(sb->bio, sb->sb);
480 if (submit_bio_wait(sb->bio))
483 if (!uuid_equal(&sb->sb->magic, &BCACHE_MAGIC) &&
484 !uuid_equal(&sb->sb->magic, &BCHFS_MAGIC))
485 return "Not a bcachefs superblock";
487 if (le16_to_cpu(sb->sb->version) < bcachefs_metadata_version_min ||
488 le16_to_cpu(sb->sb->version) >= bcachefs_metadata_version_max)
489 return "Unsupported superblock version";
491 bytes = vstruct_bytes(sb->sb);
493 if (bytes > 512 << sb->sb->layout.sb_max_size_bits)
494 return "Bad superblock: too big";
496 if (get_order(bytes) > sb->page_order) {
497 if (bch2_sb_realloc(sb, le32_to_cpu(sb->sb->u64s)))
498 return "cannot allocate memory";
502 if (BCH_SB_CSUM_TYPE(sb->sb) >= BCH_CSUM_NR)
503 return "unknown csum type";
505 /* XXX: verify MACs */
506 csum = csum_vstruct(NULL, BCH_SB_CSUM_TYPE(sb->sb),
507 null_nonce(), sb->sb);
509 if (bch2_crc_cmp(csum, sb->sb->csum))
510 return "bad checksum reading superblock";
515 int bch2_read_super(const char *path, struct bch_opts *opts,
516 struct bch_sb_handle *sb)
518 u64 offset = opt_get(*opts, sb);
519 struct bch_sb_layout layout;
524 pr_verbose_init(*opts, "");
526 memset(sb, 0, sizeof(*sb));
527 sb->mode = BLK_OPEN_READ;
529 sb->holder = kmalloc(1, GFP_KERNEL);
533 if (!opt_get(*opts, noexcl))
534 sb->mode |= BLK_OPEN_EXCL;
536 if (!opt_get(*opts, nochanges))
537 sb->mode |= BLK_OPEN_WRITE;
539 sb->bdev = blkdev_get_by_path(path, sb->mode, sb->holder, &bch2_sb_handle_bdev_ops);
540 if (IS_ERR(sb->bdev) &&
541 PTR_ERR(sb->bdev) == -EACCES &&
542 opt_get(*opts, read_only)) {
543 sb->mode &= ~BLK_OPEN_WRITE;
545 sb->bdev = blkdev_get_by_path(path, sb->mode, sb->holder, &bch2_sb_handle_bdev_ops);
546 if (!IS_ERR(sb->bdev))
547 opt_set(*opts, nochanges, true);
550 if (IS_ERR(sb->bdev)) {
551 ret = PTR_ERR(sb->bdev);
555 err = "cannot allocate memory";
556 ret = bch2_sb_realloc(sb, 0);
561 err = "dynamic fault";
562 if (bch2_fs_init_fault("read_super"))
566 err = read_one_super(sb, offset);
570 if (opt_defined(*opts, sb))
573 pr_err("error reading default superblock: %s", err);
576 * Error reading primary superblock - read location of backup
579 bio_reset(sb->bio, sb->bdev, REQ_OP_READ|REQ_SYNC|REQ_META);
580 sb->bio->bi_iter.bi_sector = BCH_SB_LAYOUT_SECTOR;
581 sb->bio->bi_iter.bi_size = sizeof(struct bch_sb_layout);
583 * use sb buffer to read layout, since sb buffer is page aligned but
586 bch2_bio_map(sb->bio, sb->sb);
589 if (submit_bio_wait(sb->bio))
592 memcpy(&layout, sb->sb, sizeof(layout));
593 err = validate_sb_layout(&layout);
597 for (i = layout.sb_offset;
598 i < layout.sb_offset + layout.nr_superblocks; i++) {
599 offset = le64_to_cpu(*i);
601 if (offset == opt_get(*opts, sb))
604 err = read_one_super(sb, offset);
613 err = "Superblock block size smaller than device block size";
615 if (le16_to_cpu(sb->sb->block_size) << 9 <
616 bdev_logical_block_size(sb->bdev))
620 sb->have_layout = true;
622 pr_verbose_init(*opts, "ret %i", ret);
626 pr_err("error reading superblock: %s", err);
630 /* write superblock: */
632 static void write_super_endio(struct bio *bio)
634 struct bch_dev *ca = bio->bi_private;
636 /* XXX: return errors directly */
638 if (bch2_dev_io_err_on(bio->bi_status, ca, "superblock write"))
639 ca->sb_write_error = 1;
641 closure_put(&ca->fs->sb_write);
642 percpu_ref_put(&ca->io_ref);
645 static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx)
647 struct bch_sb *sb = ca->disk_sb.sb;
648 struct bio *bio = ca->disk_sb.bio;
650 sb->offset = sb->layout.sb_offset[idx];
652 SET_BCH_SB_CSUM_TYPE(sb, c->opts.metadata_checksum);
653 sb->csum = csum_vstruct(c, BCH_SB_CSUM_TYPE(sb),
656 bio_reset(bio, ca->disk_sb.bdev, REQ_OP_WRITE|REQ_SYNC|REQ_META);
657 bio->bi_iter.bi_sector = le64_to_cpu(sb->offset);
658 bio->bi_iter.bi_size =
659 roundup((size_t) vstruct_bytes(sb),
660 bdev_logical_block_size(ca->disk_sb.bdev));
661 bio->bi_end_io = write_super_endio;
662 bio->bi_private = ca;
663 bch2_bio_map(bio, sb);
665 this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_SB],
668 percpu_ref_get(&ca->io_ref);
669 closure_bio_submit(bio, &c->sb_write);
672 void bch2_write_super(struct bch_fs *c)
674 struct closure *cl = &c->sb_write;
676 unsigned i, sb = 0, nr_wrote;
678 struct bch_devs_mask sb_written;
679 bool wrote, can_mount_without_written, can_mount_with_written;
681 lockdep_assert_held(&c->sb_lock);
683 closure_init_stack(cl);
684 memset(&sb_written, 0, sizeof(sb_written));
686 le64_add_cpu(&c->disk_sb.sb->seq, 1);
688 for_each_online_member(ca, c, i)
689 bch2_sb_from_fs(c, ca);
691 for_each_online_member(ca, c, i) {
692 err = bch2_sb_validate(&ca->disk_sb);
694 bch2_fs_inconsistent(c, "sb invalid before write: %s", err);
699 if (c->opts.nochanges ||
700 test_bit(BCH_FS_ERROR, &c->flags))
703 for_each_online_member(ca, c, i) {
704 __set_bit(ca->dev_idx, sb_written.d);
705 ca->sb_write_error = 0;
710 for_each_online_member(ca, c, i)
711 if (sb < ca->disk_sb.sb->layout.nr_superblocks) {
712 write_one_super(c, ca, sb);
719 for_each_online_member(ca, c, i)
720 if (ca->sb_write_error)
721 __clear_bit(ca->dev_idx, sb_written.d);
723 nr_wrote = dev_mask_nr(&sb_written);
725 can_mount_with_written =
726 bch2_have_enough_devs(__bch2_replicas_status(c, sb_written),
727 BCH_FORCE_IF_DEGRADED);
729 for (i = 0; i < ARRAY_SIZE(sb_written.d); i++)
730 sb_written.d[i] = ~sb_written.d[i];
732 can_mount_without_written =
733 bch2_have_enough_devs(__bch2_replicas_status(c, sb_written),
734 BCH_FORCE_IF_DEGRADED);
737 * If we would be able to mount _without_ the devices we successfully
738 * wrote superblocks to, we weren't able to write to enough devices:
740 * Exception: if we can mount without the successes because we haven't
741 * written anything (new filesystem), we continue if we'd be able to
742 * mount with the devices we did successfully write to:
744 bch2_fs_fatal_err_on(!nr_wrote ||
745 (can_mount_without_written &&
746 !can_mount_with_written), c,
747 "Unable to write superblock to sufficient devices");
749 /* Make new options visible after they're persistent: */
753 /* BCH_SB_FIELD_journal: */
755 static int u64_cmp(const void *_l, const void *_r)
757 u64 l = *((const u64 *) _l), r = *((const u64 *) _r);
759 return l < r ? -1 : l > r ? 1 : 0;
762 static const char *bch2_sb_validate_journal(struct bch_sb *sb,
763 struct bch_sb_field *f)
765 struct bch_sb_field_journal *journal = field_to_type(f, journal);
766 struct bch_member *m = bch2_sb_get_members(sb)->members + sb->dev_idx;
772 journal = bch2_sb_get_journal(sb);
776 nr = bch2_nr_journal_buckets(journal);
780 b = kmalloc_array(sizeof(u64), nr, GFP_KERNEL);
782 return "cannot allocate memory";
784 for (i = 0; i < nr; i++)
785 b[i] = le64_to_cpu(journal->buckets[i]);
787 sort(b, nr, sizeof(u64), u64_cmp, NULL);
789 err = "journal bucket at sector 0";
793 err = "journal bucket before first bucket";
794 if (m && b[0] < le16_to_cpu(m->first_bucket))
797 err = "journal bucket past end of device";
798 if (m && b[nr - 1] >= le64_to_cpu(m->nbuckets))
801 err = "duplicate journal buckets";
802 for (i = 0; i + 1 < nr; i++)
803 if (b[i] == b[i + 1])
812 static const struct bch_sb_field_ops bch_sb_field_ops_journal = {
813 .validate = bch2_sb_validate_journal,
816 /* BCH_SB_FIELD_members: */
818 static const char *bch2_sb_validate_members(struct bch_sb *sb,
819 struct bch_sb_field *f)
821 struct bch_sb_field_members *mi = field_to_type(f, members);
822 struct bch_member *m;
824 if ((void *) (mi->members + sb->nr_devices) >
825 vstruct_end(&mi->field))
826 return "Invalid superblock: bad member info";
828 for (m = mi->members;
829 m < mi->members + sb->nr_devices;
831 if (!bch2_member_exists(m))
834 if (le64_to_cpu(m->nbuckets) > LONG_MAX)
835 return "Too many buckets";
837 if (le64_to_cpu(m->nbuckets) -
838 le16_to_cpu(m->first_bucket) < BCH_MIN_NR_NBUCKETS)
839 return "Not enough buckets";
841 if (le16_to_cpu(m->bucket_size) <
842 le16_to_cpu(sb->block_size))
843 return "bucket size smaller than block size";
845 if (le16_to_cpu(m->bucket_size) <
846 BCH_SB_BTREE_NODE_SIZE(sb))
847 return "bucket size smaller than btree node size";
853 static const struct bch_sb_field_ops bch_sb_field_ops_members = {
854 .validate = bch2_sb_validate_members,
857 /* BCH_SB_FIELD_crypt: */
859 static const char *bch2_sb_validate_crypt(struct bch_sb *sb,
860 struct bch_sb_field *f)
862 struct bch_sb_field_crypt *crypt = field_to_type(f, crypt);
864 if (vstruct_bytes(&crypt->field) != sizeof(*crypt))
865 return "invalid field crypt: wrong size";
867 if (BCH_CRYPT_KDF_TYPE(crypt))
868 return "invalid field crypt: bad kdf type";
873 static const struct bch_sb_field_ops bch_sb_field_ops_crypt = {
874 .validate = bch2_sb_validate_crypt,
877 /* BCH_SB_FIELD_clean: */
879 void bch2_sb_clean_renumber(struct bch_sb_field_clean *clean, int write)
881 struct jset_entry *entry;
883 for (entry = clean->start;
884 entry < (struct jset_entry *) vstruct_end(&clean->field);
885 entry = vstruct_next(entry))
886 bch2_bkey_renumber(BKEY_TYPE_BTREE, bkey_to_packed(entry->start), write);
889 static void bch2_fs_mark_dirty(struct bch_fs *c)
891 mutex_lock(&c->sb_lock);
892 if (BCH_SB_CLEAN(c->disk_sb.sb) ||
893 (c->disk_sb.sb->compat[0] & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO))) {
894 SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
895 c->disk_sb.sb->compat[0] &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
898 mutex_unlock(&c->sb_lock);
902 bch2_journal_super_entries_add_common(struct bch_fs *c,
903 struct jset_entry *entry,
906 struct btree_root *r;
909 mutex_lock(&c->btree_root_lock);
911 for (r = c->btree_roots;
912 r < c->btree_roots + BTREE_ID_NR;
915 entry->u64s = r->key.u64s;
916 entry->btree_id = r - c->btree_roots;
917 entry->level = r->level;
918 entry->type = BCH_JSET_ENTRY_btree_root;
919 bkey_copy(&entry->start[0], &r->key);
921 entry = vstruct_next(entry);
923 c->btree_roots_dirty = false;
925 mutex_unlock(&c->btree_root_lock);
930 percpu_down_write(&c->mark_lock);
933 u64 nr_inodes = percpu_u64_get(&c->usage[0]->nr_inodes);
934 struct jset_entry_usage *u =
935 container_of(entry, struct jset_entry_usage, entry);
937 memset(u, 0, sizeof(*u));
938 u->entry.u64s = DIV_ROUND_UP(sizeof(*u), sizeof(u64)) - 1;
939 u->entry.type = BCH_JSET_ENTRY_usage;
940 u->entry.btree_id = FS_USAGE_INODES;
941 u->v = cpu_to_le64(nr_inodes);
943 entry = vstruct_next(entry);
947 struct jset_entry_usage *u =
948 container_of(entry, struct jset_entry_usage, entry);
950 memset(u, 0, sizeof(*u));
951 u->entry.u64s = DIV_ROUND_UP(sizeof(*u), sizeof(u64)) - 1;
952 u->entry.type = BCH_JSET_ENTRY_usage;
953 u->entry.btree_id = FS_USAGE_KEY_VERSION;
954 u->v = cpu_to_le64(atomic64_read(&c->key_version));
956 entry = vstruct_next(entry);
959 for (i = 0; i < BCH_REPLICAS_MAX; i++) {
960 struct jset_entry_usage *u =
961 container_of(entry, struct jset_entry_usage, entry);
962 u64 sectors = percpu_u64_get(&c->usage[0]->persistent_reserved[i]);
967 memset(u, 0, sizeof(*u));
968 u->entry.u64s = DIV_ROUND_UP(sizeof(*u), sizeof(u64)) - 1;
969 u->entry.type = BCH_JSET_ENTRY_usage;
970 u->entry.btree_id = FS_USAGE_RESERVED;
974 entry = vstruct_next(entry);
977 for (i = 0; i < c->replicas.nr; i++) {
978 struct bch_replicas_entry *e =
979 cpu_replicas_entry(&c->replicas, i);
980 u64 sectors = percpu_u64_get(&c->usage[0]->replicas[i]);
981 struct jset_entry_data_usage *u =
982 container_of(entry, struct jset_entry_data_usage, entry);
984 memset(u, 0, sizeof(*u));
985 u->entry.u64s = DIV_ROUND_UP(sizeof(*u) + e->nr_devs,
987 u->entry.type = BCH_JSET_ENTRY_data_usage;
988 u->v = cpu_to_le64(sectors);
989 unsafe_memcpy(&u->r, e, replicas_entry_bytes(e),
990 "embedded variable length struct");
992 entry = vstruct_next(entry);
995 percpu_up_write(&c->mark_lock);
1000 void bch2_fs_mark_clean(struct bch_fs *c, bool clean)
1002 struct bch_sb_field_clean *sb_clean;
1003 struct jset_entry *entry;
1007 bch2_fs_mark_dirty(c);
1011 mutex_lock(&c->sb_lock);
1012 if (BCH_SB_CLEAN(c->disk_sb.sb))
1015 SET_BCH_SB_CLEAN(c->disk_sb.sb, true);
1017 c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_ALLOC_INFO;
1019 u64s = sizeof(*sb_clean) / sizeof(u64) + c->journal.entry_u64s_reserved;
1021 sb_clean = bch2_sb_resize_clean(&c->disk_sb, u64s);
1023 bch_err(c, "error resizing superblock while setting filesystem clean");
1027 sb_clean->flags = 0;
1028 sb_clean->read_clock = cpu_to_le16(c->bucket_clock[READ].hand);
1029 sb_clean->write_clock = cpu_to_le16(c->bucket_clock[WRITE].hand);
1030 sb_clean->journal_seq = cpu_to_le64(journal_cur_seq(&c->journal) - 1);
1032 /* Trying to catch outstanding bug: */
1033 BUG_ON(le64_to_cpu(sb_clean->journal_seq) > S64_MAX);
1035 entry = sb_clean->start;
1036 entry = bch2_journal_super_entries_add_common(c, entry, 0);
1037 BUG_ON((void *) entry > vstruct_end(&sb_clean->field));
1040 vstruct_end(&sb_clean->field) - (void *) entry);
1042 if (le16_to_cpu(c->disk_sb.sb->version) <
1043 bcachefs_metadata_version_bkey_renumber)
1044 bch2_sb_clean_renumber(sb_clean, WRITE);
1046 bch2_write_super(c);
1048 mutex_unlock(&c->sb_lock);
1051 static const char *bch2_sb_validate_clean(struct bch_sb *sb,
1052 struct bch_sb_field *f)
1054 struct bch_sb_field_clean *clean = field_to_type(f, clean);
1056 if (vstruct_bytes(&clean->field) < sizeof(*clean))
1057 return "invalid field crypt: wrong size";
1062 static const struct bch_sb_field_ops bch_sb_field_ops_clean = {
1063 .validate = bch2_sb_validate_clean,
1066 static const struct bch_sb_field_ops *bch2_sb_field_ops[] = {
1068 [BCH_SB_FIELD_##f] = &bch_sb_field_ops_##f,
1073 static const char *bch2_sb_field_validate(struct bch_sb *sb,
1074 struct bch_sb_field *f)
1076 unsigned type = le32_to_cpu(f->type);
1078 return type < BCH_SB_FIELD_NR
1079 ? bch2_sb_field_ops[type]->validate(sb, f)
1083 void bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb,
1084 struct bch_sb_field *f)
1086 unsigned type = le32_to_cpu(f->type);
1087 const struct bch_sb_field_ops *ops = type < BCH_SB_FIELD_NR
1088 ? bch2_sb_field_ops[type] : NULL;
1091 pr_buf(out, "%s", bch2_sb_fields[type]);
1093 pr_buf(out, "(unknown field %u)", type);
1095 pr_buf(out, " (size %llu):", vstruct_bytes(f));
1097 if (ops && ops->to_text)
1098 bch2_sb_field_ops[type]->to_text(out, sb, f);