From 4ba99dde330b2d4b6de65f27ced60e7f0fbc21c2 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 19 May 2025 10:31:44 -0400 Subject: [PATCH] bcachefs: BCH_INODE_has_case_insensitive Add a flag for tracking whether a directory has case-insensitive descendents - so that overlayfs can disallow mounting, even though the filesystem supports case insensitivity. This is a new on disk format version, with a (cheap) upgrade to ensure the flag is correctly set on existing inodes. Create, rename and fssetxattr are all plumbed to ensure the new flag is set, and we've got new fsck code that hooks into check_inode(0. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_format.h | 3 +- fs/bcachefs/fsck.c | 10 +- fs/bcachefs/inode.c | 10 +- fs/bcachefs/inode.h | 2 +- fs/bcachefs/inode_format.h | 7 +- fs/bcachefs/namei.c | 166 +++++++++++++++++++++++++++++++++- fs/bcachefs/namei.h | 5 + fs/bcachefs/sb-downgrade.c | 6 +- 8 files changed, 196 insertions(+), 13 deletions(-) diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 5900ff3715c6..b4a04df5ea95 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -699,7 +699,8 @@ struct bch_sb_field_ext { x(casefolding, BCH_VERSION(1, 24)) \ x(extent_flags, BCH_VERSION(1, 25)) \ x(snapshot_deletion_v2, BCH_VERSION(1, 26)) \ - x(fast_device_removal, BCH_VERSION(1, 27)) + x(fast_device_removal, BCH_VERSION(1, 27)) \ + x(inode_has_case_insensitive, BCH_VERSION(1, 28)) enum bcachefs_metadata_version { bcachefs_metadata_version_min = 9, diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 5402c40e3697..dbfa3e0b8abb 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -264,7 +264,7 @@ create_lostfound: u64 cpu = raw_smp_processor_id(); bch2_inode_init_early(c, lostfound); - bch2_inode_init_late(lostfound, now, 0, 0, S_IFDIR|0700, 0, &root_inode); + bch2_inode_init_late(c, lostfound, now, 0, 0, S_IFDIR|0700, 0, &root_inode); lostfound->bi_dir = root_inode.bi_inum; lostfound->bi_snapshot = le32_to_cpu(st.root_snapshot); @@ -545,7 +545,7 @@ static int reconstruct_subvol(struct btree_trans *trans, u32 snapshotid, u32 sub u64 cpu = raw_smp_processor_id(); bch2_inode_init_early(c, &new_inode); - bch2_inode_init_late(&new_inode, bch2_current_time(c), 0, 0, S_IFDIR|0755, 0, NULL); + bch2_inode_init_late(c, &new_inode, bch2_current_time(c), 0, 0, S_IFDIR|0755, 0, NULL); new_inode.bi_subvol = subvolid; @@ -635,7 +635,7 @@ static int reconstruct_inode(struct btree_trans *trans, enum btree_id btree, u32 struct bch_inode_unpacked new_inode; bch2_inode_init_early(c, &new_inode); - bch2_inode_init_late(&new_inode, bch2_current_time(c), 0, 0, i_mode|0600, 0, NULL); + bch2_inode_init_late(c, &new_inode, bch2_current_time(c), 0, 0, i_mode|0600, 0, NULL); new_inode.bi_size = i_size; new_inode.bi_inum = inum; new_inode.bi_snapshot = snapshot; @@ -1137,6 +1137,10 @@ static int check_inode(struct btree_trans *trans, goto err; } + ret = bch2_check_inode_has_case_insensitive(trans, &u, &s->ids, &do_update); + if (ret) + goto err; + if (u.bi_dir || u.bi_dir_offset) { ret = check_inode_dirent_inode(trans, &u, &do_update); if (ret) diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 13c1e9df252a..5cf70108ae2f 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -908,7 +908,8 @@ void bch2_inode_init_early(struct bch_fs *c, get_random_bytes(&inode_u->bi_hash_seed, sizeof(inode_u->bi_hash_seed)); } -void bch2_inode_init_late(struct bch_inode_unpacked *inode_u, u64 now, +void bch2_inode_init_late(struct bch_fs *c, + struct bch_inode_unpacked *inode_u, u64 now, uid_t uid, gid_t gid, umode_t mode, dev_t rdev, struct bch_inode_unpacked *parent) { @@ -935,6 +936,9 @@ void bch2_inode_init_late(struct bch_inode_unpacked *inode_u, u64 now, if (!S_ISDIR(mode)) inode_u->bi_casefold = 0; + + if (bch2_inode_casefold(c, inode_u)) + inode_u->bi_flags |= BCH_INODE_has_case_insensitive; } void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u, @@ -942,7 +946,7 @@ void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u, struct bch_inode_unpacked *parent) { bch2_inode_init_early(c, inode_u); - bch2_inode_init_late(inode_u, bch2_current_time(c), + bch2_inode_init_late(c, inode_u, bch2_current_time(c), uid, gid, mode, rdev, parent); } @@ -1279,7 +1283,7 @@ int bch2_inode_set_casefold(struct btree_trans *trans, subvol_inum inum, bi->bi_casefold = v + 1; bi->bi_fields_set |= BIT(Inode_opt_casefold); - return 0; + return bch2_maybe_propagate_has_case_insensitive(trans, inum, bi); #else bch_err(c, "Cannot use casefolding on a kernel without CONFIG_UNICODE"); return -EOPNOTSUPP; diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index c31567c09b8a..77ad2d549541 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -164,7 +164,7 @@ int bch2_fsck_write_inode(struct btree_trans *, struct bch_inode_unpacked *); void bch2_inode_init_early(struct bch_fs *, struct bch_inode_unpacked *); -void bch2_inode_init_late(struct bch_inode_unpacked *, u64, +void bch2_inode_init_late(struct bch_fs *, struct bch_inode_unpacked *, u64, uid_t, gid_t, umode_t, dev_t, struct bch_inode_unpacked *); void bch2_inode_init(struct bch_fs *, struct bch_inode_unpacked *, diff --git a/fs/bcachefs/inode_format.h b/fs/bcachefs/inode_format.h index 87e193e8ed25..1f00938b1bdc 100644 --- a/fs/bcachefs/inode_format.h +++ b/fs/bcachefs/inode_format.h @@ -129,6 +129,10 @@ enum inode_opt_id { Inode_opt_nr, }; +/* + * BCH_INODE_has_case_insensitive is set if any descendent is case insensitive - + * for overlayfs + */ #define BCH_INODE_FLAGS() \ x(sync, 0) \ x(immutable, 1) \ @@ -139,7 +143,8 @@ enum inode_opt_id { x(i_sectors_dirty, 6) \ x(unlinked, 7) \ x(backptr_untrusted, 8) \ - x(has_child_snapshot, 9) + x(has_child_snapshot, 9) \ + x(has_case_insensitive, 10) /* bits 20+ reserved for packed fields below: */ diff --git a/fs/bcachefs/namei.c b/fs/bcachefs/namei.c index 8088e810815f..bd093ce56ad9 100644 --- a/fs/bcachefs/namei.c +++ b/fs/bcachefs/namei.c @@ -11,6 +11,14 @@ #include +static inline subvol_inum parent_inum(subvol_inum inum, struct bch_inode_unpacked *inode) +{ + return (subvol_inum) { + .subvol = inode->bi_parent_subvol ?: inum.subvol, + .inum = inode->bi_dir, + }; +} + static inline int is_subdir_for_nlink(struct bch_inode_unpacked *inode) { return S_ISDIR(inode->bi_mode) && !inode->bi_subvol; @@ -49,7 +57,7 @@ int bch2_create_trans(struct btree_trans *trans, if (!(flags & BCH_CREATE_SNAPSHOT)) { /* Normal create path - allocate a new inode: */ - bch2_inode_init_late(new_inode, now, uid, gid, mode, rdev, dir_u); + bch2_inode_init_late(c, new_inode, now, uid, gid, mode, rdev, dir_u); if (flags & BCH_CREATE_TMPFILE) new_inode->bi_flags |= BCH_INODE_unlinked; @@ -510,6 +518,13 @@ int bch2_rename_trans(struct btree_trans *trans, goto err; } + ret = bch2_maybe_propagate_has_case_insensitive(trans, src_inum, src_inode_u) ?: + (mode == BCH_RENAME_EXCHANGE + ? bch2_maybe_propagate_has_case_insensitive(trans, dst_inum, dst_inode_u) + : 0); + if (ret) + goto err; + if (is_subdir_for_nlink(src_inode_u)) { src_dir_u->bi_nlink--; dst_dir_u->bi_nlink++; @@ -611,8 +626,7 @@ int bch2_inum_to_path(struct btree_trans *trans, subvol_inum inum, struct printb goto disconnected; } - inum.subvol = inode.bi_parent_subvol ?: inum.subvol; - inum.inum = inode.bi_dir; + inum = parent_inum(inum, &inode); u32 snapshot; ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); @@ -847,3 +861,149 @@ fsck_err: bch_err_fn(c, ret); return ret; } + +/* + * BCH_INODE_has_case_insensitive: + * We have to track whether directories have any descendent directory that is + * casefolded - for overlayfs: + */ + +static int bch2_propagate_has_case_insensitive(struct btree_trans *trans, subvol_inum inum) +{ + struct btree_iter iter = {}; + int ret = 0; + + while (true) { + struct bch_inode_unpacked inode; + ret = bch2_inode_peek(trans, &iter, &inode, inum, + BTREE_ITER_intent|BTREE_ITER_with_updates); + if (ret) + break; + + if (inode.bi_flags & BCH_INODE_has_case_insensitive) + break; + + inode.bi_flags |= BCH_INODE_has_case_insensitive; + ret = bch2_inode_write(trans, &iter, &inode); + if (ret) + break; + + bch2_trans_iter_exit(trans, &iter); + if (subvol_inum_eq(inum, BCACHEFS_ROOT_SUBVOL_INUM)) + break; + + inum = parent_inum(inum, &inode); + } + + bch2_trans_iter_exit(trans, &iter); + return ret; +} + +int bch2_maybe_propagate_has_case_insensitive(struct btree_trans *trans, subvol_inum inum, + struct bch_inode_unpacked *inode) +{ + if (!bch2_inode_casefold(trans->c, inode)) + return 0; + + inode->bi_flags |= BCH_INODE_has_case_insensitive; + + return bch2_propagate_has_case_insensitive(trans, parent_inum(inum, inode)); +} + +int bch2_check_inode_has_case_insensitive(struct btree_trans *trans, + struct bch_inode_unpacked *inode, + snapshot_id_list *snapshot_overwrites, + bool *do_update) +{ + struct printbuf buf = PRINTBUF; + bool repairing_parents = false; + int ret = 0; + + if (!S_ISDIR(inode->bi_mode)) { + /* + * Old versions set bi_casefold for non dirs, but that's + * unnecessary and wasteful + */ + if (inode->bi_casefold) { + inode->bi_casefold = 0; + *do_update = true; + } + return 0; + } + + if (trans->c->sb.version < bcachefs_metadata_version_inode_has_case_insensitive) + return 0; + + if (bch2_inode_casefold(trans->c, inode) && + !(inode->bi_flags & BCH_INODE_has_case_insensitive)) { + prt_printf(&buf, "casefolded dir with has_case_insensitive not set\ninum %llu:%u ", + inode->bi_inum, inode->bi_snapshot); + + ret = bch2_inum_snapshot_to_path(trans, inode->bi_inum, inode->bi_snapshot, + snapshot_overwrites, &buf); + if (ret) + goto err; + + if (fsck_err(trans, inode_has_case_insensitive_not_set, "%s", buf.buf)) { + inode->bi_flags |= BCH_INODE_has_case_insensitive; + *do_update = true; + } + } + + if (!(inode->bi_flags & BCH_INODE_has_case_insensitive)) + goto out; + + struct bch_inode_unpacked dir = *inode; + u32 snapshot = dir.bi_snapshot; + + while (!(dir.bi_inum == BCACHEFS_ROOT_INO && + dir.bi_subvol == BCACHEFS_ROOT_SUBVOL)) { + if (dir.bi_parent_subvol) { + ret = bch2_subvolume_get_snapshot(trans, dir.bi_parent_subvol, &snapshot); + if (ret) + goto err; + + snapshot_overwrites = NULL; + } + + ret = bch2_inode_find_by_inum_snapshot(trans, dir.bi_dir, snapshot, &dir, 0); + if (ret) + goto err; + + if (!(dir.bi_flags & BCH_INODE_has_case_insensitive)) { + prt_printf(&buf, "parent of casefolded dir with has_case_insensitive not set\n"); + + ret = bch2_inum_snapshot_to_path(trans, dir.bi_inum, dir.bi_snapshot, + snapshot_overwrites, &buf); + if (ret) + goto err; + + if (fsck_err(trans, inode_parent_has_case_insensitive_not_set, "%s", buf.buf)) { + dir.bi_flags |= BCH_INODE_has_case_insensitive; + ret = __bch2_fsck_write_inode(trans, &dir); + if (ret) + goto err; + } + } + + /* + * We only need to check the first parent, unless we find an + * inconsistency + */ + if (!repairing_parents) + break; + } +out: +err: +fsck_err: + printbuf_exit(&buf); + if (ret) + return ret; + + if (repairing_parents) { + return bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?: + -BCH_ERR_transaction_restart_nested; + } + + return 0; +} diff --git a/fs/bcachefs/namei.h b/fs/bcachefs/namei.h index d4d2d2d69517..ae6ebc2d0785 100644 --- a/fs/bcachefs/namei.h +++ b/fs/bcachefs/namei.h @@ -71,4 +71,9 @@ static inline int bch2_check_dirent_target(struct btree_trans *trans, return __bch2_check_dirent_target(trans, dirent_iter, d, target, in_fsck); } +int bch2_maybe_propagate_has_case_insensitive(struct btree_trans *, subvol_inum, + struct bch_inode_unpacked *); +int bch2_check_inode_has_case_insensitive(struct btree_trans *, struct bch_inode_unpacked *, + snapshot_id_list *, bool *); + #endif /* _BCACHEFS_NAMEI_H */ diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c index 296c6c925386..861fce1630f0 100644 --- a/fs/bcachefs/sb-downgrade.c +++ b/fs/bcachefs/sb-downgrade.c @@ -100,7 +100,11 @@ BCH_FSCK_ERR_ptr_to_missing_backpointer) \ x(stripe_backpointers, \ BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers),\ - BCH_FSCK_ERR_ptr_to_missing_backpointer) + BCH_FSCK_ERR_ptr_to_missing_backpointer) \ + x(inode_has_case_insensitive, \ + BIT_ULL(BCH_RECOVERY_PASS_check_inodes), \ + BCH_FSCK_ERR_inode_has_case_insensitive_not_set, \ + BCH_FSCK_ERR_inode_parent_has_case_insensitive_not_set) #define DOWNGRADE_TABLE() \ x(bucket_stripe_sectors, \ -- 2.25.1