bcachefs: New varints
authorKent Overstreet <kent.overstreet@gmail.com>
Fri, 6 Nov 2020 04:39:33 +0000 (23:39 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:46 +0000 (17:08 -0400)
Previous varint implementation used by the inode code was not nearly as
fast as it could have been; partly because it was attempting to encode
integers up to 96 bits (for timestamps) but this meant that encoding and
decoding the length required a table lookup.

Instead, we'll just encode timestamps greater than 64 bits as two
separate varints; this will make decoding/encoding of inodes
significantly faster overall.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/Makefile
fs/bcachefs/bcachefs_format.h
fs/bcachefs/fsck.c
fs/bcachefs/inode.c
fs/bcachefs/inode.h
fs/bcachefs/io.c
fs/bcachefs/recovery.c
fs/bcachefs/super.c
fs/bcachefs/varint.c [new file with mode: 0644]
fs/bcachefs/varint.h [new file with mode: 0644]

index ffe4db45e1c90f0c3068bab5f49a4bd4cad95d40..dad2fe2530e56e62b93be4cc851f1de37355e5e5 100644 (file)
@@ -57,4 +57,5 @@ bcachefs-y            :=      \
        tests.o                 \
        trace.o                 \
        util.o                  \
+       varint.o                \
        xattr.o
index 0d79bb7764a710e37de046defdc813904c0ea4fb..f072e865e43f70bc9c5200e73840ed188f1f6d0a 100644 (file)
@@ -673,10 +673,10 @@ struct bch_inode_generation {
 } __attribute__((packed, aligned(8)));
 
 #define BCH_INODE_FIELDS()                     \
-       x(bi_atime,                     64)     \
-       x(bi_ctime,                     64)     \
-       x(bi_mtime,                     64)     \
-       x(bi_otime,                     64)     \
+       x(bi_atime,                     96)     \
+       x(bi_ctime,                     96)     \
+       x(bi_mtime,                     96)     \
+       x(bi_otime,                     96)     \
        x(bi_size,                      64)     \
        x(bi_sectors,                   64)     \
        x(bi_uid,                       32)     \
@@ -743,7 +743,8 @@ enum {
 #define BCH_INODE_UNLINKED     (1 << __BCH_INODE_UNLINKED)
 
 LE32_BITMASK(INODE_STR_HASH,   struct bch_inode, bi_flags, 20, 24);
-LE32_BITMASK(INODE_NR_FIELDS,  struct bch_inode, bi_flags, 24, 32);
+LE32_BITMASK(INODE_NR_FIELDS,  struct bch_inode, bi_flags, 24, 31);
+LE32_BITMASK(INODE_NEW_VARINT, struct bch_inode, bi_flags, 31, 32);
 
 /* Dirents */
 
@@ -1334,13 +1335,15 @@ LE64_BITMASK(BCH_SB_ERASURE_CODE,       struct bch_sb, flags[3],  0, 16);
        x(btree_ptr_v2,                 11)     \
        x(extents_above_btree_updates,  12)     \
        x(btree_updates_journalled,     13)     \
-       x(reflink_inline_data,          14)
+       x(reflink_inline_data,          14)     \
+       x(new_varint,                   15)
 
 #define BCH_SB_FEATURES_ALL                            \
        ((1ULL << BCH_FEATURE_new_siphash)|             \
         (1ULL << BCH_FEATURE_new_extent_overwrite)|    \
         (1ULL << BCH_FEATURE_btree_ptr_v2)|            \
-        (1ULL << BCH_FEATURE_extents_above_btree_updates))
+        (1ULL << BCH_FEATURE_extents_above_btree_updates)|\
+        (1ULL << BCH_FEATURE_new_varint))\
 
 enum bch_sb_feature {
 #define x(f, n) BCH_FEATURE_##f,
index 5a6df3d1973a9dedbfab51e1a5b667a7da3751df..e3671b66c04659e1bb4f64198cf74bf63b834276 100644 (file)
@@ -537,7 +537,7 @@ retry:
 
                        bch2_trans_unlock(&trans);
 
-                       bch2_inode_pack(&p, &w.inode);
+                       bch2_inode_pack(c, &p, &w.inode);
 
                        ret = bch2_btree_insert(c, BTREE_ID_INODES,
                                                &p.inode.k_i, NULL, NULL,
@@ -808,7 +808,7 @@ create_root:
                        0, NULL);
        root_inode->bi_inum = BCACHEFS_ROOT_INO;
 
-       bch2_inode_pack(&packed, root_inode);
+       bch2_inode_pack(c, &packed, root_inode);
 
        return bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i,
                                 NULL, NULL,
@@ -1326,7 +1326,7 @@ static int check_inode(struct btree_trans *trans,
        if (do_update) {
                struct bkey_inode_buf p;
 
-               bch2_inode_pack(&p, &u);
+               bch2_inode_pack(c, &p, &u);
 
                ret = __bch2_trans_do(trans, NULL, NULL,
                                      BTREE_INSERT_NOFAIL|
index b49c382f5452ce79cbe8be61bf9c8a84c0cf58e1..c64197d8fc84daea9e21bb71aefacba809dfe284 100644 (file)
@@ -8,6 +8,7 @@
 #include "extents.h"
 #include "inode.h"
 #include "str_hash.h"
+#include "varint.h"
 
 #include <linux/random.h>
 
@@ -89,22 +90,17 @@ static int inode_decode_field(const u8 *in, const u8 *end,
        return bytes;
 }
 
-void bch2_inode_pack(struct bkey_inode_buf *packed,
-                    const struct bch_inode_unpacked *inode)
+static noinline void bch2_inode_pack_v1(struct bkey_inode_buf *packed,
+                                       const struct bch_inode_unpacked *inode)
 {
-       u8 *out = packed->inode.v.fields;
+       struct bkey_i_inode *k = &packed->inode;
+       u8 *out = k->v.fields;
        u8 *end = (void *) &packed[1];
        u8 *last_nonzero_field = out;
        unsigned nr_fields = 0, last_nonzero_fieldnr = 0;
        unsigned bytes;
 
-       bkey_inode_init(&packed->inode.k_i);
-       packed->inode.k.p.offset        = inode->bi_inum;
-       packed->inode.v.bi_hash_seed    = inode->bi_hash_seed;
-       packed->inode.v.bi_flags        = cpu_to_le32(inode->bi_flags);
-       packed->inode.v.bi_mode         = cpu_to_le16(inode->bi_mode);
-
-#define x(_name, _bits)                                        \
+#define x(_name, _bits)                                                        \
        out += inode_encode_field(out, end, 0, inode->_name);           \
        nr_fields++;                                                    \
                                                                        \
@@ -123,7 +119,69 @@ void bch2_inode_pack(struct bkey_inode_buf *packed,
        set_bkey_val_bytes(&packed->inode.k, bytes);
        memset_u64s_tail(&packed->inode.v, 0, bytes);
 
-       SET_INODE_NR_FIELDS(&packed->inode.v, nr_fields);
+       SET_INODE_NR_FIELDS(&k->v, nr_fields);
+}
+
+static void bch2_inode_pack_v2(struct bkey_inode_buf *packed,
+                              const struct bch_inode_unpacked *inode)
+{
+       struct bkey_i_inode *k = &packed->inode;
+       u8 *out = k->v.fields;
+       u8 *end = (void *) &packed[1];
+       u8 *last_nonzero_field = out;
+       unsigned nr_fields = 0, last_nonzero_fieldnr = 0;
+       unsigned bytes;
+       int ret;
+
+#define x(_name, _bits)                                                        \
+       nr_fields++;                                                    \
+                                                                       \
+       if (inode->_name) {                                             \
+               ret = bch2_varint_encode(out, inode->_name);            \
+               out += ret;                                             \
+                                                                       \
+               if (_bits > 64)                                         \
+                       *out++ = 0;                                     \
+                                                                       \
+               last_nonzero_field = out;                               \
+               last_nonzero_fieldnr = nr_fields;                       \
+       } else {                                                        \
+               *out++ = 0;                                             \
+                                                                       \
+               if (_bits > 64)                                         \
+                       *out++ = 0;                                     \
+       }
+
+       BCH_INODE_FIELDS()
+#undef  x
+       BUG_ON(out > end);
+
+       out = last_nonzero_field;
+       nr_fields = last_nonzero_fieldnr;
+
+       bytes = out - (u8 *) &packed->inode.v;
+       set_bkey_val_bytes(&packed->inode.k, bytes);
+       memset_u64s_tail(&packed->inode.v, 0, bytes);
+
+       SET_INODE_NR_FIELDS(&k->v, nr_fields);
+}
+
+void bch2_inode_pack(struct bch_fs *c,
+                    struct bkey_inode_buf *packed,
+                    const struct bch_inode_unpacked *inode)
+{
+       bkey_inode_init(&packed->inode.k_i);
+       packed->inode.k.p.offset        = inode->bi_inum;
+       packed->inode.v.bi_hash_seed    = inode->bi_hash_seed;
+       packed->inode.v.bi_flags        = cpu_to_le32(inode->bi_flags);
+       packed->inode.v.bi_mode         = cpu_to_le16(inode->bi_mode);
+
+       if (c->sb.features & (1ULL << BCH_FEATURE_new_varint)) {
+               SET_INODE_NEW_VARINT(&packed->inode.v, true);
+               bch2_inode_pack_v2(packed, inode);
+       } else {
+               bch2_inode_pack_v1(packed, inode);
+       }
 
        if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
                struct bch_inode_unpacked unpacked;
@@ -135,26 +193,23 @@ void bch2_inode_pack(struct bkey_inode_buf *packed,
                BUG_ON(unpacked.bi_hash_seed    != inode->bi_hash_seed);
                BUG_ON(unpacked.bi_mode         != inode->bi_mode);
 
-#define x(_name, _bits)        BUG_ON(unpacked._name != inode->_name);
+#define x(_name, _bits)        if (unpacked._name != inode->_name)             \
+                       panic("unpacked %llu should be %llu",           \
+                             (u64) unpacked._name, (u64) inode->_name);
                BCH_INODE_FIELDS()
 #undef  x
        }
 }
 
-int bch2_inode_unpack(struct bkey_s_c_inode inode,
-                     struct bch_inode_unpacked *unpacked)
+static noinline int bch2_inode_unpack_v1(struct bkey_s_c_inode inode,
+                               struct bch_inode_unpacked *unpacked)
 {
        const u8 *in = inode.v->fields;
-       const u8 *end = (void *) inode.v + bkey_val_bytes(inode.k);
+       const u8 *end = bkey_val_end(inode);
        u64 field[2];
        unsigned fieldnr = 0, field_bits;
        int ret;
 
-       unpacked->bi_inum       = inode.k->p.offset;
-       unpacked->bi_hash_seed  = inode.v->bi_hash_seed;
-       unpacked->bi_flags      = le32_to_cpu(inode.v->bi_flags);
-       unpacked->bi_mode       = le16_to_cpu(inode.v->bi_mode);
-
 #define x(_name, _bits)                                        \
        if (fieldnr++ == INODE_NR_FIELDS(inode.v)) {                    \
                unsigned offset = offsetof(struct bch_inode_unpacked, _name);\
@@ -177,6 +232,62 @@ int bch2_inode_unpack(struct bkey_s_c_inode inode,
 #undef  x
 
        /* XXX: signal if there were more fields than expected? */
+       return 0;
+}
+
+static int bch2_inode_unpack_v2(struct bkey_s_c_inode inode,
+                               struct bch_inode_unpacked *unpacked)
+{
+       const u8 *in = inode.v->fields;
+       const u8 *end = bkey_val_end(inode);
+       unsigned fieldnr = 0;
+       int ret;
+       u64 v[2];
+
+#define x(_name, _bits)                                                        \
+       if (fieldnr < INODE_NR_FIELDS(inode.v)) {                       \
+               ret = bch2_varint_decode(in, end, &v[0]);               \
+               if (ret < 0)                                            \
+                       return ret;                                     \
+               in += ret;                                              \
+                                                                       \
+               if (_bits > 64) {                                       \
+                       ret = bch2_varint_decode(in, end, &v[1]);       \
+                       if (ret < 0)                                    \
+                               return ret;                             \
+                       in += ret;                                      \
+               } else {                                                \
+                       v[1] = 0;                                       \
+               }                                                       \
+       } else {                                                        \
+               v[0] = v[1] = 0;                                        \
+       }                                                               \
+                                                                       \
+       unpacked->_name = v[0];                                         \
+       if (v[1] || v[0] != unpacked->_name)                            \
+               return -1;                                              \
+       fieldnr++;
+
+       BCH_INODE_FIELDS()
+#undef  x
+
+       /* XXX: signal if there were more fields than expected? */
+       return 0;
+}
+
+int bch2_inode_unpack(struct bkey_s_c_inode inode,
+                     struct bch_inode_unpacked *unpacked)
+{
+       unpacked->bi_inum       = inode.k->p.offset;
+       unpacked->bi_hash_seed  = inode.v->bi_hash_seed;
+       unpacked->bi_flags      = le32_to_cpu(inode.v->bi_flags);
+       unpacked->bi_mode       = le16_to_cpu(inode.v->bi_mode);
+
+       if (INODE_NEW_VARINT(inode.v)) {
+               return bch2_inode_unpack_v2(inode, unpacked);
+       } else {
+               return bch2_inode_unpack_v1(inode, unpacked);
+       }
 
        return 0;
 }
@@ -223,7 +334,7 @@ int bch2_inode_write(struct btree_trans *trans,
        if (IS_ERR(inode_p))
                return PTR_ERR(inode_p);
 
-       bch2_inode_pack(inode_p, inode);
+       bch2_inode_pack(trans->c, inode_p, inode);
        bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0);
        return 0;
 }
@@ -426,10 +537,7 @@ found_slot:
        inode_u->bi_inum        = k.k->p.offset;
        inode_u->bi_generation  = bkey_generation(k);
 
-       bch2_inode_pack(inode_p, inode_u);
-       bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0);
-       bch2_trans_iter_put(trans, iter);
-       return 0;
+       return bch2_inode_write(trans, iter, inode_u);
 }
 
 int bch2_inode_rm(struct bch_fs *c, u64 inode_nr)
@@ -553,32 +661,3 @@ int bch2_inode_find_by_inum(struct bch_fs *c, u64 inode_nr,
        return bch2_trans_do(c, NULL, NULL, 0,
                bch2_inode_find_by_inum_trans(&trans, inode_nr, inode));
 }
-
-#ifdef CONFIG_BCACHEFS_DEBUG
-void bch2_inode_pack_test(void)
-{
-       struct bch_inode_unpacked *u, test_inodes[] = {
-               {
-                       .bi_atime       = U64_MAX,
-                       .bi_ctime       = U64_MAX,
-                       .bi_mtime       = U64_MAX,
-                       .bi_otime       = U64_MAX,
-                       .bi_size        = U64_MAX,
-                       .bi_sectors     = U64_MAX,
-                       .bi_uid         = U32_MAX,
-                       .bi_gid         = U32_MAX,
-                       .bi_nlink       = U32_MAX,
-                       .bi_generation  = U32_MAX,
-                       .bi_dev         = U32_MAX,
-               },
-       };
-
-       for (u = test_inodes;
-            u < test_inodes + ARRAY_SIZE(test_inodes);
-            u++) {
-               struct bkey_inode_buf p;
-
-               bch2_inode_pack(&p, u);
-       }
-}
-#endif
index 5743be2307f3608793c23b081c4e30f77fd1049f..ef7e885dce0c87e671248f30f9debbaaf13b2dc9 100644 (file)
@@ -24,6 +24,14 @@ void bch2_inode_generation_to_text(struct printbuf *, struct bch_fs *,
        .val_to_text    = bch2_inode_generation_to_text,        \
 }
 
+#if 0
+typedef struct {
+       u64                     lo;
+       u32                     hi;
+} __packed __aligned(4) u96;
+#endif
+typedef u64 u96;
+
 struct bch_inode_unpacked {
        u64                     bi_inum;
        __le64                  bi_hash_seed;
@@ -43,7 +51,8 @@ struct bkey_inode_buf {
 #undef  x
 } __attribute__((packed, aligned(8)));
 
-void bch2_inode_pack(struct bkey_inode_buf *, const struct bch_inode_unpacked *);
+void bch2_inode_pack(struct bch_fs *, struct bkey_inode_buf *,
+                    const struct bch_inode_unpacked *);
 int bch2_inode_unpack(struct bkey_s_c_inode, struct bch_inode_unpacked *);
 
 struct btree_iter *bch2_inode_peek(struct btree_trans *,
@@ -166,10 +175,4 @@ static inline void bch2_inode_nlink_set(struct bch_inode_unpacked *bi,
        }
 }
 
-#ifdef CONFIG_BCACHEFS_DEBUG
-void bch2_inode_pack_test(void);
-#else
-static inline void bch2_inode_pack_test(void) {}
-#endif
-
 #endif /* _BCACHEFS_INODE_H */
index 6df99ac013a1f008e69286350ba569db43f813ce..62a9a0b32d5b1df3d8953a910c601a22beaca4d7 100644 (file)
@@ -310,7 +310,7 @@ int bch2_extent_update(struct btree_trans *trans,
                inode_u.bi_sectors += delta;
 
                if (delta || new_i_size) {
-                       bch2_inode_pack(&inode_p, &inode_u);
+                       bch2_inode_pack(trans->c, &inode_p, &inode_u);
                        bch2_trans_update(trans, inode_iter,
                                          &inode_p.inode.k_i, 0);
                }
index 32fed6b81a526a6f83549bc05cf56ab5907fb7a9..1745cfac6b26aef400984100b5d82193dec1ad88 100644 (file)
@@ -1320,7 +1320,7 @@ int bch2_fs_initialize(struct bch_fs *c)
        bch2_inode_init(c, &root_inode, 0, 0,
                        S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0, NULL);
        root_inode.bi_inum = BCACHEFS_ROOT_INO;
-       bch2_inode_pack(&packed_inode, &root_inode);
+       bch2_inode_pack(c, &packed_inode, &root_inode);
 
        err = "error creating root directory";
        ret = bch2_btree_insert(c, BTREE_ID_INODES,
index e55fcbcbd37fb0223a337ff32c0a78ddeeabe6e9..61b7e750037c756cb983d04ae0329bbaece020a8 100644 (file)
@@ -2027,7 +2027,6 @@ static void bcachefs_exit(void)
 static int __init bcachefs_init(void)
 {
        bch2_bkey_pack_test();
-       bch2_inode_pack_test();
 
        if (!(bcachefs_kset = kset_create_and_add("bcachefs", NULL, fs_kobj)) ||
            bch2_chardev_init() ||
diff --git a/fs/bcachefs/varint.c b/fs/bcachefs/varint.c
new file mode 100644 (file)
index 0000000..0f3d06a
--- /dev/null
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bitops.h>
+#include <linux/math.h>
+#include <asm/unaligned.h>
+
+#include "varint.h"
+
+int bch2_varint_encode(u8 *out, u64 v)
+{
+       unsigned bits = fls64(v|1);
+       unsigned bytes = DIV_ROUND_UP(bits, 7);
+
+       if (likely(bytes < 9)) {
+               v <<= bytes;
+               v |= ~(~0 << (bytes - 1));
+       } else {
+               *out++ = 255;
+               bytes = 9;
+       }
+
+       put_unaligned_le64(v, out);
+       return bytes;
+}
+
+int bch2_varint_decode(const u8 *in, const u8 *end, u64 *out)
+{
+       u64 v = get_unaligned_le64(in);
+       unsigned bytes = ffz(v & 255) + 1;
+
+       if (unlikely(in + bytes > end))
+               return -1;
+
+       if (likely(bytes < 9)) {
+               v >>= bytes;
+               v &= ~(~0ULL << (7 * bytes));
+       } else {
+               v = get_unaligned_le64(++in);
+       }
+
+       *out = v;
+       return bytes;
+}
diff --git a/fs/bcachefs/varint.h b/fs/bcachefs/varint.h
new file mode 100644 (file)
index 0000000..8daf813
--- /dev/null
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_VARINT_H
+#define _BCACHEFS_VARINT_H
+
+int bch2_varint_encode(u8 *, u64);
+int bch2_varint_decode(const u8 *, const u8 *, u64 *);
+
+#endif /* _BCACHEFS_VARINT_H */