Btrfs: Add lowest key information to back refs for extent tree blocks as well.
authorChris Mason <chris.mason@oracle.com>
Tue, 11 Dec 2007 17:42:00 +0000 (12:42 -0500)
committerChris Mason <chris.mason@oracle.com>
Thu, 25 Sep 2008 15:03:58 +0000 (11:03 -0400)
Signed-off-by: Chris Mason <chris.mason@oracle.com>
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/file.c
fs/btrfs/inode.c

index 0ac21e3aac87c6850419516d9c6ad64c4b8737b4..60a30da6af00e2dbf09408bdbddbe6b674f2da00 100644 (file)
@@ -210,7 +210,7 @@ static int btree_writepages(struct address_space *mapping,
 {
        struct extent_map_tree *tree;
        tree = &BTRFS_I(mapping->host)->extent_tree;
-       if (0 && wbc->sync_mode == WB_SYNC_NONE) {
+       if (wbc->sync_mode == WB_SYNC_NONE) {
                u64 num_dirty;
                u64 start = 0;
                unsigned long thresh = 96 * 1024 * 1024;
index 32991f73e9dbf39953164a2793764068c6dd9d5d..187be40124749f53ba7b89afa896a957d0f650eb 100644 (file)
@@ -457,6 +457,94 @@ out:
        return ret;
 }
 
+/*
+ * Back reference rules.  Back refs have three main goals:
+ *
+ * 1) differentiate between all holders of references to an extent so that
+ *    when a reference is dropped we can make sure it was a valid reference
+ *    before freeing the extent.
+ *
+ * 2) Provide enough information to quickly find the holders of an extent
+ *    if we notice a given block is corrupted or bad.
+ *
+ * 3) Make it easy to migrate blocks for FS shrinking or storage pool
+ *    maintenance.  This is actually the same as #2, but with a slightly
+ *    different use case.
+ *
+ * File extents can be referenced by:
+ *
+ * - multiple snapshots, subvolumes, or different generations in one subvol
+ * - different files inside a single subvolume (in theory, not implemented yet)
+ * - different offsets inside a file (bookend extents in file.c)
+ *
+ * The extent ref structure has fields for:
+ *
+ * - Objectid of the subvolume root
+ * - Generation number of the tree holding the reference
+ * - objectid of the file holding the reference
+ * - offset in the file corresponding to the key holding the reference
+ *
+ * When a file extent is allocated the fields are filled in:
+ *     (root_key.objectid, trans->transid, inode objectid, offset in file)
+ *
+ * When a leaf is cow'd new references are added for every file extent found
+ * in the leaf.  It looks the same as the create case, but trans->transid
+ * will be different when the block is cow'd.
+ *
+ *     (root_key.objectid, trans->transid, inode objectid, offset in file)
+ *
+ * When a file extent is removed either during snapshot deletion or file
+ * truncation, the corresponding back reference is found
+ * by searching for:
+ *
+ *     (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
+ *      inode objectid, offset in file)
+ *
+ * Btree extents can be referenced by:
+ *
+ * - Different subvolumes
+ * - Different generations of the same subvolume
+ *
+ * Storing sufficient information for a full reverse mapping of a btree
+ * block would require storing the lowest key of the block in the backref,
+ * and it would require updating that lowest key either before write out or
+ * every time it changed.  Instead, the objectid of the lowest key is stored
+ * along with the level of the tree block.  This provides a hint
+ * about where in the btree the block can be found.  Searches through the
+ * btree only need to look for a pointer to that block, so they stop one
+ * level higher than the level recorded in the backref.
+ *
+ * Some btrees do not do reference counting on their extents.  These
+ * include the extent tree and the tree of tree roots.  Backrefs for these
+ * trees always have a generation of zero.
+ *
+ * When a tree block is created, back references are inserted:
+ *
+ * (root->root_key.objectid, trans->transid or zero, lowest_key_objectid, level)
+ *
+ * When a tree block is cow'd in a reference counted root,
+ * new back references are added for all the blocks it points to.
+ * These are of the form (trans->transid will have increased since creation):
+ *
+ * (root->root_key.objectid, trans->transid, lowest_key_objectid, level)
+ *
+ * Because the lowest_key_objectid and the level are just hints
+ * they are not used when backrefs are deleted.  When a backref is deleted:
+ *
+ * if backref was for a tree root:
+ *     root_objectid = root->root_key.objectid
+ * else
+ *     root_objectid = btrfs_header_owner(parent)
+ *
+ * (root_objectid, btrfs_header_generation(parent) or zero, 0, 0)
+ *
+ * Back Reference Key hashing:
+ *
+ * Back references have four fields, each 64 bits long.  Unfortunately,
+ * This is hashed into a single 64 bit number and placed into the key offset.
+ * The key objectid corresponds to the first byte in the extent, and the
+ * key type is set to BTRFS_EXTENT_REF_KEY
+ */
 int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans,
                                 struct btrfs_root *root,
                                 struct btrfs_path *path, u64 bytenr,
@@ -939,10 +1027,13 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct
        u64 start;
        u64 end;
        struct btrfs_fs_info *info = extent_root->fs_info;
+       struct extent_buffer *eb;
        struct btrfs_path *path;
        struct btrfs_key ins;
+       struct btrfs_disk_key first;
        struct btrfs_extent_item extent_item;
        int ret;
+       int level;
        int err = 0;
 
        btrfs_set_stack_extent_refs(&extent_item, 1);
@@ -961,10 +1052,19 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct
                                        &extent_item, sizeof(extent_item));
                clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED,
                                  GFP_NOFS);
+               eb = read_tree_block(extent_root, ins.objectid, ins.offset);
+               level = btrfs_header_level(eb);
+               if (level == 0) {
+                       btrfs_item_key(eb, &first, 0);
+               } else {
+                       btrfs_node_key(eb, &first, 0);
+               }
                err = btrfs_insert_extent_backref(trans, extent_root, path,
                                          start, extent_root->root_key.objectid,
-                                         0, 0, 0);
+                                         0, btrfs_disk_key_objectid(&first),
+                                         level);
                BUG_ON(err);
+               free_extent_buffer(eb);
        }
        btrfs_free_path(path);
        return 0;
index 5b1f90f06e034639b18d36b8f77461a704ff714e..1cc4d285951ca6ee4883519a40930b71b386b83a 100644 (file)
@@ -545,13 +545,10 @@ next_slot:
                        u64 disk_num_bytes = 0;
                        u64 extent_num_bytes = 0;
                        u64 root_gen;
+                       u64 root_owner;
 
-                       if (leaf != root->node) {
-                               root_gen =
-                                       btrfs_header_generation(path->nodes[1]);
-                       } else {
-                               root_gen = btrfs_header_generation(leaf);
-                       }
+                       root_gen = btrfs_header_generation(leaf);
+                       root_owner = btrfs_header_owner(leaf);
                        if (found_extent) {
                                disk_bytenr =
                                      btrfs_file_extent_disk_bytenr(leaf,
@@ -575,7 +572,7 @@ next_slot:
                                ret = btrfs_free_extent(trans, root,
                                                disk_bytenr,
                                                disk_num_bytes,
-                                               root->root_key.objectid,
+                                               root_owner,
                                                root_gen, inode->i_ino,
                                                key.offset, 0);
                        }
index bb70db0c9df440b06c952edee8db326d9599851f..03fea037667e4d61924240a9f853cb5e4c7a22ab 100644 (file)
@@ -563,6 +563,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
        u64 extent_num_bytes = 0;
        u64 item_end = 0;
        u64 root_gen = 0;
+       u64 root_owner = 0;
        int found_extent;
        int del_item;
        int extent_type = -1;
@@ -673,15 +674,8 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
                                        found_extent = 1;
                                        inode->i_blocks -= num_dec;
                                }
-                               if (leaf == root->node) {
-                                       root_gen =
-                                               btrfs_header_generation(leaf);
-                               } else {
-                                       struct extent_buffer *parent;
-                                       parent = path->nodes[1];
-                                       root_gen =
-                                               btrfs_header_generation(parent);
-                               }
+                               root_gen = btrfs_header_generation(leaf);
+                               root_owner = btrfs_header_owner(leaf);
                        }
                } else if (extent_type == BTRFS_FILE_EXTENT_INLINE &&
                           !del_item) {
@@ -703,7 +697,7 @@ delete:
                if (found_extent) {
                        ret = btrfs_free_extent(trans, root, extent_start,
                                                extent_num_bytes,
-                                               root->root_key.objectid,
+                                               root_owner,
                                                root_gen, inode->i_ino,
                                                found_key.offset, 0);
                        BUG_ON(ret);