Btrfs: write barriers on commit, balance level before split
authorChris Mason <chris.mason@oracle.com>
Fri, 20 Apr 2007 17:16:02 +0000 (13:16 -0400)
committerDavid Woodhouse <dwmw2@hera.kernel.org>
Fri, 20 Apr 2007 17:16:02 +0000 (13:16 -0400)
Signed-off-by: Chris Mason <chris.mason@oracle.com>
fs/btrfs/ctree.c
fs/btrfs/ctree.h
fs/btrfs/disk-io.c

index 4efcd1bd63e5fb58c6bd60db5f3072a1f55dbee3..744fd728e5d9c1de42745bb7e5ad50b0b32b9463 100644 (file)
@@ -151,6 +151,11 @@ static int check_node(struct btrfs_root *root, struct btrfs_path *path,
        for (i = 0; nritems > 1 && i < nritems - 2; i++) {
                struct btrfs_key cpukey;
                btrfs_disk_key_to_cpu(&cpukey, &node->ptrs[i + 1].key);
+if (comp_keys(&node->ptrs[i].key, &cpukey) >= 0) {
+       struct btrfs_key bad;
+       btrfs_disk_key_to_cpu(&bad, &node->ptrs[i].key);
+printk("check_node level %d i is %d bad comp %Lu %u %Lu, %Lu %u %Lu\n",level, i, bad.objectid, bad.flags, bad.offset, cpukey.objectid, cpukey.flags, cpukey.offset);
+}
                BUG_ON(comp_keys(&node->ptrs[i].key, &cpukey) >= 0);
        }
        return 0;
@@ -448,6 +453,111 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root
        return ret;
 }
 
+/* returns zero if the push worked, non-zero otherwise */
+static int push_nodes_for_insert(struct btrfs_trans_handle *trans,
+                               struct btrfs_root *root,
+                               struct btrfs_path *path, int level)
+{
+       struct buffer_head *right_buf;
+       struct buffer_head *mid_buf;
+       struct buffer_head *left_buf;
+       struct buffer_head *parent_buf = NULL;
+       struct btrfs_node *right = NULL;
+       struct btrfs_node *mid;
+       struct btrfs_node *left = NULL;
+       struct btrfs_node *parent = NULL;
+       int ret = 0;
+       int wret;
+       int pslot;
+       int orig_slot = path->slots[level];
+       u64 orig_ptr;
+
+       if (level == 0)
+               return 1;
+
+       mid_buf = path->nodes[level];
+       mid = btrfs_buffer_node(mid_buf);
+       orig_ptr = btrfs_node_blockptr(mid, orig_slot);
+
+       if (level < BTRFS_MAX_LEVEL - 1)
+               parent_buf = path->nodes[level + 1];
+       pslot = path->slots[level + 1];
+
+       if (!parent_buf)
+               return 1;
+       parent = btrfs_buffer_node(parent_buf);
+
+       left_buf = read_node_slot(root, parent_buf, pslot - 1);
+
+       /* first, try to make some room in the middle buffer */
+       if (left_buf) {
+               u32 left_nr;
+               btrfs_cow_block(trans, root, left_buf, parent_buf, pslot - 1,
+                               &left_buf);
+               left = btrfs_buffer_node(left_buf);
+               left_nr = btrfs_header_nritems(&left->header);
+               wret = push_node_left(trans, root, left_buf, mid_buf);
+               if (wret < 0)
+                       ret = wret;
+               if (wret == 0) {
+                       orig_slot += left_nr;
+                       btrfs_memcpy(root, parent,
+                                    &parent->ptrs[pslot].key,
+                                    &mid->ptrs[0].key,
+                                    sizeof(struct btrfs_disk_key));
+                       btrfs_mark_buffer_dirty(parent_buf);
+                       if (btrfs_header_nritems(&left->header) > orig_slot) {
+                               path->nodes[level] = left_buf;
+                               path->slots[level + 1] -= 1;
+                               path->slots[level] = orig_slot;
+                               btrfs_block_release(root, mid_buf);
+                       } else {
+                               orig_slot -=
+                                       btrfs_header_nritems(&left->header);
+                               path->slots[level] = orig_slot;
+                               btrfs_block_release(root, left_buf);
+                       }
+                       check_node(root, path, level);
+                       return 0;
+               }
+               btrfs_block_release(root, left_buf);
+       }
+       right_buf = read_node_slot(root, parent_buf, pslot + 1);
+
+       /*
+        * then try to empty the right most buffer into the middle
+        */
+       if (right_buf) {
+               btrfs_cow_block(trans, root, right_buf, parent_buf, pslot + 1,
+                               &right_buf);
+               right = btrfs_buffer_node(right_buf);
+               wret = balance_node_right(trans, root, right_buf, mid_buf);
+               if (wret < 0)
+                       ret = wret;
+               if (wret == 0) {
+                       btrfs_memcpy(root, parent,
+                                    &parent->ptrs[pslot + 1].key,
+                                    &right->ptrs[0].key,
+                                    sizeof(struct btrfs_disk_key));
+                       btrfs_mark_buffer_dirty(parent_buf);
+                       if (btrfs_header_nritems(&mid->header) <= orig_slot) {
+                               path->nodes[level] = right_buf;
+                               path->slots[level + 1] += 1;
+                               path->slots[level] = orig_slot -
+                                       btrfs_header_nritems(&mid->header);
+                               btrfs_block_release(root, mid_buf);
+                       } else {
+                               btrfs_block_release(root, right_buf);
+                       }
+                       check_node(root, path, level);
+                       return 0;
+               }
+               btrfs_block_release(root, right_buf);
+       }
+       check_node(root, path, level);
+       return 1;
+}
+
 /*
  * look for key in the tree.  path is filled in with nodes along the way
  * if key is found, we return zero and you can find the item in the leaf
@@ -774,7 +884,16 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
                ret = insert_new_root(trans, root, path, level + 1);
                if (ret)
                        return ret;
+       } else {
+               ret = push_nodes_for_insert(trans, root, path, level);
+               t = path->nodes[level];
+               c = btrfs_buffer_node(t);
+               if (!ret &&
+                   btrfs_header_nritems(&c->header) <
+                   BTRFS_NODEPTRS_PER_BLOCK(root) - 1)
+                       return 0;
        }
+
        c_nritems = btrfs_header_nritems(&c->header);
        split_buffer = btrfs_alloc_free_block(trans, root);
        split = btrfs_buffer_node(split_buffer);
index 047a84762d93035cc47db73dc10874473b4c11ec..681b23933d9bfaeb21ff68da19034bb61b6270b1 100644 (file)
@@ -259,6 +259,7 @@ struct btrfs_fs_info {
        struct list_head trans_list;
        struct crypto_hash *hash_tfm;
        spinlock_t hash_lock;
+       int do_barriers;
        struct kobject kobj;
 };
 
index b666c3571aeed561e60475ab9701e84fce16e271..13046295bf7a0dd7557a8670dd8854fd490caf1f 100644 (file)
@@ -540,6 +540,7 @@ struct btrfs_root *open_ctree(struct super_block *sb)
        fs_info->btree_inode->i_nlink = 1;
        fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size;
        fs_info->btree_inode->i_mapping->a_ops = &btree_aops;
+       fs_info->do_barriers = 1;
        BTRFS_I(fs_info->btree_inode)->root = tree_root;
        memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
               sizeof(struct btrfs_key));
@@ -622,6 +623,7 @@ struct btrfs_root *open_ctree(struct super_block *sb)
 int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root
                      *root)
 {
+       int ret;
        struct buffer_head *bh = root->fs_info->sb_buffer;
 
        btrfs_set_super_root(root->fs_info->disk_super,
@@ -632,7 +634,15 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root
        csum_tree_block(root, bh, 0);
        bh->b_end_io = end_buffer_write_sync;
        get_bh(bh);
-       submit_bh(WRITE, bh);
+       if (root->fs_info->do_barriers)
+               ret = submit_bh(WRITE_BARRIER, bh);
+       else
+               ret = submit_bh(WRITE, bh);
+       if (ret == -EOPNOTSUPP) {
+               set_buffer_uptodate(bh);
+               root->fs_info->do_barriers = 0;
+               ret = submit_bh(WRITE, bh);
+       }
        wait_on_buffer(bh);
        if (!buffer_uptodate(bh)) {
                WARN_ON(1);