Btrfs: Seed device support
[linux-2.6-block.git] / fs / btrfs / ctree.c
index 9caeb377de6327c7d5d466af994d2f9e9dea6301..dd1c03aea2dfe3835f346296d592586748be69ac 100644 (file)
@@ -185,6 +185,10 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
        btrfs_set_header_owner(cow, new_root_objectid);
        btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN);
 
+       write_extent_buffer(cow, root->fs_info->fsid,
+                           (unsigned long)btrfs_header_fsid(cow),
+                           BTRFS_FSID_SIZE);
+
        WARN_ON(btrfs_header_generation(buf) > trans->transid);
        ret = btrfs_inc_ref(trans, new_root, buf, cow, NULL);
        kfree(new_root);
@@ -274,6 +278,10 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans,
        btrfs_set_header_owner(cow, root->root_key.objectid);
        btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN);
 
+       write_extent_buffer(cow, root->fs_info->fsid,
+                           (unsigned long)btrfs_header_fsid(cow),
+                           BTRFS_FSID_SIZE);
+
        WARN_ON(btrfs_header_generation(buf) > trans->transid);
        if (btrfs_header_generation(buf) != trans->transid) {
                u32 nr_extents;
@@ -287,7 +295,7 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans,
                /*
                 * There are only two places that can drop reference to
                 * tree blocks owned by living reloc trees, one is here,
-                * the other place is btrfs_merge_path. In both places,
+                * the other place is btrfs_drop_subtree. In both places,
                 * we check reference count while tree block is locked.
                 * Furthermore, if reference count is one, it won't get
                 * increased by someone else.
@@ -312,9 +320,6 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans,
        }
 
        if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
-               ret = btrfs_add_reloc_mapping(root, buf->start,
-                                             buf->len, cow->start);
-               BUG_ON(ret);
                ret = btrfs_reloc_tree_cache_ref(trans, root, cow, buf->start);
                WARN_ON(ret);
        }
@@ -434,6 +439,25 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
        return 0;
 }
 
+/*
+ * same as comp_keys only with two btrfs_key's
+ */
+static int comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2)
+{
+       if (k1->objectid > k2->objectid)
+               return 1;
+       if (k1->objectid < k2->objectid)
+               return -1;
+       if (k1->type > k2->type)
+               return 1;
+       if (k1->type < k2->type)
+               return -1;
+       if (k1->offset > k2->offset)
+               return 1;
+       if (k1->offset < k2->offset)
+               return -1;
+       return 0;
+}
 
 /*
  * this is used by the defrag code to go through all the
@@ -1288,16 +1312,16 @@ static noinline void reada_for_search(struct btrfs_root *root,
                }
                search = btrfs_node_blockptr(node, nr);
                if ((search >= lowest_read && search <= highest_read) ||
-                   (search < lowest_read && lowest_read - search <= 32768) ||
-                   (search > highest_read && search - highest_read <= 32768)) {
+                   (search < lowest_read && lowest_read - search <= 16384) ||
+                   (search > highest_read && search - highest_read <= 16384)) {
                        readahead_tree_block(root, search, blocksize,
                                     btrfs_node_ptr_generation(node, nr));
                        nread += blocksize;
                }
                nscan++;
-               if (path->reada < 2 && (nread > (256 * 1024) || nscan > 32))
+               if (path->reada < 2 && (nread > (64 * 1024) || nscan > 32))
                        break;
-               if(nread > (1024 * 1024) || nscan > 128)
+               if(nread > (256 * 1024) || nscan > 128)
                        break;
 
                if (search < lowest_read)
@@ -1390,8 +1414,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
        lowest_level = p->lowest_level;
        WARN_ON(lowest_level && ins_len > 0);
        WARN_ON(p->nodes[0] != NULL);
-       WARN_ON(cow && root == root->fs_info->extent_root &&
-               !mutex_is_locked(&root->fs_info->alloc_mutex));
+
        if (ins_len < 0)
                lowest_unlock = 2;
 
@@ -1627,61 +1650,57 @@ int btrfs_merge_path(struct btrfs_trans_handle *trans,
                btrfs_node_key_to_cpu(eb, &key, slot);
                key_match = !memcmp(&key, &node_keys[level - 1], sizeof(key));
 
+               if (generation == trans->transid) {
+                       eb = read_tree_block(root, bytenr, blocksize,
+                                            generation);
+                       btrfs_tree_lock(eb);
+               }
+
                /*
                 * if node keys match and node pointer hasn't been modified
                 * in the running transaction, we can merge the path. for
-                * reloc trees, the node pointer check is skipped, this is
-                * because the reloc trees are fully controlled by the space
-                * balance code, no one else can modify them.
+                * blocks owened by reloc trees, the node pointer check is
+                * skipped, this is because these blocks are fully controlled
+                * by the space balance code, no one else can modify them.
                 */
                if (!nodes[level - 1] || !key_match ||
                    (generation == trans->transid &&
-                    root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID)) {
-next_level:
-                       if (level == 1 || level == lowest_level + 1)
+                    btrfs_header_owner(eb) != BTRFS_TREE_RELOC_OBJECTID)) {
+                       if (level == 1 || level == lowest_level + 1) {
+                               if (generation == trans->transid) {
+                                       btrfs_tree_unlock(eb);
+                                       free_extent_buffer(eb);
+                               }
                                break;
+                       }
 
-                       eb = read_tree_block(root, bytenr, blocksize,
-                                            generation);
-                       btrfs_tree_lock(eb);
+                       if (generation != trans->transid) {
+                               eb = read_tree_block(root, bytenr, blocksize,
+                                               generation);
+                               btrfs_tree_lock(eb);
+                       }
 
                        ret = btrfs_cow_block(trans, root, eb, parent, slot,
                                              &eb, 0);
                        BUG_ON(ret);
 
+                       if (root->root_key.objectid ==
+                           BTRFS_TREE_RELOC_OBJECTID) {
+                               if (!nodes[level - 1]) {
+                                       nodes[level - 1] = eb->start;
+                                       memcpy(&node_keys[level - 1], &key,
+                                              sizeof(node_keys[0]));
+                               } else {
+                                       WARN_ON(1);
+                               }
+                       }
+
                        btrfs_tree_unlock(parent);
                        free_extent_buffer(parent);
                        parent = eb;
                        continue;
                }
 
-               if (generation == trans->transid) {
-                       u32 refs;
-                       BUG_ON(btrfs_header_owner(eb) !=
-                              BTRFS_TREE_RELOC_OBJECTID);
-                       /*
-                        * lock the block to keep __btrfs_cow_block from
-                        * changing the reference count.
-                        */
-                       eb = read_tree_block(root, bytenr, blocksize,
-                                            generation);
-                       btrfs_tree_lock(eb);
-
-                       ret = btrfs_lookup_extent_ref(trans, root, bytenr,
-                                                     blocksize, &refs);
-                       BUG_ON(ret);
-                       /*
-                        * if replace block whose reference count is one,
-                        * we have to "drop the subtree". so skip it for
-                        * simplicity
-                        */
-                       if (refs == 1) {
-                               btrfs_tree_unlock(eb);
-                               free_extent_buffer(eb);
-                               goto next_level;
-                       }
-               }
-
                btrfs_set_node_blockptr(parent, slot, nodes[level - 1]);
                btrfs_set_node_ptr_generation(parent, slot, trans->transid);
                btrfs_mark_buffer_dirty(parent);
@@ -1693,16 +1712,24 @@ next_level:
                                        btrfs_header_generation(parent),
                                        level - 1);
                BUG_ON(ret);
-               ret = btrfs_free_extent(trans, root, bytenr,
-                                       blocksize, parent->start,
-                                       btrfs_header_owner(parent),
-                                       btrfs_header_generation(parent),
-                                       level - 1, 1);
-               BUG_ON(ret);
 
+               /*
+                * If the block was created in the running transaction,
+                * it's possible this is the last reference to it, so we
+                * should drop the subtree.
+                */
                if (generation == trans->transid) {
+                       ret = btrfs_drop_subtree(trans, root, eb, parent);
+                       BUG_ON(ret);
                        btrfs_tree_unlock(eb);
                        free_extent_buffer(eb);
+               } else {
+                       ret = btrfs_free_extent(trans, root, bytenr,
+                                       blocksize, parent->start,
+                                       btrfs_header_owner(parent),
+                                       btrfs_header_generation(parent),
+                                       level - 1, 1);
+                       BUG_ON(ret);
                }
                break;
        }
@@ -2999,6 +3026,157 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
        return ret;
 }
 
+/*
+ * Given a key and some data, insert items into the tree.
+ * This does all the path init required, making room in the tree if needed.
+ * Returns the number of keys that were inserted.
+ */
+int btrfs_insert_some_items(struct btrfs_trans_handle *trans,
+                           struct btrfs_root *root,
+                           struct btrfs_path *path,
+                           struct btrfs_key *cpu_key, u32 *data_size,
+                           int nr)
+{
+       struct extent_buffer *leaf;
+       struct btrfs_item *item;
+       int ret = 0;
+       int slot;
+       int slot_orig;
+       int i;
+       u32 nritems;
+       u32 total_data = 0;
+       u32 total_size = 0;
+       unsigned int data_end;
+       struct btrfs_disk_key disk_key;
+       struct btrfs_key found_key;
+
+       found_key.objectid = 0;
+       nr = min_t(int, nr, BTRFS_NODEPTRS_PER_BLOCK(root));
+
+       for (i = 0; i < nr; i++)
+               total_data += data_size[i];
+
+       total_data = min_t(u32, total_data, BTRFS_LEAF_DATA_SIZE(root));
+       total_size = total_data + (nr * sizeof(struct btrfs_item));
+       ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
+       if (ret == 0)
+               return -EEXIST;
+       if (ret < 0)
+               goto out;
+
+       slot_orig = path->slots[0];
+       leaf = path->nodes[0];
+
+       nritems = btrfs_header_nritems(leaf);
+       data_end = leaf_data_end(root, leaf);
+
+       if (btrfs_leaf_free_space(root, leaf) < total_size) {
+               for (i = nr; i >= 0; i--) {
+                       total_data -= data_size[i];
+                       total_size -= data_size[i] + sizeof(struct btrfs_item);
+                       if (total_size < btrfs_leaf_free_space(root, leaf))
+                               break;
+               }
+               nr = i;
+       }
+
+       slot = path->slots[0];
+       BUG_ON(slot < 0);
+
+       if (slot != nritems) {
+               unsigned int old_data = btrfs_item_end_nr(leaf, slot);
+
+               item = btrfs_item_nr(leaf, slot);
+               btrfs_item_key_to_cpu(leaf, &found_key, slot);
+
+               /* figure out how many keys we can insert in here */
+               total_data = data_size[0];
+               for (i = 1; i < nr; i++) {
+                       if (comp_cpu_keys(&found_key, cpu_key + i) <= 0)
+                               break;
+                       total_data += data_size[i];
+               }
+               nr = i;
+
+               if (old_data < data_end) {
+                       btrfs_print_leaf(root, leaf);
+                       printk("slot %d old_data %d data_end %d\n",
+                              slot, old_data, data_end);
+                       BUG_ON(1);
+               }
+               /*
+                * item0..itemN ... dataN.offset..dataN.size .. data0.size
+                */
+               /* first correct the data pointers */
+               WARN_ON(leaf->map_token);
+               for (i = slot; i < nritems; i++) {
+                       u32 ioff;
+
+                       item = btrfs_item_nr(leaf, i);
+                       if (!leaf->map_token) {
+                               map_extent_buffer(leaf, (unsigned long)item,
+                                       sizeof(struct btrfs_item),
+                                       &leaf->map_token, &leaf->kaddr,
+                                       &leaf->map_start, &leaf->map_len,
+                                       KM_USER1);
+                       }
+
+                       ioff = btrfs_item_offset(leaf, item);
+                       btrfs_set_item_offset(leaf, item, ioff - total_data);
+               }
+               if (leaf->map_token) {
+                       unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
+                       leaf->map_token = NULL;
+               }
+
+               /* shift the items */
+               memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
+                             btrfs_item_nr_offset(slot),
+                             (nritems - slot) * sizeof(struct btrfs_item));
+
+               /* shift the data */
+               memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
+                             data_end - total_data, btrfs_leaf_data(leaf) +
+                             data_end, old_data - data_end);
+               data_end = old_data;
+       } else {
+               /*
+                * this sucks but it has to be done, if we are inserting at
+                * the end of the leaf only insert 1 of the items, since we
+                * have no way of knowing whats on the next leaf and we'd have
+                * to drop our current locks to figure it out
+                */
+               nr = 1;
+       }
+
+       /* setup the item for the new data */
+       for (i = 0; i < nr; i++) {
+               btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
+               btrfs_set_item_key(leaf, &disk_key, slot + i);
+               item = btrfs_item_nr(leaf, slot + i);
+               btrfs_set_item_offset(leaf, item, data_end - data_size[i]);
+               data_end -= data_size[i];
+               btrfs_set_item_size(leaf, item, data_size[i]);
+       }
+       btrfs_set_header_nritems(leaf, nritems + nr);
+       btrfs_mark_buffer_dirty(leaf);
+
+       ret = 0;
+       if (slot == 0) {
+               btrfs_cpu_key_to_disk(&disk_key, cpu_key);
+               ret = fixup_low_keys(trans, root, path, &disk_key, 1);
+       }
+
+       if (btrfs_leaf_free_space(root, leaf) < 0) {
+               btrfs_print_leaf(root, leaf);
+               BUG();
+       }
+out:
+       if (!ret)
+               ret = nr;
+       return ret;
+}
+
 /*
  * Given a key and some data, insert items into the tree.
  * This does all the path init required, making room in the tree if needed.