Btrfs: Avoid recursive chunk allocations
[linux-2.6-block.git] / fs / btrfs / extent-tree.c
index e49147e767df9bc11a491ade6e23fa02bac2f173..a589912fdd51aaa2d6ea727352196d02fbf2b53f 100644 (file)
@@ -35,10 +35,6 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct
                                 btrfs_root *extent_root);
 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
                               btrfs_root *extent_root);
-int btrfs_make_block_group(struct btrfs_trans_handle *trans,
-                          struct btrfs_root *root, u64 bytes_used,
-                          u64 type, u64 chunk_tree, u64 chunk_objectid,
-                          u64 size);
 
 
 static int cache_block_group(struct btrfs_root *root,
@@ -191,6 +187,7 @@ static int noinline find_search_start(struct btrfs_root *root,
 
        if (!cache)
                goto out;
+
        total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
        free_space_cache = &root->fs_info->free_space_cache;
 
@@ -200,7 +197,7 @@ again:
                goto out;
 
        last = max(search_start, cache->key.objectid);
-       if (!block_group_bits(cache, data)) {
+       if (!block_group_bits(cache, data) || cache->ro) {
                goto new_group;
        }
 
@@ -225,6 +222,8 @@ again:
                        continue;
                }
                spin_unlock_irq(&free_space_cache->lock);
+               if (cache->ro)
+                       goto new_group;
                if (start + num > cache->key.objectid + cache->key.offset)
                        goto new_group;
                if (start + num  > total_fs_bytes)
@@ -310,20 +309,20 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
        int bit;
        int ret;
        int full_search = 0;
-       int factor = 8;
+       int factor = 10;
 
        block_group_cache = &info->block_group_cache;
        total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
 
        if (!owner)
-               factor = 8;
+               factor = 10;
 
        bit = block_group_state_bits(data);
 
        if (search_start && search_start < total_fs_bytes) {
                struct btrfs_block_group_cache *shint;
                shint = btrfs_lookup_block_group(info, search_start);
-               if (shint && block_group_bits(shint, data)) {
+               if (shint && block_group_bits(shint, data) && !shint->ro) {
                        used = btrfs_block_group_used(&shint->item);
                        if (used + shint->pinned <
                            div_factor(shint->key.offset, factor)) {
@@ -331,7 +330,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
                        }
                }
        }
-       if (hint && block_group_bits(hint, data) &&
+       if (hint && !hint->ro && block_group_bits(hint, data) &&
            hint->key.objectid < total_fs_bytes) {
                used = btrfs_block_group_used(&hint->item);
                if (used + hint->pinned <
@@ -368,7 +367,7 @@ again:
                if (cache->key.objectid > total_fs_bytes)
                        break;
 
-               if (block_group_bits(cache, data)) {
+               if (!cache->ro && block_group_bits(cache, data)) {
                        if (full_search)
                                free_check = cache->key.offset;
                        else
@@ -644,7 +643,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
        if (!path)
                return -ENOMEM;
 
-       path->reada = 0;
+       path->reada = 1;
        key.objectid = bytenr;
        btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
        key.offset = num_bytes;
@@ -664,7 +663,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
 
        btrfs_release_path(root->fs_info->extent_root, path);
 
-       path->reada = 0;
+       path->reada = 1;
        ret = btrfs_insert_extent_backref(trans, root->fs_info->extent_root,
                                          path, bytenr, root_objectid,
                                          ref_generation, owner, owner_offset);
@@ -696,7 +695,7 @@ static int lookup_extent_ref(struct btrfs_trans_handle *trans,
 
        WARN_ON(num_bytes < root->sectorsize);
        path = btrfs_alloc_path();
-       path->reada = 0;
+       path->reada = 1;
        key.objectid = bytenr;
        key.offset = num_bytes;
        btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
@@ -980,7 +979,6 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
                ret = get_state_private(block_group_cache, start, &ptr);
                if (ret)
                        break;
-
                cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
                err = write_one_cache_group(trans, root,
                                            path, cache);
@@ -1025,6 +1023,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
        if (found) {
                found->total_bytes += total_bytes;
                found->bytes_used += bytes_used;
+               found->full = 0;
                WARN_ON(found->total_bytes < found->bytes_used);
                *space_info = found;
                return 0;
@@ -1047,6 +1046,7 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
 {
        u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
                                   BTRFS_BLOCK_GROUP_RAID1 |
+                                  BTRFS_BLOCK_GROUP_RAID10 |
                                   BTRFS_BLOCK_GROUP_DUP);
        if (extra_flags) {
                if (flags & BTRFS_BLOCK_GROUP_DATA)
@@ -1094,8 +1094,7 @@ printk("space info full %Lu\n", flags);
        BUG_ON(ret);
 
        ret = btrfs_make_block_group(trans, extent_root, 0, flags,
-                    extent_root->fs_info->chunk_root->root_key.objectid,
-                    start, num_bytes);
+                    BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes);
        BUG_ON(ret);
 
        return 0;
@@ -1333,7 +1332,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
        if (!path)
                return -ENOMEM;
 
-       path->reada = 0;
+       path->reada = 1;
        ret = lookup_extent_backref(trans, extent_root, path,
                                    bytenr, root_objectid,
                                    ref_generation,
@@ -1705,7 +1704,6 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
        u64 super_used;
        u64 root_used;
        u64 search_start = 0;
-       u64 new_hint;
        u64 alloc_profile;
        u32 sizes[2];
        struct btrfs_fs_info *info = root->fs_info;
@@ -1743,15 +1741,12 @@ again:
                BUG_ON(ret);
        }
 
-       new_hint = max(hint_byte, root->fs_info->alloc_start);
-       if (new_hint < btrfs_super_total_bytes(&info->super_copy))
-               hint_byte = new_hint;
-
        WARN_ON(num_bytes < root->sectorsize);
        ret = find_free_extent(trans, root, num_bytes, empty_size,
                               search_start, search_end, hint_byte, ins,
                               trans->alloc_exclude_start,
                               trans->alloc_exclude_nr, data);
+
        if (ret == -ENOSPC && num_bytes > min_alloc_size) {
                num_bytes = num_bytes >> 1;
                num_bytes = max(num_bytes, min_alloc_size);
@@ -1892,7 +1887,6 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
        }
        btrfs_set_header_generation(buf, trans->transid);
        clean_tree_block(trans, root, buf);
-       wait_on_tree_block_writeback(root, buf);
        btrfs_set_buffer_uptodate(buf);
 
        if (PageDirty(buf->first_page)) {
@@ -2478,15 +2472,16 @@ out:
        return ret;
 }
 
-int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size)
+int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start)
 {
        struct btrfs_trans_handle *trans;
        struct btrfs_root *tree_root = root->fs_info->tree_root;
        struct btrfs_path *path;
        u64 cur_byte;
        u64 total_found;
+       u64 shrink_last_byte;
+       struct btrfs_block_group_cache *shrink_block_group;
        struct btrfs_fs_info *info = root->fs_info;
-       struct extent_io_tree *block_group_cache;
        struct btrfs_key key;
        struct btrfs_key found_key;
        struct extent_buffer *leaf;
@@ -2494,17 +2489,29 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size)
        int ret;
        int progress = 0;
 
-       btrfs_set_super_total_bytes(&info->super_copy, new_size);
-       clear_extent_dirty(&info->free_space_cache, new_size, (u64)-1,
-                          GFP_NOFS);
-       block_group_cache = &info->block_group_cache;
+       shrink_block_group = btrfs_lookup_block_group(root->fs_info,
+                                                     shrink_start);
+       BUG_ON(!shrink_block_group);
+
+       shrink_last_byte = shrink_start + shrink_block_group->key.offset;
+
+       shrink_block_group->space_info->total_bytes -=
+               shrink_block_group->key.offset;
+printk("shrink_extent_tree %Lu -> %Lu type %Lu\n", shrink_start, shrink_last_byte, shrink_block_group->flags);
        path = btrfs_alloc_path();
        root = root->fs_info->extent_root;
        path->reada = 2;
 
 again:
+       trans = btrfs_start_transaction(root, 1);
+       do_chunk_alloc(trans, root->fs_info->extent_root,
+                       btrfs_block_group_used(&shrink_block_group->item) +
+                       2 * 1024 * 1024, shrink_block_group->flags);
+       btrfs_end_transaction(trans, root);
+       shrink_block_group->ro = 1;
+
        total_found = 0;
-       key.objectid = new_size;
+       key.objectid = shrink_start;
        key.offset = 0;
        key.type = 0;
        cur_byte = key.objectid;
@@ -2516,10 +2523,12 @@ again:
        ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
        if (ret < 0)
                goto out;
+
        if (ret == 0) {
                leaf = path->nodes[0];
                btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
-               if (found_key.objectid + found_key.offset > new_size) {
+               if (found_key.objectid + found_key.offset > shrink_start &&
+                   found_key.objectid < shrink_last_byte) {
                        cur_byte = found_key.objectid;
                        key.objectid = cur_byte;
                }
@@ -2548,6 +2557,9 @@ next:
 
                btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
 
+               if (found_key.objectid >= shrink_last_byte)
+                       break;
+
                if (progress && need_resched()) {
                        memcpy(&key, &found_key, sizeof(key));
                        mutex_unlock(&root->fs_info->fs_mutex);
@@ -2588,68 +2600,31 @@ next:
                goto again;
        }
 
+       /*
+        * we've freed all the extents, now remove the block
+        * group item from the tree
+        */
        trans = btrfs_start_transaction(root, 1);
-       key.objectid = new_size;
-       key.offset = 0;
-       key.type = 0;
-       while(1) {
-               u64 ptr;
-
-               ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
-               if (ret < 0)
-                       goto out;
+       memcpy(&key, &shrink_block_group->key, sizeof(key));
 
-               leaf = path->nodes[0];
-               nritems = btrfs_header_nritems(leaf);
-bg_next:
-               if (path->slots[0] >= nritems) {
-                       ret = btrfs_next_leaf(root, path);
-                       if (ret < 0)
-                               break;
-                       if (ret == 1) {
-                               ret = 0;
-                               break;
-                       }
-                       leaf = path->nodes[0];
-                       btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
-
-                       /*
-                        * btrfs_next_leaf doesn't cow buffers, we have to
-                        * do the search again
-                        */
-                       memcpy(&key, &found_key, sizeof(key));
-                       btrfs_release_path(root, path);
-                       goto resched_check;
-               }
+       ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+       if (ret > 0)
+               ret = -EIO;
+       if (ret < 0)
+               goto out;
 
-               btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
-               if (btrfs_key_type(&found_key) != BTRFS_BLOCK_GROUP_ITEM_KEY) {
-                       printk("shrinker found key %Lu %u %Lu\n",
-                               found_key.objectid, found_key.type,
-                               found_key.offset);
-                       path->slots[0]++;
-                       goto bg_next;
-               }
-               ret = get_state_private(&info->block_group_cache,
-                                       found_key.objectid, &ptr);
-               if (!ret)
-                       kfree((void *)(unsigned long)ptr);
+       leaf = path->nodes[0];
+       nritems = btrfs_header_nritems(leaf);
+       btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+       kfree(shrink_block_group);
 
-               clear_extent_bits(&info->block_group_cache, found_key.objectid,
-                                 found_key.objectid + found_key.offset - 1,
-                                 (unsigned int)-1, GFP_NOFS);
+       clear_extent_bits(&info->block_group_cache, found_key.objectid,
+                         found_key.objectid + found_key.offset - 1,
+                         (unsigned int)-1, GFP_NOFS);
 
-               key.objectid = found_key.objectid + 1;
-               btrfs_del_item(trans, root, path);
-               btrfs_release_path(root, path);
-resched_check:
-               if (need_resched()) {
-                       mutex_unlock(&root->fs_info->fs_mutex);
-                       cond_resched();
-                       mutex_lock(&root->fs_info->fs_mutex);
-               }
-       }
-       clear_extent_dirty(&info->free_space_cache, new_size, (u64)-1,
+       btrfs_del_item(trans, root, path);
+       clear_extent_dirty(&info->free_space_cache,
+                          shrink_start, shrink_last_byte - 1,
                           GFP_NOFS);
        btrfs_commit_transaction(trans, root);
 out:
@@ -2657,13 +2632,6 @@ out:
        return ret;
 }
 
-int btrfs_grow_extent_tree(struct btrfs_trans_handle *trans,
-                          struct btrfs_root *root, u64 new_size)
-{
-       btrfs_set_super_total_bytes(&root->fs_info->super_copy, new_size);
-       return 0;
-}
-
 int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path,
                           struct btrfs_key *key)
 {
@@ -2731,7 +2699,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
 
                leaf = path->nodes[0];
                btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
-               cache = kmalloc(sizeof(*cache), GFP_NOFS);
+               cache = kzalloc(sizeof(*cache), GFP_NOFS);
                if (!cache) {
                        ret = -ENOMEM;
                        break;
@@ -2741,8 +2709,6 @@ int btrfs_read_block_groups(struct btrfs_root *root)
                                   btrfs_item_ptr_offset(leaf, path->slots[0]),
                                   sizeof(cache->item));
                memcpy(&cache->key, &found_key, sizeof(found_key));
-               cache->cached = 0;
-               cache->pinned = 0;
 
                key.objectid = found_key.objectid + found_key.offset;
                btrfs_release_path(root, path);
@@ -2782,7 +2748,7 @@ error:
 
 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
                           struct btrfs_root *root, u64 bytes_used,
-                          u64 type, u64 chunk_tree, u64 chunk_objectid,
+                          u64 type, u64 chunk_objectid, u64 chunk_offset,
                           u64 size)
 {
        int ret;
@@ -2794,16 +2760,14 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
        extent_root = root->fs_info->extent_root;
        block_group_cache = &root->fs_info->block_group_cache;
 
-       cache = kmalloc(sizeof(*cache), GFP_NOFS);
+       cache = kzalloc(sizeof(*cache), GFP_NOFS);
        BUG_ON(!cache);
-       cache->key.objectid = chunk_objectid;
+       cache->key.objectid = chunk_offset;
        cache->key.offset = size;
-       cache->cached = 0;
-       cache->pinned = 0;
+
        btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
        memset(&cache->item, 0, sizeof(cache->item));
        btrfs_set_block_group_used(&cache->item, bytes_used);
-       btrfs_set_block_group_chunk_tree(&cache->item, chunk_tree);
        btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
        cache->flags = type;
        btrfs_set_block_group_flags(&cache->item, type);
@@ -2813,12 +2777,12 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
        BUG_ON(ret);
 
        bit = block_group_state_bits(type);
-       set_extent_bits(block_group_cache, chunk_objectid,
-                       chunk_objectid + size - 1,
+       set_extent_bits(block_group_cache, chunk_offset,
+                       chunk_offset + size - 1,
                        bit | EXTENT_LOCKED, GFP_NOFS);
-       set_state_private(block_group_cache, chunk_objectid,
-                         (unsigned long)cache);
 
+       set_state_private(block_group_cache, chunk_offset,
+                         (unsigned long)cache);
        ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
                                sizeof(cache->item));
        BUG_ON(ret);