Btrfs: Tune readahead during defrag to avoid reading too much at once
[linux-block.git] / fs / btrfs / extent-tree.c
index 2c569b4d59d44a8eb9b2e85603bf8a68b8821a04..c4fe37791d3f6346d00831fb05378fc358c04c76 100644 (file)
@@ -63,7 +63,7 @@ static int cache_block_group(struct btrfs_root *root,
        int ret;
        struct btrfs_key key;
        struct extent_buffer *leaf;
-       struct extent_map_tree *free_space_cache;
+       struct extent_io_tree *free_space_cache;
        int slot;
        u64 last = 0;
        u64 hole_size;
@@ -158,7 +158,7 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
                                                         btrfs_fs_info *info,
                                                         u64 bytenr)
 {
-       struct extent_map_tree *block_group_cache;
+       struct extent_io_tree *block_group_cache;
        struct btrfs_block_group_cache *block_group = NULL;
        u64 ptr;
        u64 start;
@@ -281,7 +281,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
                                                 int data, int owner)
 {
        struct btrfs_block_group_cache *cache;
-       struct extent_map_tree *block_group_cache;
+       struct extent_io_tree *block_group_cache;
        struct btrfs_block_group_cache *found_group = NULL;
        struct btrfs_fs_info *info = root->fs_info;
        u64 used;
@@ -951,7 +951,7 @@ fail:
 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
                                   struct btrfs_root *root)
 {
-       struct extent_map_tree *block_group_cache;
+       struct extent_io_tree *block_group_cache;
        struct btrfs_block_group_cache *cache;
        int ret;
        int err = 0;
@@ -1107,12 +1107,12 @@ static int update_pinned_extents(struct btrfs_root *root,
        return 0;
 }
 
-int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy)
+int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
 {
        u64 last = 0;
        u64 start;
        u64 end;
-       struct extent_map_tree *pinned_extents = &root->fs_info->pinned_extents;
+       struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
        int ret;
 
        while(1) {
@@ -1128,12 +1128,12 @@ int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy)
 
 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
                               struct btrfs_root *root,
-                              struct extent_map_tree *unpin)
+                              struct extent_io_tree *unpin)
 {
        u64 start;
        u64 end;
        int ret;
-       struct extent_map_tree *free_space_cache;
+       struct extent_io_tree *free_space_cache;
        free_space_cache = &root->fs_info->free_space_cache;
 
        while(1) {
@@ -1329,8 +1329,8 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct
        int err = 0;
        u64 start;
        u64 end;
-       struct extent_map_tree *pending_del;
-       struct extent_map_tree *pinned_extents;
+       struct extent_io_tree *pending_del;
+       struct extent_io_tree *pinned_extents;
 
        pending_del = &extent_root->fs_info->pending_del;
        pinned_extents = &extent_root->fs_info->pinned_extents;
@@ -1431,6 +1431,19 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans,
                data = BTRFS_BLOCK_GROUP_MIXED;
        }
 
+       /* for SSD, cluster allocations together as much as possible */
+       if (btrfs_test_opt(root, SSD)) {
+               if (!data) {
+                       if (root->fs_info->last_alloc)
+                               hint_byte = root->fs_info->last_alloc;
+                       else {
+                               hint_byte = hint_byte &
+                                       ~((u64)BTRFS_BLOCK_GROUP_SIZE - 1);
+                               empty_size += 16 * 1024 * 1024;
+                       }
+               }
+       }
+
        search_end = min(search_end,
                         btrfs_super_total_bytes(&info->super_copy));
        if (hint_byte) {
@@ -1456,6 +1469,19 @@ check_failed:
        }
        search_start = find_search_start(root, &block_group, search_start,
                                         total_needed, data);
+
+       if (!data && btrfs_test_opt(root, SSD) && info->last_alloc &&
+           search_start != info->last_alloc) {
+               info->last_alloc = 0;
+               if (!empty_size) {
+                       empty_size += 16 * 1024 * 1024;
+                       total_needed += empty_size;
+               }
+               search_start = find_search_start(root, &block_group,
+                                                search_start, total_needed,
+                                                data);
+       }
+
        search_start = stripe_align(root, search_start);
        cached_start = search_start;
        btrfs_init_path(path);
@@ -1610,6 +1636,8 @@ enospc:
 error:
        btrfs_release_path(root, path);
        btrfs_free_path(path);
+       if (btrfs_test_opt(root, SSD) && !ret && !data)
+               info->last_alloc = ins->objectid + ins->offset;
        return ret;
 }
 /*
@@ -1774,11 +1802,12 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
 
        set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
                         buf->start + buf->len - 1, GFP_NOFS);
-       set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->extent_tree,
+       set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->io_tree,
                        buf->start, buf->start + buf->len - 1,
                        EXTENT_CSUM, GFP_NOFS);
        buf->flags |= EXTENT_CSUM;
-       btrfs_set_buffer_defrag(buf);
+       if (!btrfs_test_opt(root, SSD))
+               btrfs_set_buffer_defrag(buf);
        trans->blocks_used++;
        return buf;
 }
@@ -1827,27 +1856,44 @@ static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
 }
 
 static void noinline reada_walk_down(struct btrfs_root *root,
-                                    struct extent_buffer *node)
+                                    struct extent_buffer *node,
+                                    int slot)
 {
-       int i;
-       u32 nritems;
        u64 bytenr;
-       int ret;
+       u64 last = 0;
+       u32 nritems;
        u32 refs;
-       int level;
        u32 blocksize;
+       int ret;
+       int i;
+       int level;
+       int skipped = 0;
 
        nritems = btrfs_header_nritems(node);
        level = btrfs_header_level(node);
-       for (i = 0; i < nritems; i++) {
+       if (level)
+               return;
+
+       for (i = slot; i < nritems && skipped < 32; i++) {
                bytenr = btrfs_node_blockptr(node, i);
-               blocksize = btrfs_level_size(root, level - 1);
-               ret = lookup_extent_ref(NULL, root, bytenr, blocksize, &refs);
-               BUG_ON(ret);
-               if (refs != 1)
+               if (last && ((bytenr > last && bytenr - last > 32 * 1024) ||
+                            (last > bytenr && last - bytenr > 32 * 1024))) {
+                       skipped++;
                        continue;
+               }
+               blocksize = btrfs_level_size(root, level - 1);
+               if (i != slot) {
+                       ret = lookup_extent_ref(NULL, root, bytenr,
+                                               blocksize, &refs);
+                       BUG_ON(ret);
+                       if (refs != 1) {
+                               skipped++;
+                               continue;
+                       }
+               }
                mutex_unlock(&root->fs_info->fs_mutex);
                ret = readahead_tree_block(root, bytenr, blocksize);
+               last = bytenr + blocksize;
                cond_resched();
                mutex_lock(&root->fs_info->fs_mutex);
                if (ret)
@@ -1890,9 +1936,6 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
                WARN_ON(*level >= BTRFS_MAX_LEVEL);
                cur = path->nodes[*level];
 
-               if (*level > 0 && path->slots[*level] == 0)
-                       reada_walk_down(root, cur);
-
                if (btrfs_header_level(cur) != *level)
                        WARN_ON(1);
 
@@ -1922,6 +1965,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
                next = btrfs_find_tree_block(root, bytenr, blocksize);
                if (!next || !btrfs_buffer_uptodate(next)) {
                        free_extent_buffer(next);
+                       reada_walk_down(root, cur, path->slots[*level]);
                        mutex_unlock(&root->fs_info->fs_mutex);
                        next = read_tree_block(root, bytenr, blocksize);
                        mutex_lock(&root->fs_info->fs_mutex);
@@ -2137,7 +2181,7 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start,
        unsigned long i;
        struct page *page;
        struct btrfs_root *root = BTRFS_I(inode)->root;
-       struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+       struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
        struct file_ra_state *ra;
 
        ra = kzalloc(sizeof(*ra), GFP_NOFS);
@@ -2166,15 +2210,14 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start,
                page_start = (u64)page->index << PAGE_CACHE_SHIFT;
                page_end = page_start + PAGE_CACHE_SIZE - 1;
 
-               lock_extent(em_tree, page_start, page_end, GFP_NOFS);
+               lock_extent(io_tree, page_start, page_end, GFP_NOFS);
 
                delalloc_start = page_start;
-               existing_delalloc =
-                       count_range_bits(&BTRFS_I(inode)->extent_tree,
-                                        &delalloc_start, page_end,
-                                        PAGE_CACHE_SIZE, EXTENT_DELALLOC);
+               existing_delalloc = count_range_bits(io_tree,
+                                            &delalloc_start, page_end,
+                                            PAGE_CACHE_SIZE, EXTENT_DELALLOC);
 
-               set_extent_delalloc(em_tree, page_start,
+               set_extent_delalloc(io_tree, page_start,
                                    page_end, GFP_NOFS);
 
                spin_lock(&root->fs_info->delalloc_lock);
@@ -2182,7 +2225,7 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start,
                                                 existing_delalloc;
                spin_unlock(&root->fs_info->delalloc_lock);
 
-               unlock_extent(em_tree, page_start, page_end, GFP_NOFS);
+               unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
                set_page_dirty(page);
                unlock_page(page);
                page_cache_release(page);
@@ -2350,7 +2393,7 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size)
        u64 cur_byte;
        u64 total_found;
        struct btrfs_fs_info *info = root->fs_info;
-       struct extent_map_tree *block_group_cache;
+       struct extent_io_tree *block_group_cache;
        struct btrfs_key key;
        struct btrfs_key found_key;
        struct extent_buffer *leaf;
@@ -2532,7 +2575,7 @@ int btrfs_grow_extent_tree(struct btrfs_trans_handle *trans,
        struct btrfs_block_group_cache *cache;
        struct btrfs_block_group_item *item;
        struct btrfs_fs_info *info = root->fs_info;
-       struct extent_map_tree *block_group_cache;
+       struct extent_io_tree *block_group_cache;
        struct btrfs_key key;
        struct extent_buffer *leaf;
        int ret;
@@ -2616,7 +2659,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
        int bit;
        struct btrfs_block_group_cache *cache;
        struct btrfs_fs_info *info = root->fs_info;
-       struct extent_map_tree *block_group_cache;
+       struct extent_io_tree *block_group_cache;
        struct btrfs_key key;
        struct btrfs_key found_key;
        struct extent_buffer *leaf;