Btrfs: Further reduce the concurrency penalty of defrag and drop_snapshot
[linux-block.git] / fs / btrfs / tree-defrag.c
CommitLineData
6702ed49
CM
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/sched.h>
20#include "ctree.h"
21#include "disk-io.h"
22#include "print-tree.h"
23#include "transaction.h"
24
25static void reada_defrag(struct btrfs_root *root,
26 struct btrfs_node *node)
27{
28 int i;
29 u32 nritems;
30 u64 blocknr;
31 int ret;
32
33 nritems = btrfs_header_nritems(&node->header);
34 for (i = 0; i < nritems; i++) {
35 blocknr = btrfs_node_blockptr(node, i);
36 ret = readahead_tree_block(root, blocknr);
37 if (ret)
38 break;
39 }
40}
41
42static int defrag_walk_down(struct btrfs_trans_handle *trans,
43 struct btrfs_root *root,
44 struct btrfs_path *path, int *level,
45 int cache_only)
46{
47 struct buffer_head *next;
48 struct buffer_head *cur;
49 u64 blocknr;
50 int ret = 0;
51
52 WARN_ON(*level < 0);
53 WARN_ON(*level >= BTRFS_MAX_LEVEL);
54
55 while(*level > 0) {
56 WARN_ON(*level < 0);
57 WARN_ON(*level >= BTRFS_MAX_LEVEL);
58 cur = path->nodes[*level];
59
60 if (!cache_only && *level > 1 && path->slots[*level] == 0)
61 reada_defrag(root, btrfs_buffer_node(cur));
62
63 if (btrfs_header_level(btrfs_buffer_header(cur)) != *level)
64 WARN_ON(1);
65
66 if (path->slots[*level] >=
67 btrfs_header_nritems(btrfs_buffer_header(cur)))
68 break;
69
70 if (*level == 1) {
71 ret = btrfs_realloc_node(trans, root,
72 path->nodes[*level],
73 cache_only);
74 break;
75 }
76 blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur),
77 path->slots[*level]);
78
79 if (cache_only) {
80 next = btrfs_find_tree_block(root, blocknr);
81 if (!next || !buffer_uptodate(next) ||
82 buffer_locked(next)) {
83 brelse(next);
84 path->slots[*level]++;
85 continue;
86 }
87 } else {
88 next = read_tree_block(root, blocknr);
89 }
90 ret = btrfs_cow_block(trans, root, next, path->nodes[*level],
91 path->slots[*level], &next);
92 BUG_ON(ret);
93 ret = btrfs_realloc_node(trans, root, next, cache_only);
94 BUG_ON(ret);
95 WARN_ON(*level <= 0);
96 if (path->nodes[*level-1])
97 btrfs_block_release(root, path->nodes[*level-1]);
98 path->nodes[*level-1] = next;
99 *level = btrfs_header_level(btrfs_buffer_header(next));
100 path->slots[*level] = 0;
101 }
102 WARN_ON(*level < 0);
103 WARN_ON(*level >= BTRFS_MAX_LEVEL);
104 btrfs_block_release(root, path->nodes[*level]);
105 path->nodes[*level] = NULL;
106 *level += 1;
107 WARN_ON(ret);
108 return 0;
109}
110
111static int defrag_walk_up(struct btrfs_trans_handle *trans,
112 struct btrfs_root *root,
113 struct btrfs_path *path, int *level,
114 int cache_only)
115{
116 int i;
117 int slot;
118 struct btrfs_node *node;
119
120 for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
121 slot = path->slots[i];
122 if (slot < btrfs_header_nritems(
123 btrfs_buffer_header(path->nodes[i])) - 1) {
124 path->slots[i]++;
125 *level = i;
126 node = btrfs_buffer_node(path->nodes[i]);
127 WARN_ON(i == 0);
128 btrfs_disk_key_to_cpu(&root->defrag_progress,
129 &node->ptrs[path->slots[i]].key);
130 root->defrag_level = i;
131 return 0;
132 } else {
133 btrfs_block_release(root, path->nodes[*level]);
134 path->nodes[*level] = NULL;
135 *level = i + 1;
136 }
137 }
138 return 1;
139}
140
141int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
142 struct btrfs_root *root, int cache_only)
143{
144 struct btrfs_path *path = NULL;
145 struct buffer_head *tmp;
146 int ret = 0;
147 int wret;
148 int level;
149 int orig_level;
150 int i;
6702ed49
CM
151
152 if (root->ref_cows == 0) {
153 goto out;
154 }
155 path = btrfs_alloc_path();
156 if (!path)
157 return -ENOMEM;
158
159 level = btrfs_header_level(btrfs_buffer_header(root->node));
160 orig_level = level;
161 if (level == 0) {
162 goto out;
163 }
164 if (root->defrag_progress.objectid == 0) {
165 get_bh(root->node);
166 ret = btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp);
167 BUG_ON(ret);
168 ret = btrfs_realloc_node(trans, root, root->node, cache_only);
169 BUG_ON(ret);
170 path->nodes[level] = root->node;
171 path->slots[level] = 0;
172 } else {
173 level = root->defrag_level;
174 path->lowest_level = level;
175 wret = btrfs_search_slot(trans, root, &root->defrag_progress,
176 path, 0, 1);
177
178 if (wret < 0) {
179 ret = wret;
180 goto out;
181 }
182 while(level > 0 && !path->nodes[level])
183 level--;
184 if (!path->nodes[level]) {
185 ret = 0;
186 goto out;
187 }
188 }
189
190 while(1) {
191 wret = defrag_walk_down(trans, root, path, &level, cache_only);
192 if (wret > 0)
193 break;
194 if (wret < 0)
195 ret = wret;
196
197 wret = defrag_walk_up(trans, root, path, &level, cache_only);
198 if (wret > 0)
199 break;
200 if (wret < 0)
201 ret = wret;
409eb95d
CM
202 ret = -EAGAIN;
203 break;
6702ed49
CM
204 }
205 for (i = 0; i <= orig_level; i++) {
206 if (path->nodes[i]) {
207 btrfs_block_release(root, path->nodes[i]);
208 path->nodes[i] = 0;
209 }
210 }
211out:
212 if (path)
213 btrfs_free_path(path);
214 if (ret != -EAGAIN) {
215 memset(&root->defrag_progress, 0,
216 sizeof(root->defrag_progress));
217 }
218 return ret;
219}