Btrfs: Allocator fix variety pack
[linux-2.6-block.git] / fs / btrfs / tree-defrag.c
CommitLineData
6702ed49
CM
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/sched.h>
20#include "ctree.h"
21#include "disk-io.h"
22#include "print-tree.h"
23#include "transaction.h"
24
25static void reada_defrag(struct btrfs_root *root,
5f39d397 26 struct extent_buffer *node)
6702ed49
CM
27{
28 int i;
29 u32 nritems;
db94535d 30 u64 bytenr;
ca7a79ad 31 u64 gen;
db94535d 32 u32 blocksize;
6702ed49
CM
33 int ret;
34
db94535d 35 blocksize = btrfs_level_size(root, btrfs_header_level(node) - 1);
5f39d397 36 nritems = btrfs_header_nritems(node);
6702ed49 37 for (i = 0; i < nritems; i++) {
db94535d 38 bytenr = btrfs_node_blockptr(node, i);
ca7a79ad
CM
39 gen = btrfs_node_ptr_generation(node, i);
40 ret = readahead_tree_block(root, bytenr, blocksize, gen);
6702ed49
CM
41 if (ret)
42 break;
43 }
44}
45
46static int defrag_walk_down(struct btrfs_trans_handle *trans,
47 struct btrfs_root *root,
48 struct btrfs_path *path, int *level,
e9d0b13b 49 int cache_only, u64 *last_ret)
6702ed49 50{
5f39d397
CM
51 struct extent_buffer *next;
52 struct extent_buffer *cur;
db94535d 53 u64 bytenr;
1259ab75 54 u64 ptr_gen;
6702ed49 55 int ret = 0;
e9d0b13b 56 int is_extent = 0;
6702ed49
CM
57
58 WARN_ON(*level < 0);
59 WARN_ON(*level >= BTRFS_MAX_LEVEL);
60
e9d0b13b
CM
61 if (root->fs_info->extent_root == root)
62 is_extent = 1;
63
f84a8b36
CM
64 if (*level == 1 && cache_only && path->nodes[1] &&
65 !btrfs_buffer_defrag(path->nodes[1])) {
66 goto out;
67 }
6702ed49
CM
68 while(*level > 0) {
69 WARN_ON(*level < 0);
70 WARN_ON(*level >= BTRFS_MAX_LEVEL);
71 cur = path->nodes[*level];
72
73 if (!cache_only && *level > 1 && path->slots[*level] == 0)
5f39d397 74 reada_defrag(root, cur);
6702ed49 75
5f39d397 76 if (btrfs_header_level(cur) != *level)
6702ed49
CM
77 WARN_ON(1);
78
79 if (path->slots[*level] >=
5f39d397 80 btrfs_header_nritems(cur))
6702ed49
CM
81 break;
82
83 if (*level == 1) {
7bb86316
CM
84 WARN_ON(btrfs_header_generation(path->nodes[*level]) !=
85 trans->transid);
6702ed49
CM
86 ret = btrfs_realloc_node(trans, root,
87 path->nodes[*level],
a6b6e75e
CM
88 path->slots[*level],
89 cache_only, last_ret,
90 &root->defrag_progress);
e9d0b13b
CM
91 if (is_extent)
92 btrfs_extent_post_op(trans, root);
93
6702ed49
CM
94 break;
95 }
db94535d 96 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1259ab75 97 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
6702ed49
CM
98
99 if (cache_only) {
db94535d
CM
100 next = btrfs_find_tree_block(root, bytenr,
101 btrfs_level_size(root, *level - 1));
1259ab75 102 if (!next || !btrfs_buffer_uptodate(next, ptr_gen) ||
cf786e79 103 !btrfs_buffer_defrag(next)) {
5f39d397 104 free_extent_buffer(next);
6702ed49
CM
105 path->slots[*level]++;
106 continue;
107 }
108 } else {
db94535d 109 next = read_tree_block(root, bytenr,
ca7a79ad 110 btrfs_level_size(root, *level - 1),
1259ab75 111 ptr_gen);
6702ed49
CM
112 }
113 ret = btrfs_cow_block(trans, root, next, path->nodes[*level],
114 path->slots[*level], &next);
115 BUG_ON(ret);
e9d0b13b
CM
116 if (is_extent)
117 btrfs_extent_post_op(trans, root);
118
6702ed49
CM
119 WARN_ON(*level <= 0);
120 if (path->nodes[*level-1])
5f39d397 121 free_extent_buffer(path->nodes[*level-1]);
6702ed49 122 path->nodes[*level-1] = next;
5f39d397 123 *level = btrfs_header_level(next);
6702ed49
CM
124 path->slots[*level] = 0;
125 }
126 WARN_ON(*level < 0);
127 WARN_ON(*level >= BTRFS_MAX_LEVEL);
6b80053d
CM
128
129 btrfs_clear_buffer_defrag(path->nodes[*level]);
f84a8b36 130out:
5f39d397 131 free_extent_buffer(path->nodes[*level]);
6702ed49
CM
132 path->nodes[*level] = NULL;
133 *level += 1;
a6b6e75e
CM
134 WARN_ON(ret && ret != -EAGAIN);
135 return ret;
6702ed49
CM
136}
137
138static int defrag_walk_up(struct btrfs_trans_handle *trans,
139 struct btrfs_root *root,
140 struct btrfs_path *path, int *level,
141 int cache_only)
142{
143 int i;
144 int slot;
5f39d397 145 struct extent_buffer *node;
6702ed49
CM
146
147 for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
148 slot = path->slots[i];
5f39d397 149 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
6702ed49
CM
150 path->slots[i]++;
151 *level = i;
5f39d397 152 node = path->nodes[i];
6702ed49 153 WARN_ON(i == 0);
5f39d397
CM
154 btrfs_node_key_to_cpu(node, &root->defrag_progress,
155 path->slots[i]);
6702ed49
CM
156 root->defrag_level = i;
157 return 0;
158 } else {
6b80053d 159 btrfs_clear_buffer_defrag(path->nodes[*level]);
5f39d397 160 free_extent_buffer(path->nodes[*level]);
6702ed49
CM
161 path->nodes[*level] = NULL;
162 *level = i + 1;
163 }
164 }
165 return 1;
166}
167
168int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
169 struct btrfs_root *root, int cache_only)
170{
171 struct btrfs_path *path = NULL;
5f39d397 172 struct extent_buffer *tmp;
6702ed49
CM
173 int ret = 0;
174 int wret;
175 int level;
176 int orig_level;
177 int i;
e9d0b13b
CM
178 int is_extent = 0;
179 u64 last_ret = 0;
180
181 if (root->fs_info->extent_root == root)
182 is_extent = 1;
6702ed49 183
e9d0b13b 184 if (root->ref_cows == 0 && !is_extent)
6702ed49 185 goto out;
5f39d397 186
9afbb0b7
CM
187 if (btrfs_test_opt(root, SSD))
188 goto out;
189
6702ed49
CM
190 path = btrfs_alloc_path();
191 if (!path)
192 return -ENOMEM;
193
5f39d397 194 level = btrfs_header_level(root->node);
6702ed49 195 orig_level = level;
0f1ebbd1 196
6702ed49
CM
197 if (level == 0) {
198 goto out;
199 }
200 if (root->defrag_progress.objectid == 0) {
0ef3e66b
CM
201 u32 nritems;
202
203 nritems = btrfs_header_nritems(root->node);
204 root->defrag_max.objectid = 0;
205 /* from above we know this is not a leaf */
206 btrfs_node_key_to_cpu(root->node, &root->defrag_max,
207 nritems - 1);
5f39d397 208 extent_buffer_get(root->node);
6702ed49
CM
209 ret = btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp);
210 BUG_ON(ret);
6702ed49
CM
211 path->nodes[level] = root->node;
212 path->slots[level] = 0;
e9d0b13b
CM
213 if (is_extent)
214 btrfs_extent_post_op(trans, root);
6702ed49
CM
215 } else {
216 level = root->defrag_level;
217 path->lowest_level = level;
218 wret = btrfs_search_slot(trans, root, &root->defrag_progress,
219 path, 0, 1);
220
e9d0b13b
CM
221 if (is_extent)
222 btrfs_extent_post_op(trans, root);
5f39d397 223
6702ed49
CM
224 if (wret < 0) {
225 ret = wret;
226 goto out;
227 }
5f39d397 228
6702ed49
CM
229 while(level > 0 && !path->nodes[level])
230 level--;
5f39d397 231
6702ed49
CM
232 if (!path->nodes[level]) {
233 ret = 0;
234 goto out;
235 }
236 }
237
238 while(1) {
e9d0b13b
CM
239 wret = defrag_walk_down(trans, root, path, &level, cache_only,
240 &last_ret);
6702ed49
CM
241 if (wret > 0)
242 break;
243 if (wret < 0)
244 ret = wret;
245
246 wret = defrag_walk_up(trans, root, path, &level, cache_only);
247 if (wret > 0)
248 break;
249 if (wret < 0)
250 ret = wret;
081e9573
CM
251 else
252 ret = -EAGAIN;
409eb95d 253 break;
6702ed49
CM
254 }
255 for (i = 0; i <= orig_level; i++) {
256 if (path->nodes[i]) {
5f39d397 257 free_extent_buffer(path->nodes[i]);
0f82731f 258 path->nodes[i] = NULL;
6702ed49
CM
259 }
260 }
261out:
262 if (path)
263 btrfs_free_path(path);
0ef3e66b
CM
264 if (ret == -EAGAIN) {
265 if (root->defrag_max.objectid > root->defrag_progress.objectid)
266 goto done;
267 if (root->defrag_max.type > root->defrag_progress.type)
268 goto done;
269 if (root->defrag_max.offset > root->defrag_progress.offset)
270 goto done;
271 ret = 0;
272 }
273done:
6702ed49
CM
274 if (ret != -EAGAIN) {
275 memset(&root->defrag_progress, 0,
276 sizeof(root->defrag_progress));
277 }
278 return ret;
279}