Btrfs: early metadata/data split
[linux-2.6-block.git] / fs / btrfs / extent-tree.c
CommitLineData
2e635a27 1#include <linux/module.h>
fec577fb
CM
2#include "ctree.h"
3#include "disk-io.h"
4#include "print-tree.h"
e089f05c 5#include "transaction.h"
fec577fb 6
e089f05c
CM
7static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
8 *orig_root, u64 num_blocks, u64 search_start, u64
be08c1b9 9 search_end, struct btrfs_key *ins, int data);
e089f05c
CM
10static int finish_current_insert(struct btrfs_trans_handle *trans, struct
11 btrfs_root *extent_root);
e20d96d6
CM
12static int del_pending_extents(struct btrfs_trans_handle *trans, struct
13 btrfs_root *extent_root);
fec577fb 14
31f3c99b
CM
15struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
16 struct btrfs_block_group_cache
17 *hint, int data)
cd1bc465
CM
18{
19 struct btrfs_block_group_cache *cache[8];
31f3c99b 20 struct btrfs_block_group_cache *found_group = NULL;
cd1bc465
CM
21 struct btrfs_fs_info *info = root->fs_info;
22 u64 used;
31f3c99b
CM
23 u64 last = 0;
24 u64 hint_last;
cd1bc465
CM
25 int i;
26 int ret;
31f3c99b 27 int full_search = 0;
be08c1b9 28 if (!data && hint) {
31f3c99b
CM
29 used = btrfs_block_group_used(&hint->item);
30 if (used < (hint->key.offset * 2) / 3) {
31 return hint;
32 }
33 radix_tree_tag_clear(&info->block_group_radix,
34 hint->key.objectid + hint->key.offset - 1,
35 BTRFS_BLOCK_GROUP_AVAIL);
36 last = hint->key.objectid + hint->key.offset;
37 hint_last = last;
38 } else {
39 hint_last = 0;
40 last = 0;
41 }
cd1bc465
CM
42 while(1) {
43 ret = radix_tree_gang_lookup_tag(&info->block_group_radix,
44 (void **)cache,
45 last, ARRAY_SIZE(cache),
31f3c99b 46 BTRFS_BLOCK_GROUP_AVAIL);
cd1bc465
CM
47 if (!ret)
48 break;
49 for (i = 0; i < ret; i++) {
be08c1b9
CM
50 last = cache[i]->key.objectid +
51 cache[i]->key.offset;
52 if (!full_search && !data &&
53 (cache[i]->key.objectid & cache[i]->key.offset))
54 continue;
55 if (!full_search && data &&
56 (cache[i]->key.objectid & cache[i]->key.offset) == 0)
57 continue;
cd1bc465 58 used = btrfs_block_group_used(&cache[i]->item);
31f3c99b 59 if (used < (cache[i]->key.offset * 2) / 3) {
cd1bc465 60 info->block_group_cache = cache[i];
31f3c99b
CM
61 found_group = cache[i];
62 goto found;
cd1bc465 63 }
31f3c99b
CM
64 radix_tree_tag_clear(&info->block_group_radix,
65 cache[i]->key.objectid +
66 cache[i]->key.offset - 1,
67 BTRFS_BLOCK_GROUP_AVAIL);
cd1bc465
CM
68 }
69 }
31f3c99b
CM
70 last = hint_last;
71again:
cd1bc465
CM
72 while(1) {
73 ret = radix_tree_gang_lookup(&info->block_group_radix,
74 (void **)cache,
75 last, ARRAY_SIZE(cache));
76 if (!ret)
77 break;
78 for (i = 0; i < ret; i++) {
be08c1b9
CM
79 last = cache[i]->key.objectid +
80 cache[i]->key.offset;
81 if (!full_search && !data &&
82 (cache[i]->key.objectid & cache[i]->key.offset))
83 continue;
84 if (!full_search && data &&
85 (cache[i]->key.objectid & cache[i]->key.offset) == 0)
86 continue;
cd1bc465 87 used = btrfs_block_group_used(&cache[i]->item);
31f3c99b 88 if (used < cache[i]->key.offset) {
cd1bc465 89 info->block_group_cache = cache[i];
31f3c99b
CM
90 found_group = cache[i];
91 goto found;
cd1bc465 92 }
31f3c99b
CM
93 radix_tree_tag_clear(&info->block_group_radix,
94 cache[i]->key.objectid +
95 cache[i]->key.offset - 1,
96 BTRFS_BLOCK_GROUP_AVAIL);
cd1bc465
CM
97 }
98 }
99 info->block_group_cache = NULL;
31f3c99b
CM
100 if (!full_search) {
101 last = 0;
102 full_search = 1;
103 goto again;
104 }
105found:
106 if (!found_group) {
107 ret = radix_tree_gang_lookup(&info->block_group_radix,
108 (void **)&found_group, 0, 1);
109 BUG_ON(ret != 1);
110 }
111 return found_group;
cd1bc465
CM
112}
113
b18c6685
CM
114int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
115 struct btrfs_root *root,
116 u64 blocknr, u64 num_blocks)
02217ed2 117{
5caf2a00 118 struct btrfs_path *path;
02217ed2 119 int ret;
e2fa7227 120 struct btrfs_key key;
234b63a0
CM
121 struct btrfs_leaf *l;
122 struct btrfs_extent_item *item;
e2fa7227 123 struct btrfs_key ins;
cf27e1ee 124 u32 refs;
037e6390 125
9f5fae2f 126 find_free_extent(trans, root->fs_info->extent_root, 0, 0, (u64)-1,
be08c1b9 127 &ins, 0);
5caf2a00
CM
128 path = btrfs_alloc_path();
129 BUG_ON(!path);
130 btrfs_init_path(path);
02217ed2
CM
131 key.objectid = blocknr;
132 key.flags = 0;
62e2749e 133 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
6407bf6d 134 key.offset = num_blocks;
5caf2a00 135 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
9f5fae2f 136 0, 1);
a429e513
CM
137 if (ret != 0) {
138printk("can't find block %Lu %Lu\n", blocknr, num_blocks);
a28ec197 139 BUG();
a429e513 140 }
02217ed2 141 BUG_ON(ret != 0);
5caf2a00
CM
142 l = btrfs_buffer_leaf(path->nodes[0]);
143 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
cf27e1ee
CM
144 refs = btrfs_extent_refs(item);
145 btrfs_set_extent_refs(item, refs + 1);
5caf2a00 146 btrfs_mark_buffer_dirty(path->nodes[0]);
a28ec197 147
5caf2a00
CM
148 btrfs_release_path(root->fs_info->extent_root, path);
149 btrfs_free_path(path);
9f5fae2f 150 finish_current_insert(trans, root->fs_info->extent_root);
e20d96d6 151 del_pending_extents(trans, root->fs_info->extent_root);
02217ed2
CM
152 return 0;
153}
154
b18c6685
CM
155static int lookup_extent_ref(struct btrfs_trans_handle *trans,
156 struct btrfs_root *root, u64 blocknr,
157 u64 num_blocks, u32 *refs)
a28ec197 158{
5caf2a00 159 struct btrfs_path *path;
a28ec197 160 int ret;
e2fa7227 161 struct btrfs_key key;
234b63a0
CM
162 struct btrfs_leaf *l;
163 struct btrfs_extent_item *item;
5caf2a00
CM
164
165 path = btrfs_alloc_path();
166 btrfs_init_path(path);
a28ec197 167 key.objectid = blocknr;
6407bf6d 168 key.offset = num_blocks;
62e2749e
CM
169 key.flags = 0;
170 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
5caf2a00 171 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
9f5fae2f 172 0, 0);
a28ec197
CM
173 if (ret != 0)
174 BUG();
5caf2a00
CM
175 l = btrfs_buffer_leaf(path->nodes[0]);
176 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
cf27e1ee 177 *refs = btrfs_extent_refs(item);
5caf2a00
CM
178 btrfs_release_path(root->fs_info->extent_root, path);
179 btrfs_free_path(path);
a28ec197
CM
180 return 0;
181}
182
c5739bba
CM
183int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
184 struct btrfs_root *root)
185{
b18c6685 186 return btrfs_inc_extent_ref(trans, root, bh_blocknr(root->node), 1);
c5739bba
CM
187}
188
e089f05c 189int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
e20d96d6 190 struct buffer_head *buf)
02217ed2
CM
191{
192 u64 blocknr;
e20d96d6 193 struct btrfs_node *buf_node;
6407bf6d
CM
194 struct btrfs_leaf *buf_leaf;
195 struct btrfs_disk_key *key;
196 struct btrfs_file_extent_item *fi;
02217ed2 197 int i;
6407bf6d
CM
198 int leaf;
199 int ret;
a28ec197 200
3768f368 201 if (!root->ref_cows)
a28ec197 202 return 0;
e20d96d6 203 buf_node = btrfs_buffer_node(buf);
6407bf6d
CM
204 leaf = btrfs_is_leaf(buf_node);
205 buf_leaf = btrfs_buffer_leaf(buf);
e20d96d6 206 for (i = 0; i < btrfs_header_nritems(&buf_node->header); i++) {
6407bf6d
CM
207 if (leaf) {
208 key = &buf_leaf->items[i].key;
209 if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
210 continue;
211 fi = btrfs_item_ptr(buf_leaf, i,
212 struct btrfs_file_extent_item);
236454df
CM
213 if (btrfs_file_extent_type(fi) ==
214 BTRFS_FILE_EXTENT_INLINE)
215 continue;
b18c6685 216 ret = btrfs_inc_extent_ref(trans, root,
6407bf6d
CM
217 btrfs_file_extent_disk_blocknr(fi),
218 btrfs_file_extent_disk_num_blocks(fi));
219 BUG_ON(ret);
220 } else {
221 blocknr = btrfs_node_blockptr(buf_node, i);
b18c6685 222 ret = btrfs_inc_extent_ref(trans, root, blocknr, 1);
6407bf6d
CM
223 BUG_ON(ret);
224 }
02217ed2
CM
225 }
226 return 0;
227}
228
9078a3e1
CM
229static int write_one_cache_group(struct btrfs_trans_handle *trans,
230 struct btrfs_root *root,
231 struct btrfs_path *path,
232 struct btrfs_block_group_cache *cache)
233{
234 int ret;
235 int pending_ret;
236 struct btrfs_root *extent_root = root->fs_info->extent_root;
237 struct btrfs_block_group_item *bi;
238 struct btrfs_key ins;
239
be08c1b9 240 find_free_extent(trans, extent_root, 0, 0, (u64)-1, &ins, 0);
9078a3e1
CM
241 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
242 BUG_ON(ret);
243 bi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
244 struct btrfs_block_group_item);
245 memcpy(bi, &cache->item, sizeof(*bi));
246 mark_buffer_dirty(path->nodes[0]);
247 btrfs_release_path(extent_root, path);
248
249 finish_current_insert(trans, extent_root);
250 pending_ret = del_pending_extents(trans, extent_root);
251 if (ret)
252 return ret;
253 if (pending_ret)
254 return pending_ret;
255 return 0;
256
257}
258
259int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
260 struct btrfs_root *root)
261{
262 struct btrfs_block_group_cache *cache[8];
263 int ret;
264 int err = 0;
265 int werr = 0;
266 struct radix_tree_root *radix = &root->fs_info->block_group_radix;
267 int i;
268 struct btrfs_path *path;
269
270 path = btrfs_alloc_path();
271 if (!path)
272 return -ENOMEM;
273
274 while(1) {
275 ret = radix_tree_gang_lookup_tag(radix, (void **)cache,
276 0, ARRAY_SIZE(cache),
277 BTRFS_BLOCK_GROUP_DIRTY);
278 if (!ret)
279 break;
280 for (i = 0; i < ret; i++) {
281 radix_tree_tag_clear(radix, cache[i]->key.objectid +
282 cache[i]->key.offset - 1,
283 BTRFS_BLOCK_GROUP_DIRTY);
284 err = write_one_cache_group(trans, root,
285 path, cache[i]);
286 if (err)
287 werr = err;
31f3c99b 288 cache[i]->last_alloc = cache[i]->first_free;
9078a3e1
CM
289 }
290 }
291 btrfs_free_path(path);
292 return werr;
293}
294
295static int update_block_group(struct btrfs_trans_handle *trans,
296 struct btrfs_root *root,
297 u64 blocknr, u64 num, int alloc)
298{
299 struct btrfs_block_group_cache *cache;
300 struct btrfs_fs_info *info = root->fs_info;
301 u64 total = num;
302 u64 old_val;
303 u64 block_in_group;
304 int ret;
305 while(total) {
306 ret = radix_tree_gang_lookup(&info->block_group_radix,
307 (void **)&cache, blocknr, 1);
cd1bc465
CM
308 if (!ret) {
309 printk(KERN_CRIT "blocknr %Lu lookup failed\n",
310 blocknr);
9078a3e1 311 return -1;
cd1bc465 312 }
9078a3e1
CM
313 block_in_group = blocknr - cache->key.objectid;
314 WARN_ON(block_in_group > cache->key.offset);
315 radix_tree_tag_set(&info->block_group_radix,
316 cache->key.objectid + cache->key.offset - 1,
317 BTRFS_BLOCK_GROUP_DIRTY);
318
319 old_val = btrfs_block_group_used(&cache->item);
320 num = min(total, cache->key.offset - block_in_group);
321 total -= num;
322 blocknr += num;
cd1bc465 323 if (alloc) {
9078a3e1 324 old_val += num;
cd1bc465
CM
325 if (blocknr > cache->last_alloc)
326 cache->last_alloc = blocknr;
327 } else {
9078a3e1 328 old_val -= num;
cd1bc465
CM
329 if (blocknr < cache->first_free)
330 cache->first_free = blocknr;
331 }
9078a3e1
CM
332 btrfs_set_block_group_used(&cache->item, old_val);
333 }
334 return 0;
335}
336
be08c1b9
CM
337static int try_remove_page(struct address_space *mapping, unsigned long index)
338{
339 int ret;
340 ret = invalidate_mapping_pages(mapping, index, index);
341 return ret;
342}
343
e089f05c
CM
344int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct
345 btrfs_root *root)
a28ec197 346{
8ef97622 347 unsigned long gang[8];
be08c1b9 348 struct inode *btree_inode = root->fs_info->btree_inode;
88fd146c 349 u64 first = 0;
a28ec197
CM
350 int ret;
351 int i;
8ef97622 352 struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix;
a28ec197
CM
353
354 while(1) {
8ef97622
CM
355 ret = find_first_radix_bit(pinned_radix, gang,
356 ARRAY_SIZE(gang));
a28ec197
CM
357 if (!ret)
358 break;
88fd146c 359 if (!first)
8ef97622 360 first = gang[0];
0579da42 361 for (i = 0; i < ret; i++) {
8ef97622 362 clear_radix_bit(pinned_radix, gang[i]);
be08c1b9
CM
363 try_remove_page(btree_inode->i_mapping,
364 gang[i] << (PAGE_CACHE_SHIFT -
365 btree_inode->i_blkbits));
0579da42 366 }
a28ec197
CM
367 }
368 return 0;
369}
370
e089f05c
CM
371static int finish_current_insert(struct btrfs_trans_handle *trans, struct
372 btrfs_root *extent_root)
037e6390 373{
e2fa7227 374 struct btrfs_key ins;
234b63a0 375 struct btrfs_extent_item extent_item;
037e6390
CM
376 int i;
377 int ret;
1261ec42
CM
378 u64 super_blocks_used;
379 struct btrfs_fs_info *info = extent_root->fs_info;
037e6390 380
cf27e1ee 381 btrfs_set_extent_refs(&extent_item, 1);
037e6390
CM
382 ins.offset = 1;
383 ins.flags = 0;
62e2749e 384 btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
5d0c3e60 385 btrfs_set_extent_owner(&extent_item, extent_root->root_key.objectid);
037e6390 386
f2458e1d
CM
387 for (i = 0; i < extent_root->fs_info->extent_tree_insert_nr; i++) {
388 ins.objectid = extent_root->fs_info->extent_tree_insert[i];
1261ec42
CM
389 super_blocks_used = btrfs_super_blocks_used(info->disk_super);
390 btrfs_set_super_blocks_used(info->disk_super,
391 super_blocks_used + 1);
e089f05c
CM
392 ret = btrfs_insert_item(trans, extent_root, &ins, &extent_item,
393 sizeof(extent_item));
037e6390
CM
394 BUG_ON(ret);
395 }
f2458e1d
CM
396 extent_root->fs_info->extent_tree_insert_nr = 0;
397 extent_root->fs_info->extent_tree_prealloc_nr = 0;
037e6390
CM
398 return 0;
399}
400
8ef97622 401static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending)
e20d96d6
CM
402{
403 int err;
78fae27e 404 struct btrfs_header *header;
8ef97622
CM
405 struct buffer_head *bh;
406
f4b9aa8d 407 if (!pending) {
d98237b3 408 bh = btrfs_find_tree_block(root, blocknr);
2c90e5d6
CM
409 if (bh) {
410 if (buffer_uptodate(bh)) {
411 u64 transid =
412 root->fs_info->running_transaction->transid;
413 header = btrfs_buffer_header(bh);
414 if (btrfs_header_generation(header) ==
415 transid) {
416 btrfs_block_release(root, bh);
417 return 0;
418 }
f4b9aa8d 419 }
d6025579 420 btrfs_block_release(root, bh);
8ef97622 421 }
8ef97622 422 err = set_radix_bit(&root->fs_info->pinned_radix, blocknr);
f4b9aa8d
CM
423 } else {
424 err = set_radix_bit(&root->fs_info->pending_del_radix, blocknr);
425 }
8ef97622 426 BUG_ON(err);
e20d96d6
CM
427 return 0;
428}
429
fec577fb 430/*
a28ec197 431 * remove an extent from the root, returns 0 on success
fec577fb 432 */
e089f05c 433static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
78fae27e 434 *root, u64 blocknr, u64 num_blocks, int pin)
a28ec197 435{
5caf2a00 436 struct btrfs_path *path;
e2fa7227 437 struct btrfs_key key;
1261ec42
CM
438 struct btrfs_fs_info *info = root->fs_info;
439 struct btrfs_root *extent_root = info->extent_root;
a28ec197 440 int ret;
234b63a0 441 struct btrfs_extent_item *ei;
e2fa7227 442 struct btrfs_key ins;
cf27e1ee 443 u32 refs;
037e6390 444
a28ec197
CM
445 key.objectid = blocknr;
446 key.flags = 0;
62e2749e 447 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
a28ec197
CM
448 key.offset = num_blocks;
449
be08c1b9 450 find_free_extent(trans, root, 0, 0, (u64)-1, &ins, 0);
5caf2a00
CM
451 path = btrfs_alloc_path();
452 BUG_ON(!path);
453 btrfs_init_path(path);
5f26f772 454
5caf2a00 455 ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
a28ec197 456 if (ret) {
2e635a27 457 printk("failed to find %Lu\n", key.objectid);
234b63a0 458 btrfs_print_tree(extent_root, extent_root->node);
2e635a27 459 printk("failed to find %Lu\n", key.objectid);
a28ec197
CM
460 BUG();
461 }
5caf2a00 462 ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
123abc88 463 struct btrfs_extent_item);
a28ec197 464 BUG_ON(ei->refs == 0);
cf27e1ee
CM
465 refs = btrfs_extent_refs(ei) - 1;
466 btrfs_set_extent_refs(ei, refs);
5caf2a00 467 btrfs_mark_buffer_dirty(path->nodes[0]);
cf27e1ee 468 if (refs == 0) {
1261ec42 469 u64 super_blocks_used;
78fae27e
CM
470
471 if (pin) {
8ef97622 472 ret = pin_down_block(root, blocknr, 0);
78fae27e
CM
473 BUG_ON(ret);
474 }
475
1261ec42
CM
476 super_blocks_used = btrfs_super_blocks_used(info->disk_super);
477 btrfs_set_super_blocks_used(info->disk_super,
478 super_blocks_used - num_blocks);
5caf2a00 479 ret = btrfs_del_item(trans, extent_root, path);
a28ec197
CM
480 if (ret)
481 BUG();
9078a3e1
CM
482 ret = update_block_group(trans, root, blocknr, num_blocks, 0);
483 BUG_ON(ret);
a28ec197 484 }
5caf2a00
CM
485 btrfs_release_path(extent_root, path);
486 btrfs_free_path(path);
e089f05c 487 finish_current_insert(trans, extent_root);
a28ec197
CM
488 return ret;
489}
490
a28ec197
CM
491/*
492 * find all the blocks marked as pending in the radix tree and remove
493 * them from the extent map
494 */
e089f05c
CM
495static int del_pending_extents(struct btrfs_trans_handle *trans, struct
496 btrfs_root *extent_root)
a28ec197
CM
497{
498 int ret;
e20d96d6
CM
499 int wret;
500 int err = 0;
8ef97622 501 unsigned long gang[4];
a28ec197 502 int i;
8ef97622
CM
503 struct radix_tree_root *pending_radix;
504 struct radix_tree_root *pinned_radix;
505
506 pending_radix = &extent_root->fs_info->pending_del_radix;
507 pinned_radix = &extent_root->fs_info->pinned_radix;
a28ec197
CM
508
509 while(1) {
8ef97622
CM
510 ret = find_first_radix_bit(pending_radix, gang,
511 ARRAY_SIZE(gang));
a28ec197
CM
512 if (!ret)
513 break;
514 for (i = 0; i < ret; i++) {
8ef97622
CM
515 wret = set_radix_bit(pinned_radix, gang[i]);
516 BUG_ON(wret);
517 wret = clear_radix_bit(pending_radix, gang[i]);
518 BUG_ON(wret);
d5719762 519 wret = __free_extent(trans, extent_root,
8ef97622 520 gang[i], 1, 0);
e20d96d6
CM
521 if (wret)
522 err = wret;
fec577fb
CM
523 }
524 }
e20d96d6 525 return err;
fec577fb
CM
526}
527
528/*
529 * remove an extent from the root, returns 0 on success
530 */
e089f05c
CM
531int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
532 *root, u64 blocknr, u64 num_blocks, int pin)
fec577fb 533{
9f5fae2f 534 struct btrfs_root *extent_root = root->fs_info->extent_root;
fec577fb
CM
535 int pending_ret;
536 int ret;
a28ec197 537
fec577fb 538 if (root == extent_root) {
8ef97622 539 pin_down_block(root, blocknr, 1);
fec577fb
CM
540 return 0;
541 }
78fae27e 542 ret = __free_extent(trans, root, blocknr, num_blocks, pin);
e20d96d6 543 pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
fec577fb
CM
544 return ret ? ret : pending_ret;
545}
546
547/*
548 * walks the btree of allocated extents and find a hole of a given size.
549 * The key ins is changed to record the hole:
550 * ins->objectid == block start
62e2749e 551 * ins->flags = BTRFS_EXTENT_ITEM_KEY
fec577fb
CM
552 * ins->offset == number of blocks
553 * Any available blocks before search_start are skipped.
554 */
e089f05c
CM
555static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
556 *orig_root, u64 num_blocks, u64 search_start, u64
be08c1b9 557 search_end, struct btrfs_key *ins, int data)
fec577fb 558{
5caf2a00 559 struct btrfs_path *path;
e2fa7227 560 struct btrfs_key key;
fec577fb
CM
561 int ret;
562 u64 hole_size = 0;
563 int slot = 0;
e20d96d6 564 u64 last_block = 0;
037e6390 565 u64 test_block;
fec577fb 566 int start_found;
234b63a0 567 struct btrfs_leaf *l;
9f5fae2f 568 struct btrfs_root * root = orig_root->fs_info->extent_root;
f2458e1d 569 struct btrfs_fs_info *info = root->fs_info;
0579da42 570 int total_needed = num_blocks;
f2458e1d
CM
571 int total_found = 0;
572 int fill_prealloc = 0;
e20d96d6 573 int level;
be08c1b9 574 struct btrfs_block_group_cache *block_group;
fec577fb 575
b1a4d965
CM
576 path = btrfs_alloc_path();
577 ins->flags = 0;
578 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
579
e20d96d6 580 level = btrfs_header_level(btrfs_buffer_header(root->node));
f2458e1d
CM
581 if (num_blocks == 0) {
582 fill_prealloc = 1;
583 num_blocks = 1;
308535a0 584 total_needed = (min(level + 1, BTRFS_MAX_LEVEL) + 2) * 3;
f2458e1d 585 }
be08c1b9
CM
586 block_group = btrfs_find_block_group(root, trans->block_group, data);
587 if (block_group->last_alloc > search_start)
588 search_start = block_group->last_alloc;
fec577fb 589check_failed:
5caf2a00 590 btrfs_init_path(path);
fec577fb
CM
591 ins->objectid = search_start;
592 ins->offset = 0;
fec577fb 593 start_found = 0;
5caf2a00 594 ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
0f70abe2
CM
595 if (ret < 0)
596 goto error;
aa5d6bed 597
5caf2a00
CM
598 if (path->slots[0] > 0)
599 path->slots[0]--;
0579da42 600
fec577fb 601 while (1) {
5caf2a00
CM
602 l = btrfs_buffer_leaf(path->nodes[0]);
603 slot = path->slots[0];
7518a238 604 if (slot >= btrfs_header_nritems(&l->header)) {
f2458e1d
CM
605 if (fill_prealloc) {
606 info->extent_tree_prealloc_nr = 0;
607 total_found = 0;
608 }
5caf2a00 609 ret = btrfs_next_leaf(root, path);
fec577fb
CM
610 if (ret == 0)
611 continue;
0f70abe2
CM
612 if (ret < 0)
613 goto error;
fec577fb
CM
614 if (!start_found) {
615 ins->objectid = search_start;
f2458e1d 616 ins->offset = (u64)-1 - search_start;
fec577fb
CM
617 start_found = 1;
618 goto check_pending;
619 }
620 ins->objectid = last_block > search_start ?
621 last_block : search_start;
f2458e1d 622 ins->offset = (u64)-1 - ins->objectid;
fec577fb
CM
623 goto check_pending;
624 }
e2fa7227 625 btrfs_disk_key_to_cpu(&key, &l->items[slot].key);
9078a3e1
CM
626 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY)
627 goto next;
e2fa7227 628 if (key.objectid >= search_start) {
fec577fb 629 if (start_found) {
0579da42
CM
630 if (last_block < search_start)
631 last_block = search_start;
e2fa7227 632 hole_size = key.objectid - last_block;
28b8bb9e 633 if (hole_size >= num_blocks) {
fec577fb 634 ins->objectid = last_block;
037e6390 635 ins->offset = hole_size;
fec577fb
CM
636 goto check_pending;
637 }
0579da42 638 }
fec577fb 639 }
0579da42 640 start_found = 1;
e2fa7227 641 last_block = key.objectid + key.offset;
9078a3e1 642next:
5caf2a00 643 path->slots[0]++;
fec577fb
CM
644 }
645 // FIXME -ENOSPC
646check_pending:
647 /* we have to make sure we didn't find an extent that has already
648 * been allocated by the map tree or the original allocation
649 */
5caf2a00 650 btrfs_release_path(root, path);
fec577fb 651 BUG_ON(ins->objectid < search_start);
06a2f9fa
CM
652 if (ins->objectid >= btrfs_super_total_blocks(info->disk_super)) {
653 if (search_start == 0)
654 return -ENOSPC;
655 search_start = 0;
656 goto check_failed;
657 }
037e6390 658 for (test_block = ins->objectid;
f2458e1d
CM
659 test_block < ins->objectid + num_blocks; test_block++) {
660 if (test_radix_bit(&info->pinned_radix, test_block)) {
037e6390 661 search_start = test_block + 1;
fec577fb
CM
662 goto check_failed;
663 }
664 }
f2458e1d
CM
665 if (!fill_prealloc && info->extent_tree_insert_nr) {
666 u64 last =
667 info->extent_tree_insert[info->extent_tree_insert_nr - 1];
668 if (ins->objectid + num_blocks >
669 info->extent_tree_insert[0] &&
670 ins->objectid <= last) {
671 search_start = last + 1;
672 WARN_ON(1);
673 goto check_failed;
674 }
675 }
676 if (!fill_prealloc && info->extent_tree_prealloc_nr) {
677 u64 first =
678 info->extent_tree_prealloc[info->extent_tree_prealloc_nr - 1];
679 if (ins->objectid + num_blocks > first &&
680 ins->objectid <= info->extent_tree_prealloc[0]) {
681 search_start = info->extent_tree_prealloc[0] + 1;
682 WARN_ON(1);
683 goto check_failed;
684 }
685 }
686 if (fill_prealloc) {
687 int nr;
688 test_block = ins->objectid;
689 while(test_block < ins->objectid + ins->offset &&
690 total_found < total_needed) {
691 nr = total_needed - total_found - 1;
692 BUG_ON(nr < 0);
cd1bc465 693 info->extent_tree_prealloc[nr] = test_block;
f2458e1d
CM
694 total_found++;
695 test_block++;
696 }
697 if (total_found < total_needed) {
698 search_start = test_block;
699 goto check_failed;
700 }
cd1bc465
CM
701 info->extent_tree_prealloc_nr = total_found;
702 }
be08c1b9
CM
703 ret = radix_tree_gang_lookup(&info->block_group_radix,
704 (void **)&block_group,
705 ins->objectid, 1);
706 if (ret) {
707 block_group->last_alloc = ins->objectid;
708 if (!data)
709 trans->block_group = block_group;
f2458e1d 710 }
037e6390 711 ins->offset = num_blocks;
5caf2a00 712 btrfs_free_path(path);
fec577fb 713 return 0;
0f70abe2 714error:
5caf2a00
CM
715 btrfs_release_path(root, path);
716 btrfs_free_path(path);
0f70abe2 717 return ret;
fec577fb 718}
fec577fb
CM
719/*
720 * finds a free extent and does all the dirty work required for allocation
721 * returns the key for the extent through ins, and a tree buffer for
722 * the first block of the extent through buf.
723 *
724 * returns 0 if everything worked, non-zero otherwise.
725 */
4d775673
CM
726int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
727 struct btrfs_root *root, u64 owner,
c62a1920 728 u64 num_blocks, u64 search_start,
be08c1b9 729 u64 search_end, struct btrfs_key *ins, int data)
fec577fb
CM
730{
731 int ret;
732 int pending_ret;
1261ec42
CM
733 u64 super_blocks_used;
734 struct btrfs_fs_info *info = root->fs_info;
735 struct btrfs_root *extent_root = info->extent_root;
234b63a0 736 struct btrfs_extent_item extent_item;
f2458e1d 737 struct btrfs_key prealloc_key;
037e6390 738
cf27e1ee 739 btrfs_set_extent_refs(&extent_item, 1);
4d775673 740 btrfs_set_extent_owner(&extent_item, owner);
fec577fb 741
037e6390 742 if (root == extent_root) {
f2458e1d
CM
743 int nr;
744 BUG_ON(info->extent_tree_prealloc_nr == 0);
037e6390 745 BUG_ON(num_blocks != 1);
037e6390 746 ins->offset = 1;
f2458e1d
CM
747 info->extent_tree_prealloc_nr--;
748 nr = info->extent_tree_prealloc_nr;
749 ins->objectid = info->extent_tree_prealloc[nr];
750 info->extent_tree_insert[info->extent_tree_insert_nr++] =
751 ins->objectid;
9078a3e1
CM
752 ret = update_block_group(trans, root,
753 ins->objectid, ins->offset, 1);
754 BUG_ON(ret);
fec577fb
CM
755 return 0;
756 }
f2458e1d 757 /* do the real allocation */
e089f05c 758 ret = find_free_extent(trans, root, num_blocks, search_start,
be08c1b9 759 search_end, ins, data);
037e6390
CM
760 if (ret)
761 return ret;
fec577fb 762
f2458e1d
CM
763 /* then do prealloc for the extent tree */
764 ret = find_free_extent(trans, root, 0, ins->objectid + ins->offset,
be08c1b9 765 search_end, &prealloc_key, 0);
f2458e1d
CM
766 if (ret)
767 return ret;
768
1261ec42
CM
769 super_blocks_used = btrfs_super_blocks_used(info->disk_super);
770 btrfs_set_super_blocks_used(info->disk_super, super_blocks_used +
771 num_blocks);
e089f05c
CM
772 ret = btrfs_insert_item(trans, extent_root, ins, &extent_item,
773 sizeof(extent_item));
037e6390 774
e089f05c 775 finish_current_insert(trans, extent_root);
e20d96d6 776 pending_ret = del_pending_extents(trans, extent_root);
037e6390
CM
777 if (ret)
778 return ret;
779 if (pending_ret)
780 return pending_ret;
9078a3e1 781 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
037e6390 782 return 0;
fec577fb
CM
783}
784
785/*
786 * helper function to allocate a block for a given tree
787 * returns the tree buffer or NULL.
788 */
e20d96d6 789struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
31f3c99b 790 struct btrfs_root *root, u64 hint)
fec577fb 791{
e2fa7227 792 struct btrfs_key ins;
fec577fb 793 int ret;
e20d96d6 794 struct buffer_head *buf;
fec577fb 795
4d775673 796 ret = btrfs_alloc_extent(trans, root, root->root_key.objectid,
be08c1b9 797 1, 0, (unsigned long)-1, &ins, 0);
fec577fb
CM
798 if (ret) {
799 BUG();
800 return NULL;
801 }
9078a3e1 802 BUG_ON(ret);
d98237b3 803 buf = btrfs_find_create_tree_block(root, ins.objectid);
df2ce34c 804 set_buffer_uptodate(buf);
090d1875 805 set_buffer_checked(buf);
7c4452b9 806 set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index);
fec577fb
CM
807 return buf;
808}
a28ec197 809
6407bf6d
CM
810static int drop_leaf_ref(struct btrfs_trans_handle *trans,
811 struct btrfs_root *root, struct buffer_head *cur)
812{
813 struct btrfs_disk_key *key;
814 struct btrfs_leaf *leaf;
815 struct btrfs_file_extent_item *fi;
816 int i;
817 int nritems;
818 int ret;
819
820 BUG_ON(!btrfs_is_leaf(btrfs_buffer_node(cur)));
821 leaf = btrfs_buffer_leaf(cur);
822 nritems = btrfs_header_nritems(&leaf->header);
823 for (i = 0; i < nritems; i++) {
824 key = &leaf->items[i].key;
825 if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
826 continue;
827 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
236454df
CM
828 if (btrfs_file_extent_type(fi) == BTRFS_FILE_EXTENT_INLINE)
829 continue;
6407bf6d
CM
830 /*
831 * FIXME make sure to insert a trans record that
832 * repeats the snapshot del on crash
833 */
834 ret = btrfs_free_extent(trans, root,
835 btrfs_file_extent_disk_blocknr(fi),
836 btrfs_file_extent_disk_num_blocks(fi),
837 0);
838 BUG_ON(ret);
839 }
840 return 0;
841}
842
9aca1d51
CM
843/*
844 * helper function for drop_snapshot, this walks down the tree dropping ref
845 * counts as it goes.
846 */
e089f05c
CM
847static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root
848 *root, struct btrfs_path *path, int *level)
20524f02 849{
e20d96d6
CM
850 struct buffer_head *next;
851 struct buffer_head *cur;
20524f02
CM
852 u64 blocknr;
853 int ret;
854 u32 refs;
855
5caf2a00
CM
856 WARN_ON(*level < 0);
857 WARN_ON(*level >= BTRFS_MAX_LEVEL);
b18c6685 858 ret = lookup_extent_ref(trans, root, bh_blocknr(path->nodes[*level]),
6407bf6d 859 1, &refs);
20524f02
CM
860 BUG_ON(ret);
861 if (refs > 1)
862 goto out;
9aca1d51
CM
863 /*
864 * walk down to the last node level and free all the leaves
865 */
6407bf6d 866 while(*level >= 0) {
5caf2a00
CM
867 WARN_ON(*level < 0);
868 WARN_ON(*level >= BTRFS_MAX_LEVEL);
20524f02 869 cur = path->nodes[*level];
2c90e5d6
CM
870 if (btrfs_header_level(btrfs_buffer_header(cur)) != *level)
871 WARN_ON(1);
7518a238 872 if (path->slots[*level] >=
e20d96d6 873 btrfs_header_nritems(btrfs_buffer_header(cur)))
20524f02 874 break;
6407bf6d
CM
875 if (*level == 0) {
876 ret = drop_leaf_ref(trans, root, cur);
877 BUG_ON(ret);
878 break;
879 }
e20d96d6
CM
880 blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur),
881 path->slots[*level]);
b18c6685 882 ret = lookup_extent_ref(trans, root, blocknr, 1, &refs);
6407bf6d
CM
883 BUG_ON(ret);
884 if (refs != 1) {
20524f02 885 path->slots[*level]++;
e089f05c 886 ret = btrfs_free_extent(trans, root, blocknr, 1, 1);
20524f02
CM
887 BUG_ON(ret);
888 continue;
889 }
20524f02 890 next = read_tree_block(root, blocknr);
5caf2a00 891 WARN_ON(*level <= 0);
83e15a28 892 if (path->nodes[*level-1])
234b63a0 893 btrfs_block_release(root, path->nodes[*level-1]);
20524f02 894 path->nodes[*level-1] = next;
e20d96d6 895 *level = btrfs_header_level(btrfs_buffer_header(next));
20524f02
CM
896 path->slots[*level] = 0;
897 }
898out:
5caf2a00
CM
899 WARN_ON(*level < 0);
900 WARN_ON(*level >= BTRFS_MAX_LEVEL);
6407bf6d 901 ret = btrfs_free_extent(trans, root,
7eccb903 902 bh_blocknr(path->nodes[*level]), 1, 1);
234b63a0 903 btrfs_block_release(root, path->nodes[*level]);
20524f02
CM
904 path->nodes[*level] = NULL;
905 *level += 1;
906 BUG_ON(ret);
907 return 0;
908}
909
9aca1d51
CM
910/*
911 * helper for dropping snapshots. This walks back up the tree in the path
912 * to find the first node higher up where we haven't yet gone through
913 * all the slots
914 */
e089f05c
CM
915static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root
916 *root, struct btrfs_path *path, int *level)
20524f02
CM
917{
918 int i;
919 int slot;
920 int ret;
234b63a0 921 for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
20524f02 922 slot = path->slots[i];
e20d96d6
CM
923 if (slot < btrfs_header_nritems(
924 btrfs_buffer_header(path->nodes[i])) - 1) {
20524f02
CM
925 path->slots[i]++;
926 *level = i;
927 return 0;
928 } else {
e089f05c 929 ret = btrfs_free_extent(trans, root,
7eccb903 930 bh_blocknr(path->nodes[*level]),
e089f05c 931 1, 1);
6407bf6d 932 BUG_ON(ret);
234b63a0 933 btrfs_block_release(root, path->nodes[*level]);
83e15a28 934 path->nodes[*level] = NULL;
20524f02 935 *level = i + 1;
20524f02
CM
936 }
937 }
938 return 1;
939}
940
9aca1d51
CM
941/*
942 * drop the reference count on the tree rooted at 'snap'. This traverses
943 * the tree freeing any blocks that have a ref count of zero after being
944 * decremented.
945 */
e089f05c 946int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
e20d96d6 947 *root, struct buffer_head *snap)
20524f02 948{
3768f368 949 int ret = 0;
9aca1d51 950 int wret;
20524f02 951 int level;
5caf2a00 952 struct btrfs_path *path;
20524f02
CM
953 int i;
954 int orig_level;
955
5caf2a00
CM
956 path = btrfs_alloc_path();
957 BUG_ON(!path);
958 btrfs_init_path(path);
20524f02 959
e20d96d6 960 level = btrfs_header_level(btrfs_buffer_header(snap));
20524f02 961 orig_level = level;
5caf2a00
CM
962 path->nodes[level] = snap;
963 path->slots[level] = 0;
20524f02 964 while(1) {
5caf2a00 965 wret = walk_down_tree(trans, root, path, &level);
9aca1d51 966 if (wret > 0)
20524f02 967 break;
9aca1d51
CM
968 if (wret < 0)
969 ret = wret;
970
5caf2a00 971 wret = walk_up_tree(trans, root, path, &level);
9aca1d51 972 if (wret > 0)
20524f02 973 break;
9aca1d51
CM
974 if (wret < 0)
975 ret = wret;
35b7e476 976 btrfs_btree_balance_dirty(root);
20524f02 977 }
83e15a28 978 for (i = 0; i <= orig_level; i++) {
5caf2a00
CM
979 if (path->nodes[i]) {
980 btrfs_block_release(root, path->nodes[i]);
83e15a28 981 }
20524f02 982 }
5caf2a00 983 btrfs_free_path(path);
9aca1d51 984 return ret;
20524f02 985}
9078a3e1
CM
986
987int btrfs_free_block_groups(struct btrfs_fs_info *info)
988{
989 int ret;
990 struct btrfs_block_group_cache *cache[8];
991 int i;
992
993 while(1) {
994 ret = radix_tree_gang_lookup(&info->block_group_radix,
995 (void **)cache, 0,
996 ARRAY_SIZE(cache));
997 if (!ret)
998 break;
999 for (i = 0; i < ret; i++) {
1000 radix_tree_delete(&info->block_group_radix,
1001 cache[i]->key.objectid +
1002 cache[i]->key.offset - 1);
1003 kfree(cache[i]);
1004 }
1005 }
1006 return 0;
1007}
1008
1009int btrfs_read_block_groups(struct btrfs_root *root)
1010{
1011 struct btrfs_path *path;
1012 int ret;
1013 int err = 0;
1014 struct btrfs_block_group_item *bi;
1015 struct btrfs_block_group_cache *cache;
1016 struct btrfs_key key;
1017 struct btrfs_key found_key;
1018 struct btrfs_leaf *leaf;
1019 u64 group_size_blocks = BTRFS_BLOCK_GROUP_SIZE / root->blocksize;
31f3c99b 1020 u64 used;
9078a3e1
CM
1021
1022 root = root->fs_info->extent_root;
1023 key.objectid = 0;
1024 key.offset = group_size_blocks;
1025 key.flags = 0;
1026 btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
1027
1028 path = btrfs_alloc_path();
1029 if (!path)
1030 return -ENOMEM;
1031
1032 while(1) {
1033 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
1034 &key, path, 0, 0);
1035 if (ret != 0) {
1036 err = ret;
1037 break;
1038 }
1039 leaf = btrfs_buffer_leaf(path->nodes[0]);
1040 btrfs_disk_key_to_cpu(&found_key,
1041 &leaf->items[path->slots[0]].key);
1042 cache = kmalloc(sizeof(*cache), GFP_NOFS);
1043 if (!cache) {
1044 err = -1;
1045 break;
1046 }
1047 bi = btrfs_item_ptr(leaf, path->slots[0],
1048 struct btrfs_block_group_item);
1049 memcpy(&cache->item, bi, sizeof(*bi));
1050 memcpy(&cache->key, &found_key, sizeof(found_key));
31f3c99b
CM
1051 cache->last_alloc = cache->key.objectid;
1052 cache->first_free = cache->key.objectid;
9078a3e1
CM
1053 key.objectid = found_key.objectid + found_key.offset;
1054 btrfs_release_path(root, path);
1055 ret = radix_tree_insert(&root->fs_info->block_group_radix,
1056 found_key.objectid +
1057 found_key.offset - 1,
1058 (void *)cache);
1059 BUG_ON(ret);
31f3c99b
CM
1060 used = btrfs_block_group_used(bi);
1061 if (used < (key.offset * 2) / 3) {
1062 radix_tree_tag_set(&root->fs_info->block_group_radix,
1063 found_key.objectid +
1064 found_key.offset - 1,
1065 BTRFS_BLOCK_GROUP_AVAIL);
1066 }
9078a3e1
CM
1067 if (key.objectid >=
1068 btrfs_super_total_blocks(root->fs_info->disk_super))
1069 break;
1070 }
1071
1072 btrfs_free_path(path);
1073 return 0;
1074}