Btrfs: merge leaves before split
[linux-2.6-block.git] / fs / btrfs / extent-tree.c
CommitLineData
fec577fb
CM
1#include <stdio.h>
2#include <stdlib.h>
3#include "kerncompat.h"
4#include "radix-tree.h"
5#include "ctree.h"
6#include "disk-io.h"
7#include "print-tree.h"
8
234b63a0 9static int find_free_extent(struct btrfs_root *orig_root, u64 num_blocks,
e2fa7227
CM
10 u64 search_start, u64 search_end,
11 struct btrfs_key *ins);
234b63a0
CM
12static int finish_current_insert(struct btrfs_root *extent_root);
13static int run_pending(struct btrfs_root *extent_root);
037e6390 14
fec577fb
CM
15/*
16 * pending extents are blocks that we're trying to allocate in the extent
17 * map while trying to grow the map because of other allocations. To avoid
18 * recursing, they are tagged in the radix tree and cleaned up after
19 * other allocations are done. The pending tag is also used in the same
20 * manner for deletes.
21 */
037e6390 22#define CTREE_EXTENT_PENDING_DEL 0
fec577fb 23
234b63a0 24static int inc_block_ref(struct btrfs_root *root, u64 blocknr)
02217ed2 25{
234b63a0 26 struct btrfs_path path;
02217ed2 27 int ret;
e2fa7227 28 struct btrfs_key key;
234b63a0
CM
29 struct btrfs_leaf *l;
30 struct btrfs_extent_item *item;
e2fa7227 31 struct btrfs_key ins;
cf27e1ee 32 u32 refs;
037e6390
CM
33
34 find_free_extent(root->extent_root, 0, 0, (u64)-1, &ins);
234b63a0 35 btrfs_init_path(&path);
02217ed2
CM
36 key.objectid = blocknr;
37 key.flags = 0;
38 key.offset = 1;
234b63a0 39 ret = btrfs_search_slot(root->extent_root, &key, &path, 0, 1);
a28ec197
CM
40 if (ret != 0)
41 BUG();
02217ed2
CM
42 BUG_ON(ret != 0);
43 l = &path.nodes[0]->leaf;
234b63a0
CM
44 item = (struct btrfs_extent_item *)(l->data +
45 btrfs_item_offset(l->items +
46 path.slots[0]));
cf27e1ee
CM
47 refs = btrfs_extent_refs(item);
48 btrfs_set_extent_refs(item, refs + 1);
a28ec197 49
02217ed2 50 BUG_ON(list_empty(&path.nodes[0]->dirty));
234b63a0 51 btrfs_release_path(root->extent_root, &path);
037e6390
CM
52 finish_current_insert(root->extent_root);
53 run_pending(root->extent_root);
02217ed2
CM
54 return 0;
55}
56
234b63a0 57static int lookup_block_ref(struct btrfs_root *root, u64 blocknr, u32 *refs)
a28ec197 58{
234b63a0 59 struct btrfs_path path;
a28ec197 60 int ret;
e2fa7227 61 struct btrfs_key key;
234b63a0
CM
62 struct btrfs_leaf *l;
63 struct btrfs_extent_item *item;
64 btrfs_init_path(&path);
a28ec197
CM
65 key.objectid = blocknr;
66 key.flags = 0;
67 key.offset = 1;
234b63a0 68 ret = btrfs_search_slot(root->extent_root, &key, &path, 0, 0);
a28ec197
CM
69 if (ret != 0)
70 BUG();
71 l = &path.nodes[0]->leaf;
234b63a0 72 item = (struct btrfs_extent_item *)(l->data +
0783fcfc
CM
73 btrfs_item_offset(l->items +
74 path.slots[0]));
cf27e1ee 75 *refs = btrfs_extent_refs(item);
234b63a0 76 btrfs_release_path(root->extent_root, &path);
a28ec197
CM
77 return 0;
78}
79
234b63a0 80int btrfs_inc_ref(struct btrfs_root *root, struct btrfs_buffer *buf)
02217ed2
CM
81{
82 u64 blocknr;
83 int i;
a28ec197
CM
84
85 if (root == root->extent_root)
86 return 0;
7518a238 87 if (btrfs_is_leaf(&buf->node))
a28ec197
CM
88 return 0;
89
7518a238 90 for (i = 0; i < btrfs_header_nritems(&buf->node.header); i++) {
1d4f8a0c 91 blocknr = btrfs_node_blockptr(&buf->node, i);
02217ed2
CM
92 inc_block_ref(root, blocknr);
93 }
94 return 0;
95}
96
234b63a0 97int btrfs_finish_extent_commit(struct btrfs_root *root)
a28ec197 98{
234b63a0 99 struct btrfs_root *extent_root = root->extent_root;
a28ec197
CM
100 unsigned long gang[8];
101 int ret;
102 int i;
103
104 while(1) {
105 ret = radix_tree_gang_lookup(&extent_root->pinned_radix,
106 (void **)gang, 0,
107 ARRAY_SIZE(gang));
108 if (!ret)
109 break;
0579da42 110 for (i = 0; i < ret; i++) {
a28ec197 111 radix_tree_delete(&extent_root->pinned_radix, gang[i]);
0579da42 112 }
a28ec197 113 }
0579da42
CM
114 extent_root->last_insert.objectid = 0;
115 extent_root->last_insert.offset = 0;
a28ec197
CM
116 return 0;
117}
118
234b63a0 119static int finish_current_insert(struct btrfs_root *extent_root)
037e6390 120{
e2fa7227 121 struct btrfs_key ins;
234b63a0 122 struct btrfs_extent_item extent_item;
037e6390
CM
123 int i;
124 int ret;
125
cf27e1ee
CM
126 btrfs_set_extent_refs(&extent_item, 1);
127 btrfs_set_extent_owner(&extent_item,
128 btrfs_header_parentid(&extent_root->node->node.header));
037e6390
CM
129 ins.offset = 1;
130 ins.flags = 0;
131
132 for (i = 0; i < extent_root->current_insert.flags; i++) {
133 ins.objectid = extent_root->current_insert.objectid + i;
234b63a0 134 ret = btrfs_insert_item(extent_root, &ins, &extent_item,
037e6390
CM
135 sizeof(extent_item));
136 BUG_ON(ret);
137 }
138 extent_root->current_insert.offset = 0;
139 return 0;
140}
141
fec577fb 142/*
a28ec197 143 * remove an extent from the root, returns 0 on success
fec577fb 144 */
234b63a0 145static int __free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks)
a28ec197 146{
234b63a0 147 struct btrfs_path path;
e2fa7227 148 struct btrfs_key key;
234b63a0 149 struct btrfs_root *extent_root = root->extent_root;
a28ec197 150 int ret;
0783fcfc 151 struct btrfs_item *item;
234b63a0 152 struct btrfs_extent_item *ei;
e2fa7227 153 struct btrfs_key ins;
cf27e1ee 154 u32 refs;
037e6390 155
a28ec197
CM
156 key.objectid = blocknr;
157 key.flags = 0;
158 key.offset = num_blocks;
159
037e6390 160 find_free_extent(root, 0, 0, (u64)-1, &ins);
234b63a0
CM
161 btrfs_init_path(&path);
162 ret = btrfs_search_slot(extent_root, &key, &path, -1, 1);
a28ec197
CM
163 if (ret) {
164 printf("failed to find %Lu\n", key.objectid);
234b63a0 165 btrfs_print_tree(extent_root, extent_root->node);
a28ec197
CM
166 printf("failed to find %Lu\n", key.objectid);
167 BUG();
168 }
169 item = path.nodes[0]->leaf.items + path.slots[0];
234b63a0 170 ei = (struct btrfs_extent_item *)(path.nodes[0]->leaf.data +
0783fcfc 171 btrfs_item_offset(item));
a28ec197 172 BUG_ON(ei->refs == 0);
cf27e1ee
CM
173 refs = btrfs_extent_refs(ei) - 1;
174 btrfs_set_extent_refs(ei, refs);
175 if (refs == 0) {
a28ec197
CM
176 if (root == extent_root) {
177 int err;
178 radix_tree_preload(GFP_KERNEL);
179 err = radix_tree_insert(&extent_root->pinned_radix,
180 blocknr, (void *)blocknr);
181 BUG_ON(err);
182 radix_tree_preload_end();
183 }
234b63a0 184 ret = btrfs_del_item(extent_root, &path);
0579da42
CM
185 if (root != extent_root &&
186 extent_root->last_insert.objectid < blocknr)
187 extent_root->last_insert.objectid = blocknr;
a28ec197
CM
188 if (ret)
189 BUG();
190 }
234b63a0 191 btrfs_release_path(extent_root, &path);
037e6390 192 finish_current_insert(extent_root);
a28ec197
CM
193 return ret;
194}
195
a28ec197
CM
196/*
197 * find all the blocks marked as pending in the radix tree and remove
198 * them from the extent map
199 */
234b63a0 200static int del_pending_extents(struct btrfs_root *extent_root)
a28ec197
CM
201{
202 int ret;
234b63a0 203 struct btrfs_buffer *gang[4];
a28ec197
CM
204 int i;
205
206 while(1) {
207 ret = radix_tree_gang_lookup_tag(&extent_root->cache_radix,
208 (void **)gang, 0,
209 ARRAY_SIZE(gang),
210 CTREE_EXTENT_PENDING_DEL);
211 if (!ret)
212 break;
213 for (i = 0; i < ret; i++) {
214 ret = __free_extent(extent_root, gang[i]->blocknr, 1);
fec577fb
CM
215 radix_tree_tag_clear(&extent_root->cache_radix,
216 gang[i]->blocknr,
a28ec197 217 CTREE_EXTENT_PENDING_DEL);
234b63a0 218 btrfs_block_release(extent_root, gang[i]);
fec577fb
CM
219 }
220 }
221 return 0;
222}
223
234b63a0 224static int run_pending(struct btrfs_root *extent_root)
a28ec197
CM
225{
226 while(radix_tree_tagged(&extent_root->cache_radix,
037e6390 227 CTREE_EXTENT_PENDING_DEL))
a28ec197 228 del_pending_extents(extent_root);
a28ec197
CM
229 return 0;
230}
231
232
fec577fb
CM
233/*
234 * remove an extent from the root, returns 0 on success
235 */
234b63a0 236int btrfs_free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks)
fec577fb 237{
e2fa7227 238 struct btrfs_key key;
234b63a0
CM
239 struct btrfs_root *extent_root = root->extent_root;
240 struct btrfs_buffer *t;
fec577fb
CM
241 int pending_ret;
242 int ret;
a28ec197 243
fec577fb 244 if (root == extent_root) {
a28ec197 245 t = find_tree_block(root, blocknr);
037e6390 246 radix_tree_tag_set(&root->cache_radix, blocknr,
a28ec197 247 CTREE_EXTENT_PENDING_DEL);
fec577fb
CM
248 return 0;
249 }
a28ec197
CM
250 key.objectid = blocknr;
251 key.flags = 0;
252 key.offset = num_blocks;
253 ret = __free_extent(root, blocknr, num_blocks);
254 pending_ret = run_pending(root->extent_root);
fec577fb
CM
255 return ret ? ret : pending_ret;
256}
257
258/*
259 * walks the btree of allocated extents and find a hole of a given size.
260 * The key ins is changed to record the hole:
261 * ins->objectid == block start
262 * ins->flags = 0
263 * ins->offset == number of blocks
264 * Any available blocks before search_start are skipped.
265 */
234b63a0 266static int find_free_extent(struct btrfs_root *orig_root, u64 num_blocks,
e2fa7227
CM
267 u64 search_start, u64 search_end,
268 struct btrfs_key *ins)
fec577fb 269{
234b63a0 270 struct btrfs_path path;
e2fa7227 271 struct btrfs_key key;
fec577fb
CM
272 int ret;
273 u64 hole_size = 0;
274 int slot = 0;
275 u64 last_block;
037e6390 276 u64 test_block;
fec577fb 277 int start_found;
234b63a0
CM
278 struct btrfs_leaf *l;
279 struct btrfs_root * root = orig_root->extent_root;
0579da42 280 int total_needed = num_blocks;
fec577fb 281
7518a238 282 total_needed += (btrfs_header_level(&root->node->node.header) + 1) * 3;
0579da42
CM
283 if (root->last_insert.objectid > search_start)
284 search_start = root->last_insert.objectid;
fec577fb 285check_failed:
234b63a0 286 btrfs_init_path(&path);
fec577fb
CM
287 ins->objectid = search_start;
288 ins->offset = 0;
289 ins->flags = 0;
290 start_found = 0;
234b63a0 291 ret = btrfs_search_slot(root, ins, &path, 0, 0);
0f70abe2
CM
292 if (ret < 0)
293 goto error;
aa5d6bed 294
0579da42
CM
295 if (path.slots[0] > 0)
296 path.slots[0]--;
297
fec577fb
CM
298 while (1) {
299 l = &path.nodes[0]->leaf;
300 slot = path.slots[0];
7518a238 301 if (slot >= btrfs_header_nritems(&l->header)) {
234b63a0 302 ret = btrfs_next_leaf(root, &path);
fec577fb
CM
303 if (ret == 0)
304 continue;
0f70abe2
CM
305 if (ret < 0)
306 goto error;
fec577fb
CM
307 if (!start_found) {
308 ins->objectid = search_start;
037e6390 309 ins->offset = (u64)-1;
fec577fb
CM
310 start_found = 1;
311 goto check_pending;
312 }
313 ins->objectid = last_block > search_start ?
314 last_block : search_start;
037e6390 315 ins->offset = (u64)-1;
fec577fb
CM
316 goto check_pending;
317 }
e2fa7227
CM
318 btrfs_disk_key_to_cpu(&key, &l->items[slot].key);
319 if (key.objectid >= search_start) {
fec577fb 320 if (start_found) {
0579da42
CM
321 if (last_block < search_start)
322 last_block = search_start;
e2fa7227 323 hole_size = key.objectid - last_block;
037e6390 324 if (hole_size > total_needed) {
fec577fb 325 ins->objectid = last_block;
037e6390 326 ins->offset = hole_size;
fec577fb
CM
327 goto check_pending;
328 }
0579da42 329 }
fec577fb 330 }
0579da42 331 start_found = 1;
e2fa7227 332 last_block = key.objectid + key.offset;
fec577fb
CM
333 path.slots[0]++;
334 }
335 // FIXME -ENOSPC
336check_pending:
337 /* we have to make sure we didn't find an extent that has already
338 * been allocated by the map tree or the original allocation
339 */
234b63a0 340 btrfs_release_path(root, &path);
fec577fb 341 BUG_ON(ins->objectid < search_start);
037e6390
CM
342 for (test_block = ins->objectid;
343 test_block < ins->objectid + total_needed; test_block++) {
344 if (radix_tree_lookup(&root->pinned_radix, test_block)) {
345 search_start = test_block + 1;
fec577fb
CM
346 goto check_failed;
347 }
348 }
037e6390 349 BUG_ON(root->current_insert.offset);
0579da42 350 root->current_insert.offset = total_needed - num_blocks;
037e6390
CM
351 root->current_insert.objectid = ins->objectid + num_blocks;
352 root->current_insert.flags = 0;
0579da42 353 root->last_insert.objectid = ins->objectid;
037e6390 354 ins->offset = num_blocks;
fec577fb 355 return 0;
0f70abe2 356error:
234b63a0 357 btrfs_release_path(root, &path);
0f70abe2 358 return ret;
fec577fb
CM
359}
360
fec577fb
CM
361/*
362 * finds a free extent and does all the dirty work required for allocation
363 * returns the key for the extent through ins, and a tree buffer for
364 * the first block of the extent through buf.
365 *
366 * returns 0 if everything worked, non-zero otherwise.
367 */
9aca1d51
CM
368static int alloc_extent(struct btrfs_root *root, u64 num_blocks,
369 u64 search_start, u64 search_end, u64 owner,
370 struct btrfs_key *ins)
fec577fb
CM
371{
372 int ret;
373 int pending_ret;
234b63a0
CM
374 struct btrfs_root *extent_root = root->extent_root;
375 struct btrfs_extent_item extent_item;
037e6390 376
cf27e1ee
CM
377 btrfs_set_extent_refs(&extent_item, 1);
378 btrfs_set_extent_owner(&extent_item, owner);
fec577fb 379
037e6390
CM
380 if (root == extent_root) {
381 BUG_ON(extent_root->current_insert.offset == 0);
382 BUG_ON(num_blocks != 1);
383 BUG_ON(extent_root->current_insert.flags ==
384 extent_root->current_insert.offset);
385 ins->offset = 1;
386 ins->objectid = extent_root->current_insert.objectid +
387 extent_root->current_insert.flags++;
fec577fb
CM
388 return 0;
389 }
037e6390
CM
390 ret = find_free_extent(root, num_blocks, search_start,
391 search_end, ins);
392 if (ret)
393 return ret;
fec577fb 394
234b63a0 395 ret = btrfs_insert_item(extent_root, ins, &extent_item,
037e6390
CM
396 sizeof(extent_item));
397
398 finish_current_insert(extent_root);
399 pending_ret = run_pending(extent_root);
400 if (ret)
401 return ret;
402 if (pending_ret)
403 return pending_ret;
404 return 0;
fec577fb
CM
405}
406
407/*
408 * helper function to allocate a block for a given tree
409 * returns the tree buffer or NULL.
410 */
234b63a0 411struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_root *root)
fec577fb 412{
e2fa7227 413 struct btrfs_key ins;
fec577fb 414 int ret;
234b63a0 415 struct btrfs_buffer *buf;
fec577fb
CM
416
417 ret = alloc_extent(root, 1, 0, (unsigned long)-1,
7518a238 418 btrfs_header_parentid(&root->node->node.header),
037e6390 419 &ins);
fec577fb
CM
420 if (ret) {
421 BUG();
422 return NULL;
423 }
037e6390
CM
424 buf = find_tree_block(root, ins.objectid);
425 dirty_tree_block(root, buf);
fec577fb
CM
426 return buf;
427}
a28ec197 428
9aca1d51
CM
429/*
430 * helper function for drop_snapshot, this walks down the tree dropping ref
431 * counts as it goes.
432 */
433static int walk_down_tree(struct btrfs_root *root,
434 struct btrfs_path *path, int *level)
20524f02 435{
234b63a0
CM
436 struct btrfs_buffer *next;
437 struct btrfs_buffer *cur;
20524f02
CM
438 u64 blocknr;
439 int ret;
440 u32 refs;
441
442 ret = lookup_block_ref(root, path->nodes[*level]->blocknr, &refs);
443 BUG_ON(ret);
444 if (refs > 1)
445 goto out;
9aca1d51
CM
446 /*
447 * walk down to the last node level and free all the leaves
448 */
20524f02
CM
449 while(*level > 0) {
450 cur = path->nodes[*level];
7518a238
CM
451 if (path->slots[*level] >=
452 btrfs_header_nritems(&cur->node.header))
20524f02 453 break;
1d4f8a0c 454 blocknr = btrfs_node_blockptr(&cur->node, path->slots[*level]);
20524f02
CM
455 ret = lookup_block_ref(root, blocknr, &refs);
456 if (refs != 1 || *level == 1) {
457 path->slots[*level]++;
234b63a0 458 ret = btrfs_free_extent(root, blocknr, 1);
20524f02
CM
459 BUG_ON(ret);
460 continue;
461 }
462 BUG_ON(ret);
463 next = read_tree_block(root, blocknr);
83e15a28 464 if (path->nodes[*level-1])
234b63a0 465 btrfs_block_release(root, path->nodes[*level-1]);
20524f02 466 path->nodes[*level-1] = next;
7518a238 467 *level = btrfs_header_level(&next->node.header);
20524f02
CM
468 path->slots[*level] = 0;
469 }
470out:
234b63a0
CM
471 ret = btrfs_free_extent(root, path->nodes[*level]->blocknr, 1);
472 btrfs_block_release(root, path->nodes[*level]);
20524f02
CM
473 path->nodes[*level] = NULL;
474 *level += 1;
475 BUG_ON(ret);
476 return 0;
477}
478
9aca1d51
CM
479/*
480 * helper for dropping snapshots. This walks back up the tree in the path
481 * to find the first node higher up where we haven't yet gone through
482 * all the slots
483 */
484static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
485 int *level)
20524f02
CM
486{
487 int i;
488 int slot;
489 int ret;
234b63a0 490 for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
20524f02 491 slot = path->slots[i];
7518a238
CM
492 if (slot <
493 btrfs_header_nritems(&path->nodes[i]->node.header)- 1) {
20524f02
CM
494 path->slots[i]++;
495 *level = i;
496 return 0;
497 } else {
234b63a0 498 ret = btrfs_free_extent(root,
20524f02 499 path->nodes[*level]->blocknr, 1);
234b63a0 500 btrfs_block_release(root, path->nodes[*level]);
83e15a28 501 path->nodes[*level] = NULL;
20524f02
CM
502 *level = i + 1;
503 BUG_ON(ret);
504 }
505 }
506 return 1;
507}
508
9aca1d51
CM
509/*
510 * drop the reference count on the tree rooted at 'snap'. This traverses
511 * the tree freeing any blocks that have a ref count of zero after being
512 * decremented.
513 */
234b63a0 514int btrfs_drop_snapshot(struct btrfs_root *root, struct btrfs_buffer *snap)
20524f02 515{
9aca1d51
CM
516 int ret = 0;;
517 int wret;
20524f02 518 int level;
234b63a0 519 struct btrfs_path path;
20524f02
CM
520 int i;
521 int orig_level;
522
234b63a0 523 btrfs_init_path(&path);
20524f02 524
7518a238 525 level = btrfs_header_level(&snap->node.header);
20524f02
CM
526 orig_level = level;
527 path.nodes[level] = snap;
528 path.slots[level] = 0;
529 while(1) {
9aca1d51
CM
530 wret = walk_down_tree(root, &path, &level);
531 if (wret > 0)
20524f02 532 break;
9aca1d51
CM
533 if (wret < 0)
534 ret = wret;
535
536 wret = walk_up_tree(root, &path, &level);
537 if (wret > 0)
20524f02 538 break;
9aca1d51
CM
539 if (wret < 0)
540 ret = wret;
20524f02 541 }
83e15a28
CM
542 for (i = 0; i <= orig_level; i++) {
543 if (path.nodes[i]) {
234b63a0 544 btrfs_block_release(root, path.nodes[i]);
83e15a28 545 }
20524f02 546 }
9aca1d51 547 return ret;
20524f02 548}