Btrfs: Use a chunk of the key flags to record the item type.
[linux-2.6-block.git] / fs / btrfs / extent-tree.c
CommitLineData
fec577fb
CM
1#include <stdio.h>
2#include <stdlib.h>
3#include "kerncompat.h"
4#include "radix-tree.h"
5#include "ctree.h"
6#include "disk-io.h"
7#include "print-tree.h"
8
234b63a0 9static int find_free_extent(struct btrfs_root *orig_root, u64 num_blocks,
e2fa7227
CM
10 u64 search_start, u64 search_end,
11 struct btrfs_key *ins);
234b63a0
CM
12static int finish_current_insert(struct btrfs_root *extent_root);
13static int run_pending(struct btrfs_root *extent_root);
037e6390 14
fec577fb
CM
15/*
16 * pending extents are blocks that we're trying to allocate in the extent
17 * map while trying to grow the map because of other allocations. To avoid
18 * recursing, they are tagged in the radix tree and cleaned up after
19 * other allocations are done. The pending tag is also used in the same
20 * manner for deletes.
21 */
037e6390 22#define CTREE_EXTENT_PENDING_DEL 0
fec577fb 23
234b63a0 24static int inc_block_ref(struct btrfs_root *root, u64 blocknr)
02217ed2 25{
234b63a0 26 struct btrfs_path path;
02217ed2 27 int ret;
e2fa7227 28 struct btrfs_key key;
234b63a0
CM
29 struct btrfs_leaf *l;
30 struct btrfs_extent_item *item;
e2fa7227 31 struct btrfs_key ins;
cf27e1ee 32 u32 refs;
037e6390
CM
33
34 find_free_extent(root->extent_root, 0, 0, (u64)-1, &ins);
234b63a0 35 btrfs_init_path(&path);
02217ed2
CM
36 key.objectid = blocknr;
37 key.flags = 0;
62e2749e 38 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
02217ed2 39 key.offset = 1;
234b63a0 40 ret = btrfs_search_slot(root->extent_root, &key, &path, 0, 1);
a28ec197
CM
41 if (ret != 0)
42 BUG();
02217ed2
CM
43 BUG_ON(ret != 0);
44 l = &path.nodes[0]->leaf;
4beb1b8b 45 item = btrfs_item_ptr(l, path.slots[0], struct btrfs_extent_item);
cf27e1ee
CM
46 refs = btrfs_extent_refs(item);
47 btrfs_set_extent_refs(item, refs + 1);
a28ec197 48
02217ed2 49 BUG_ON(list_empty(&path.nodes[0]->dirty));
234b63a0 50 btrfs_release_path(root->extent_root, &path);
037e6390
CM
51 finish_current_insert(root->extent_root);
52 run_pending(root->extent_root);
02217ed2
CM
53 return 0;
54}
55
234b63a0 56static int lookup_block_ref(struct btrfs_root *root, u64 blocknr, u32 *refs)
a28ec197 57{
234b63a0 58 struct btrfs_path path;
a28ec197 59 int ret;
e2fa7227 60 struct btrfs_key key;
234b63a0
CM
61 struct btrfs_leaf *l;
62 struct btrfs_extent_item *item;
63 btrfs_init_path(&path);
a28ec197 64 key.objectid = blocknr;
a28ec197 65 key.offset = 1;
62e2749e
CM
66 key.flags = 0;
67 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
234b63a0 68 ret = btrfs_search_slot(root->extent_root, &key, &path, 0, 0);
a28ec197
CM
69 if (ret != 0)
70 BUG();
71 l = &path.nodes[0]->leaf;
4beb1b8b 72 item = btrfs_item_ptr(l, path.slots[0], struct btrfs_extent_item);
cf27e1ee 73 *refs = btrfs_extent_refs(item);
234b63a0 74 btrfs_release_path(root->extent_root, &path);
a28ec197
CM
75 return 0;
76}
77
234b63a0 78int btrfs_inc_ref(struct btrfs_root *root, struct btrfs_buffer *buf)
02217ed2
CM
79{
80 u64 blocknr;
81 int i;
a28ec197 82
3768f368 83 if (!root->ref_cows)
a28ec197 84 return 0;
7518a238 85 if (btrfs_is_leaf(&buf->node))
a28ec197
CM
86 return 0;
87
7518a238 88 for (i = 0; i < btrfs_header_nritems(&buf->node.header); i++) {
1d4f8a0c 89 blocknr = btrfs_node_blockptr(&buf->node, i);
02217ed2
CM
90 inc_block_ref(root, blocknr);
91 }
92 return 0;
93}
94
234b63a0 95int btrfs_finish_extent_commit(struct btrfs_root *root)
a28ec197 96{
a28ec197
CM
97 unsigned long gang[8];
98 int ret;
99 int i;
100
101 while(1) {
3768f368 102 ret = radix_tree_gang_lookup(&root->pinned_radix,
a28ec197
CM
103 (void **)gang, 0,
104 ARRAY_SIZE(gang));
105 if (!ret)
106 break;
0579da42 107 for (i = 0; i < ret; i++) {
3768f368 108 radix_tree_delete(&root->pinned_radix, gang[i]);
0579da42 109 }
a28ec197 110 }
3768f368
CM
111 root->last_insert.objectid = 0;
112 root->last_insert.offset = 0;
a28ec197
CM
113 return 0;
114}
115
234b63a0 116static int finish_current_insert(struct btrfs_root *extent_root)
037e6390 117{
e2fa7227 118 struct btrfs_key ins;
234b63a0 119 struct btrfs_extent_item extent_item;
037e6390
CM
120 int i;
121 int ret;
122
cf27e1ee
CM
123 btrfs_set_extent_refs(&extent_item, 1);
124 btrfs_set_extent_owner(&extent_item,
125 btrfs_header_parentid(&extent_root->node->node.header));
037e6390
CM
126 ins.offset = 1;
127 ins.flags = 0;
62e2749e 128 btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
037e6390
CM
129
130 for (i = 0; i < extent_root->current_insert.flags; i++) {
131 ins.objectid = extent_root->current_insert.objectid + i;
234b63a0 132 ret = btrfs_insert_item(extent_root, &ins, &extent_item,
037e6390
CM
133 sizeof(extent_item));
134 BUG_ON(ret);
135 }
136 extent_root->current_insert.offset = 0;
137 return 0;
138}
139
fec577fb 140/*
a28ec197 141 * remove an extent from the root, returns 0 on success
fec577fb 142 */
234b63a0 143static int __free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks)
a28ec197 144{
234b63a0 145 struct btrfs_path path;
e2fa7227 146 struct btrfs_key key;
234b63a0 147 struct btrfs_root *extent_root = root->extent_root;
a28ec197 148 int ret;
234b63a0 149 struct btrfs_extent_item *ei;
e2fa7227 150 struct btrfs_key ins;
cf27e1ee 151 u32 refs;
037e6390 152
a28ec197
CM
153 key.objectid = blocknr;
154 key.flags = 0;
62e2749e 155 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
a28ec197
CM
156 key.offset = num_blocks;
157
037e6390 158 find_free_extent(root, 0, 0, (u64)-1, &ins);
234b63a0
CM
159 btrfs_init_path(&path);
160 ret = btrfs_search_slot(extent_root, &key, &path, -1, 1);
a28ec197
CM
161 if (ret) {
162 printf("failed to find %Lu\n", key.objectid);
234b63a0 163 btrfs_print_tree(extent_root, extent_root->node);
a28ec197
CM
164 printf("failed to find %Lu\n", key.objectid);
165 BUG();
166 }
123abc88
CM
167 ei = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0],
168 struct btrfs_extent_item);
a28ec197 169 BUG_ON(ei->refs == 0);
cf27e1ee
CM
170 refs = btrfs_extent_refs(ei) - 1;
171 btrfs_set_extent_refs(ei, refs);
172 if (refs == 0) {
3768f368 173 if (!root->ref_cows) {
a28ec197
CM
174 int err;
175 radix_tree_preload(GFP_KERNEL);
176 err = radix_tree_insert(&extent_root->pinned_radix,
177 blocknr, (void *)blocknr);
178 BUG_ON(err);
179 radix_tree_preload_end();
180 }
234b63a0 181 ret = btrfs_del_item(extent_root, &path);
0579da42 182 if (root != extent_root &&
71087494 183 extent_root->last_insert.objectid > blocknr)
0579da42 184 extent_root->last_insert.objectid = blocknr;
a28ec197
CM
185 if (ret)
186 BUG();
187 }
234b63a0 188 btrfs_release_path(extent_root, &path);
037e6390 189 finish_current_insert(extent_root);
a28ec197
CM
190 return ret;
191}
192
a28ec197
CM
193/*
194 * find all the blocks marked as pending in the radix tree and remove
195 * them from the extent map
196 */
234b63a0 197static int del_pending_extents(struct btrfs_root *extent_root)
a28ec197
CM
198{
199 int ret;
234b63a0 200 struct btrfs_buffer *gang[4];
a28ec197
CM
201 int i;
202
203 while(1) {
204 ret = radix_tree_gang_lookup_tag(&extent_root->cache_radix,
205 (void **)gang, 0,
206 ARRAY_SIZE(gang),
207 CTREE_EXTENT_PENDING_DEL);
208 if (!ret)
209 break;
210 for (i = 0; i < ret; i++) {
211 ret = __free_extent(extent_root, gang[i]->blocknr, 1);
fec577fb
CM
212 radix_tree_tag_clear(&extent_root->cache_radix,
213 gang[i]->blocknr,
a28ec197 214 CTREE_EXTENT_PENDING_DEL);
234b63a0 215 btrfs_block_release(extent_root, gang[i]);
fec577fb
CM
216 }
217 }
218 return 0;
219}
220
234b63a0 221static int run_pending(struct btrfs_root *extent_root)
a28ec197
CM
222{
223 while(radix_tree_tagged(&extent_root->cache_radix,
037e6390 224 CTREE_EXTENT_PENDING_DEL))
a28ec197 225 del_pending_extents(extent_root);
a28ec197
CM
226 return 0;
227}
228
229
fec577fb
CM
230/*
231 * remove an extent from the root, returns 0 on success
232 */
234b63a0 233int btrfs_free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks)
fec577fb 234{
234b63a0
CM
235 struct btrfs_root *extent_root = root->extent_root;
236 struct btrfs_buffer *t;
fec577fb
CM
237 int pending_ret;
238 int ret;
a28ec197 239
fec577fb 240 if (root == extent_root) {
a28ec197 241 t = find_tree_block(root, blocknr);
037e6390 242 radix_tree_tag_set(&root->cache_radix, blocknr,
a28ec197 243 CTREE_EXTENT_PENDING_DEL);
fec577fb
CM
244 return 0;
245 }
a28ec197
CM
246 ret = __free_extent(root, blocknr, num_blocks);
247 pending_ret = run_pending(root->extent_root);
fec577fb
CM
248 return ret ? ret : pending_ret;
249}
250
251/*
252 * walks the btree of allocated extents and find a hole of a given size.
253 * The key ins is changed to record the hole:
254 * ins->objectid == block start
62e2749e 255 * ins->flags = BTRFS_EXTENT_ITEM_KEY
fec577fb
CM
256 * ins->offset == number of blocks
257 * Any available blocks before search_start are skipped.
258 */
234b63a0 259static int find_free_extent(struct btrfs_root *orig_root, u64 num_blocks,
e2fa7227
CM
260 u64 search_start, u64 search_end,
261 struct btrfs_key *ins)
fec577fb 262{
234b63a0 263 struct btrfs_path path;
e2fa7227 264 struct btrfs_key key;
fec577fb
CM
265 int ret;
266 u64 hole_size = 0;
267 int slot = 0;
268 u64 last_block;
037e6390 269 u64 test_block;
fec577fb 270 int start_found;
234b63a0
CM
271 struct btrfs_leaf *l;
272 struct btrfs_root * root = orig_root->extent_root;
0579da42 273 int total_needed = num_blocks;
fec577fb 274
7518a238 275 total_needed += (btrfs_header_level(&root->node->node.header) + 1) * 3;
0579da42
CM
276 if (root->last_insert.objectid > search_start)
277 search_start = root->last_insert.objectid;
62e2749e
CM
278
279 ins->flags = 0;
280 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
281
fec577fb 282check_failed:
234b63a0 283 btrfs_init_path(&path);
fec577fb
CM
284 ins->objectid = search_start;
285 ins->offset = 0;
fec577fb 286 start_found = 0;
234b63a0 287 ret = btrfs_search_slot(root, ins, &path, 0, 0);
0f70abe2
CM
288 if (ret < 0)
289 goto error;
aa5d6bed 290
0579da42
CM
291 if (path.slots[0] > 0)
292 path.slots[0]--;
293
fec577fb
CM
294 while (1) {
295 l = &path.nodes[0]->leaf;
296 slot = path.slots[0];
7518a238 297 if (slot >= btrfs_header_nritems(&l->header)) {
234b63a0 298 ret = btrfs_next_leaf(root, &path);
fec577fb
CM
299 if (ret == 0)
300 continue;
0f70abe2
CM
301 if (ret < 0)
302 goto error;
fec577fb
CM
303 if (!start_found) {
304 ins->objectid = search_start;
037e6390 305 ins->offset = (u64)-1;
fec577fb
CM
306 start_found = 1;
307 goto check_pending;
308 }
309 ins->objectid = last_block > search_start ?
310 last_block : search_start;
037e6390 311 ins->offset = (u64)-1;
fec577fb
CM
312 goto check_pending;
313 }
e2fa7227
CM
314 btrfs_disk_key_to_cpu(&key, &l->items[slot].key);
315 if (key.objectid >= search_start) {
fec577fb 316 if (start_found) {
0579da42
CM
317 if (last_block < search_start)
318 last_block = search_start;
e2fa7227 319 hole_size = key.objectid - last_block;
037e6390 320 if (hole_size > total_needed) {
fec577fb 321 ins->objectid = last_block;
037e6390 322 ins->offset = hole_size;
fec577fb
CM
323 goto check_pending;
324 }
0579da42 325 }
fec577fb 326 }
0579da42 327 start_found = 1;
e2fa7227 328 last_block = key.objectid + key.offset;
fec577fb
CM
329 path.slots[0]++;
330 }
331 // FIXME -ENOSPC
332check_pending:
333 /* we have to make sure we didn't find an extent that has already
334 * been allocated by the map tree or the original allocation
335 */
234b63a0 336 btrfs_release_path(root, &path);
fec577fb 337 BUG_ON(ins->objectid < search_start);
037e6390
CM
338 for (test_block = ins->objectid;
339 test_block < ins->objectid + total_needed; test_block++) {
340 if (radix_tree_lookup(&root->pinned_radix, test_block)) {
341 search_start = test_block + 1;
fec577fb
CM
342 goto check_failed;
343 }
344 }
037e6390 345 BUG_ON(root->current_insert.offset);
0579da42 346 root->current_insert.offset = total_needed - num_blocks;
037e6390
CM
347 root->current_insert.objectid = ins->objectid + num_blocks;
348 root->current_insert.flags = 0;
0579da42 349 root->last_insert.objectid = ins->objectid;
037e6390 350 ins->offset = num_blocks;
fec577fb 351 return 0;
0f70abe2 352error:
234b63a0 353 btrfs_release_path(root, &path);
0f70abe2 354 return ret;
fec577fb
CM
355}
356
fec577fb
CM
357/*
358 * finds a free extent and does all the dirty work required for allocation
359 * returns the key for the extent through ins, and a tree buffer for
360 * the first block of the extent through buf.
361 *
362 * returns 0 if everything worked, non-zero otherwise.
363 */
9aca1d51
CM
364static int alloc_extent(struct btrfs_root *root, u64 num_blocks,
365 u64 search_start, u64 search_end, u64 owner,
366 struct btrfs_key *ins)
fec577fb
CM
367{
368 int ret;
369 int pending_ret;
234b63a0
CM
370 struct btrfs_root *extent_root = root->extent_root;
371 struct btrfs_extent_item extent_item;
037e6390 372
cf27e1ee
CM
373 btrfs_set_extent_refs(&extent_item, 1);
374 btrfs_set_extent_owner(&extent_item, owner);
fec577fb 375
037e6390
CM
376 if (root == extent_root) {
377 BUG_ON(extent_root->current_insert.offset == 0);
378 BUG_ON(num_blocks != 1);
379 BUG_ON(extent_root->current_insert.flags ==
380 extent_root->current_insert.offset);
381 ins->offset = 1;
382 ins->objectid = extent_root->current_insert.objectid +
383 extent_root->current_insert.flags++;
fec577fb
CM
384 return 0;
385 }
037e6390
CM
386 ret = find_free_extent(root, num_blocks, search_start,
387 search_end, ins);
388 if (ret)
389 return ret;
fec577fb 390
234b63a0 391 ret = btrfs_insert_item(extent_root, ins, &extent_item,
037e6390
CM
392 sizeof(extent_item));
393
394 finish_current_insert(extent_root);
395 pending_ret = run_pending(extent_root);
396 if (ret)
397 return ret;
398 if (pending_ret)
399 return pending_ret;
400 return 0;
fec577fb
CM
401}
402
403/*
404 * helper function to allocate a block for a given tree
405 * returns the tree buffer or NULL.
406 */
234b63a0 407struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_root *root)
fec577fb 408{
e2fa7227 409 struct btrfs_key ins;
fec577fb 410 int ret;
234b63a0 411 struct btrfs_buffer *buf;
fec577fb
CM
412
413 ret = alloc_extent(root, 1, 0, (unsigned long)-1,
7518a238 414 btrfs_header_parentid(&root->node->node.header),
037e6390 415 &ins);
fec577fb
CM
416 if (ret) {
417 BUG();
418 return NULL;
419 }
037e6390
CM
420 buf = find_tree_block(root, ins.objectid);
421 dirty_tree_block(root, buf);
fec577fb
CM
422 return buf;
423}
a28ec197 424
9aca1d51
CM
425/*
426 * helper function for drop_snapshot, this walks down the tree dropping ref
427 * counts as it goes.
428 */
429static int walk_down_tree(struct btrfs_root *root,
430 struct btrfs_path *path, int *level)
20524f02 431{
234b63a0
CM
432 struct btrfs_buffer *next;
433 struct btrfs_buffer *cur;
20524f02
CM
434 u64 blocknr;
435 int ret;
436 u32 refs;
437
438 ret = lookup_block_ref(root, path->nodes[*level]->blocknr, &refs);
439 BUG_ON(ret);
440 if (refs > 1)
441 goto out;
9aca1d51
CM
442 /*
443 * walk down to the last node level and free all the leaves
444 */
20524f02
CM
445 while(*level > 0) {
446 cur = path->nodes[*level];
7518a238
CM
447 if (path->slots[*level] >=
448 btrfs_header_nritems(&cur->node.header))
20524f02 449 break;
1d4f8a0c 450 blocknr = btrfs_node_blockptr(&cur->node, path->slots[*level]);
20524f02
CM
451 ret = lookup_block_ref(root, blocknr, &refs);
452 if (refs != 1 || *level == 1) {
453 path->slots[*level]++;
234b63a0 454 ret = btrfs_free_extent(root, blocknr, 1);
20524f02
CM
455 BUG_ON(ret);
456 continue;
457 }
458 BUG_ON(ret);
459 next = read_tree_block(root, blocknr);
83e15a28 460 if (path->nodes[*level-1])
234b63a0 461 btrfs_block_release(root, path->nodes[*level-1]);
20524f02 462 path->nodes[*level-1] = next;
7518a238 463 *level = btrfs_header_level(&next->node.header);
20524f02
CM
464 path->slots[*level] = 0;
465 }
466out:
234b63a0
CM
467 ret = btrfs_free_extent(root, path->nodes[*level]->blocknr, 1);
468 btrfs_block_release(root, path->nodes[*level]);
20524f02
CM
469 path->nodes[*level] = NULL;
470 *level += 1;
471 BUG_ON(ret);
472 return 0;
473}
474
9aca1d51
CM
475/*
476 * helper for dropping snapshots. This walks back up the tree in the path
477 * to find the first node higher up where we haven't yet gone through
478 * all the slots
479 */
480static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
481 int *level)
20524f02
CM
482{
483 int i;
484 int slot;
485 int ret;
234b63a0 486 for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
20524f02 487 slot = path->slots[i];
7518a238
CM
488 if (slot <
489 btrfs_header_nritems(&path->nodes[i]->node.header)- 1) {
20524f02
CM
490 path->slots[i]++;
491 *level = i;
492 return 0;
493 } else {
234b63a0 494 ret = btrfs_free_extent(root,
20524f02 495 path->nodes[*level]->blocknr, 1);
234b63a0 496 btrfs_block_release(root, path->nodes[*level]);
83e15a28 497 path->nodes[*level] = NULL;
20524f02
CM
498 *level = i + 1;
499 BUG_ON(ret);
500 }
501 }
502 return 1;
503}
504
9aca1d51
CM
505/*
506 * drop the reference count on the tree rooted at 'snap'. This traverses
507 * the tree freeing any blocks that have a ref count of zero after being
508 * decremented.
509 */
234b63a0 510int btrfs_drop_snapshot(struct btrfs_root *root, struct btrfs_buffer *snap)
20524f02 511{
3768f368 512 int ret = 0;
9aca1d51 513 int wret;
20524f02 514 int level;
234b63a0 515 struct btrfs_path path;
20524f02
CM
516 int i;
517 int orig_level;
518
234b63a0 519 btrfs_init_path(&path);
20524f02 520
7518a238 521 level = btrfs_header_level(&snap->node.header);
20524f02
CM
522 orig_level = level;
523 path.nodes[level] = snap;
524 path.slots[level] = 0;
525 while(1) {
9aca1d51
CM
526 wret = walk_down_tree(root, &path, &level);
527 if (wret > 0)
20524f02 528 break;
9aca1d51
CM
529 if (wret < 0)
530 ret = wret;
531
532 wret = walk_up_tree(root, &path, &level);
533 if (wret > 0)
20524f02 534 break;
9aca1d51
CM
535 if (wret < 0)
536 ret = wret;
20524f02 537 }
83e15a28
CM
538 for (i = 0; i <= orig_level; i++) {
539 if (path.nodes[i]) {
234b63a0 540 btrfs_block_release(root, path.nodes[i]);
83e15a28 541 }
20524f02 542 }
9aca1d51 543 return ret;
20524f02 544}