Btrfs: add transaction.h to the Makefile
[linux-2.6-block.git] / fs / btrfs / extent-tree.c
CommitLineData
fec577fb
CM
1#include <stdio.h>
2#include <stdlib.h>
3#include "kerncompat.h"
4#include "radix-tree.h"
5#include "ctree.h"
6#include "disk-io.h"
7#include "print-tree.h"
e089f05c 8#include "transaction.h"
fec577fb 9
e089f05c
CM
10static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
11 *orig_root, u64 num_blocks, u64 search_start, u64
12 search_end, struct btrfs_key *ins);
13static int finish_current_insert(struct btrfs_trans_handle *trans, struct
14 btrfs_root *extent_root);
15static int run_pending(struct btrfs_trans_handle *trans, struct btrfs_root
16 *extent_root);
037e6390 17
fec577fb
CM
18/*
19 * pending extents are blocks that we're trying to allocate in the extent
20 * map while trying to grow the map because of other allocations. To avoid
21 * recursing, they are tagged in the radix tree and cleaned up after
22 * other allocations are done. The pending tag is also used in the same
23 * manner for deletes.
24 */
037e6390 25#define CTREE_EXTENT_PENDING_DEL 0
fec577fb 26
e089f05c
CM
27static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root
28 *root, u64 blocknr)
02217ed2 29{
234b63a0 30 struct btrfs_path path;
02217ed2 31 int ret;
e2fa7227 32 struct btrfs_key key;
234b63a0
CM
33 struct btrfs_leaf *l;
34 struct btrfs_extent_item *item;
e2fa7227 35 struct btrfs_key ins;
cf27e1ee 36 u32 refs;
037e6390 37
e089f05c 38 find_free_extent(trans, root->extent_root, 0, 0, (u64)-1, &ins);
234b63a0 39 btrfs_init_path(&path);
02217ed2
CM
40 key.objectid = blocknr;
41 key.flags = 0;
62e2749e 42 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
02217ed2 43 key.offset = 1;
e089f05c 44 ret = btrfs_search_slot(trans, root->extent_root, &key, &path, 0, 1);
a28ec197
CM
45 if (ret != 0)
46 BUG();
02217ed2
CM
47 BUG_ON(ret != 0);
48 l = &path.nodes[0]->leaf;
4beb1b8b 49 item = btrfs_item_ptr(l, path.slots[0], struct btrfs_extent_item);
cf27e1ee
CM
50 refs = btrfs_extent_refs(item);
51 btrfs_set_extent_refs(item, refs + 1);
a28ec197 52
02217ed2 53 BUG_ON(list_empty(&path.nodes[0]->dirty));
234b63a0 54 btrfs_release_path(root->extent_root, &path);
e089f05c
CM
55 finish_current_insert(trans, root->extent_root);
56 run_pending(trans, root->extent_root);
02217ed2
CM
57 return 0;
58}
59
e089f05c
CM
60static int lookup_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root
61 *root, u64 blocknr, u32 *refs)
a28ec197 62{
234b63a0 63 struct btrfs_path path;
a28ec197 64 int ret;
e2fa7227 65 struct btrfs_key key;
234b63a0
CM
66 struct btrfs_leaf *l;
67 struct btrfs_extent_item *item;
68 btrfs_init_path(&path);
a28ec197 69 key.objectid = blocknr;
a28ec197 70 key.offset = 1;
62e2749e
CM
71 key.flags = 0;
72 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
e089f05c 73 ret = btrfs_search_slot(trans, root->extent_root, &key, &path, 0, 0);
a28ec197
CM
74 if (ret != 0)
75 BUG();
76 l = &path.nodes[0]->leaf;
4beb1b8b 77 item = btrfs_item_ptr(l, path.slots[0], struct btrfs_extent_item);
cf27e1ee 78 *refs = btrfs_extent_refs(item);
234b63a0 79 btrfs_release_path(root->extent_root, &path);
a28ec197
CM
80 return 0;
81}
82
e089f05c
CM
83int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
84 struct btrfs_buffer *buf)
02217ed2
CM
85{
86 u64 blocknr;
87 int i;
a28ec197 88
3768f368 89 if (!root->ref_cows)
a28ec197 90 return 0;
7518a238 91 if (btrfs_is_leaf(&buf->node))
a28ec197
CM
92 return 0;
93
7518a238 94 for (i = 0; i < btrfs_header_nritems(&buf->node.header); i++) {
1d4f8a0c 95 blocknr = btrfs_node_blockptr(&buf->node, i);
e089f05c 96 inc_block_ref(trans, root, blocknr);
02217ed2
CM
97 }
98 return 0;
99}
100
e089f05c
CM
101int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct
102 btrfs_root *root)
a28ec197 103{
a28ec197 104 unsigned long gang[8];
88fd146c 105 u64 first = 0;
a28ec197
CM
106 int ret;
107 int i;
108
109 while(1) {
3768f368 110 ret = radix_tree_gang_lookup(&root->pinned_radix,
a28ec197
CM
111 (void **)gang, 0,
112 ARRAY_SIZE(gang));
113 if (!ret)
114 break;
88fd146c
CM
115 if (!first)
116 first = gang[0];
0579da42 117 for (i = 0; i < ret; i++) {
3768f368 118 radix_tree_delete(&root->pinned_radix, gang[i]);
0579da42 119 }
a28ec197 120 }
88fd146c 121 root->last_insert.objectid = first;
3768f368 122 root->last_insert.offset = 0;
a28ec197
CM
123 return 0;
124}
125
e089f05c
CM
126static int finish_current_insert(struct btrfs_trans_handle *trans, struct
127 btrfs_root *extent_root)
037e6390 128{
e2fa7227 129 struct btrfs_key ins;
234b63a0 130 struct btrfs_extent_item extent_item;
037e6390
CM
131 int i;
132 int ret;
133
cf27e1ee
CM
134 btrfs_set_extent_refs(&extent_item, 1);
135 btrfs_set_extent_owner(&extent_item,
136 btrfs_header_parentid(&extent_root->node->node.header));
037e6390
CM
137 ins.offset = 1;
138 ins.flags = 0;
62e2749e 139 btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
037e6390
CM
140
141 for (i = 0; i < extent_root->current_insert.flags; i++) {
142 ins.objectid = extent_root->current_insert.objectid + i;
e089f05c
CM
143 ret = btrfs_insert_item(trans, extent_root, &ins, &extent_item,
144 sizeof(extent_item));
037e6390
CM
145 BUG_ON(ret);
146 }
147 extent_root->current_insert.offset = 0;
148 return 0;
149}
150
fec577fb 151/*
a28ec197 152 * remove an extent from the root, returns 0 on success
fec577fb 153 */
e089f05c
CM
154static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
155 *root, u64 blocknr, u64 num_blocks, int pin)
a28ec197 156{
234b63a0 157 struct btrfs_path path;
e2fa7227 158 struct btrfs_key key;
234b63a0 159 struct btrfs_root *extent_root = root->extent_root;
a28ec197 160 int ret;
234b63a0 161 struct btrfs_extent_item *ei;
e2fa7227 162 struct btrfs_key ins;
cf27e1ee 163 u32 refs;
037e6390 164
88fd146c 165 BUG_ON(pin && num_blocks != 1);
a28ec197
CM
166 key.objectid = blocknr;
167 key.flags = 0;
62e2749e 168 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
a28ec197
CM
169 key.offset = num_blocks;
170
e089f05c 171 find_free_extent(trans, root, 0, 0, (u64)-1, &ins);
234b63a0 172 btrfs_init_path(&path);
e089f05c 173 ret = btrfs_search_slot(trans, extent_root, &key, &path, -1, 1);
a28ec197
CM
174 if (ret) {
175 printf("failed to find %Lu\n", key.objectid);
234b63a0 176 btrfs_print_tree(extent_root, extent_root->node);
a28ec197
CM
177 printf("failed to find %Lu\n", key.objectid);
178 BUG();
179 }
123abc88
CM
180 ei = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0],
181 struct btrfs_extent_item);
a28ec197 182 BUG_ON(ei->refs == 0);
cf27e1ee
CM
183 refs = btrfs_extent_refs(ei) - 1;
184 btrfs_set_extent_refs(ei, refs);
185 if (refs == 0) {
88fd146c 186 if (pin) {
a28ec197
CM
187 int err;
188 radix_tree_preload(GFP_KERNEL);
189 err = radix_tree_insert(&extent_root->pinned_radix,
190 blocknr, (void *)blocknr);
191 BUG_ON(err);
192 radix_tree_preload_end();
193 }
e089f05c 194 ret = btrfs_del_item(trans, extent_root, &path);
88fd146c 195 if (!pin && extent_root->last_insert.objectid > blocknr)
0579da42 196 extent_root->last_insert.objectid = blocknr;
a28ec197
CM
197 if (ret)
198 BUG();
199 }
234b63a0 200 btrfs_release_path(extent_root, &path);
e089f05c 201 finish_current_insert(trans, extent_root);
a28ec197
CM
202 return ret;
203}
204
a28ec197
CM
205/*
206 * find all the blocks marked as pending in the radix tree and remove
207 * them from the extent map
208 */
e089f05c
CM
209static int del_pending_extents(struct btrfs_trans_handle *trans, struct
210 btrfs_root *extent_root)
a28ec197
CM
211{
212 int ret;
234b63a0 213 struct btrfs_buffer *gang[4];
a28ec197
CM
214 int i;
215
216 while(1) {
217 ret = radix_tree_gang_lookup_tag(&extent_root->cache_radix,
218 (void **)gang, 0,
219 ARRAY_SIZE(gang),
220 CTREE_EXTENT_PENDING_DEL);
221 if (!ret)
222 break;
223 for (i = 0; i < ret; i++) {
e089f05c 224 ret = __free_extent(trans, extent_root,
88fd146c 225 gang[i]->blocknr, 1, 1);
fec577fb
CM
226 radix_tree_tag_clear(&extent_root->cache_radix,
227 gang[i]->blocknr,
a28ec197 228 CTREE_EXTENT_PENDING_DEL);
234b63a0 229 btrfs_block_release(extent_root, gang[i]);
fec577fb
CM
230 }
231 }
232 return 0;
233}
234
e089f05c
CM
235static int run_pending(struct btrfs_trans_handle *trans, struct btrfs_root
236 *extent_root)
a28ec197
CM
237{
238 while(radix_tree_tagged(&extent_root->cache_radix,
037e6390 239 CTREE_EXTENT_PENDING_DEL))
e089f05c 240 del_pending_extents(trans, extent_root);
a28ec197
CM
241 return 0;
242}
243
244
fec577fb
CM
245/*
246 * remove an extent from the root, returns 0 on success
247 */
e089f05c
CM
248int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
249 *root, u64 blocknr, u64 num_blocks, int pin)
fec577fb 250{
234b63a0
CM
251 struct btrfs_root *extent_root = root->extent_root;
252 struct btrfs_buffer *t;
fec577fb
CM
253 int pending_ret;
254 int ret;
a28ec197 255
fec577fb 256 if (root == extent_root) {
a28ec197 257 t = find_tree_block(root, blocknr);
037e6390 258 radix_tree_tag_set(&root->cache_radix, blocknr,
a28ec197 259 CTREE_EXTENT_PENDING_DEL);
fec577fb
CM
260 return 0;
261 }
e089f05c
CM
262 ret = __free_extent(trans, root, blocknr, num_blocks, pin);
263 pending_ret = run_pending(trans, root->extent_root);
fec577fb
CM
264 return ret ? ret : pending_ret;
265}
266
267/*
268 * walks the btree of allocated extents and find a hole of a given size.
269 * The key ins is changed to record the hole:
270 * ins->objectid == block start
62e2749e 271 * ins->flags = BTRFS_EXTENT_ITEM_KEY
fec577fb
CM
272 * ins->offset == number of blocks
273 * Any available blocks before search_start are skipped.
274 */
e089f05c
CM
275static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
276 *orig_root, u64 num_blocks, u64 search_start, u64
277 search_end, struct btrfs_key *ins)
fec577fb 278{
234b63a0 279 struct btrfs_path path;
e2fa7227 280 struct btrfs_key key;
fec577fb
CM
281 int ret;
282 u64 hole_size = 0;
283 int slot = 0;
284 u64 last_block;
037e6390 285 u64 test_block;
fec577fb 286 int start_found;
234b63a0
CM
287 struct btrfs_leaf *l;
288 struct btrfs_root * root = orig_root->extent_root;
0579da42 289 int total_needed = num_blocks;
fec577fb 290
7518a238 291 total_needed += (btrfs_header_level(&root->node->node.header) + 1) * 3;
0579da42
CM
292 if (root->last_insert.objectid > search_start)
293 search_start = root->last_insert.objectid;
62e2749e
CM
294
295 ins->flags = 0;
296 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
297
fec577fb 298check_failed:
234b63a0 299 btrfs_init_path(&path);
fec577fb
CM
300 ins->objectid = search_start;
301 ins->offset = 0;
fec577fb 302 start_found = 0;
e089f05c 303 ret = btrfs_search_slot(trans, root, ins, &path, 0, 0);
0f70abe2
CM
304 if (ret < 0)
305 goto error;
aa5d6bed 306
0579da42
CM
307 if (path.slots[0] > 0)
308 path.slots[0]--;
309
fec577fb
CM
310 while (1) {
311 l = &path.nodes[0]->leaf;
312 slot = path.slots[0];
7518a238 313 if (slot >= btrfs_header_nritems(&l->header)) {
234b63a0 314 ret = btrfs_next_leaf(root, &path);
fec577fb
CM
315 if (ret == 0)
316 continue;
0f70abe2
CM
317 if (ret < 0)
318 goto error;
fec577fb
CM
319 if (!start_found) {
320 ins->objectid = search_start;
037e6390 321 ins->offset = (u64)-1;
fec577fb
CM
322 start_found = 1;
323 goto check_pending;
324 }
325 ins->objectid = last_block > search_start ?
326 last_block : search_start;
037e6390 327 ins->offset = (u64)-1;
fec577fb
CM
328 goto check_pending;
329 }
e2fa7227
CM
330 btrfs_disk_key_to_cpu(&key, &l->items[slot].key);
331 if (key.objectid >= search_start) {
fec577fb 332 if (start_found) {
0579da42
CM
333 if (last_block < search_start)
334 last_block = search_start;
e2fa7227 335 hole_size = key.objectid - last_block;
037e6390 336 if (hole_size > total_needed) {
fec577fb 337 ins->objectid = last_block;
037e6390 338 ins->offset = hole_size;
fec577fb
CM
339 goto check_pending;
340 }
0579da42 341 }
fec577fb 342 }
0579da42 343 start_found = 1;
e2fa7227 344 last_block = key.objectid + key.offset;
fec577fb
CM
345 path.slots[0]++;
346 }
347 // FIXME -ENOSPC
348check_pending:
349 /* we have to make sure we didn't find an extent that has already
350 * been allocated by the map tree or the original allocation
351 */
234b63a0 352 btrfs_release_path(root, &path);
fec577fb 353 BUG_ON(ins->objectid < search_start);
037e6390
CM
354 for (test_block = ins->objectid;
355 test_block < ins->objectid + total_needed; test_block++) {
356 if (radix_tree_lookup(&root->pinned_radix, test_block)) {
357 search_start = test_block + 1;
fec577fb
CM
358 goto check_failed;
359 }
360 }
037e6390 361 BUG_ON(root->current_insert.offset);
0579da42 362 root->current_insert.offset = total_needed - num_blocks;
037e6390
CM
363 root->current_insert.objectid = ins->objectid + num_blocks;
364 root->current_insert.flags = 0;
0579da42 365 root->last_insert.objectid = ins->objectid;
037e6390 366 ins->offset = num_blocks;
fec577fb 367 return 0;
0f70abe2 368error:
234b63a0 369 btrfs_release_path(root, &path);
0f70abe2 370 return ret;
fec577fb
CM
371}
372
fec577fb
CM
373/*
374 * finds a free extent and does all the dirty work required for allocation
375 * returns the key for the extent through ins, and a tree buffer for
376 * the first block of the extent through buf.
377 *
378 * returns 0 if everything worked, non-zero otherwise.
379 */
e089f05c
CM
380static int alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root
381 *root, u64 num_blocks, u64 search_start, u64
382 search_end, u64 owner, struct btrfs_key *ins)
fec577fb
CM
383{
384 int ret;
385 int pending_ret;
234b63a0
CM
386 struct btrfs_root *extent_root = root->extent_root;
387 struct btrfs_extent_item extent_item;
037e6390 388
cf27e1ee
CM
389 btrfs_set_extent_refs(&extent_item, 1);
390 btrfs_set_extent_owner(&extent_item, owner);
fec577fb 391
037e6390
CM
392 if (root == extent_root) {
393 BUG_ON(extent_root->current_insert.offset == 0);
394 BUG_ON(num_blocks != 1);
395 BUG_ON(extent_root->current_insert.flags ==
396 extent_root->current_insert.offset);
397 ins->offset = 1;
398 ins->objectid = extent_root->current_insert.objectid +
399 extent_root->current_insert.flags++;
fec577fb
CM
400 return 0;
401 }
e089f05c 402 ret = find_free_extent(trans, root, num_blocks, search_start,
037e6390
CM
403 search_end, ins);
404 if (ret)
405 return ret;
fec577fb 406
e089f05c
CM
407 ret = btrfs_insert_item(trans, extent_root, ins, &extent_item,
408 sizeof(extent_item));
037e6390 409
e089f05c
CM
410 finish_current_insert(trans, extent_root);
411 pending_ret = run_pending(trans, extent_root);
037e6390
CM
412 if (ret)
413 return ret;
414 if (pending_ret)
415 return pending_ret;
416 return 0;
fec577fb
CM
417}
418
419/*
420 * helper function to allocate a block for a given tree
421 * returns the tree buffer or NULL.
422 */
e089f05c
CM
423struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
424 struct btrfs_root *root)
fec577fb 425{
e2fa7227 426 struct btrfs_key ins;
fec577fb 427 int ret;
234b63a0 428 struct btrfs_buffer *buf;
fec577fb 429
e089f05c 430 ret = alloc_extent(trans, root, 1, 0, (unsigned long)-1,
7518a238 431 btrfs_header_parentid(&root->node->node.header),
037e6390 432 &ins);
fec577fb
CM
433 if (ret) {
434 BUG();
435 return NULL;
436 }
037e6390 437 buf = find_tree_block(root, ins.objectid);
e089f05c 438 dirty_tree_block(trans, root, buf);
fec577fb
CM
439 return buf;
440}
a28ec197 441
9aca1d51
CM
442/*
443 * helper function for drop_snapshot, this walks down the tree dropping ref
444 * counts as it goes.
445 */
e089f05c
CM
446static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root
447 *root, struct btrfs_path *path, int *level)
20524f02 448{
234b63a0
CM
449 struct btrfs_buffer *next;
450 struct btrfs_buffer *cur;
20524f02
CM
451 u64 blocknr;
452 int ret;
453 u32 refs;
454
e089f05c
CM
455 ret = lookup_block_ref(trans, root, path->nodes[*level]->blocknr,
456 &refs);
20524f02
CM
457 BUG_ON(ret);
458 if (refs > 1)
459 goto out;
9aca1d51
CM
460 /*
461 * walk down to the last node level and free all the leaves
462 */
20524f02
CM
463 while(*level > 0) {
464 cur = path->nodes[*level];
7518a238
CM
465 if (path->slots[*level] >=
466 btrfs_header_nritems(&cur->node.header))
20524f02 467 break;
1d4f8a0c 468 blocknr = btrfs_node_blockptr(&cur->node, path->slots[*level]);
e089f05c 469 ret = lookup_block_ref(trans, root, blocknr, &refs);
20524f02
CM
470 if (refs != 1 || *level == 1) {
471 path->slots[*level]++;
e089f05c 472 ret = btrfs_free_extent(trans, root, blocknr, 1, 1);
20524f02
CM
473 BUG_ON(ret);
474 continue;
475 }
476 BUG_ON(ret);
477 next = read_tree_block(root, blocknr);
83e15a28 478 if (path->nodes[*level-1])
234b63a0 479 btrfs_block_release(root, path->nodes[*level-1]);
20524f02 480 path->nodes[*level-1] = next;
7518a238 481 *level = btrfs_header_level(&next->node.header);
20524f02
CM
482 path->slots[*level] = 0;
483 }
484out:
e089f05c
CM
485 ret = btrfs_free_extent(trans, root, path->nodes[*level]->blocknr, 1,
486 1);
234b63a0 487 btrfs_block_release(root, path->nodes[*level]);
20524f02
CM
488 path->nodes[*level] = NULL;
489 *level += 1;
490 BUG_ON(ret);
491 return 0;
492}
493
9aca1d51
CM
494/*
495 * helper for dropping snapshots. This walks back up the tree in the path
496 * to find the first node higher up where we haven't yet gone through
497 * all the slots
498 */
e089f05c
CM
499static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root
500 *root, struct btrfs_path *path, int *level)
20524f02
CM
501{
502 int i;
503 int slot;
504 int ret;
234b63a0 505 for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
20524f02 506 slot = path->slots[i];
7518a238
CM
507 if (slot <
508 btrfs_header_nritems(&path->nodes[i]->node.header)- 1) {
20524f02
CM
509 path->slots[i]++;
510 *level = i;
511 return 0;
512 } else {
e089f05c
CM
513 ret = btrfs_free_extent(trans, root,
514 path->nodes[*level]->blocknr,
515 1, 1);
234b63a0 516 btrfs_block_release(root, path->nodes[*level]);
83e15a28 517 path->nodes[*level] = NULL;
20524f02
CM
518 *level = i + 1;
519 BUG_ON(ret);
520 }
521 }
522 return 1;
523}
524
9aca1d51
CM
525/*
526 * drop the reference count on the tree rooted at 'snap'. This traverses
527 * the tree freeing any blocks that have a ref count of zero after being
528 * decremented.
529 */
e089f05c
CM
530int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
531 *root, struct btrfs_buffer *snap)
20524f02 532{
3768f368 533 int ret = 0;
9aca1d51 534 int wret;
20524f02 535 int level;
234b63a0 536 struct btrfs_path path;
20524f02
CM
537 int i;
538 int orig_level;
539
234b63a0 540 btrfs_init_path(&path);
20524f02 541
7518a238 542 level = btrfs_header_level(&snap->node.header);
20524f02
CM
543 orig_level = level;
544 path.nodes[level] = snap;
545 path.slots[level] = 0;
546 while(1) {
e089f05c 547 wret = walk_down_tree(trans, root, &path, &level);
9aca1d51 548 if (wret > 0)
20524f02 549 break;
9aca1d51
CM
550 if (wret < 0)
551 ret = wret;
552
e089f05c 553 wret = walk_up_tree(trans, root, &path, &level);
9aca1d51 554 if (wret > 0)
20524f02 555 break;
9aca1d51
CM
556 if (wret < 0)
557 ret = wret;
20524f02 558 }
83e15a28
CM
559 for (i = 0; i <= orig_level; i++) {
560 if (path.nodes[i]) {
234b63a0 561 btrfs_block_release(root, path.nodes[i]);
83e15a28 562 }
20524f02 563 }
9aca1d51 564 return ret;
20524f02 565}