Btrfs: allocator optimizations, truncate readahead
[linux-block.git] / fs / btrfs / extent-tree.c
1 #include <linux/module.h>
2 #include "ctree.h"
3 #include "disk-io.h"
4 #include "print-tree.h"
5 #include "transaction.h"
6
7 static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
8                             *orig_root, u64 num_blocks, u64 search_start, u64
9                             search_end, struct btrfs_key *ins, int data);
10 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
11                                  btrfs_root *extent_root);
12 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
13                                btrfs_root *extent_root);
14
15 static void reada_extent_leaves(struct btrfs_root *root,
16                                 struct btrfs_path *path, u64 limit)
17 {
18         struct btrfs_node *node;
19         int i;
20         int nritems;
21         u64 item_objectid;
22         u64 blocknr;
23         int slot;
24         int ret;
25
26         if (!path->nodes[1])
27                 return;
28         node = btrfs_buffer_node(path->nodes[1]);
29         slot = path->slots[1] + 1;
30         nritems = btrfs_header_nritems(&node->header);
31         for (i = slot; i < nritems && i < slot + 8; i++) {
32                 item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key);
33                 if (item_objectid > limit)
34                         break;
35                 blocknr = btrfs_node_blockptr(node, i);
36                 ret = readahead_tree_block(root, blocknr);
37                 if (ret)
38                         break;
39         }
40 }
41
42 static int cache_block_group(struct btrfs_root *root,
43                              struct btrfs_block_group_cache *block_group)
44 {
45         struct btrfs_path *path;
46         int ret;
47         struct btrfs_key key;
48         struct btrfs_leaf *leaf;
49         struct radix_tree_root *extent_radix;
50         int slot;
51         u64 i;
52         u64 last = 0;
53         u64 hole_size;
54         u64 limit;
55         int found = 0;
56
57         root = root->fs_info->extent_root;
58         extent_radix = &root->fs_info->extent_map_radix;
59
60         if (block_group->cached)
61                 return 0;
62         if (block_group->data)
63                 return 0;
64         path = btrfs_alloc_path();
65         if (!path)
66                 return -ENOMEM;
67 printk("cache block group %Lu\n", block_group->key.objectid);
68         key.objectid = block_group->key.objectid;
69         key.flags = 0;
70         key.offset = 0;
71         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
72         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
73         if (ret < 0)
74                 return ret;
75         if (ret && path->slots[0] > 0)
76                 path->slots[0]--;
77         limit = block_group->key.objectid + block_group->key.offset;
78         reada_extent_leaves(root, path, limit);
79         while(1) {
80                 leaf = btrfs_buffer_leaf(path->nodes[0]);
81                 slot = path->slots[0];
82                 if (slot >= btrfs_header_nritems(&leaf->header)) {
83                         reada_extent_leaves(root, path, limit);
84                         ret = btrfs_next_leaf(root, path);
85                         if (ret == 0) {
86                                 continue;
87                         } else {
88                                 if (found) {
89                                         hole_size = block_group->key.objectid +
90                                                 block_group->key.offset - last;
91                                 } else {
92                                         last = block_group->key.objectid;
93                                         hole_size = block_group->key.offset;
94                                 }
95                                 for (i = 0; i < hole_size; i++) {
96                                         set_radix_bit(extent_radix,
97                                                       last + i);
98                                 }
99                                 break;
100                         }
101                 }
102                 btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key);
103                 if (key.objectid >= block_group->key.objectid +
104                     block_group->key.offset) {
105                         if (found) {
106                                 hole_size = block_group->key.objectid +
107                                         block_group->key.offset - last;
108                         } else {
109                                 last = block_group->key.objectid;
110                                 hole_size = block_group->key.offset;
111                         }
112                         for (i = 0; i < hole_size; i++) {
113                                 set_radix_bit(extent_radix, last + i);
114                         }
115                         break;
116                 }
117                 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
118                         if (!found) {
119                                 last = key.objectid + key.offset;
120                                 found = 1;
121                         } else {
122                                 hole_size = key.objectid - last;
123                                 for (i = 0; i < hole_size; i++) {
124                                         set_radix_bit(extent_radix, last + i);
125                                 }
126                                 last = key.objectid + key.offset;
127                         }
128                 }
129                 path->slots[0]++;
130         }
131
132         block_group->cached = 1;
133         btrfs_free_path(path);
134         return 0;
135 }
136
137 static struct btrfs_block_group_cache *lookup_block_group(struct
138                                                           btrfs_fs_info *info,
139                                                           u64 blocknr)
140 {
141         struct btrfs_block_group_cache *block_group;
142         int ret;
143
144         ret = radix_tree_gang_lookup(&info->block_group_radix,
145                                      (void **)&block_group,
146                                      blocknr, 1);
147         if (ret) {
148                 if (block_group->key.objectid <= blocknr && blocknr <=
149                     block_group->key.objectid + block_group->key.offset)
150                         return block_group;
151         }
152         ret = radix_tree_gang_lookup(&info->block_group_data_radix,
153                                      (void **)&block_group,
154                                      blocknr, 1);
155         if (ret) {
156                 if (block_group->key.objectid <= blocknr && blocknr <=
157                     block_group->key.objectid + block_group->key.offset)
158                         return block_group;
159         }
160         WARN_ON(1);
161         printk("lookup_block_group fails for blocknr %Lu\n", blocknr);
162         printk("last ret was %d\n", ret);
163         if (ret) {
164                 printk("last block group was %Lu %Lu\n", block_group->key.objectid, block_group->key.offset);
165         }
166         return NULL;
167 }
168
169 static u64 leaf_range(struct btrfs_root *root)
170 {
171         u64 size = BTRFS_LEAF_DATA_SIZE(root);
172         size = size / (sizeof(struct btrfs_extent_item) +
173                        sizeof(struct btrfs_item));
174         return size;
175 }
176
177 static u64 find_search_start(struct btrfs_root *root,
178                              struct btrfs_block_group_cache **cache_ret,
179                              u64 search_start, int num)
180 {
181         unsigned long gang[8];
182         int ret;
183         struct btrfs_block_group_cache *cache = *cache_ret;
184         u64 last = max(search_start, cache->key.objectid);
185
186         if (cache->data)
187                 goto out;
188         if (num > 1) {
189                 last = max(last, cache->last_prealloc);
190         }
191 again:
192         cache_block_group(root, cache);
193         while(1) {
194                 ret = find_first_radix_bit(&root->fs_info->extent_map_radix,
195                                            gang, last, ARRAY_SIZE(gang));
196                 if (!ret)
197                         goto out;
198                 last = gang[ret-1] + 1;
199                 if (num > 1) {
200                         if (ret != ARRAY_SIZE(gang)) {
201                                 goto new_group;
202                         }
203                         if (gang[ret-1] - gang[0] > leaf_range(root)) {
204                                 continue;
205                         }
206                 }
207                 if (gang[0] >= cache->key.objectid + cache->key.offset) {
208                         goto new_group;
209                 }
210                 return gang[0];
211         }
212 out:
213         return max(cache->last_alloc, search_start);
214
215 new_group:
216         cache = lookup_block_group(root->fs_info, last + cache->key.offset - 1);
217         if (!cache) {
218                 return max((*cache_ret)->last_alloc, search_start);
219         }
220         cache = btrfs_find_block_group(root, cache,
221                                        last + cache->key.offset - 1, 0, 0);
222         *cache_ret = cache;
223         goto again;
224 }
225
226 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
227                                                  struct btrfs_block_group_cache
228                                                  *hint, u64 search_start,
229                                                  int data, int owner)
230 {
231         struct btrfs_block_group_cache *cache[8];
232         struct btrfs_block_group_cache *found_group = NULL;
233         struct btrfs_fs_info *info = root->fs_info;
234         struct radix_tree_root *radix;
235         u64 used;
236         u64 last = 0;
237         u64 hint_last;
238         int i;
239         int ret;
240         int full_search = 0;
241         int factor = 8;
242
243         if (!owner)
244                 factor = 5;
245
246         if (data)
247                 radix = &info->block_group_data_radix;
248         else
249                 radix = &info->block_group_radix;
250
251         if (search_start) {
252                 struct btrfs_block_group_cache *shint;
253                 shint = lookup_block_group(info, search_start);
254                 if (shint->data == data) {
255                         used = btrfs_block_group_used(&shint->item);
256                         if (used + shint->pinned <
257                             (shint->key.offset * factor) / 10) {
258                                 return shint;
259                         }
260                 }
261         }
262         if (hint && hint->data == data) {
263                 used = btrfs_block_group_used(&hint->item);
264                 if (used + hint->pinned < (hint->key.offset * factor) / 10) {
265                         return hint;
266                 }
267                 if (used >= (hint->key.offset * 8) / 10) {
268                         radix_tree_tag_clear(radix,
269                                              hint->key.objectid +
270                                              hint->key.offset - 1,
271                                              BTRFS_BLOCK_GROUP_AVAIL);
272                 }
273                 last = hint->key.offset * 3;
274                 if (hint->key.objectid >= last)
275                         last = max(search_start + hint->key.offset - 1,
276                                    hint->key.objectid - last);
277                 else
278                         last = hint->key.objectid + hint->key.offset;
279                 hint_last = last;
280         } else {
281                 if (hint)
282                         hint_last = max(hint->key.objectid, search_start);
283                 else
284                         hint_last = search_start;
285
286                 last = hint_last;
287         }
288         while(1) {
289                 ret = radix_tree_gang_lookup_tag(radix, (void **)cache,
290                                                  last, ARRAY_SIZE(cache),
291                                                  BTRFS_BLOCK_GROUP_AVAIL);
292                 if (!ret)
293                         break;
294                 for (i = 0; i < ret; i++) {
295                         last = cache[i]->key.objectid +
296                                 cache[i]->key.offset;
297                         used = btrfs_block_group_used(&cache[i]->item);
298                         if (used + cache[i]->pinned <
299                             (cache[i]->key.offset * factor) / 10) {
300                                 found_group = cache[i];
301                                 goto found;
302                         }
303                         if (used >= (cache[i]->key.offset * 8) / 10) {
304                                 radix_tree_tag_clear(radix,
305                                                      cache[i]->key.objectid +
306                                                      cache[i]->key.offset - 1,
307                                                      BTRFS_BLOCK_GROUP_AVAIL);
308                         }
309                 }
310                 cond_resched();
311         }
312         last = hint_last;
313 again:
314         while(1) {
315                 ret = radix_tree_gang_lookup(radix, (void **)cache,
316                                              last, ARRAY_SIZE(cache));
317                 if (!ret)
318                         break;
319                 for (i = 0; i < ret; i++) {
320                         last = cache[i]->key.objectid +
321                                 cache[i]->key.offset;
322                         used = btrfs_block_group_used(&cache[i]->item);
323                         if (used + cache[i]->pinned < cache[i]->key.offset) {
324                                 found_group = cache[i];
325                                 goto found;
326                         }
327                         if (used >= cache[i]->key.offset) {
328                                 radix_tree_tag_clear(radix,
329                                                      cache[i]->key.objectid +
330                                                      cache[i]->key.offset - 1,
331                                                      BTRFS_BLOCK_GROUP_AVAIL);
332                         }
333                 }
334                 cond_resched();
335         }
336         if (!full_search) {
337 printk("find block group doing full search data %d start %Lu\n", data, search_start);
338                 last = search_start;
339                 full_search = 1;
340                 goto again;
341         }
342         if (!found_group) {
343 printk("find block group bailing to zero data %d\n", data);
344                 ret = radix_tree_gang_lookup(radix,
345                                              (void **)&found_group, 0, 1);
346                 BUG_ON(ret != 1);
347         }
348 found:
349         return found_group;
350 }
351
352 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
353                                 struct btrfs_root *root,
354                                 u64 blocknr, u64 num_blocks)
355 {
356         struct btrfs_path *path;
357         int ret;
358         struct btrfs_key key;
359         struct btrfs_leaf *l;
360         struct btrfs_extent_item *item;
361         struct btrfs_key ins;
362         u32 refs;
363
364         find_free_extent(trans, root->fs_info->extent_root, 0, 0, (u64)-1,
365                          &ins, 0);
366         path = btrfs_alloc_path();
367         BUG_ON(!path);
368         btrfs_init_path(path);
369         key.objectid = blocknr;
370         key.flags = 0;
371         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
372         key.offset = num_blocks;
373         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
374                                 0, 1);
375         if (ret != 0) {
376 printk("can't find block %Lu %Lu\n", blocknr, num_blocks);
377                 BUG();
378         }
379         BUG_ON(ret != 0);
380         l = btrfs_buffer_leaf(path->nodes[0]);
381         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
382         refs = btrfs_extent_refs(item);
383         btrfs_set_extent_refs(item, refs + 1);
384         btrfs_mark_buffer_dirty(path->nodes[0]);
385
386         btrfs_release_path(root->fs_info->extent_root, path);
387         btrfs_free_path(path);
388         finish_current_insert(trans, root->fs_info->extent_root);
389         del_pending_extents(trans, root->fs_info->extent_root);
390         return 0;
391 }
392
393 static int lookup_extent_ref(struct btrfs_trans_handle *trans,
394                              struct btrfs_root *root, u64 blocknr,
395                              u64 num_blocks, u32 *refs)
396 {
397         struct btrfs_path *path;
398         int ret;
399         struct btrfs_key key;
400         struct btrfs_leaf *l;
401         struct btrfs_extent_item *item;
402
403         path = btrfs_alloc_path();
404         btrfs_init_path(path);
405         key.objectid = blocknr;
406         key.offset = num_blocks;
407         key.flags = 0;
408         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
409         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
410                                 0, 0);
411         if (ret != 0)
412                 BUG();
413         l = btrfs_buffer_leaf(path->nodes[0]);
414         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
415         *refs = btrfs_extent_refs(item);
416         btrfs_release_path(root->fs_info->extent_root, path);
417         btrfs_free_path(path);
418         return 0;
419 }
420
421 int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
422                        struct btrfs_root *root)
423 {
424         return btrfs_inc_extent_ref(trans, root, bh_blocknr(root->node), 1);
425 }
426
427 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
428                   struct buffer_head *buf)
429 {
430         u64 blocknr;
431         struct btrfs_node *buf_node;
432         struct btrfs_leaf *buf_leaf;
433         struct btrfs_disk_key *key;
434         struct btrfs_file_extent_item *fi;
435         int i;
436         int leaf;
437         int ret;
438
439         if (!root->ref_cows)
440                 return 0;
441         buf_node = btrfs_buffer_node(buf);
442         leaf = btrfs_is_leaf(buf_node);
443         buf_leaf = btrfs_buffer_leaf(buf);
444         for (i = 0; i < btrfs_header_nritems(&buf_node->header); i++) {
445                 if (leaf) {
446                         key = &buf_leaf->items[i].key;
447                         if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
448                                 continue;
449                         fi = btrfs_item_ptr(buf_leaf, i,
450                                             struct btrfs_file_extent_item);
451                         if (btrfs_file_extent_type(fi) ==
452                             BTRFS_FILE_EXTENT_INLINE)
453                                 continue;
454                         ret = btrfs_inc_extent_ref(trans, root,
455                                     btrfs_file_extent_disk_blocknr(fi),
456                                     btrfs_file_extent_disk_num_blocks(fi));
457                         BUG_ON(ret);
458                 } else {
459                         blocknr = btrfs_node_blockptr(buf_node, i);
460                         ret = btrfs_inc_extent_ref(trans, root, blocknr, 1);
461                         BUG_ON(ret);
462                 }
463         }
464         return 0;
465 }
466
467 static int write_one_cache_group(struct btrfs_trans_handle *trans,
468                                  struct btrfs_root *root,
469                                  struct btrfs_path *path,
470                                  struct btrfs_block_group_cache *cache)
471 {
472         int ret;
473         int pending_ret;
474         struct btrfs_root *extent_root = root->fs_info->extent_root;
475         struct btrfs_block_group_item *bi;
476         struct btrfs_key ins;
477
478         find_free_extent(trans, extent_root, 0, 0, (u64)-1, &ins, 0);
479         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
480         BUG_ON(ret);
481         bi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
482                             struct btrfs_block_group_item);
483         memcpy(bi, &cache->item, sizeof(*bi));
484         mark_buffer_dirty(path->nodes[0]);
485         btrfs_release_path(extent_root, path);
486
487         finish_current_insert(trans, extent_root);
488         pending_ret = del_pending_extents(trans, extent_root);
489         if (ret)
490                 return ret;
491         if (pending_ret)
492                 return pending_ret;
493         if (cache->data)
494                 cache->last_alloc = cache->first_free;
495         return 0;
496
497 }
498
499 static int write_dirty_block_radix(struct btrfs_trans_handle *trans,
500                                    struct btrfs_root *root,
501                                    struct radix_tree_root *radix)
502 {
503         struct btrfs_block_group_cache *cache[8];
504         int ret;
505         int err = 0;
506         int werr = 0;
507         int i;
508         struct btrfs_path *path;
509
510         path = btrfs_alloc_path();
511         if (!path)
512                 return -ENOMEM;
513
514         while(1) {
515                 ret = radix_tree_gang_lookup_tag(radix, (void **)cache,
516                                                  0, ARRAY_SIZE(cache),
517                                                  BTRFS_BLOCK_GROUP_DIRTY);
518                 if (!ret)
519                         break;
520                 for (i = 0; i < ret; i++) {
521                         radix_tree_tag_clear(radix, cache[i]->key.objectid +
522                                              cache[i]->key.offset - 1,
523                                              BTRFS_BLOCK_GROUP_DIRTY);
524                         err = write_one_cache_group(trans, root,
525                                                     path, cache[i]);
526                         if (err)
527                                 werr = err;
528                 }
529         }
530         btrfs_free_path(path);
531         return werr;
532 }
533
534 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
535                                    struct btrfs_root *root)
536 {
537         int ret;
538         int ret2;
539         ret = write_dirty_block_radix(trans, root,
540                                       &root->fs_info->block_group_radix);
541         ret2 = write_dirty_block_radix(trans, root,
542                                       &root->fs_info->block_group_data_radix);
543         if (ret)
544                 return ret;
545         if (ret2)
546                 return ret2;
547         return 0;
548 }
549
550 static int update_block_group(struct btrfs_trans_handle *trans,
551                               struct btrfs_root *root,
552                               u64 blocknr, u64 num, int alloc, int mark_free)
553 {
554         struct btrfs_block_group_cache *cache;
555         struct btrfs_fs_info *info = root->fs_info;
556         u64 total = num;
557         u64 old_val;
558         u64 block_in_group;
559         u64 i;
560
561         while(total) {
562                 cache = lookup_block_group(info, blocknr);
563                 if (!cache) {
564                         printk(KERN_CRIT "blocknr %Lu lookup failed\n",
565                                blocknr);
566                         return -1;
567                 }
568                 block_in_group = blocknr - cache->key.objectid;
569                 WARN_ON(block_in_group > cache->key.offset);
570                 radix_tree_tag_set(cache->radix, cache->key.objectid +
571                                    cache->key.offset - 1,
572                                    BTRFS_BLOCK_GROUP_DIRTY);
573
574                 old_val = btrfs_block_group_used(&cache->item);
575                 num = min(total, cache->key.offset - block_in_group);
576                 if (alloc) {
577                         old_val += num;
578                         if (blocknr > cache->last_alloc)
579                                 cache->last_alloc = blocknr;
580                         if (!cache->data) {
581                                 for (i = 0; i < num; i++) {
582                                         clear_radix_bit(&info->extent_map_radix,
583                                                         blocknr + i);
584                                 }
585                         }
586                 } else {
587                         old_val -= num;
588                         if (blocknr < cache->first_free)
589                                 cache->first_free = blocknr;
590                         if (!cache->data && mark_free) {
591                                 for (i = 0; i < num; i++) {
592                                         set_radix_bit(&info->extent_map_radix,
593                                                       blocknr + i);
594                                 }
595                         }
596                         if (old_val < (cache->key.offset * 5) / 10 &&
597                             old_val + num >= (cache->key.offset * 5) / 10) {
598 printk("group %Lu now available\n", cache->key.objectid);
599                                 radix_tree_tag_set(cache->radix,
600                                                    cache->key.objectid +
601                                                    cache->key.offset - 1,
602                                                    BTRFS_BLOCK_GROUP_AVAIL);
603                         }
604                 }
605                 btrfs_set_block_group_used(&cache->item, old_val);
606                 total -= num;
607                 blocknr += num;
608         }
609         return 0;
610 }
611
612 static int try_remove_page(struct address_space *mapping, unsigned long index)
613 {
614         int ret;
615         ret = invalidate_mapping_pages(mapping, index, index);
616         return ret;
617 }
618
619 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct
620                                btrfs_root *root)
621 {
622         unsigned long gang[8];
623         struct inode *btree_inode = root->fs_info->btree_inode;
624         struct btrfs_block_group_cache *block_group;
625         u64 first = 0;
626         int ret;
627         int i;
628         struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix;
629         struct radix_tree_root *extent_radix = &root->fs_info->extent_map_radix;
630
631         while(1) {
632                 ret = find_first_radix_bit(pinned_radix, gang, 0,
633                                            ARRAY_SIZE(gang));
634                 if (!ret)
635                         break;
636                 if (!first)
637                         first = gang[0];
638                 for (i = 0; i < ret; i++) {
639                         clear_radix_bit(pinned_radix, gang[i]);
640                         block_group = lookup_block_group(root->fs_info,
641                                                          gang[i]);
642                         if (block_group) {
643                                 WARN_ON(block_group->pinned == 0);
644                                 block_group->pinned--;
645                                 if (gang[i] < block_group->last_alloc)
646                                         block_group->last_alloc = gang[i];
647                                 if (gang[i] < block_group->last_prealloc)
648                                         block_group->last_prealloc = gang[i];
649                                 if (!block_group->data)
650                                         set_radix_bit(extent_radix, gang[i]);
651                         }
652                         try_remove_page(btree_inode->i_mapping,
653                                         gang[i] << (PAGE_CACHE_SHIFT -
654                                                     btree_inode->i_blkbits));
655                 }
656         }
657         return 0;
658 }
659
660 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
661                                  btrfs_root *extent_root)
662 {
663         struct btrfs_key ins;
664         struct btrfs_extent_item extent_item;
665         int i;
666         int ret;
667         u64 super_blocks_used;
668         struct btrfs_fs_info *info = extent_root->fs_info;
669
670         btrfs_set_extent_refs(&extent_item, 1);
671         ins.offset = 1;
672         ins.flags = 0;
673         btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
674         btrfs_set_extent_owner(&extent_item, extent_root->root_key.objectid);
675
676         for (i = 0; i < extent_root->fs_info->extent_tree_insert_nr; i++) {
677                 ins.objectid = extent_root->fs_info->extent_tree_insert[i];
678                 super_blocks_used = btrfs_super_blocks_used(info->disk_super);
679                 btrfs_set_super_blocks_used(info->disk_super,
680                                             super_blocks_used + 1);
681                 ret = btrfs_insert_item(trans, extent_root, &ins, &extent_item,
682                                         sizeof(extent_item));
683                 BUG_ON(ret);
684         }
685         extent_root->fs_info->extent_tree_insert_nr = 0;
686         extent_root->fs_info->extent_tree_prealloc_nr = 0;
687         return 0;
688 }
689
690 static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending)
691 {
692         int err;
693         struct btrfs_header *header;
694         struct buffer_head *bh;
695
696         if (!pending) {
697                 bh = btrfs_find_tree_block(root, blocknr);
698                 if (bh) {
699                         if (buffer_uptodate(bh)) {
700                                 u64 transid =
701                                     root->fs_info->running_transaction->transid;
702                                 header = btrfs_buffer_header(bh);
703                                 if (btrfs_header_generation(header) ==
704                                     transid) {
705                                         btrfs_block_release(root, bh);
706                                         return 0;
707                                 }
708                         }
709                         btrfs_block_release(root, bh);
710                 }
711                 err = set_radix_bit(&root->fs_info->pinned_radix, blocknr);
712                 if (!err) {
713                         struct btrfs_block_group_cache *cache;
714                         cache = lookup_block_group(root->fs_info, blocknr);
715                         if (cache)
716                                 cache->pinned++;
717                 }
718         } else {
719                 err = set_radix_bit(&root->fs_info->pending_del_radix, blocknr);
720         }
721         BUG_ON(err < 0);
722         return 0;
723 }
724
725 /*
726  * remove an extent from the root, returns 0 on success
727  */
728 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
729                          *root, u64 blocknr, u64 num_blocks, int pin,
730                          int mark_free)
731 {
732         struct btrfs_path *path;
733         struct btrfs_key key;
734         struct btrfs_fs_info *info = root->fs_info;
735         struct btrfs_root *extent_root = info->extent_root;
736         int ret;
737         struct btrfs_extent_item *ei;
738         struct btrfs_key ins;
739         u32 refs;
740
741         key.objectid = blocknr;
742         key.flags = 0;
743         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
744         key.offset = num_blocks;
745
746         find_free_extent(trans, root, 0, 0, (u64)-1, &ins, 0);
747         path = btrfs_alloc_path();
748         BUG_ON(!path);
749         btrfs_init_path(path);
750
751         ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
752         if (ret) {
753                 printk("failed to find %Lu\n", key.objectid);
754                 btrfs_print_tree(extent_root, extent_root->node);
755                 printk("failed to find %Lu\n", key.objectid);
756                 BUG();
757         }
758         ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
759                             struct btrfs_extent_item);
760         BUG_ON(ei->refs == 0);
761         refs = btrfs_extent_refs(ei) - 1;
762         btrfs_set_extent_refs(ei, refs);
763         btrfs_mark_buffer_dirty(path->nodes[0]);
764         if (refs == 0) {
765                 u64 super_blocks_used;
766
767                 if (pin) {
768                         ret = pin_down_block(root, blocknr, 0);
769                         BUG_ON(ret);
770                 }
771
772                 super_blocks_used = btrfs_super_blocks_used(info->disk_super);
773                 btrfs_set_super_blocks_used(info->disk_super,
774                                             super_blocks_used - num_blocks);
775                 ret = btrfs_del_item(trans, extent_root, path);
776                 if (ret)
777                         BUG();
778                 ret = update_block_group(trans, root, blocknr, num_blocks, 0,
779                                          mark_free);
780                 BUG_ON(ret);
781         }
782         btrfs_free_path(path);
783         finish_current_insert(trans, extent_root);
784         return ret;
785 }
786
787 /*
788  * find all the blocks marked as pending in the radix tree and remove
789  * them from the extent map
790  */
791 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
792                                btrfs_root *extent_root)
793 {
794         int ret;
795         int wret;
796         int err = 0;
797         unsigned long gang[4];
798         int i;
799         struct radix_tree_root *pending_radix;
800         struct radix_tree_root *pinned_radix;
801         struct btrfs_block_group_cache *cache;
802
803         pending_radix = &extent_root->fs_info->pending_del_radix;
804         pinned_radix = &extent_root->fs_info->pinned_radix;
805
806         while(1) {
807                 ret = find_first_radix_bit(pending_radix, gang, 0,
808                                            ARRAY_SIZE(gang));
809                 if (!ret)
810                         break;
811                 for (i = 0; i < ret; i++) {
812                         wret = set_radix_bit(pinned_radix, gang[i]);
813                         if (wret == 0) {
814                                 cache = lookup_block_group(extent_root->fs_info,
815                                                            gang[i]);
816                                 if (cache)
817                                         cache->pinned++;
818                         }
819                         if (wret < 0) {
820                                 printk(KERN_CRIT "set_radix_bit, err %d\n",
821                                        wret);
822                                 BUG_ON(wret < 0);
823                         }
824                         wret = clear_radix_bit(pending_radix, gang[i]);
825                         BUG_ON(wret);
826                         wret = __free_extent(trans, extent_root,
827                                              gang[i], 1, 0, 0);
828                         if (wret)
829                                 err = wret;
830                 }
831         }
832         return err;
833 }
834
835 /*
836  * remove an extent from the root, returns 0 on success
837  */
838 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
839                       *root, u64 blocknr, u64 num_blocks, int pin)
840 {
841         struct btrfs_root *extent_root = root->fs_info->extent_root;
842         int pending_ret;
843         int ret;
844
845         if (root == extent_root) {
846                 pin_down_block(root, blocknr, 1);
847                 return 0;
848         }
849         ret = __free_extent(trans, root, blocknr, num_blocks, pin, pin == 0);
850         pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
851         return ret ? ret : pending_ret;
852 }
853
854 /*
855  * walks the btree of allocated extents and find a hole of a given size.
856  * The key ins is changed to record the hole:
857  * ins->objectid == block start
858  * ins->flags = BTRFS_EXTENT_ITEM_KEY
859  * ins->offset == number of blocks
860  * Any available blocks before search_start are skipped.
861  */
862 static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
863                             *orig_root, u64 num_blocks, u64 search_start, u64
864                             search_end, struct btrfs_key *ins, int data)
865 {
866         struct btrfs_path *path;
867         struct btrfs_key key;
868         int ret;
869         u64 hole_size = 0;
870         int slot = 0;
871         u64 last_block = 0;
872         u64 test_block;
873         u64 orig_search_start = search_start;
874         int start_found;
875         struct btrfs_leaf *l;
876         struct btrfs_root * root = orig_root->fs_info->extent_root;
877         struct btrfs_fs_info *info = root->fs_info;
878         int total_needed = num_blocks;
879         int total_found = 0;
880         int fill_prealloc = 0;
881         int level;
882         struct btrfs_block_group_cache *block_group;
883         int full_scan = 0;
884         u64 limit;
885
886         path = btrfs_alloc_path();
887         ins->flags = 0;
888         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
889
890         level = btrfs_header_level(btrfs_buffer_header(root->node));
891         if (num_blocks == 0) {
892                 fill_prealloc = 1;
893                 num_blocks = 1;
894                 total_needed = (min(level + 1, BTRFS_MAX_LEVEL) + 2) * 3;
895         }
896         if (search_end == (u64)-1)
897                 search_end = btrfs_super_total_blocks(info->disk_super);
898         if (search_start) {
899                 block_group = lookup_block_group(info, search_start);
900                 block_group = btrfs_find_block_group(root, block_group,
901                                                      search_start, data, 1);
902         } else {
903                 block_group = btrfs_find_block_group(root,
904                                                      trans->block_group, 0,
905                                                      data, 1);
906         }
907
908 check_failed:
909         if (!full_scan && block_group->data != data)
910                 WARN_ON(1);
911
912         if (!data)
913                 search_start = find_search_start(root, &block_group,
914                                                  search_start, total_needed);
915         else
916                 search_start = max(block_group->last_alloc, search_start);
917
918         btrfs_init_path(path);
919         ins->objectid = search_start;
920         ins->offset = 0;
921         start_found = 0;
922
923         ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
924         if (ret < 0)
925                 goto error;
926
927         if (path->slots[0] > 0) {
928                 path->slots[0]--;
929         }
930
931         l = btrfs_buffer_leaf(path->nodes[0]);
932         btrfs_disk_key_to_cpu(&key, &l->items[path->slots[0]].key);
933         /*
934          * a rare case, go back one key if we hit a block group item
935          * instead of an extent item
936          */
937         if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY &&
938             key.objectid + key.offset >= search_start) {
939                 ins->objectid = key.objectid;
940                 ins->offset = key.offset - 1;
941                 btrfs_release_path(root, path);
942                 ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
943                 if (ret < 0)
944                         goto error;
945
946                 if (path->slots[0] > 0) {
947                         path->slots[0]--;
948                 }
949         }
950
951         while (1) {
952                 l = btrfs_buffer_leaf(path->nodes[0]);
953                 slot = path->slots[0];
954                 if (slot >= btrfs_header_nritems(&l->header)) {
955                         if (fill_prealloc) {
956                                 info->extent_tree_prealloc_nr = 0;
957                                 total_found = 0;
958                         }
959                         if (start_found)
960                                 limit = last_block +
961                                         block_group->key.offset / 2;
962                         else
963                                 limit = search_start +
964                                         block_group->key.offset / 2;
965                         ret = btrfs_next_leaf(root, path);
966                         if (ret == 0)
967                                 continue;
968                         if (ret < 0)
969                                 goto error;
970                         if (!start_found) {
971                                 ins->objectid = search_start;
972                                 ins->offset = search_end - search_start;
973                                 start_found = 1;
974                                 goto check_pending;
975                         }
976                         ins->objectid = last_block > search_start ?
977                                         last_block : search_start;
978                         ins->offset = search_end - ins->objectid;
979                         goto check_pending;
980                 }
981
982                 btrfs_disk_key_to_cpu(&key, &l->items[slot].key);
983                 if (key.objectid >= search_start && key.objectid > last_block &&
984                     start_found) {
985                         if (last_block < search_start)
986                                 last_block = search_start;
987                         hole_size = key.objectid - last_block;
988                         if (hole_size >= num_blocks) {
989                                 ins->objectid = last_block;
990                                 ins->offset = hole_size;
991                                 goto check_pending;
992                         }
993                 }
994
995                 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY)
996                         goto next;
997
998                 start_found = 1;
999                 last_block = key.objectid + key.offset;
1000                 if (last_block >= block_group->key.objectid +
1001                     block_group->key.offset) {
1002                         btrfs_release_path(root, path);
1003                         search_start = block_group->key.objectid +
1004                                 block_group->key.offset * 2;
1005                         goto new_group;
1006                 }
1007 next:
1008                 path->slots[0]++;
1009                 cond_resched();
1010         }
1011         // FIXME -ENOSPC
1012 check_pending:
1013         /* we have to make sure we didn't find an extent that has already
1014          * been allocated by the map tree or the original allocation
1015          */
1016         btrfs_release_path(root, path);
1017         BUG_ON(ins->objectid < search_start);
1018
1019         if (ins->objectid + num_blocks >= search_end) {
1020                 if (full_scan)
1021                         return -ENOSPC;
1022                 search_start = orig_search_start;
1023                 full_scan = 1;
1024                 goto new_group;
1025         }
1026         for (test_block = ins->objectid;
1027              test_block < ins->objectid + num_blocks; test_block++) {
1028                 if (test_radix_bit(&info->pinned_radix, test_block)) {
1029                         search_start = test_block + 1;
1030                         goto new_group;
1031                 }
1032         }
1033         if (!fill_prealloc && info->extent_tree_insert_nr) {
1034                 u64 last =
1035                   info->extent_tree_insert[info->extent_tree_insert_nr - 1];
1036                 if (ins->objectid + num_blocks >
1037                     info->extent_tree_insert[0] &&
1038                     ins->objectid <= last) {
1039                         search_start = last + 1;
1040                         WARN_ON(!full_scan);
1041                         goto new_group;
1042                 }
1043         }
1044         if (!fill_prealloc && info->extent_tree_prealloc_nr) {
1045                 u64 first =
1046                   info->extent_tree_prealloc[info->extent_tree_prealloc_nr - 1];
1047                 if (ins->objectid + num_blocks > first &&
1048                     ins->objectid <= info->extent_tree_prealloc[0]) {
1049                         search_start = info->extent_tree_prealloc[0] + 1;
1050                         WARN_ON(!full_scan);
1051                         goto new_group;
1052                 }
1053         }
1054         if (fill_prealloc) {
1055                 int nr;
1056                 test_block = ins->objectid;
1057                 if (test_block - info->extent_tree_prealloc[total_needed - 1] >=
1058                     leaf_range(root)) {
1059                         total_found = 0;
1060                         info->extent_tree_prealloc_nr = total_found;
1061                 }
1062                 while(test_block < ins->objectid + ins->offset &&
1063                       total_found < total_needed) {
1064                         nr = total_needed - total_found - 1;
1065                         BUG_ON(nr < 0);
1066                         info->extent_tree_prealloc[nr] = test_block;
1067                         total_found++;
1068                         test_block++;
1069                 }
1070                 if (total_found < total_needed) {
1071                         search_start = test_block;
1072                         goto new_group;
1073                 }
1074                 info->extent_tree_prealloc_nr = total_found;
1075         }
1076         if (!data) {
1077                 block_group = lookup_block_group(info, ins->objectid);
1078                 if (block_group) {
1079                         if (fill_prealloc)
1080                                 block_group->last_prealloc =
1081                                      info->extent_tree_prealloc[total_needed-1];
1082                         else
1083                                 trans->block_group = block_group;
1084                 }
1085         }
1086         ins->offset = num_blocks;
1087         btrfs_free_path(path);
1088         return 0;
1089
1090 new_group:
1091         if (search_start + num_blocks >= search_end) {
1092                 search_start = orig_search_start;
1093 printk("doing full scan!\n");
1094                 full_scan = 1;
1095         }
1096         block_group = lookup_block_group(info, search_start);
1097         if (!full_scan)
1098                 block_group = btrfs_find_block_group(root, block_group,
1099                                                      search_start, data, 0);
1100         cond_resched();
1101         goto check_failed;
1102
1103 error:
1104         btrfs_release_path(root, path);
1105         btrfs_free_path(path);
1106         return ret;
1107 }
1108 /*
1109  * finds a free extent and does all the dirty work required for allocation
1110  * returns the key for the extent through ins, and a tree buffer for
1111  * the first block of the extent through buf.
1112  *
1113  * returns 0 if everything worked, non-zero otherwise.
1114  */
1115 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1116                        struct btrfs_root *root, u64 owner,
1117                        u64 num_blocks, u64 search_start,
1118                        u64 search_end, struct btrfs_key *ins, int data)
1119 {
1120         int ret;
1121         int pending_ret;
1122         u64 super_blocks_used;
1123         struct btrfs_fs_info *info = root->fs_info;
1124         struct btrfs_root *extent_root = info->extent_root;
1125         struct btrfs_extent_item extent_item;
1126         struct btrfs_key prealloc_key;
1127
1128         btrfs_set_extent_refs(&extent_item, 1);
1129         btrfs_set_extent_owner(&extent_item, owner);
1130
1131         if (root == extent_root) {
1132                 int nr;
1133                 BUG_ON(info->extent_tree_prealloc_nr == 0);
1134                 BUG_ON(num_blocks != 1);
1135                 ins->offset = 1;
1136                 info->extent_tree_prealloc_nr--;
1137                 nr = info->extent_tree_prealloc_nr;
1138                 ins->objectid = info->extent_tree_prealloc[nr];
1139                 info->extent_tree_insert[info->extent_tree_insert_nr++] =
1140                         ins->objectid;
1141                 ret = update_block_group(trans, root,
1142                                          ins->objectid, ins->offset, 1, 0);
1143                 BUG_ON(ret);
1144                 return 0;
1145         }
1146
1147         /*
1148          * if we're doing a data allocation, preallocate room in the
1149          * extent tree first.  This way the extent tree blocks end up
1150          * in the correct block group.
1151          */
1152         if (data) {
1153                 ret = find_free_extent(trans, root, 0, 0,
1154                                        search_end, &prealloc_key, 0);
1155                 if (ret) {
1156                         return ret;
1157                 }
1158                 if (prealloc_key.objectid + prealloc_key.offset >= search_end) {
1159                         int nr = info->extent_tree_prealloc_nr;
1160                         search_end = info->extent_tree_prealloc[nr - 1] - 1;
1161                 } else {
1162                         search_start = info->extent_tree_prealloc[0] + 1;
1163                 }
1164         }
1165         /* do the real allocation */
1166         ret = find_free_extent(trans, root, num_blocks, search_start,
1167                                search_end, ins, data);
1168         if (ret) {
1169                 return ret;
1170         }
1171
1172         /*
1173          * if we're doing a metadata allocation, preallocate space in the
1174          * extent tree second.  This way, we don't create a tiny hole
1175          * in the allocation map between any unused preallocation blocks
1176          * and the metadata block we're actually allocating.  On disk,
1177          * it'll go:
1178          * [block we've allocated], [used prealloc 1], [ unused prealloc ]
1179          * The unused prealloc will get reused the next time around.
1180          */
1181         if (!data) {
1182                 if (ins->objectid + ins->offset >= search_end)
1183                         search_end = ins->objectid - 1;
1184                 else
1185                         search_start = ins->objectid + ins->offset;
1186
1187                 ret = find_free_extent(trans, root, 0, search_start,
1188                                        search_end, &prealloc_key, 0);
1189                 if (ret) {
1190                         return ret;
1191                 }
1192         }
1193
1194         super_blocks_used = btrfs_super_blocks_used(info->disk_super);
1195         btrfs_set_super_blocks_used(info->disk_super, super_blocks_used +
1196                                     num_blocks);
1197         ret = btrfs_insert_item(trans, extent_root, ins, &extent_item,
1198                                 sizeof(extent_item));
1199
1200         finish_current_insert(trans, extent_root);
1201         pending_ret = del_pending_extents(trans, extent_root);
1202         if (ret) {
1203                 return ret;
1204         }
1205         if (pending_ret) {
1206                 return pending_ret;
1207         }
1208         ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0);
1209         return 0;
1210 }
1211
1212 /*
1213  * helper function to allocate a block for a given tree
1214  * returns the tree buffer or NULL.
1215  */
1216 struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1217                                            struct btrfs_root *root, u64 hint)
1218 {
1219         struct btrfs_key ins;
1220         int ret;
1221         struct buffer_head *buf;
1222
1223         ret = btrfs_alloc_extent(trans, root, root->root_key.objectid,
1224                                  1, hint, (unsigned long)-1, &ins, 0);
1225         if (ret) {
1226                 BUG();
1227                 return NULL;
1228         }
1229         BUG_ON(ret);
1230         buf = btrfs_find_create_tree_block(root, ins.objectid);
1231         set_buffer_uptodate(buf);
1232         set_buffer_checked(buf);
1233         set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index);
1234         return buf;
1235 }
1236
1237 static int drop_leaf_ref(struct btrfs_trans_handle *trans,
1238                          struct btrfs_root *root, struct buffer_head *cur)
1239 {
1240         struct btrfs_disk_key *key;
1241         struct btrfs_leaf *leaf;
1242         struct btrfs_file_extent_item *fi;
1243         int i;
1244         int nritems;
1245         int ret;
1246
1247         BUG_ON(!btrfs_is_leaf(btrfs_buffer_node(cur)));
1248         leaf = btrfs_buffer_leaf(cur);
1249         nritems = btrfs_header_nritems(&leaf->header);
1250         for (i = 0; i < nritems; i++) {
1251                 key = &leaf->items[i].key;
1252                 if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
1253                         continue;
1254                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1255                 if (btrfs_file_extent_type(fi) == BTRFS_FILE_EXTENT_INLINE)
1256                         continue;
1257                 /*
1258                  * FIXME make sure to insert a trans record that
1259                  * repeats the snapshot del on crash
1260                  */
1261                 ret = btrfs_free_extent(trans, root,
1262                                         btrfs_file_extent_disk_blocknr(fi),
1263                                         btrfs_file_extent_disk_num_blocks(fi),
1264                                         0);
1265                 BUG_ON(ret);
1266         }
1267         return 0;
1268 }
1269
1270 /*
1271  * helper function for drop_snapshot, this walks down the tree dropping ref
1272  * counts as it goes.
1273  */
1274 static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root
1275                           *root, struct btrfs_path *path, int *level)
1276 {
1277         struct buffer_head *next;
1278         struct buffer_head *cur;
1279         u64 blocknr;
1280         int ret;
1281         u32 refs;
1282
1283         WARN_ON(*level < 0);
1284         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1285         ret = lookup_extent_ref(trans, root, bh_blocknr(path->nodes[*level]),
1286                                1, &refs);
1287         BUG_ON(ret);
1288         if (refs > 1)
1289                 goto out;
1290         /*
1291          * walk down to the last node level and free all the leaves
1292          */
1293         while(*level >= 0) {
1294                 WARN_ON(*level < 0);
1295                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1296                 cur = path->nodes[*level];
1297                 if (btrfs_header_level(btrfs_buffer_header(cur)) != *level)
1298                         WARN_ON(1);
1299                 if (path->slots[*level] >=
1300                     btrfs_header_nritems(btrfs_buffer_header(cur)))
1301                         break;
1302                 if (*level == 0) {
1303                         ret = drop_leaf_ref(trans, root, cur);
1304                         BUG_ON(ret);
1305                         break;
1306                 }
1307                 blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur),
1308                                               path->slots[*level]);
1309                 ret = lookup_extent_ref(trans, root, blocknr, 1, &refs);
1310                 BUG_ON(ret);
1311                 if (refs != 1) {
1312                         path->slots[*level]++;
1313                         ret = btrfs_free_extent(trans, root, blocknr, 1, 1);
1314                         BUG_ON(ret);
1315                         continue;
1316                 }
1317                 next = read_tree_block(root, blocknr);
1318                 WARN_ON(*level <= 0);
1319                 if (path->nodes[*level-1])
1320                         btrfs_block_release(root, path->nodes[*level-1]);
1321                 path->nodes[*level-1] = next;
1322                 *level = btrfs_header_level(btrfs_buffer_header(next));
1323                 path->slots[*level] = 0;
1324         }
1325 out:
1326         WARN_ON(*level < 0);
1327         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1328         ret = btrfs_free_extent(trans, root,
1329                                 bh_blocknr(path->nodes[*level]), 1, 1);
1330         btrfs_block_release(root, path->nodes[*level]);
1331         path->nodes[*level] = NULL;
1332         *level += 1;
1333         BUG_ON(ret);
1334         return 0;
1335 }
1336
1337 /*
1338  * helper for dropping snapshots.  This walks back up the tree in the path
1339  * to find the first node higher up where we haven't yet gone through
1340  * all the slots
1341  */
1342 static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root
1343                         *root, struct btrfs_path *path, int *level)
1344 {
1345         int i;
1346         int slot;
1347         int ret;
1348         for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1349                 slot = path->slots[i];
1350                 if (slot < btrfs_header_nritems(
1351                     btrfs_buffer_header(path->nodes[i])) - 1) {
1352                         path->slots[i]++;
1353                         *level = i;
1354                         return 0;
1355                 } else {
1356                         ret = btrfs_free_extent(trans, root,
1357                                                 bh_blocknr(path->nodes[*level]),
1358                                                 1, 1);
1359                         BUG_ON(ret);
1360                         btrfs_block_release(root, path->nodes[*level]);
1361                         path->nodes[*level] = NULL;
1362                         *level = i + 1;
1363                 }
1364         }
1365         return 1;
1366 }
1367
1368 /*
1369  * drop the reference count on the tree rooted at 'snap'.  This traverses
1370  * the tree freeing any blocks that have a ref count of zero after being
1371  * decremented.
1372  */
1373 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
1374                         *root, struct buffer_head *snap)
1375 {
1376         int ret = 0;
1377         int wret;
1378         int level;
1379         struct btrfs_path *path;
1380         int i;
1381         int orig_level;
1382
1383         path = btrfs_alloc_path();
1384         BUG_ON(!path);
1385         btrfs_init_path(path);
1386
1387         level = btrfs_header_level(btrfs_buffer_header(snap));
1388         orig_level = level;
1389         path->nodes[level] = snap;
1390         path->slots[level] = 0;
1391         while(1) {
1392                 wret = walk_down_tree(trans, root, path, &level);
1393                 if (wret > 0)
1394                         break;
1395                 if (wret < 0)
1396                         ret = wret;
1397
1398                 wret = walk_up_tree(trans, root, path, &level);
1399                 if (wret > 0)
1400                         break;
1401                 if (wret < 0)
1402                         ret = wret;
1403                 btrfs_btree_balance_dirty(root);
1404         }
1405         for (i = 0; i <= orig_level; i++) {
1406                 if (path->nodes[i]) {
1407                         btrfs_block_release(root, path->nodes[i]);
1408                 }
1409         }
1410         btrfs_free_path(path);
1411         return ret;
1412 }
1413
1414 static int free_block_group_radix(struct radix_tree_root *radix)
1415 {
1416         int ret;
1417         struct btrfs_block_group_cache *cache[8];
1418         int i;
1419
1420         while(1) {
1421                 ret = radix_tree_gang_lookup(radix, (void **)cache, 0,
1422                                              ARRAY_SIZE(cache));
1423                 if (!ret)
1424                         break;
1425                 for (i = 0; i < ret; i++) {
1426                         radix_tree_delete(radix, cache[i]->key.objectid +
1427                                           cache[i]->key.offset - 1);
1428                         kfree(cache[i]);
1429                 }
1430         }
1431         return 0;
1432 }
1433
1434 int btrfs_free_block_groups(struct btrfs_fs_info *info)
1435 {
1436         int ret;
1437         int ret2;
1438         unsigned long gang[16];
1439         int i;
1440
1441         ret = free_block_group_radix(&info->block_group_radix);
1442         ret2 = free_block_group_radix(&info->block_group_data_radix);
1443         if (ret)
1444                 return ret;
1445         if (ret2)
1446                 return ret2;
1447
1448         while(1) {
1449                 ret = find_first_radix_bit(&info->extent_map_radix,
1450                                            gang, 0, ARRAY_SIZE(gang));
1451                 if (!ret)
1452                         break;
1453                 for (i = 0; i < ret; i++) {
1454                         clear_radix_bit(&info->extent_map_radix, gang[i]);
1455                 }
1456         }
1457         return 0;
1458 }
1459
1460 int btrfs_read_block_groups(struct btrfs_root *root)
1461 {
1462         struct btrfs_path *path;
1463         int ret;
1464         int err = 0;
1465         struct btrfs_block_group_item *bi;
1466         struct btrfs_block_group_cache *cache;
1467         struct btrfs_fs_info *info = root->fs_info;
1468         struct radix_tree_root *radix;
1469         struct btrfs_key key;
1470         struct btrfs_key found_key;
1471         struct btrfs_leaf *leaf;
1472         u64 group_size_blocks = BTRFS_BLOCK_GROUP_SIZE / root->blocksize;
1473         u64 used;
1474         u64 nr = 0;
1475
1476         root = info->extent_root;
1477         key.objectid = 0;
1478         key.offset = group_size_blocks;
1479         key.flags = 0;
1480         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
1481
1482         path = btrfs_alloc_path();
1483         if (!path)
1484                 return -ENOMEM;
1485
1486         while(1) {
1487                 ret = btrfs_search_slot(NULL, info->extent_root,
1488                                         &key, path, 0, 0);
1489                 if (ret != 0) {
1490                         err = ret;
1491                         break;
1492                 }
1493                 leaf = btrfs_buffer_leaf(path->nodes[0]);
1494                 btrfs_disk_key_to_cpu(&found_key,
1495                                       &leaf->items[path->slots[0]].key);
1496                 cache = kmalloc(sizeof(*cache), GFP_NOFS);
1497                 if (!cache) {
1498                         err = -1;
1499                         break;
1500                 }
1501
1502                 if (nr % 3)
1503                         radix = &info->block_group_data_radix;
1504                 else
1505                         radix = &info->block_group_radix;
1506
1507                 bi = btrfs_item_ptr(leaf, path->slots[0],
1508                                     struct btrfs_block_group_item);
1509                 memcpy(&cache->item, bi, sizeof(*bi));
1510                 memcpy(&cache->key, &found_key, sizeof(found_key));
1511                 cache->last_alloc = cache->key.objectid;
1512                 cache->first_free = cache->key.objectid;
1513                 cache->last_prealloc = cache->key.objectid;
1514                 cache->pinned = 0;
1515                 cache->cached = 0;
1516
1517                 if (nr % 3)
1518                         cache->data = 1;
1519                 else
1520                         cache->data = 0;
1521                 cache->radix = radix;
1522
1523                 key.objectid = found_key.objectid + found_key.offset;
1524                 btrfs_release_path(root, path);
1525                 ret = radix_tree_insert(radix, found_key.objectid +
1526                                         found_key.offset - 1,
1527                                         (void *)cache);
1528                 BUG_ON(ret);
1529                 used = btrfs_block_group_used(bi);
1530                 if (used < (key.offset * 8) / 10) {
1531                         radix_tree_tag_set(radix, found_key.objectid +
1532                                            found_key.offset - 1,
1533                                            BTRFS_BLOCK_GROUP_AVAIL);
1534                 }
1535                 if (key.objectid >=
1536                     btrfs_super_total_blocks(info->disk_super))
1537                         break;
1538                 nr++;
1539         }
1540
1541         btrfs_free_path(path);
1542         return 0;
1543 }