Btrfs: Allow large data extents in a single file to span into metadata block groups
[linux-2.6-block.git] / fs / btrfs / extent-tree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/sched.h>
20 #include "ctree.h"
21 #include "disk-io.h"
22 #include "print-tree.h"
23 #include "transaction.h"
24
25 #define BLOCK_GROUP_DATA EXTENT_WRITEBACK
26 #define BLOCK_GROUP_METADATA EXTENT_UPTODATE
27 #define BLOCK_GROUP_DIRTY EXTENT_DIRTY
28
29 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
30                                  btrfs_root *extent_root);
31 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
32                                btrfs_root *extent_root);
33
34 static int cache_block_group(struct btrfs_root *root,
35                              struct btrfs_block_group_cache *block_group)
36 {
37         struct btrfs_path *path;
38         int ret;
39         struct btrfs_key key;
40         struct extent_buffer *leaf;
41         struct extent_map_tree *free_space_cache;
42         int slot;
43         u64 last = 0;
44         u64 hole_size;
45         u64 first_free;
46         int found = 0;
47
48         root = root->fs_info->extent_root;
49         free_space_cache = &root->fs_info->free_space_cache;
50
51         if (block_group->cached)
52                 return 0;
53
54         path = btrfs_alloc_path();
55         if (!path)
56                 return -ENOMEM;
57
58         path->reada = 2;
59         first_free = block_group->key.objectid;
60         key.objectid = block_group->key.objectid;
61         key.offset = 0;
62
63         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
64         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
65
66         if (ret < 0)
67                 return ret;
68
69         if (ret && path->slots[0] > 0)
70                 path->slots[0]--;
71
72         while(1) {
73                 leaf = path->nodes[0];
74                 slot = path->slots[0];
75                 if (slot >= btrfs_header_nritems(leaf)) {
76                         ret = btrfs_next_leaf(root, path);
77                         if (ret < 0)
78                                 goto err;
79                         if (ret == 0) {
80                                 continue;
81                         } else {
82                                 break;
83                         }
84                 }
85
86                 btrfs_item_key_to_cpu(leaf, &key, slot);
87                 if (key.objectid < block_group->key.objectid) {
88                         if (key.objectid + key.offset > first_free)
89                                 first_free = key.objectid + key.offset;
90                         goto next;
91                 }
92
93                 if (key.objectid >= block_group->key.objectid +
94                     block_group->key.offset) {
95                         break;
96                 }
97
98                 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
99                         if (!found) {
100                                 last = first_free;
101                                 found = 1;
102                         }
103                         if (key.objectid > last) {
104                                 hole_size = key.objectid - last;
105                                 set_extent_dirty(free_space_cache, last,
106                                                  last + hole_size - 1,
107                                                  GFP_NOFS);
108                         }
109                         last = key.objectid + key.offset;
110                 }
111 next:
112                 path->slots[0]++;
113         }
114
115         if (!found)
116                 last = first_free;
117         if (block_group->key.objectid +
118             block_group->key.offset > last) {
119                 hole_size = block_group->key.objectid +
120                         block_group->key.offset - last;
121                 set_extent_dirty(free_space_cache, last,
122                                  last + hole_size - 1, GFP_NOFS);
123         }
124         block_group->cached = 1;
125 err:
126         btrfs_free_path(path);
127         return 0;
128 }
129
130 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
131                                                          btrfs_fs_info *info,
132                                                          u64 bytenr)
133 {
134         struct extent_map_tree *block_group_cache;
135         struct btrfs_block_group_cache *block_group = NULL;
136         u64 ptr;
137         u64 start;
138         u64 end;
139         int ret;
140
141         block_group_cache = &info->block_group_cache;
142         ret = find_first_extent_bit(block_group_cache,
143                                     bytenr, &start, &end,
144                                     BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA);
145         if (ret) {
146                 return NULL;
147         }
148         ret = get_state_private(block_group_cache, start, &ptr);
149         if (ret)
150                 return NULL;
151
152         block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
153
154
155         if (block_group->key.objectid <= bytenr && bytenr <=
156             block_group->key.objectid + block_group->key.offset)
157                 return block_group;
158
159         return NULL;
160 }
161
162 static u64 find_search_start(struct btrfs_root *root,
163                              struct btrfs_block_group_cache **cache_ret,
164                              u64 search_start, int num, int data)
165 {
166         int ret;
167         struct btrfs_block_group_cache *cache = *cache_ret;
168         u64 last;
169         u64 start = 0;
170         u64 end = 0;
171         u64 cache_miss = 0;
172         int wrapped = 0;
173
174 again:
175         ret = cache_block_group(root, cache);
176         if (ret)
177                 goto out;
178
179         last = max(search_start, cache->key.objectid);
180
181         while(1) {
182                 ret = find_first_extent_bit(&root->fs_info->free_space_cache,
183                                             last, &start, &end, EXTENT_DIRTY);
184                 if (ret) {
185                         if (!cache_miss)
186                                 cache_miss = last;
187                         goto new_group;
188                 }
189
190                 start = max(last, start);
191                 last = end + 1;
192                 if (last - start < num) {
193                         if (last == cache->key.objectid + cache->key.offset)
194                                 cache_miss = start;
195                         continue;
196                 }
197                 if (data != BTRFS_BLOCK_GROUP_MIXED &&
198                     start + num >= cache->key.objectid + cache->key.offset)
199                         goto new_group;
200                 return start;
201         }
202 out:
203         return search_start;
204
205 new_group:
206         last = cache->key.objectid + cache->key.offset;
207 wrapped:
208         cache = btrfs_lookup_block_group(root->fs_info, last);
209         if (!cache) {
210                 if (!wrapped) {
211                         wrapped = 1;
212                         last = search_start;
213                         data = BTRFS_BLOCK_GROUP_MIXED;
214                         goto wrapped;
215                 }
216                 return search_start;
217         }
218         if (cache_miss && !cache->cached) {
219                 cache_block_group(root, cache);
220                 last = cache_miss;
221
222                 cache = btrfs_lookup_block_group(root->fs_info, last);
223         }
224         cache = btrfs_find_block_group(root, cache, last, data, 0);
225         *cache_ret = cache;
226         cache_miss = 0;
227         goto again;
228 }
229
230 static u64 div_factor(u64 num, int factor)
231 {
232         if (factor == 10)
233                 return num;
234         num *= factor;
235         do_div(num, 10);
236         return num;
237 }
238
239 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
240                                                  struct btrfs_block_group_cache
241                                                  *hint, u64 search_start,
242                                                  int data, int owner)
243 {
244         struct btrfs_block_group_cache *cache;
245         struct extent_map_tree *block_group_cache;
246         struct btrfs_block_group_cache *found_group = NULL;
247         struct btrfs_fs_info *info = root->fs_info;
248         u64 used;
249         u64 last = 0;
250         u64 hint_last;
251         u64 start;
252         u64 end;
253         u64 free_check;
254         u64 ptr;
255         int bit;
256         int ret;
257         int full_search = 0;
258         int factor = 8;
259         int data_swap = 0;
260
261         block_group_cache = &info->block_group_cache;
262
263         if (!owner)
264                 factor = 8;
265
266         if (data == BTRFS_BLOCK_GROUP_MIXED) {
267                 bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
268                 factor = 10;
269         } else if (data)
270                 bit = BLOCK_GROUP_DATA;
271         else
272                 bit = BLOCK_GROUP_METADATA;
273
274         if (search_start) {
275                 struct btrfs_block_group_cache *shint;
276                 shint = btrfs_lookup_block_group(info, search_start);
277                 if (shint && (shint->data == data ||
278                               shint->data == BTRFS_BLOCK_GROUP_MIXED)) {
279                         used = btrfs_block_group_used(&shint->item);
280                         if (used < div_factor(shint->key.offset, factor)) {
281                                 return shint;
282                         }
283                 }
284         }
285         if (hint && (hint->data == data ||
286                      hint->data == BTRFS_BLOCK_GROUP_MIXED)) {
287                 used = btrfs_block_group_used(&hint->item);
288                 if (used < div_factor(hint->key.offset, factor)) {
289                         return hint;
290                 }
291                 last = hint->key.objectid + hint->key.offset;
292                 hint_last = last;
293         } else {
294                 if (hint)
295                         hint_last = max(hint->key.objectid, search_start);
296                 else
297                         hint_last = search_start;
298
299                 last = hint_last;
300         }
301 again:
302         while(1) {
303                 ret = find_first_extent_bit(block_group_cache, last,
304                                             &start, &end, bit);
305                 if (ret)
306                         break;
307
308                 ret = get_state_private(block_group_cache, start, &ptr);
309                 if (ret)
310                         break;
311
312                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
313                 last = cache->key.objectid + cache->key.offset;
314                 used = btrfs_block_group_used(&cache->item);
315
316                 if (full_search)
317                         free_check = cache->key.offset;
318                 else
319                         free_check = div_factor(cache->key.offset, factor);
320
321                 if (used < free_check) {
322                         found_group = cache;
323                         goto found;
324                 }
325                 cond_resched();
326         }
327         if (!full_search) {
328                 last = search_start;
329                 full_search = 1;
330                 goto again;
331         }
332         if (!data_swap) {
333                 data_swap = 1;
334                 bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
335                 last = search_start;
336                 goto again;
337         }
338 found:
339         return found_group;
340 }
341
342 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
343                                 struct btrfs_root *root,
344                                 u64 bytenr, u64 num_bytes)
345 {
346         struct btrfs_path *path;
347         int ret;
348         struct btrfs_key key;
349         struct extent_buffer *l;
350         struct btrfs_extent_item *item;
351         u32 refs;
352
353         WARN_ON(num_bytes < root->sectorsize);
354         path = btrfs_alloc_path();
355         if (!path)
356                 return -ENOMEM;
357
358         key.objectid = bytenr;
359         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
360         key.offset = num_bytes;
361         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
362                                 0, 1);
363         if (ret < 0)
364                 return ret;
365         if (ret != 0) {
366                 BUG();
367         }
368         BUG_ON(ret != 0);
369         l = path->nodes[0];
370         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
371         refs = btrfs_extent_refs(l, item);
372         btrfs_set_extent_refs(l, item, refs + 1);
373         btrfs_mark_buffer_dirty(path->nodes[0]);
374
375         btrfs_release_path(root->fs_info->extent_root, path);
376         btrfs_free_path(path);
377         finish_current_insert(trans, root->fs_info->extent_root);
378         del_pending_extents(trans, root->fs_info->extent_root);
379         return 0;
380 }
381
382 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
383                          struct btrfs_root *root)
384 {
385         finish_current_insert(trans, root->fs_info->extent_root);
386         del_pending_extents(trans, root->fs_info->extent_root);
387         return 0;
388 }
389
390 static int lookup_extent_ref(struct btrfs_trans_handle *trans,
391                              struct btrfs_root *root, u64 bytenr,
392                              u64 num_bytes, u32 *refs)
393 {
394         struct btrfs_path *path;
395         int ret;
396         struct btrfs_key key;
397         struct extent_buffer *l;
398         struct btrfs_extent_item *item;
399
400         WARN_ON(num_bytes < root->sectorsize);
401         path = btrfs_alloc_path();
402         key.objectid = bytenr;
403         key.offset = num_bytes;
404         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
405         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
406                                 0, 0);
407         if (ret < 0)
408                 goto out;
409         if (ret != 0) {
410                 btrfs_print_leaf(root, path->nodes[0]);
411                 printk("failed to find block number %Lu\n", bytenr);
412                 BUG();
413         }
414         l = path->nodes[0];
415         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
416         *refs = btrfs_extent_refs(l, item);
417 out:
418         btrfs_free_path(path);
419         return 0;
420 }
421
422 int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
423                        struct btrfs_root *root)
424 {
425         return btrfs_inc_extent_ref(trans, root, root->node->start,
426                                     root->node->len);
427 }
428
429 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
430                   struct extent_buffer *buf)
431 {
432         u64 bytenr;
433         u32 nritems;
434         struct btrfs_key key;
435         struct btrfs_file_extent_item *fi;
436         int i;
437         int level;
438         int ret;
439         int faili;
440         int err;
441
442         if (!root->ref_cows)
443                 return 0;
444
445         level = btrfs_header_level(buf);
446         nritems = btrfs_header_nritems(buf);
447         for (i = 0; i < nritems; i++) {
448                 if (level == 0) {
449                         u64 disk_bytenr;
450                         btrfs_item_key_to_cpu(buf, &key, i);
451                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
452                                 continue;
453                         fi = btrfs_item_ptr(buf, i,
454                                             struct btrfs_file_extent_item);
455                         if (btrfs_file_extent_type(buf, fi) ==
456                             BTRFS_FILE_EXTENT_INLINE)
457                                 continue;
458                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
459                         if (disk_bytenr == 0)
460                                 continue;
461                         ret = btrfs_inc_extent_ref(trans, root, disk_bytenr,
462                                     btrfs_file_extent_disk_num_bytes(buf, fi));
463                         if (ret) {
464                                 faili = i;
465                                 goto fail;
466                         }
467                 } else {
468                         bytenr = btrfs_node_blockptr(buf, i);
469                         ret = btrfs_inc_extent_ref(trans, root, bytenr,
470                                            btrfs_level_size(root, level - 1));
471                         if (ret) {
472                                 faili = i;
473                                 goto fail;
474                         }
475                 }
476         }
477         return 0;
478 fail:
479         WARN_ON(1);
480         for (i =0; i < faili; i++) {
481                 if (level == 0) {
482                         u64 disk_bytenr;
483                         btrfs_item_key_to_cpu(buf, &key, i);
484                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
485                                 continue;
486                         fi = btrfs_item_ptr(buf, i,
487                                             struct btrfs_file_extent_item);
488                         if (btrfs_file_extent_type(buf, fi) ==
489                             BTRFS_FILE_EXTENT_INLINE)
490                                 continue;
491                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
492                         if (disk_bytenr == 0)
493                                 continue;
494                         err = btrfs_free_extent(trans, root, disk_bytenr,
495                                     btrfs_file_extent_disk_num_bytes(buf,
496                                                                       fi), 0);
497                         BUG_ON(err);
498                 } else {
499                         bytenr = btrfs_node_blockptr(buf, i);
500                         err = btrfs_free_extent(trans, root, bytenr,
501                                         btrfs_level_size(root, level - 1), 0);
502                         BUG_ON(err);
503                 }
504         }
505         return ret;
506 }
507
508 static int write_one_cache_group(struct btrfs_trans_handle *trans,
509                                  struct btrfs_root *root,
510                                  struct btrfs_path *path,
511                                  struct btrfs_block_group_cache *cache)
512 {
513         int ret;
514         int pending_ret;
515         struct btrfs_root *extent_root = root->fs_info->extent_root;
516         unsigned long bi;
517         struct extent_buffer *leaf;
518
519         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
520         if (ret < 0)
521                 goto fail;
522         BUG_ON(ret);
523
524         leaf = path->nodes[0];
525         bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
526         write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
527         btrfs_mark_buffer_dirty(leaf);
528         btrfs_release_path(extent_root, path);
529 fail:
530         finish_current_insert(trans, extent_root);
531         pending_ret = del_pending_extents(trans, extent_root);
532         if (ret)
533                 return ret;
534         if (pending_ret)
535                 return pending_ret;
536         return 0;
537
538 }
539
540 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
541                                    struct btrfs_root *root)
542 {
543         struct extent_map_tree *block_group_cache;
544         struct btrfs_block_group_cache *cache;
545         int ret;
546         int err = 0;
547         int werr = 0;
548         struct btrfs_path *path;
549         u64 last = 0;
550         u64 start;
551         u64 end;
552         u64 ptr;
553
554         block_group_cache = &root->fs_info->block_group_cache;
555         path = btrfs_alloc_path();
556         if (!path)
557                 return -ENOMEM;
558
559         while(1) {
560                 ret = find_first_extent_bit(block_group_cache, last,
561                                             &start, &end, BLOCK_GROUP_DIRTY);
562                 if (ret)
563                         break;
564
565                 last = end + 1;
566                 ret = get_state_private(block_group_cache, start, &ptr);
567                 if (ret)
568                         break;
569
570                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
571                 err = write_one_cache_group(trans, root,
572                                             path, cache);
573                 /*
574                  * if we fail to write the cache group, we want
575                  * to keep it marked dirty in hopes that a later
576                  * write will work
577                  */
578                 if (err) {
579                         werr = err;
580                         continue;
581                 }
582                 clear_extent_bits(block_group_cache, start, end,
583                                   BLOCK_GROUP_DIRTY, GFP_NOFS);
584         }
585         btrfs_free_path(path);
586         return werr;
587 }
588
589 static int update_block_group(struct btrfs_trans_handle *trans,
590                               struct btrfs_root *root,
591                               u64 bytenr, u64 num_bytes, int alloc,
592                               int mark_free, int data)
593 {
594         struct btrfs_block_group_cache *cache;
595         struct btrfs_fs_info *info = root->fs_info;
596         u64 total = num_bytes;
597         u64 old_val;
598         u64 byte_in_group;
599         u64 start;
600         u64 end;
601
602         while(total) {
603                 cache = btrfs_lookup_block_group(info, bytenr);
604                 if (!cache) {
605                         return -1;
606                 }
607                 byte_in_group = bytenr - cache->key.objectid;
608                 WARN_ON(byte_in_group > cache->key.offset);
609                 start = cache->key.objectid;
610                 end = start + cache->key.offset - 1;
611                 set_extent_bits(&info->block_group_cache, start, end,
612                                 BLOCK_GROUP_DIRTY, GFP_NOFS);
613
614                 old_val = btrfs_block_group_used(&cache->item);
615                 num_bytes = min(total, cache->key.offset - byte_in_group);
616                 if (alloc) {
617                         if (cache->data != data &&
618                             old_val < (cache->key.offset >> 1)) {
619                                 int bit_to_clear;
620                                 int bit_to_set;
621                                 cache->data = data;
622                                 if (data) {
623                                         bit_to_clear = BLOCK_GROUP_METADATA;
624                                         bit_to_set = BLOCK_GROUP_DATA;
625                                         cache->item.flags &=
626                                                 ~BTRFS_BLOCK_GROUP_MIXED;
627                                         cache->item.flags |=
628                                                 BTRFS_BLOCK_GROUP_DATA;
629                                 } else {
630                                         bit_to_clear = BLOCK_GROUP_DATA;
631                                         bit_to_set = BLOCK_GROUP_METADATA;
632                                         cache->item.flags &=
633                                                 ~BTRFS_BLOCK_GROUP_MIXED;
634                                         cache->item.flags &=
635                                                 ~BTRFS_BLOCK_GROUP_DATA;
636                                 }
637                                 clear_extent_bits(&info->block_group_cache,
638                                                   start, end, bit_to_clear,
639                                                   GFP_NOFS);
640                                 set_extent_bits(&info->block_group_cache,
641                                                 start, end, bit_to_set,
642                                                 GFP_NOFS);
643                         } else if (cache->data != data &&
644                                    cache->data != BTRFS_BLOCK_GROUP_MIXED) {
645                                 cache->data = BTRFS_BLOCK_GROUP_MIXED;
646                                 set_extent_bits(&info->block_group_cache,
647                                                 start, end,
648                                                 BLOCK_GROUP_DATA |
649                                                 BLOCK_GROUP_METADATA,
650                                                 GFP_NOFS);
651                         }
652                         old_val += num_bytes;
653                 } else {
654                         old_val -= num_bytes;
655                         if (mark_free) {
656                                 set_extent_dirty(&info->free_space_cache,
657                                                  bytenr, bytenr + num_bytes - 1,
658                                                  GFP_NOFS);
659                         }
660                 }
661                 btrfs_set_block_group_used(&cache->item, old_val);
662                 total -= num_bytes;
663                 bytenr += num_bytes;
664         }
665         return 0;
666 }
667
668 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy)
669 {
670         u64 last = 0;
671         u64 start;
672         u64 end;
673         struct extent_map_tree *pinned_extents = &root->fs_info->pinned_extents;
674         int ret;
675
676         while(1) {
677                 ret = find_first_extent_bit(pinned_extents, last,
678                                             &start, &end, EXTENT_DIRTY);
679                 if (ret)
680                         break;
681                 set_extent_dirty(copy, start, end, GFP_NOFS);
682                 last = end + 1;
683         }
684         return 0;
685 }
686
687 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
688                                struct btrfs_root *root,
689                                struct extent_map_tree *unpin)
690 {
691         u64 start;
692         u64 end;
693         int ret;
694         struct extent_map_tree *pinned_extents = &root->fs_info->pinned_extents;
695         struct extent_map_tree *free_space_cache;
696
697         free_space_cache = &root->fs_info->free_space_cache;
698
699         while(1) {
700                 ret = find_first_extent_bit(unpin, 0, &start, &end,
701                                             EXTENT_DIRTY);
702                 if (ret)
703                         break;
704
705                 clear_extent_dirty(pinned_extents, start, end,
706                                    GFP_NOFS);
707                 clear_extent_dirty(unpin, start, end, GFP_NOFS);
708                 set_extent_dirty(free_space_cache, start, end, GFP_NOFS);
709         }
710         return 0;
711 }
712
713 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
714                                  btrfs_root *extent_root)
715 {
716         struct btrfs_key ins;
717         struct btrfs_extent_item extent_item;
718         int ret;
719         int err = 0;
720         u64 start;
721         u64 end;
722         struct btrfs_fs_info *info = extent_root->fs_info;
723
724         btrfs_set_stack_extent_refs(&extent_item, 1);
725         btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
726         btrfs_set_stack_extent_owner(&extent_item,
727                                      extent_root->root_key.objectid);
728
729         while(1) {
730                 ret = find_first_extent_bit(&info->extent_ins, 0, &start,
731                                             &end, EXTENT_LOCKED);
732                 if (ret)
733                         break;
734
735                 ins.objectid = start;
736                 ins.offset = end + 1 - start;
737                 err = btrfs_insert_item(trans, extent_root, &ins,
738                                         &extent_item, sizeof(extent_item));
739                 clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED,
740                                   GFP_NOFS);
741         }
742         return 0;
743 }
744
745 static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes,
746                           int pending)
747 {
748         int err = 0;
749         struct extent_buffer *buf;
750
751         if (!pending) {
752                 buf = btrfs_find_tree_block(root, bytenr, num_bytes);
753                 if (buf) {
754                         if (btrfs_buffer_uptodate(buf)) {
755                                 u64 transid =
756                                     root->fs_info->running_transaction->transid;
757                                 if (btrfs_header_generation(buf) == transid) {
758                                         free_extent_buffer(buf);
759                                         return 1;
760                                 }
761                         }
762                         free_extent_buffer(buf);
763                 }
764                 set_extent_dirty(&root->fs_info->pinned_extents,
765                                  bytenr, bytenr + num_bytes - 1, GFP_NOFS);
766         } else {
767                 set_extent_bits(&root->fs_info->pending_del,
768                                 bytenr, bytenr + num_bytes - 1,
769                                 EXTENT_LOCKED, GFP_NOFS);
770         }
771         BUG_ON(err < 0);
772         return 0;
773 }
774
775 /*
776  * remove an extent from the root, returns 0 on success
777  */
778 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
779                          *root, u64 bytenr, u64 num_bytes, int pin,
780                          int mark_free)
781 {
782         struct btrfs_path *path;
783         struct btrfs_key key;
784         struct btrfs_fs_info *info = root->fs_info;
785         struct btrfs_root *extent_root = info->extent_root;
786         struct extent_buffer *leaf;
787         int ret;
788         struct btrfs_extent_item *ei;
789         u32 refs;
790
791         key.objectid = bytenr;
792         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
793         key.offset = num_bytes;
794
795         path = btrfs_alloc_path();
796         if (!path)
797                 return -ENOMEM;
798
799         ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
800         if (ret < 0)
801                 return ret;
802         BUG_ON(ret);
803
804         leaf = path->nodes[0];
805         ei = btrfs_item_ptr(leaf, path->slots[0],
806                             struct btrfs_extent_item);
807         refs = btrfs_extent_refs(leaf, ei);
808         BUG_ON(refs == 0);
809         refs -= 1;
810         btrfs_set_extent_refs(leaf, ei, refs);
811         btrfs_mark_buffer_dirty(leaf);
812
813         if (refs == 0) {
814                 u64 super_used;
815                 u64 root_used;
816
817                 if (pin) {
818                         ret = pin_down_bytes(root, bytenr, num_bytes, 0);
819                         if (ret > 0)
820                                 mark_free = 1;
821                         BUG_ON(ret < 0);
822                 }
823
824                 /* block accounting for super block */
825                 super_used = btrfs_super_bytes_used(&info->super_copy);
826                 btrfs_set_super_bytes_used(&info->super_copy,
827                                            super_used - num_bytes);
828
829                 /* block accounting for root item */
830                 root_used = btrfs_root_used(&root->root_item);
831                 btrfs_set_root_used(&root->root_item,
832                                            root_used - num_bytes);
833
834                 ret = btrfs_del_item(trans, extent_root, path);
835                 if (ret) {
836                         return ret;
837                 }
838                 ret = update_block_group(trans, root, bytenr, num_bytes, 0,
839                                          mark_free, 0);
840                 BUG_ON(ret);
841         }
842         btrfs_free_path(path);
843         finish_current_insert(trans, extent_root);
844         return ret;
845 }
846
847 /*
848  * find all the blocks marked as pending in the radix tree and remove
849  * them from the extent map
850  */
851 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
852                                btrfs_root *extent_root)
853 {
854         int ret;
855         int err = 0;
856         u64 start;
857         u64 end;
858         struct extent_map_tree *pending_del;
859         struct extent_map_tree *pinned_extents;
860
861         pending_del = &extent_root->fs_info->pending_del;
862         pinned_extents = &extent_root->fs_info->pinned_extents;
863
864         while(1) {
865                 ret = find_first_extent_bit(pending_del, 0, &start, &end,
866                                             EXTENT_LOCKED);
867                 if (ret)
868                         break;
869
870                 set_extent_dirty(pinned_extents, start, end, GFP_NOFS);
871                 clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
872                                   GFP_NOFS);
873                 ret = __free_extent(trans, extent_root,
874                                      start, end + 1 - start, 0, 0);
875                 if (ret)
876                         err = ret;
877         }
878         return err;
879 }
880
881 /*
882  * remove an extent from the root, returns 0 on success
883  */
884 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
885                       *root, u64 bytenr, u64 num_bytes, int pin)
886 {
887         struct btrfs_root *extent_root = root->fs_info->extent_root;
888         int pending_ret;
889         int ret;
890
891         WARN_ON(num_bytes < root->sectorsize);
892         if (root == extent_root) {
893                 pin_down_bytes(root, bytenr, num_bytes, 1);
894                 return 0;
895         }
896         ret = __free_extent(trans, root, bytenr, num_bytes, pin, pin == 0);
897         pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
898         return ret ? ret : pending_ret;
899 }
900
901 /*
902  * walks the btree of allocated extents and find a hole of a given size.
903  * The key ins is changed to record the hole:
904  * ins->objectid == block start
905  * ins->flags = BTRFS_EXTENT_ITEM_KEY
906  * ins->offset == number of blocks
907  * Any available blocks before search_start are skipped.
908  */
909 static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
910                             *orig_root, u64 num_bytes, u64 empty_size,
911                             u64 search_start, u64 search_end, u64 hint_byte,
912                             struct btrfs_key *ins, u64 exclude_start,
913                             u64 exclude_nr, int data)
914 {
915         struct btrfs_path *path;
916         struct btrfs_key key;
917         int ret;
918         u64 hole_size = 0;
919         int slot = 0;
920         u64 last_byte = 0;
921         u64 orig_search_start = search_start;
922         int start_found;
923         struct extent_buffer *l;
924         struct btrfs_root * root = orig_root->fs_info->extent_root;
925         struct btrfs_fs_info *info = root->fs_info;
926         u64 total_needed = num_bytes;
927         int level;
928         struct btrfs_block_group_cache *block_group;
929         int full_scan = 0;
930         int wrapped = 0;
931         u64 cached_start;
932
933         WARN_ON(num_bytes < root->sectorsize);
934         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
935
936         level = btrfs_header_level(root->node);
937
938         if (num_bytes >= 96 * 1024 * 1024 && hint_byte) {
939                 data = BTRFS_BLOCK_GROUP_MIXED;
940         }
941
942         if (search_end == (u64)-1)
943                 search_end = btrfs_super_total_bytes(&info->super_copy);
944         if (hint_byte) {
945                 block_group = btrfs_lookup_block_group(info, hint_byte);
946                 block_group = btrfs_find_block_group(root, block_group,
947                                                      hint_byte, data, 1);
948         } else {
949                 block_group = btrfs_find_block_group(root,
950                                                      trans->block_group, 0,
951                                                      data, 1);
952         }
953
954         total_needed += empty_size;
955         path = btrfs_alloc_path();
956
957 check_failed:
958         search_start = find_search_start(root, &block_group,
959                                          search_start, total_needed, data);
960         cached_start = search_start;
961
962         btrfs_init_path(path);
963         ins->objectid = search_start;
964         ins->offset = 0;
965         start_found = 0;
966         path->reada = 2;
967
968         ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
969         if (ret < 0)
970                 goto error;
971
972         if (path->slots[0] > 0) {
973                 path->slots[0]--;
974         }
975
976         l = path->nodes[0];
977         btrfs_item_key_to_cpu(l, &key, path->slots[0]);
978
979         /*
980          * a rare case, go back one key if we hit a block group item
981          * instead of an extent item
982          */
983         if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY &&
984             key.objectid + key.offset >= search_start) {
985                 ins->objectid = key.objectid;
986                 ins->offset = key.offset - 1;
987                 btrfs_release_path(root, path);
988                 ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
989                 if (ret < 0)
990                         goto error;
991
992                 if (path->slots[0] > 0) {
993                         path->slots[0]--;
994                 }
995         }
996
997         while (1) {
998                 l = path->nodes[0];
999                 slot = path->slots[0];
1000                 if (slot >= btrfs_header_nritems(l)) {
1001                         ret = btrfs_next_leaf(root, path);
1002                         if (ret == 0)
1003                                 continue;
1004                         if (ret < 0)
1005                                 goto error;
1006
1007                         search_start = max(search_start,
1008                                            block_group->key.objectid);
1009                         if (!start_found) {
1010                                 ins->objectid = search_start;
1011                                 ins->offset = search_end - search_start;
1012                                 start_found = 1;
1013                                 goto check_pending;
1014                         }
1015                         ins->objectid = last_byte > search_start ?
1016                                         last_byte : search_start;
1017                         ins->offset = search_end - ins->objectid;
1018                         BUG_ON(ins->objectid >= search_end);
1019                         goto check_pending;
1020                 }
1021                 btrfs_item_key_to_cpu(l, &key, slot);
1022
1023                 if (key.objectid >= search_start && key.objectid > last_byte &&
1024                     start_found) {
1025                         if (last_byte < search_start)
1026                                 last_byte = search_start;
1027                         hole_size = key.objectid - last_byte;
1028                         if (hole_size >= num_bytes) {
1029                                 ins->objectid = last_byte;
1030                                 ins->offset = hole_size;
1031                                 goto check_pending;
1032                         }
1033                 }
1034                 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) {
1035                         if (!start_found) {
1036                                 last_byte = key.objectid;
1037                                 start_found = 1;
1038                         }
1039                         goto next;
1040                 }
1041
1042
1043                 start_found = 1;
1044                 last_byte = key.objectid + key.offset;
1045
1046                 if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED &&
1047                     last_byte >= block_group->key.objectid +
1048                     block_group->key.offset) {
1049                         btrfs_release_path(root, path);
1050                         search_start = block_group->key.objectid +
1051                                 block_group->key.offset;
1052                         goto new_group;
1053                 }
1054 next:
1055                 path->slots[0]++;
1056                 cond_resched();
1057         }
1058 check_pending:
1059         /* we have to make sure we didn't find an extent that has already
1060          * been allocated by the map tree or the original allocation
1061          */
1062         btrfs_release_path(root, path);
1063         BUG_ON(ins->objectid < search_start);
1064
1065         if (ins->objectid + num_bytes >= search_end)
1066                 goto enospc;
1067
1068         if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED &&
1069             ins->objectid + num_bytes >= block_group->
1070             key.objectid + block_group->key.offset) {
1071                 search_start = block_group->key.objectid +
1072                         block_group->key.offset;
1073                 goto new_group;
1074         }
1075         if (test_range_bit(&info->extent_ins, ins->objectid,
1076                            ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) {
1077                 search_start = ins->objectid + num_bytes;
1078                 goto new_group;
1079         }
1080         if (test_range_bit(&info->pinned_extents, ins->objectid,
1081                            ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
1082                 search_start = ins->objectid + num_bytes;
1083                 goto new_group;
1084         }
1085         if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
1086             ins->objectid < exclude_start + exclude_nr)) {
1087                 search_start = exclude_start + exclude_nr;
1088                 goto new_group;
1089         }
1090         if (!data) {
1091                 block_group = btrfs_lookup_block_group(info, ins->objectid);
1092                 if (block_group)
1093                         trans->block_group = block_group;
1094         }
1095         ins->offset = num_bytes;
1096         btrfs_free_path(path);
1097         return 0;
1098
1099 new_group:
1100         if (search_start + num_bytes >= search_end) {
1101 enospc:
1102                 search_start = orig_search_start;
1103                 if (full_scan) {
1104                         ret = -ENOSPC;
1105                         goto error;
1106                 }
1107                 if (wrapped) {
1108                         if (!full_scan)
1109                                 total_needed -= empty_size;
1110                         full_scan = 1;
1111                 } else
1112                         wrapped = 1;
1113         }
1114         block_group = btrfs_lookup_block_group(info, search_start);
1115         cond_resched();
1116         if (!full_scan)
1117                 block_group = btrfs_find_block_group(root, block_group,
1118                                                      search_start, data, 0);
1119         goto check_failed;
1120
1121 error:
1122         btrfs_release_path(root, path);
1123         btrfs_free_path(path);
1124         return ret;
1125 }
1126 /*
1127  * finds a free extent and does all the dirty work required for allocation
1128  * returns the key for the extent through ins, and a tree buffer for
1129  * the first block of the extent through buf.
1130  *
1131  * returns 0 if everything worked, non-zero otherwise.
1132  */
1133 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1134                        struct btrfs_root *root, u64 owner,
1135                        u64 num_bytes, u64 empty_size, u64 hint_byte,
1136                        u64 search_end, struct btrfs_key *ins, int data)
1137 {
1138         int ret;
1139         int pending_ret;
1140         u64 super_used, root_used;
1141         u64 search_start = 0;
1142         struct btrfs_fs_info *info = root->fs_info;
1143         struct btrfs_root *extent_root = info->extent_root;
1144         struct btrfs_extent_item extent_item;
1145
1146         btrfs_set_stack_extent_refs(&extent_item, 1);
1147         btrfs_set_stack_extent_owner(&extent_item, owner);
1148
1149         WARN_ON(num_bytes < root->sectorsize);
1150         ret = find_free_extent(trans, root, num_bytes, empty_size,
1151                                search_start, search_end, hint_byte, ins,
1152                                trans->alloc_exclude_start,
1153                                trans->alloc_exclude_nr, data);
1154         BUG_ON(ret);
1155         if (ret)
1156                 return ret;
1157
1158         /* block accounting for super block */
1159         super_used = btrfs_super_bytes_used(&info->super_copy);
1160         btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
1161
1162         /* block accounting for root item */
1163         root_used = btrfs_root_used(&root->root_item);
1164         btrfs_set_root_used(&root->root_item, root_used + num_bytes);
1165
1166         clear_extent_dirty(&root->fs_info->free_space_cache,
1167                            ins->objectid, ins->objectid + ins->offset - 1,
1168                            GFP_NOFS);
1169
1170         if (root == extent_root) {
1171                 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
1172                                 ins->objectid + ins->offset - 1,
1173                                 EXTENT_LOCKED, GFP_NOFS);
1174                 WARN_ON(data == 1);
1175                 goto update_block;
1176         }
1177
1178         WARN_ON(trans->alloc_exclude_nr);
1179         trans->alloc_exclude_start = ins->objectid;
1180         trans->alloc_exclude_nr = ins->offset;
1181         ret = btrfs_insert_item(trans, extent_root, ins, &extent_item,
1182                                 sizeof(extent_item));
1183
1184         trans->alloc_exclude_start = 0;
1185         trans->alloc_exclude_nr = 0;
1186
1187         BUG_ON(ret);
1188         finish_current_insert(trans, extent_root);
1189         pending_ret = del_pending_extents(trans, extent_root);
1190
1191         if (ret) {
1192                 return ret;
1193         }
1194         if (pending_ret) {
1195                 return pending_ret;
1196         }
1197
1198 update_block:
1199         ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0,
1200                                  data);
1201         BUG_ON(ret);
1202         return 0;
1203 }
1204
1205 /*
1206  * helper function to allocate a block for a given tree
1207  * returns the tree buffer or NULL.
1208  */
1209 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1210                                              struct btrfs_root *root,
1211                                              u32 blocksize, u64 hint,
1212                                              u64 empty_size)
1213 {
1214         struct btrfs_key ins;
1215         int ret;
1216         struct extent_buffer *buf;
1217
1218         ret = btrfs_alloc_extent(trans, root, root->root_key.objectid,
1219                                  blocksize, empty_size, hint,
1220                                  (u64)-1, &ins, 0);
1221         if (ret) {
1222                 BUG_ON(ret > 0);
1223                 return ERR_PTR(ret);
1224         }
1225         buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize);
1226         if (!buf) {
1227                 btrfs_free_extent(trans, root, ins.objectid, blocksize, 0);
1228                 return ERR_PTR(-ENOMEM);
1229         }
1230         btrfs_set_buffer_uptodate(buf);
1231         set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
1232                          buf->start + buf->len - 1, GFP_NOFS);
1233         set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->extent_tree,
1234                         buf->start, buf->start + buf->len - 1,
1235                         EXTENT_CSUM, GFP_NOFS);
1236         buf->flags |= EXTENT_CSUM;
1237         btrfs_set_buffer_defrag(buf);
1238         trans->blocks_used++;
1239         return buf;
1240 }
1241
1242 static int drop_leaf_ref(struct btrfs_trans_handle *trans,
1243                          struct btrfs_root *root, struct extent_buffer *leaf)
1244 {
1245         struct btrfs_key key;
1246         struct btrfs_file_extent_item *fi;
1247         int i;
1248         int nritems;
1249         int ret;
1250
1251         BUG_ON(!btrfs_is_leaf(leaf));
1252         nritems = btrfs_header_nritems(leaf);
1253         for (i = 0; i < nritems; i++) {
1254                 u64 disk_bytenr;
1255
1256                 btrfs_item_key_to_cpu(leaf, &key, i);
1257                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1258                         continue;
1259                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1260                 if (btrfs_file_extent_type(leaf, fi) ==
1261                     BTRFS_FILE_EXTENT_INLINE)
1262                         continue;
1263                 /*
1264                  * FIXME make sure to insert a trans record that
1265                  * repeats the snapshot del on crash
1266                  */
1267                 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1268                 if (disk_bytenr == 0)
1269                         continue;
1270                 ret = btrfs_free_extent(trans, root, disk_bytenr,
1271                                 btrfs_file_extent_disk_num_bytes(leaf, fi), 0);
1272                 BUG_ON(ret);
1273         }
1274         return 0;
1275 }
1276
1277 static void reada_walk_down(struct btrfs_root *root,
1278                             struct extent_buffer *node)
1279 {
1280         int i;
1281         u32 nritems;
1282         u64 bytenr;
1283         int ret;
1284         u32 refs;
1285         int level;
1286         u32 blocksize;
1287
1288         nritems = btrfs_header_nritems(node);
1289         level = btrfs_header_level(node);
1290         for (i = 0; i < nritems; i++) {
1291                 bytenr = btrfs_node_blockptr(node, i);
1292                 blocksize = btrfs_level_size(root, level - 1);
1293                 ret = lookup_extent_ref(NULL, root, bytenr, blocksize, &refs);
1294                 BUG_ON(ret);
1295                 if (refs != 1)
1296                         continue;
1297                 mutex_unlock(&root->fs_info->fs_mutex);
1298                 ret = readahead_tree_block(root, bytenr, blocksize);
1299                 cond_resched();
1300                 mutex_lock(&root->fs_info->fs_mutex);
1301                 if (ret)
1302                         break;
1303         }
1304 }
1305
1306 /*
1307  * helper function for drop_snapshot, this walks down the tree dropping ref
1308  * counts as it goes.
1309  */
1310 static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root
1311                           *root, struct btrfs_path *path, int *level)
1312 {
1313         struct extent_buffer *next;
1314         struct extent_buffer *cur;
1315         u64 bytenr;
1316         u32 blocksize;
1317         int ret;
1318         u32 refs;
1319
1320         WARN_ON(*level < 0);
1321         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1322         ret = lookup_extent_ref(trans, root,
1323                                 path->nodes[*level]->start,
1324                                 path->nodes[*level]->len, &refs);
1325         BUG_ON(ret);
1326         if (refs > 1)
1327                 goto out;
1328
1329         /*
1330          * walk down to the last node level and free all the leaves
1331          */
1332         while(*level >= 0) {
1333                 WARN_ON(*level < 0);
1334                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1335                 cur = path->nodes[*level];
1336
1337                 if (*level > 0 && path->slots[*level] == 0)
1338                         reada_walk_down(root, cur);
1339
1340                 if (btrfs_header_level(cur) != *level)
1341                         WARN_ON(1);
1342
1343                 if (path->slots[*level] >=
1344                     btrfs_header_nritems(cur))
1345                         break;
1346                 if (*level == 0) {
1347                         ret = drop_leaf_ref(trans, root, cur);
1348                         BUG_ON(ret);
1349                         break;
1350                 }
1351                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1352                 blocksize = btrfs_level_size(root, *level - 1);
1353                 ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs);
1354                 BUG_ON(ret);
1355                 if (refs != 1) {
1356                         path->slots[*level]++;
1357                         ret = btrfs_free_extent(trans, root, bytenr,
1358                                                 blocksize, 1);
1359                         BUG_ON(ret);
1360                         continue;
1361                 }
1362                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1363                 if (!next || !btrfs_buffer_uptodate(next)) {
1364                         free_extent_buffer(next);
1365                         mutex_unlock(&root->fs_info->fs_mutex);
1366                         next = read_tree_block(root, bytenr, blocksize);
1367                         mutex_lock(&root->fs_info->fs_mutex);
1368
1369                         /* we dropped the lock, check one more time */
1370                         ret = lookup_extent_ref(trans, root, bytenr,
1371                                                 blocksize, &refs);
1372                         BUG_ON(ret);
1373                         if (refs != 1) {
1374                                 path->slots[*level]++;
1375                                 free_extent_buffer(next);
1376                                 ret = btrfs_free_extent(trans, root,
1377                                                         bytenr, blocksize, 1);
1378                                 BUG_ON(ret);
1379                                 continue;
1380                         }
1381                 }
1382                 WARN_ON(*level <= 0);
1383                 if (path->nodes[*level-1])
1384                         free_extent_buffer(path->nodes[*level-1]);
1385                 path->nodes[*level-1] = next;
1386                 *level = btrfs_header_level(next);
1387                 path->slots[*level] = 0;
1388         }
1389 out:
1390         WARN_ON(*level < 0);
1391         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1392         ret = btrfs_free_extent(trans, root, path->nodes[*level]->start,
1393                                 path->nodes[*level]->len, 1);
1394         free_extent_buffer(path->nodes[*level]);
1395         path->nodes[*level] = NULL;
1396         *level += 1;
1397         BUG_ON(ret);
1398         return 0;
1399 }
1400
1401 /*
1402  * helper for dropping snapshots.  This walks back up the tree in the path
1403  * to find the first node higher up where we haven't yet gone through
1404  * all the slots
1405  */
1406 static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root
1407                         *root, struct btrfs_path *path, int *level)
1408 {
1409         int i;
1410         int slot;
1411         int ret;
1412         struct btrfs_root_item *root_item = &root->root_item;
1413
1414         for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1415                 slot = path->slots[i];
1416                 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
1417                         struct extent_buffer *node;
1418                         struct btrfs_disk_key disk_key;
1419                         node = path->nodes[i];
1420                         path->slots[i]++;
1421                         *level = i;
1422                         WARN_ON(*level == 0);
1423                         btrfs_node_key(node, &disk_key, path->slots[i]);
1424                         memcpy(&root_item->drop_progress,
1425                                &disk_key, sizeof(disk_key));
1426                         root_item->drop_level = i;
1427                         return 0;
1428                 } else {
1429                         ret = btrfs_free_extent(trans, root,
1430                                                 path->nodes[*level]->start,
1431                                                 path->nodes[*level]->len, 1);
1432                         BUG_ON(ret);
1433                         free_extent_buffer(path->nodes[*level]);
1434                         path->nodes[*level] = NULL;
1435                         *level = i + 1;
1436                 }
1437         }
1438         return 1;
1439 }
1440
1441 /*
1442  * drop the reference count on the tree rooted at 'snap'.  This traverses
1443  * the tree freeing any blocks that have a ref count of zero after being
1444  * decremented.
1445  */
1446 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
1447                         *root)
1448 {
1449         int ret = 0;
1450         int wret;
1451         int level;
1452         struct btrfs_path *path;
1453         int i;
1454         int orig_level;
1455         struct btrfs_root_item *root_item = &root->root_item;
1456
1457         path = btrfs_alloc_path();
1458         BUG_ON(!path);
1459
1460         level = btrfs_header_level(root->node);
1461         orig_level = level;
1462         if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
1463                 path->nodes[level] = root->node;
1464                 extent_buffer_get(root->node);
1465                 path->slots[level] = 0;
1466         } else {
1467                 struct btrfs_key key;
1468                 struct btrfs_disk_key found_key;
1469                 struct extent_buffer *node;
1470
1471                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
1472                 level = root_item->drop_level;
1473                 path->lowest_level = level;
1474                 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1475                 if (wret < 0) {
1476                         ret = wret;
1477                         goto out;
1478                 }
1479                 node = path->nodes[level];
1480                 btrfs_node_key(node, &found_key, path->slots[level]);
1481                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
1482                                sizeof(found_key)));
1483         }
1484         while(1) {
1485                 wret = walk_down_tree(trans, root, path, &level);
1486                 if (wret > 0)
1487                         break;
1488                 if (wret < 0)
1489                         ret = wret;
1490
1491                 wret = walk_up_tree(trans, root, path, &level);
1492                 if (wret > 0)
1493                         break;
1494                 if (wret < 0)
1495                         ret = wret;
1496                 ret = -EAGAIN;
1497                 break;
1498         }
1499         for (i = 0; i <= orig_level; i++) {
1500                 if (path->nodes[i]) {
1501                         free_extent_buffer(path->nodes[i]);
1502                         path->nodes[i] = NULL;
1503                 }
1504         }
1505 out:
1506         btrfs_free_path(path);
1507         return ret;
1508 }
1509
1510 int btrfs_free_block_groups(struct btrfs_fs_info *info)
1511 {
1512         u64 start;
1513         u64 end;
1514         u64 ptr;
1515         int ret;
1516         while(1) {
1517                 ret = find_first_extent_bit(&info->block_group_cache, 0,
1518                                             &start, &end, (unsigned int)-1);
1519                 if (ret)
1520                         break;
1521                 ret = get_state_private(&info->block_group_cache, start, &ptr);
1522                 if (!ret)
1523                         kfree((void *)(unsigned long)ptr);
1524                 clear_extent_bits(&info->block_group_cache, start,
1525                                   end, (unsigned int)-1, GFP_NOFS);
1526         }
1527         while(1) {
1528                 ret = find_first_extent_bit(&info->free_space_cache, 0,
1529                                             &start, &end, EXTENT_DIRTY);
1530                 if (ret)
1531                         break;
1532                 clear_extent_dirty(&info->free_space_cache, start,
1533                                    end, GFP_NOFS);
1534         }
1535         return 0;
1536 }
1537
1538 int btrfs_read_block_groups(struct btrfs_root *root)
1539 {
1540         struct btrfs_path *path;
1541         int ret;
1542         int err = 0;
1543         int bit;
1544         struct btrfs_block_group_cache *cache;
1545         struct btrfs_fs_info *info = root->fs_info;
1546         struct extent_map_tree *block_group_cache;
1547         struct btrfs_key key;
1548         struct btrfs_key found_key;
1549         struct extent_buffer *leaf;
1550
1551         block_group_cache = &info->block_group_cache;
1552
1553         root = info->extent_root;
1554         key.objectid = 0;
1555         key.offset = BTRFS_BLOCK_GROUP_SIZE;
1556         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
1557
1558         path = btrfs_alloc_path();
1559         if (!path)
1560                 return -ENOMEM;
1561
1562         while(1) {
1563                 ret = btrfs_search_slot(NULL, info->extent_root,
1564                                         &key, path, 0, 0);
1565                 if (ret != 0) {
1566                         err = ret;
1567                         break;
1568                 }
1569                 leaf = path->nodes[0];
1570                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1571                 cache = kmalloc(sizeof(*cache), GFP_NOFS);
1572                 if (!cache) {
1573                         err = -1;
1574                         break;
1575                 }
1576
1577                 read_extent_buffer(leaf, &cache->item,
1578                                    btrfs_item_ptr_offset(leaf, path->slots[0]),
1579                                    sizeof(cache->item));
1580                 memcpy(&cache->key, &found_key, sizeof(found_key));
1581                 cache->cached = 0;
1582
1583                 key.objectid = found_key.objectid + found_key.offset;
1584                 btrfs_release_path(root, path);
1585
1586                 if (cache->item.flags & BTRFS_BLOCK_GROUP_MIXED) {
1587                         bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
1588                         cache->data = BTRFS_BLOCK_GROUP_MIXED;
1589                 } else if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) {
1590                         bit = BLOCK_GROUP_DATA;
1591                         cache->data = BTRFS_BLOCK_GROUP_DATA;
1592                 } else {
1593                         bit = BLOCK_GROUP_METADATA;
1594                         cache->data = 0;
1595                 }
1596
1597                 /* use EXTENT_LOCKED to prevent merging */
1598                 set_extent_bits(block_group_cache, found_key.objectid,
1599                                 found_key.objectid + found_key.offset - 1,
1600                                 bit | EXTENT_LOCKED, GFP_NOFS);
1601                 set_state_private(block_group_cache, found_key.objectid,
1602                                   (unsigned long)cache);
1603
1604                 if (key.objectid >=
1605                     btrfs_super_total_bytes(&info->super_copy))
1606                         break;
1607         }
1608
1609         btrfs_free_path(path);
1610         return 0;
1611 }