Btrfs: Allow tree blocks larger than the page size
[linux-2.6-block.git] / fs / btrfs / extent-tree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/sched.h>
20 #include "ctree.h"
21 #include "disk-io.h"
22 #include "print-tree.h"
23 #include "transaction.h"
24
25 #define BLOCK_GROUP_DATA EXTENT_WRITEBACK
26 #define BLOCK_GROUP_METADATA EXTENT_UPTODATE
27 #define BLOCK_GROUP_DIRTY EXTENT_DIRTY
28
29 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
30                                  btrfs_root *extent_root);
31 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
32                                btrfs_root *extent_root);
33
34 static int cache_block_group(struct btrfs_root *root,
35                              struct btrfs_block_group_cache *block_group)
36 {
37         struct btrfs_path *path;
38         int ret;
39         struct btrfs_key key;
40         struct extent_buffer *leaf;
41         struct extent_map_tree *free_space_cache;
42         int slot;
43         u64 last = 0;
44         u64 hole_size;
45         u64 first_free;
46         int found = 0;
47
48         root = root->fs_info->extent_root;
49         free_space_cache = &root->fs_info->free_space_cache;
50
51         if (block_group->cached)
52                 return 0;
53
54         path = btrfs_alloc_path();
55         if (!path)
56                 return -ENOMEM;
57
58         path->reada = 2;
59         first_free = block_group->key.objectid;
60         key.objectid = block_group->key.objectid;
61         key.offset = 0;
62
63         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
64         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
65
66         if (ret < 0)
67                 return ret;
68
69         if (ret && path->slots[0] > 0)
70                 path->slots[0]--;
71
72         while(1) {
73                 leaf = path->nodes[0];
74                 slot = path->slots[0];
75                 if (slot >= btrfs_header_nritems(leaf)) {
76                         ret = btrfs_next_leaf(root, path);
77                         if (ret < 0)
78                                 goto err;
79                         if (ret == 0) {
80                                 continue;
81                         } else {
82                                 break;
83                         }
84                 }
85
86                 btrfs_item_key_to_cpu(leaf, &key, slot);
87                 if (key.objectid < block_group->key.objectid) {
88                         if (key.objectid + key.offset > first_free)
89                                 first_free = key.objectid + key.offset;
90                         goto next;
91                 }
92
93                 if (key.objectid >= block_group->key.objectid +
94                     block_group->key.offset) {
95                         break;
96                 }
97
98                 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
99                         if (!found) {
100                                 last = first_free;
101                                 found = 1;
102                         }
103                         if (key.objectid > last) {
104                                 hole_size = key.objectid - last;
105                                 set_extent_dirty(free_space_cache, last,
106                                                  last + hole_size - 1,
107                                                  GFP_NOFS);
108                         }
109                         last = key.objectid + key.offset;
110                 }
111 next:
112                 path->slots[0]++;
113         }
114
115         if (!found)
116                 last = first_free;
117         if (block_group->key.objectid +
118             block_group->key.offset > last) {
119                 hole_size = block_group->key.objectid +
120                         block_group->key.offset - last;
121                 set_extent_dirty(free_space_cache, last,
122                                  last + hole_size - 1, GFP_NOFS);
123         }
124         block_group->cached = 1;
125 err:
126         btrfs_free_path(path);
127         return 0;
128 }
129
130 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
131                                                          btrfs_fs_info *info,
132                                                          u64 bytenr)
133 {
134         struct extent_map_tree *block_group_cache;
135         struct btrfs_block_group_cache *block_group = NULL;
136         u64 ptr;
137         u64 start;
138         u64 end;
139         int ret;
140
141         block_group_cache = &info->block_group_cache;
142         ret = find_first_extent_bit(block_group_cache,
143                                     bytenr, &start, &end,
144                                     BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA);
145         if (ret) {
146                 return NULL;
147         }
148         ret = get_state_private(block_group_cache, start, &ptr);
149         if (ret)
150                 return NULL;
151
152         block_group = (struct btrfs_block_group_cache *)ptr;
153
154
155         if (block_group->key.objectid <= bytenr && bytenr <=
156             block_group->key.objectid + block_group->key.offset)
157                 return block_group;
158
159         return NULL;
160 }
161
162 static u64 find_search_start(struct btrfs_root *root,
163                              struct btrfs_block_group_cache **cache_ret,
164                              u64 search_start, int num, int data)
165 {
166         int ret;
167         struct btrfs_block_group_cache *cache = *cache_ret;
168         u64 last = max(search_start, cache->key.objectid);
169         u64 start = 0;
170         u64 end = 0;
171
172 again:
173         ret = cache_block_group(root, cache);
174         if (ret)
175                 goto out;
176         while(1) {
177                 ret = find_first_extent_bit(&root->fs_info->free_space_cache,
178                                             last, &start, &end, EXTENT_DIRTY);
179                 if (ret)
180                         goto out;
181
182                 start = max(last, start);
183                 last = end + 1;
184                 if (end + 1 - start < num)
185                         continue;
186                 if (start + num >= cache->key.objectid + cache->key.offset)
187                         goto new_group;
188                 return start;
189         }
190 out:
191         return search_start;
192
193 new_group:
194         cache = btrfs_lookup_block_group(root->fs_info,
195                                          last + cache->key.offset - 1);
196         if (!cache) {
197                 return search_start;
198         }
199         cache = btrfs_find_block_group(root, cache,
200                                        last + cache->key.offset - 1, data, 0);
201         *cache_ret = cache;
202         last = min(cache->key.objectid, last);
203         goto again;
204 }
205
206 static u64 div_factor(u64 num, int factor)
207 {
208         num *= factor;
209         do_div(num, 10);
210         return num;
211 }
212
213 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
214                                                  struct btrfs_block_group_cache
215                                                  *hint, u64 search_start,
216                                                  int data, int owner)
217 {
218         struct btrfs_block_group_cache *cache;
219         struct extent_map_tree *block_group_cache;
220         struct btrfs_block_group_cache *found_group = NULL;
221         struct btrfs_fs_info *info = root->fs_info;
222         u64 used;
223         u64 last = 0;
224         u64 hint_last;
225         u64 start;
226         u64 end;
227         u64 free_check;
228         u64 ptr;
229         int bit;
230         int ret;
231         int full_search = 0;
232         int factor = 8;
233         int data_swap = 0;
234
235         block_group_cache = &info->block_group_cache;
236
237         if (!owner)
238                 factor = 5;
239
240         if (data)
241                 bit = BLOCK_GROUP_DATA;
242         else
243                 bit = BLOCK_GROUP_METADATA;
244
245         if (search_start) {
246                 struct btrfs_block_group_cache *shint;
247                 shint = btrfs_lookup_block_group(info, search_start);
248                 if (shint && shint->data == data) {
249                         used = btrfs_block_group_used(&shint->item);
250                         if (used < div_factor(shint->key.offset, factor)) {
251                                 return shint;
252                         }
253                 }
254         }
255         if (hint && hint->data == data) {
256                 used = btrfs_block_group_used(&hint->item);
257                 if (used < div_factor(hint->key.offset, factor)) {
258                         return hint;
259                 }
260                 last = hint->key.offset * 3;
261                 if (hint->key.objectid >= last)
262                         last = max(search_start + hint->key.offset - 1,
263                                    hint->key.objectid - last);
264                 else
265                         last = hint->key.objectid + hint->key.offset;
266                 hint_last = last;
267         } else {
268                 if (hint)
269                         hint_last = max(hint->key.objectid, search_start);
270                 else
271                         hint_last = search_start;
272
273                 last = hint_last;
274         }
275 again:
276         while(1) {
277                 ret = find_first_extent_bit(block_group_cache, last,
278                                             &start, &end, bit);
279                 if (ret)
280                         break;
281
282                 ret = get_state_private(block_group_cache, start, &ptr);
283                 if (ret)
284                         break;
285
286                 cache = (struct btrfs_block_group_cache *)ptr;
287                 last = cache->key.objectid + cache->key.offset;
288                 used = btrfs_block_group_used(&cache->item);
289
290                 if (full_search)
291                         free_check = cache->key.offset;
292                 else
293                         free_check = div_factor(cache->key.offset, factor);
294
295                 if (used < free_check) {
296                         found_group = cache;
297                         goto found;
298                 }
299                 cond_resched();
300         }
301         if (!full_search) {
302                 last = search_start;
303                 full_search = 1;
304                 goto again;
305         }
306         if (!data_swap) {
307                 data_swap = 1;
308                 bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
309                 last = search_start;
310                 goto again;
311         }
312 found:
313         return found_group;
314 }
315
316 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
317                                 struct btrfs_root *root,
318                                 u64 bytenr, u64 num_bytes)
319 {
320         struct btrfs_path *path;
321         int ret;
322         struct btrfs_key key;
323         struct extent_buffer *l;
324         struct btrfs_extent_item *item;
325         u32 refs;
326
327         WARN_ON(num_bytes < root->sectorsize);
328         path = btrfs_alloc_path();
329         if (!path)
330                 return -ENOMEM;
331
332         key.objectid = bytenr;
333         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
334         key.offset = num_bytes;
335         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
336                                 0, 1);
337         if (ret < 0)
338                 return ret;
339         if (ret != 0) {
340                 BUG();
341         }
342         BUG_ON(ret != 0);
343         l = path->nodes[0];
344         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
345         refs = btrfs_extent_refs(l, item);
346         btrfs_set_extent_refs(l, item, refs + 1);
347         btrfs_mark_buffer_dirty(path->nodes[0]);
348
349         btrfs_release_path(root->fs_info->extent_root, path);
350         btrfs_free_path(path);
351         finish_current_insert(trans, root->fs_info->extent_root);
352         del_pending_extents(trans, root->fs_info->extent_root);
353         return 0;
354 }
355
356 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
357                          struct btrfs_root *root)
358 {
359         finish_current_insert(trans, root->fs_info->extent_root);
360         del_pending_extents(trans, root->fs_info->extent_root);
361         return 0;
362 }
363
364 static int lookup_extent_ref(struct btrfs_trans_handle *trans,
365                              struct btrfs_root *root, u64 bytenr,
366                              u64 num_bytes, u32 *refs)
367 {
368         struct btrfs_path *path;
369         int ret;
370         struct btrfs_key key;
371         struct extent_buffer *l;
372         struct btrfs_extent_item *item;
373
374         WARN_ON(num_bytes < root->sectorsize);
375         path = btrfs_alloc_path();
376         key.objectid = bytenr;
377         key.offset = num_bytes;
378         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
379         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
380                                 0, 0);
381         if (ret < 0)
382                 goto out;
383         if (ret != 0) {
384                 btrfs_print_leaf(root, path->nodes[0]);
385                 printk("failed to find block number %Lu\n", bytenr);
386                 BUG();
387         }
388         l = path->nodes[0];
389         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
390         *refs = btrfs_extent_refs(l, item);
391 out:
392         btrfs_free_path(path);
393         return 0;
394 }
395
396 int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
397                        struct btrfs_root *root)
398 {
399         return btrfs_inc_extent_ref(trans, root, root->node->start,
400                                     root->node->len);
401 }
402
403 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
404                   struct extent_buffer *buf)
405 {
406         u64 bytenr;
407         u32 nritems;
408         struct btrfs_key key;
409         struct btrfs_file_extent_item *fi;
410         int i;
411         int level;
412         int ret;
413         int faili;
414         int err;
415
416         if (!root->ref_cows)
417                 return 0;
418
419         level = btrfs_header_level(buf);
420         nritems = btrfs_header_nritems(buf);
421         for (i = 0; i < nritems; i++) {
422                 if (level == 0) {
423                         u64 disk_bytenr;
424                         btrfs_item_key_to_cpu(buf, &key, i);
425                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
426                                 continue;
427                         fi = btrfs_item_ptr(buf, i,
428                                             struct btrfs_file_extent_item);
429                         if (btrfs_file_extent_type(buf, fi) ==
430                             BTRFS_FILE_EXTENT_INLINE)
431                                 continue;
432                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
433                         if (disk_bytenr == 0)
434                                 continue;
435                         ret = btrfs_inc_extent_ref(trans, root, disk_bytenr,
436                                     btrfs_file_extent_disk_num_bytes(buf, fi));
437                         if (ret) {
438                                 faili = i;
439                                 goto fail;
440                         }
441                 } else {
442                         bytenr = btrfs_node_blockptr(buf, i);
443                         ret = btrfs_inc_extent_ref(trans, root, bytenr,
444                                            btrfs_level_size(root, level - 1));
445                         if (ret) {
446                                 faili = i;
447                                 goto fail;
448                         }
449                 }
450         }
451         return 0;
452 fail:
453         WARN_ON(1);
454         for (i =0; i < faili; i++) {
455                 if (level == 0) {
456                         u64 disk_bytenr;
457                         btrfs_item_key_to_cpu(buf, &key, i);
458                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
459                                 continue;
460                         fi = btrfs_item_ptr(buf, i,
461                                             struct btrfs_file_extent_item);
462                         if (btrfs_file_extent_type(buf, fi) ==
463                             BTRFS_FILE_EXTENT_INLINE)
464                                 continue;
465                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
466                         if (disk_bytenr == 0)
467                                 continue;
468                         err = btrfs_free_extent(trans, root, disk_bytenr,
469                                     btrfs_file_extent_disk_num_bytes(buf,
470                                                                       fi), 0);
471                         BUG_ON(err);
472                 } else {
473                         bytenr = btrfs_node_blockptr(buf, i);
474                         err = btrfs_free_extent(trans, root, bytenr,
475                                         btrfs_level_size(root, level - 1), 0);
476                         BUG_ON(err);
477                 }
478         }
479         return ret;
480 }
481
482 static int write_one_cache_group(struct btrfs_trans_handle *trans,
483                                  struct btrfs_root *root,
484                                  struct btrfs_path *path,
485                                  struct btrfs_block_group_cache *cache)
486 {
487         int ret;
488         int pending_ret;
489         struct btrfs_root *extent_root = root->fs_info->extent_root;
490         unsigned long bi;
491         struct extent_buffer *leaf;
492
493         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
494         if (ret < 0)
495                 goto fail;
496         BUG_ON(ret);
497
498         leaf = path->nodes[0];
499         bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
500         write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
501         btrfs_mark_buffer_dirty(leaf);
502         btrfs_release_path(extent_root, path);
503 fail:
504         finish_current_insert(trans, extent_root);
505         pending_ret = del_pending_extents(trans, extent_root);
506         if (ret)
507                 return ret;
508         if (pending_ret)
509                 return pending_ret;
510         return 0;
511
512 }
513
514 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
515                                    struct btrfs_root *root)
516 {
517         struct extent_map_tree *block_group_cache;
518         struct btrfs_block_group_cache *cache;
519         int ret;
520         int err = 0;
521         int werr = 0;
522         struct btrfs_path *path;
523         u64 last = 0;
524         u64 start;
525         u64 end;
526         u64 ptr;
527
528         block_group_cache = &root->fs_info->block_group_cache;
529         path = btrfs_alloc_path();
530         if (!path)
531                 return -ENOMEM;
532
533         while(1) {
534                 ret = find_first_extent_bit(block_group_cache, last,
535                                             &start, &end, BLOCK_GROUP_DIRTY);
536                 if (ret)
537                         break;
538
539                 last = end + 1;
540                 ret = get_state_private(block_group_cache, start, &ptr);
541                 if (ret)
542                         break;
543
544                 cache = (struct btrfs_block_group_cache *)ptr;
545                 err = write_one_cache_group(trans, root,
546                                             path, cache);
547                 /*
548                  * if we fail to write the cache group, we want
549                  * to keep it marked dirty in hopes that a later
550                  * write will work
551                  */
552                 if (err) {
553                         werr = err;
554                         continue;
555                 }
556                 clear_extent_bits(block_group_cache, start, end,
557                                   BLOCK_GROUP_DIRTY, GFP_NOFS);
558         }
559         btrfs_free_path(path);
560         return werr;
561 }
562
563 static int update_block_group(struct btrfs_trans_handle *trans,
564                               struct btrfs_root *root,
565                               u64 bytenr, u64 num_bytes, int alloc,
566                               int mark_free, int data)
567 {
568         struct btrfs_block_group_cache *cache;
569         struct btrfs_fs_info *info = root->fs_info;
570         u64 total = num_bytes;
571         u64 old_val;
572         u64 byte_in_group;
573         u64 start;
574         u64 end;
575
576         while(total) {
577                 cache = btrfs_lookup_block_group(info, bytenr);
578                 if (!cache) {
579                         return -1;
580                 }
581                 byte_in_group = bytenr - cache->key.objectid;
582                 WARN_ON(byte_in_group > cache->key.offset);
583                 start = cache->key.objectid;
584                 end = start + cache->key.offset - 1;
585                 set_extent_bits(&info->block_group_cache, start, end,
586                                 BLOCK_GROUP_DIRTY, GFP_NOFS);
587
588                 old_val = btrfs_block_group_used(&cache->item);
589                 num_bytes = min(total, cache->key.offset - byte_in_group);
590                 if (alloc) {
591                         if (cache->data != data &&
592                             old_val < (cache->key.offset >> 1)) {
593                                 int bit_to_clear;
594                                 int bit_to_set;
595
596                                 cache->data = data;
597                                 if (data) {
598                                         bit_to_clear = BLOCK_GROUP_DATA;
599                                         bit_to_set = BLOCK_GROUP_METADATA;
600                                         cache->item.flags |=
601                                                 BTRFS_BLOCK_GROUP_DATA;
602                                 } else {
603                                         bit_to_clear = BLOCK_GROUP_METADATA;
604                                         bit_to_set = BLOCK_GROUP_DATA;
605                                         cache->item.flags &=
606                                                 ~BTRFS_BLOCK_GROUP_DATA;
607                                 }
608                                 clear_extent_bits(&info->block_group_cache,
609                                                   start, end, bit_to_clear,
610                                                   GFP_NOFS);
611                                 set_extent_bits(&info->block_group_cache,
612                                                 start, end, bit_to_set,
613                                                 GFP_NOFS);
614                         }
615                         old_val += num_bytes;
616                 } else {
617                         old_val -= num_bytes;
618                         if (mark_free) {
619                                 set_extent_dirty(&info->free_space_cache,
620                                                  bytenr, bytenr + num_bytes - 1,
621                                                  GFP_NOFS);
622                         }
623                 }
624                 btrfs_set_block_group_used(&cache->item, old_val);
625                 total -= num_bytes;
626                 bytenr += num_bytes;
627         }
628         return 0;
629 }
630
631 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy)
632 {
633         u64 last = 0;
634         u64 start;
635         u64 end;
636         struct extent_map_tree *pinned_extents = &root->fs_info->pinned_extents;
637         int ret;
638
639         while(1) {
640                 ret = find_first_extent_bit(pinned_extents, last,
641                                             &start, &end, EXTENT_DIRTY);
642                 if (ret)
643                         break;
644                 set_extent_dirty(copy, start, end, GFP_NOFS);
645                 last = end + 1;
646         }
647         return 0;
648 }
649
650 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
651                                struct btrfs_root *root,
652                                struct extent_map_tree *unpin)
653 {
654         u64 start;
655         u64 end;
656         int ret;
657         struct extent_map_tree *pinned_extents = &root->fs_info->pinned_extents;
658         struct extent_map_tree *free_space_cache;
659
660         free_space_cache = &root->fs_info->free_space_cache;
661
662         while(1) {
663                 ret = find_first_extent_bit(unpin, 0, &start, &end,
664                                             EXTENT_DIRTY);
665                 if (ret)
666                         break;
667
668                 clear_extent_dirty(pinned_extents, start, end,
669                                    GFP_NOFS);
670                 clear_extent_dirty(unpin, start, end, GFP_NOFS);
671                 set_extent_dirty(free_space_cache, start, end, GFP_NOFS);
672         }
673         return 0;
674 }
675
676 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
677                                  btrfs_root *extent_root)
678 {
679         struct btrfs_key ins;
680         struct btrfs_extent_item extent_item;
681         int ret;
682         int err = 0;
683         u64 start;
684         u64 end;
685         struct btrfs_fs_info *info = extent_root->fs_info;
686
687         btrfs_set_stack_extent_refs(&extent_item, 1);
688         btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
689         btrfs_set_stack_extent_owner(&extent_item,
690                                      extent_root->root_key.objectid);
691
692         while(1) {
693                 ret = find_first_extent_bit(&info->extent_ins, 0, &start,
694                                             &end, EXTENT_LOCKED);
695                 if (ret)
696                         break;
697
698                 ins.objectid = start;
699                 ins.offset = end + 1 - start;
700                 err = btrfs_insert_item(trans, extent_root, &ins,
701                                         &extent_item, sizeof(extent_item));
702                 clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED,
703                                   GFP_NOFS);
704         }
705         return 0;
706 }
707
708 static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes,
709                           int pending)
710 {
711         int err = 0;
712         struct extent_buffer *buf;
713
714         if (!pending) {
715                 buf = btrfs_find_tree_block(root, bytenr, num_bytes);
716                 if (buf) {
717                         if (btrfs_buffer_uptodate(buf)) {
718                                 u64 transid =
719                                     root->fs_info->running_transaction->transid;
720                                 if (btrfs_header_generation(buf) == transid) {
721                                         free_extent_buffer(buf);
722                                         return 0;
723                                 }
724                         }
725                         free_extent_buffer(buf);
726                 }
727                 set_extent_dirty(&root->fs_info->pinned_extents,
728                                  bytenr, bytenr + num_bytes - 1, GFP_NOFS);
729         } else {
730                 set_extent_bits(&root->fs_info->pending_del,
731                                 bytenr, bytenr + num_bytes - 1,
732                                 EXTENT_LOCKED, GFP_NOFS);
733         }
734         BUG_ON(err < 0);
735         return 0;
736 }
737
738 /*
739  * remove an extent from the root, returns 0 on success
740  */
741 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
742                          *root, u64 bytenr, u64 num_bytes, int pin,
743                          int mark_free)
744 {
745         struct btrfs_path *path;
746         struct btrfs_key key;
747         struct btrfs_fs_info *info = root->fs_info;
748         struct btrfs_root *extent_root = info->extent_root;
749         struct extent_buffer *leaf;
750         int ret;
751         struct btrfs_extent_item *ei;
752         u32 refs;
753
754         key.objectid = bytenr;
755         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
756         key.offset = num_bytes;
757
758         path = btrfs_alloc_path();
759         if (!path)
760                 return -ENOMEM;
761
762         ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
763         if (ret < 0)
764                 return ret;
765         BUG_ON(ret);
766
767         leaf = path->nodes[0];
768         ei = btrfs_item_ptr(leaf, path->slots[0],
769                             struct btrfs_extent_item);
770         refs = btrfs_extent_refs(leaf, ei);
771         BUG_ON(refs == 0);
772         refs -= 1;
773         btrfs_set_extent_refs(leaf, ei, refs);
774         btrfs_mark_buffer_dirty(leaf);
775
776         if (refs == 0) {
777                 u64 super_used;
778                 u64 root_used;
779
780                 if (pin) {
781                         ret = pin_down_bytes(root, bytenr, num_bytes, 0);
782                         BUG_ON(ret);
783                 }
784
785                 /* block accounting for super block */
786                 super_used = btrfs_super_bytes_used(&info->super_copy);
787                 btrfs_set_super_bytes_used(&info->super_copy,
788                                            super_used - num_bytes);
789
790                 /* block accounting for root item */
791                 root_used = btrfs_root_used(&root->root_item);
792                 btrfs_set_root_used(&root->root_item,
793                                            root_used - num_bytes);
794
795                 ret = btrfs_del_item(trans, extent_root, path);
796                 if (ret) {
797                         return ret;
798                 }
799                 ret = update_block_group(trans, root, bytenr, num_bytes, 0,
800                                          mark_free, 0);
801                 BUG_ON(ret);
802         }
803         btrfs_free_path(path);
804         finish_current_insert(trans, extent_root);
805         return ret;
806 }
807
808 /*
809  * find all the blocks marked as pending in the radix tree and remove
810  * them from the extent map
811  */
812 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
813                                btrfs_root *extent_root)
814 {
815         int ret;
816         int err = 0;
817         u64 start;
818         u64 end;
819         struct extent_map_tree *pending_del;
820         struct extent_map_tree *pinned_extents;
821
822         pending_del = &extent_root->fs_info->pending_del;
823         pinned_extents = &extent_root->fs_info->pinned_extents;
824
825         while(1) {
826                 ret = find_first_extent_bit(pending_del, 0, &start, &end,
827                                             EXTENT_LOCKED);
828                 if (ret)
829                         break;
830
831                 set_extent_dirty(pinned_extents, start, end, GFP_NOFS);
832                 clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
833                                   GFP_NOFS);
834                 ret = __free_extent(trans, extent_root,
835                                      start, end + 1 - start, 0, 0);
836                 if (ret)
837                         err = ret;
838         }
839         return err;
840 }
841
842 /*
843  * remove an extent from the root, returns 0 on success
844  */
845 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
846                       *root, u64 bytenr, u64 num_bytes, int pin)
847 {
848         struct btrfs_root *extent_root = root->fs_info->extent_root;
849         int pending_ret;
850         int ret;
851
852         WARN_ON(num_bytes < root->sectorsize);
853         if (root == extent_root) {
854                 pin_down_bytes(root, bytenr, num_bytes, 1);
855                 return 0;
856         }
857         ret = __free_extent(trans, root, bytenr, num_bytes, pin, pin == 0);
858         pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
859         return ret ? ret : pending_ret;
860 }
861
862 /*
863  * walks the btree of allocated extents and find a hole of a given size.
864  * The key ins is changed to record the hole:
865  * ins->objectid == block start
866  * ins->flags = BTRFS_EXTENT_ITEM_KEY
867  * ins->offset == number of blocks
868  * Any available blocks before search_start are skipped.
869  */
870 static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
871                             *orig_root, u64 num_bytes, u64 empty_size,
872                             u64 search_start, u64 search_end, u64 hint_byte,
873                             struct btrfs_key *ins, u64 exclude_start,
874                             u64 exclude_nr, int data)
875 {
876         struct btrfs_path *path;
877         struct btrfs_key key;
878         int ret;
879         u64 hole_size = 0;
880         int slot = 0;
881         u64 last_byte = 0;
882         u64 orig_search_start = search_start;
883         int start_found;
884         struct extent_buffer *l;
885         struct btrfs_root * root = orig_root->fs_info->extent_root;
886         struct btrfs_fs_info *info = root->fs_info;
887         u64 total_needed = num_bytes;
888         int level;
889         struct btrfs_block_group_cache *block_group;
890         int full_scan = 0;
891         int wrapped = 0;
892
893         WARN_ON(num_bytes < root->sectorsize);
894         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
895
896         level = btrfs_header_level(root->node);
897
898         if (search_end == (u64)-1)
899                 search_end = btrfs_super_total_bytes(&info->super_copy);
900         if (hint_byte) {
901                 block_group = btrfs_lookup_block_group(info, hint_byte);
902                 block_group = btrfs_find_block_group(root, block_group,
903                                                      hint_byte, data, 1);
904         } else {
905                 block_group = btrfs_find_block_group(root,
906                                                      trans->block_group, 0,
907                                                      data, 1);
908         }
909
910         total_needed += empty_size;
911         path = btrfs_alloc_path();
912
913 check_failed:
914         search_start = find_search_start(root, &block_group,
915                                          search_start, total_needed, data);
916
917         btrfs_init_path(path);
918         ins->objectid = search_start;
919         ins->offset = 0;
920         start_found = 0;
921         path->reada = 2;
922
923         ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
924         if (ret < 0)
925                 goto error;
926
927         if (path->slots[0] > 0) {
928                 path->slots[0]--;
929         }
930
931         l = path->nodes[0];
932         btrfs_item_key_to_cpu(l, &key, path->slots[0]);
933
934         /*
935          * a rare case, go back one key if we hit a block group item
936          * instead of an extent item
937          */
938         if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY &&
939             key.objectid + key.offset >= search_start) {
940                 ins->objectid = key.objectid;
941                 ins->offset = key.offset - 1;
942                 btrfs_release_path(root, path);
943                 ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
944                 if (ret < 0)
945                         goto error;
946
947                 if (path->slots[0] > 0) {
948                         path->slots[0]--;
949                 }
950         }
951
952         while (1) {
953                 l = path->nodes[0];
954                 slot = path->slots[0];
955                 if (slot >= btrfs_header_nritems(l)) {
956                         ret = btrfs_next_leaf(root, path);
957                         if (ret == 0)
958                                 continue;
959                         if (ret < 0)
960                                 goto error;
961                         if (!start_found) {
962                                 ins->objectid = search_start;
963                                 ins->offset = search_end - search_start;
964                                 start_found = 1;
965                                 goto check_pending;
966                         }
967                         ins->objectid = last_byte > search_start ?
968                                         last_byte : search_start;
969                         ins->offset = search_end - ins->objectid;
970                         goto check_pending;
971                 }
972                 btrfs_item_key_to_cpu(l, &key, slot);
973
974                 if (key.objectid >= search_start && key.objectid > last_byte &&
975                     start_found) {
976                         if (last_byte < search_start)
977                                 last_byte = search_start;
978                         hole_size = key.objectid - last_byte;
979                         if (hole_size >= num_bytes) {
980                                 ins->objectid = last_byte;
981                                 ins->offset = hole_size;
982                                 goto check_pending;
983                         }
984                 }
985                 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) {
986                         if (!start_found) {
987                                 last_byte = key.objectid;
988                                 start_found = 1;
989                         }
990                         goto next;
991                 }
992
993
994                 start_found = 1;
995                 last_byte = key.objectid + key.offset;
996
997                 if (!full_scan && last_byte >= block_group->key.objectid +
998                     block_group->key.offset) {
999                         btrfs_release_path(root, path);
1000                         search_start = block_group->key.objectid +
1001                                 block_group->key.offset * 2;
1002                         goto new_group;
1003                 }
1004 next:
1005                 path->slots[0]++;
1006                 cond_resched();
1007         }
1008 check_pending:
1009         /* we have to make sure we didn't find an extent that has already
1010          * been allocated by the map tree or the original allocation
1011          */
1012         btrfs_release_path(root, path);
1013         BUG_ON(ins->objectid < search_start);
1014
1015         if (ins->objectid + num_bytes >= search_end)
1016                 goto enospc;
1017
1018         if (test_range_bit(&info->extent_ins, ins->objectid,
1019                            ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) {
1020                 search_start = ins->objectid + num_bytes;
1021                 goto new_group;
1022         }
1023         if (test_range_bit(&info->pinned_extents, ins->objectid,
1024                            ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
1025                 search_start = ins->objectid + num_bytes;
1026                 goto new_group;
1027         }
1028         if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
1029             ins->objectid < exclude_start + exclude_nr)) {
1030                 search_start = exclude_start + exclude_nr;
1031                 goto new_group;
1032         }
1033         if (!data) {
1034                 block_group = btrfs_lookup_block_group(info, ins->objectid);
1035                 if (block_group)
1036                         trans->block_group = block_group;
1037         }
1038         ins->offset = num_bytes;
1039         btrfs_free_path(path);
1040         return 0;
1041
1042 new_group:
1043         if (search_start + num_bytes >= search_end) {
1044 enospc:
1045                 search_start = orig_search_start;
1046                 if (full_scan) {
1047                         ret = -ENOSPC;
1048                         goto error;
1049                 }
1050                 if (wrapped) {
1051                         if (!full_scan)
1052                                 total_needed -= empty_size;
1053                         full_scan = 1;
1054                 } else
1055                         wrapped = 1;
1056         }
1057         block_group = btrfs_lookup_block_group(info, search_start);
1058         cond_resched();
1059         if (!full_scan)
1060                 block_group = btrfs_find_block_group(root, block_group,
1061                                                      search_start, data, 0);
1062         goto check_failed;
1063
1064 error:
1065         btrfs_release_path(root, path);
1066         btrfs_free_path(path);
1067         return ret;
1068 }
1069 /*
1070  * finds a free extent and does all the dirty work required for allocation
1071  * returns the key for the extent through ins, and a tree buffer for
1072  * the first block of the extent through buf.
1073  *
1074  * returns 0 if everything worked, non-zero otherwise.
1075  */
1076 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1077                        struct btrfs_root *root, u64 owner,
1078                        u64 num_bytes, u64 empty_size, u64 hint_byte,
1079                        u64 search_end, struct btrfs_key *ins, int data)
1080 {
1081         int ret;
1082         int pending_ret;
1083         u64 super_used, root_used;
1084         u64 search_start = 0;
1085         struct btrfs_fs_info *info = root->fs_info;
1086         struct btrfs_root *extent_root = info->extent_root;
1087         struct btrfs_extent_item extent_item;
1088
1089         btrfs_set_stack_extent_refs(&extent_item, 1);
1090         btrfs_set_stack_extent_owner(&extent_item, owner);
1091
1092         WARN_ON(num_bytes < root->sectorsize);
1093         ret = find_free_extent(trans, root, num_bytes, empty_size,
1094                                search_start, search_end, hint_byte, ins,
1095                                trans->alloc_exclude_start,
1096                                trans->alloc_exclude_nr, data);
1097         BUG_ON(ret);
1098         if (ret)
1099                 return ret;
1100
1101         /* block accounting for super block */
1102         super_used = btrfs_super_bytes_used(&info->super_copy);
1103         btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
1104
1105         /* block accounting for root item */
1106         root_used = btrfs_root_used(&root->root_item);
1107         btrfs_set_root_used(&root->root_item, root_used + num_bytes);
1108
1109         clear_extent_dirty(&root->fs_info->free_space_cache,
1110                            ins->objectid, ins->objectid + ins->offset - 1,
1111                            GFP_NOFS);
1112
1113         if (root == extent_root) {
1114                 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
1115                                 ins->objectid + ins->offset - 1,
1116                                 EXTENT_LOCKED, GFP_NOFS);
1117                 goto update_block;
1118         }
1119
1120         WARN_ON(trans->alloc_exclude_nr);
1121         trans->alloc_exclude_start = ins->objectid;
1122         trans->alloc_exclude_nr = ins->offset;
1123         ret = btrfs_insert_item(trans, extent_root, ins, &extent_item,
1124                                 sizeof(extent_item));
1125
1126         trans->alloc_exclude_start = 0;
1127         trans->alloc_exclude_nr = 0;
1128
1129         BUG_ON(ret);
1130         finish_current_insert(trans, extent_root);
1131         pending_ret = del_pending_extents(trans, extent_root);
1132
1133         if (ret) {
1134                 return ret;
1135         }
1136         if (pending_ret) {
1137                 return pending_ret;
1138         }
1139
1140 update_block:
1141         ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0,
1142                                  data);
1143         BUG_ON(ret);
1144         return 0;
1145 }
1146
1147 /*
1148  * helper function to allocate a block for a given tree
1149  * returns the tree buffer or NULL.
1150  */
1151 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1152                                              struct btrfs_root *root,
1153                                              u32 blocksize, u64 hint,
1154                                              u64 empty_size)
1155 {
1156         struct btrfs_key ins;
1157         int ret;
1158         struct extent_buffer *buf;
1159
1160         ret = btrfs_alloc_extent(trans, root, root->root_key.objectid,
1161                                  blocksize, empty_size, hint,
1162                                  (u64)-1, &ins, 0);
1163         if (ret) {
1164                 BUG_ON(ret > 0);
1165                 return ERR_PTR(ret);
1166         }
1167         buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize);
1168         if (!buf) {
1169                 btrfs_free_extent(trans, root, ins.objectid, blocksize, 0);
1170                 return ERR_PTR(-ENOMEM);
1171         }
1172         btrfs_set_buffer_uptodate(buf);
1173         buf->alloc_addr = (unsigned long)__builtin_return_address(0);
1174         set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
1175                          buf->start + buf->len - 1, GFP_NOFS);
1176         /*
1177         set_buffer_checked(buf);
1178         set_buffer_defrag(buf);
1179         */
1180         /* FIXME!!!!!!!!!!!!!!!!
1181         set_radix_bit(&trans->transaction->dirty_pages, buf->pages[0]->index);
1182         */
1183         trans->blocks_used++;
1184         return buf;
1185 }
1186
1187 static int drop_leaf_ref(struct btrfs_trans_handle *trans,
1188                          struct btrfs_root *root, struct extent_buffer *leaf)
1189 {
1190         struct btrfs_key key;
1191         struct btrfs_file_extent_item *fi;
1192         int i;
1193         int nritems;
1194         int ret;
1195
1196         BUG_ON(!btrfs_is_leaf(leaf));
1197         nritems = btrfs_header_nritems(leaf);
1198         for (i = 0; i < nritems; i++) {
1199                 u64 disk_bytenr;
1200
1201                 btrfs_item_key_to_cpu(leaf, &key, i);
1202                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1203                         continue;
1204                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1205                 if (btrfs_file_extent_type(leaf, fi) ==
1206                     BTRFS_FILE_EXTENT_INLINE)
1207                         continue;
1208                 /*
1209                  * FIXME make sure to insert a trans record that
1210                  * repeats the snapshot del on crash
1211                  */
1212                 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1213                 if (disk_bytenr == 0)
1214                         continue;
1215                 ret = btrfs_free_extent(trans, root, disk_bytenr,
1216                                 btrfs_file_extent_disk_num_bytes(leaf, fi), 0);
1217                 BUG_ON(ret);
1218         }
1219         return 0;
1220 }
1221
1222 static void reada_walk_down(struct btrfs_root *root,
1223                             struct extent_buffer *node)
1224 {
1225         int i;
1226         u32 nritems;
1227         u64 bytenr;
1228         int ret;
1229         u32 refs;
1230         int level;
1231         u32 blocksize;
1232
1233         nritems = btrfs_header_nritems(node);
1234         level = btrfs_header_level(node);
1235         for (i = 0; i < nritems; i++) {
1236                 bytenr = btrfs_node_blockptr(node, i);
1237                 blocksize = btrfs_level_size(root, level - 1);
1238                 ret = lookup_extent_ref(NULL, root, bytenr, blocksize, &refs);
1239                 BUG_ON(ret);
1240                 if (refs != 1)
1241                         continue;
1242                 mutex_unlock(&root->fs_info->fs_mutex);
1243                 ret = readahead_tree_block(root, bytenr, blocksize);
1244                 cond_resched();
1245                 mutex_lock(&root->fs_info->fs_mutex);
1246                 if (ret)
1247                         break;
1248         }
1249 }
1250
1251 /*
1252  * helper function for drop_snapshot, this walks down the tree dropping ref
1253  * counts as it goes.
1254  */
1255 static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root
1256                           *root, struct btrfs_path *path, int *level)
1257 {
1258         struct extent_buffer *next;
1259         struct extent_buffer *cur;
1260         u64 bytenr;
1261         u32 blocksize;
1262         int ret;
1263         u32 refs;
1264
1265         WARN_ON(*level < 0);
1266         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1267         ret = lookup_extent_ref(trans, root,
1268                                 path->nodes[*level]->start,
1269                                 path->nodes[*level]->len, &refs);
1270         BUG_ON(ret);
1271         if (refs > 1)
1272                 goto out;
1273
1274         /*
1275          * walk down to the last node level and free all the leaves
1276          */
1277         while(*level >= 0) {
1278                 WARN_ON(*level < 0);
1279                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1280                 cur = path->nodes[*level];
1281
1282                 if (*level > 0 && path->slots[*level] == 0)
1283                         reada_walk_down(root, cur);
1284
1285                 if (btrfs_header_level(cur) != *level)
1286                         WARN_ON(1);
1287
1288                 if (path->slots[*level] >=
1289                     btrfs_header_nritems(cur))
1290                         break;
1291                 if (*level == 0) {
1292                         ret = drop_leaf_ref(trans, root, cur);
1293                         BUG_ON(ret);
1294                         break;
1295                 }
1296                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1297                 blocksize = btrfs_level_size(root, *level - 1);
1298                 ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs);
1299                 BUG_ON(ret);
1300                 if (refs != 1) {
1301                         path->slots[*level]++;
1302                         ret = btrfs_free_extent(trans, root, bytenr,
1303                                                 blocksize, 1);
1304                         BUG_ON(ret);
1305                         continue;
1306                 }
1307                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1308                 if (!next || !btrfs_buffer_uptodate(next)) {
1309                         free_extent_buffer(next);
1310                         mutex_unlock(&root->fs_info->fs_mutex);
1311                         next = read_tree_block(root, bytenr, blocksize);
1312                         mutex_lock(&root->fs_info->fs_mutex);
1313
1314                         /* we dropped the lock, check one more time */
1315                         ret = lookup_extent_ref(trans, root, bytenr,
1316                                                 blocksize, &refs);
1317                         BUG_ON(ret);
1318                         if (refs != 1) {
1319                                 path->slots[*level]++;
1320                                 free_extent_buffer(next);
1321                                 ret = btrfs_free_extent(trans, root,
1322                                                         bytenr, blocksize, 1);
1323                                 BUG_ON(ret);
1324                                 continue;
1325                         }
1326                 }
1327                 WARN_ON(*level <= 0);
1328                 if (path->nodes[*level-1])
1329                         free_extent_buffer(path->nodes[*level-1]);
1330                 path->nodes[*level-1] = next;
1331                 *level = btrfs_header_level(next);
1332                 path->slots[*level] = 0;
1333         }
1334 out:
1335         WARN_ON(*level < 0);
1336         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1337         ret = btrfs_free_extent(trans, root, path->nodes[*level]->start,
1338                                 path->nodes[*level]->len, 1);
1339         free_extent_buffer(path->nodes[*level]);
1340         path->nodes[*level] = NULL;
1341         *level += 1;
1342         BUG_ON(ret);
1343         return 0;
1344 }
1345
1346 /*
1347  * helper for dropping snapshots.  This walks back up the tree in the path
1348  * to find the first node higher up where we haven't yet gone through
1349  * all the slots
1350  */
1351 static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root
1352                         *root, struct btrfs_path *path, int *level)
1353 {
1354         int i;
1355         int slot;
1356         int ret;
1357         struct btrfs_root_item *root_item = &root->root_item;
1358
1359         for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1360                 slot = path->slots[i];
1361                 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
1362                         struct extent_buffer *node;
1363                         struct btrfs_disk_key disk_key;
1364                         node = path->nodes[i];
1365                         path->slots[i]++;
1366                         *level = i;
1367                         WARN_ON(*level == 0);
1368                         btrfs_node_key(node, &disk_key, path->slots[i]);
1369                         memcpy(&root_item->drop_progress,
1370                                &disk_key, sizeof(disk_key));
1371                         root_item->drop_level = i;
1372                         return 0;
1373                 } else {
1374                         ret = btrfs_free_extent(trans, root,
1375                                                 path->nodes[*level]->start,
1376                                                 path->nodes[*level]->len, 1);
1377                         BUG_ON(ret);
1378                         free_extent_buffer(path->nodes[*level]);
1379                         path->nodes[*level] = NULL;
1380                         *level = i + 1;
1381                 }
1382         }
1383         return 1;
1384 }
1385
1386 /*
1387  * drop the reference count on the tree rooted at 'snap'.  This traverses
1388  * the tree freeing any blocks that have a ref count of zero after being
1389  * decremented.
1390  */
1391 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
1392                         *root)
1393 {
1394         int ret = 0;
1395         int wret;
1396         int level;
1397         struct btrfs_path *path;
1398         int i;
1399         int orig_level;
1400         struct btrfs_root_item *root_item = &root->root_item;
1401
1402         path = btrfs_alloc_path();
1403         BUG_ON(!path);
1404
1405         level = btrfs_header_level(root->node);
1406         orig_level = level;
1407         if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
1408                 path->nodes[level] = root->node;
1409                 extent_buffer_get(root->node);
1410                 path->slots[level] = 0;
1411         } else {
1412                 struct btrfs_key key;
1413                 struct btrfs_disk_key found_key;
1414                 struct extent_buffer *node;
1415
1416                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
1417                 level = root_item->drop_level;
1418                 path->lowest_level = level;
1419                 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1420                 if (wret < 0) {
1421                         ret = wret;
1422                         goto out;
1423                 }
1424                 node = path->nodes[level];
1425                 btrfs_node_key(node, &found_key, path->slots[level]);
1426                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
1427                                sizeof(found_key)));
1428         }
1429         while(1) {
1430                 wret = walk_down_tree(trans, root, path, &level);
1431                 if (wret > 0)
1432                         break;
1433                 if (wret < 0)
1434                         ret = wret;
1435
1436                 wret = walk_up_tree(trans, root, path, &level);
1437                 if (wret > 0)
1438                         break;
1439                 if (wret < 0)
1440                         ret = wret;
1441                 ret = -EAGAIN;
1442                 break;
1443         }
1444         for (i = 0; i <= orig_level; i++) {
1445                 if (path->nodes[i]) {
1446                         free_extent_buffer(path->nodes[i]);
1447                         path->nodes[i] = 0;
1448                 }
1449         }
1450 out:
1451         btrfs_free_path(path);
1452         return ret;
1453 }
1454
1455 int btrfs_free_block_groups(struct btrfs_fs_info *info)
1456 {
1457         u64 start;
1458         u64 end;
1459         int ret;
1460
1461         while(1) {
1462                 ret = find_first_extent_bit(&info->block_group_cache, 0,
1463                                             &start, &end, (unsigned int)-1);
1464                 if (ret)
1465                         break;
1466                 clear_extent_bits(&info->block_group_cache, start,
1467                                   end, (unsigned int)-1, GFP_NOFS);
1468         }
1469         while(1) {
1470                 ret = find_first_extent_bit(&info->free_space_cache, 0,
1471                                             &start, &end, EXTENT_DIRTY);
1472                 if (ret)
1473                         break;
1474                 clear_extent_dirty(&info->free_space_cache, start,
1475                                    end, GFP_NOFS);
1476         }
1477         return 0;
1478 }
1479
1480 int btrfs_read_block_groups(struct btrfs_root *root)
1481 {
1482         struct btrfs_path *path;
1483         int ret;
1484         int err = 0;
1485         int bit;
1486         struct btrfs_block_group_cache *cache;
1487         struct btrfs_fs_info *info = root->fs_info;
1488         struct extent_map_tree *block_group_cache;
1489         struct btrfs_key key;
1490         struct btrfs_key found_key;
1491         struct extent_buffer *leaf;
1492
1493         block_group_cache = &info->block_group_cache;
1494
1495         root = info->extent_root;
1496         key.objectid = 0;
1497         key.offset = BTRFS_BLOCK_GROUP_SIZE;
1498         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
1499
1500         path = btrfs_alloc_path();
1501         if (!path)
1502                 return -ENOMEM;
1503
1504         while(1) {
1505                 ret = btrfs_search_slot(NULL, info->extent_root,
1506                                         &key, path, 0, 0);
1507                 if (ret != 0) {
1508                         err = ret;
1509                         break;
1510                 }
1511                 leaf = path->nodes[0];
1512                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1513                 cache = kmalloc(sizeof(*cache), GFP_NOFS);
1514                 if (!cache) {
1515                         err = -1;
1516                         break;
1517                 }
1518
1519                 read_extent_buffer(leaf, &cache->item,
1520                                    btrfs_item_ptr_offset(leaf, path->slots[0]),
1521                                    sizeof(cache->item));
1522                 memcpy(&cache->key, &found_key, sizeof(found_key));
1523                 cache->cached = 0;
1524
1525                 key.objectid = found_key.objectid + found_key.offset;
1526                 btrfs_release_path(root, path);
1527
1528                 if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) {
1529                         bit = BLOCK_GROUP_DATA;
1530                         cache->data = 1;
1531                 } else {
1532                         bit = BLOCK_GROUP_METADATA;
1533                         cache->data = 0;
1534                 }
1535
1536                 /* use EXTENT_LOCKED to prevent merging */
1537                 set_extent_bits(block_group_cache, found_key.objectid,
1538                                 found_key.objectid + found_key.offset - 1,
1539                                 bit | EXTENT_LOCKED, GFP_NOFS);
1540                 set_state_private(block_group_cache, found_key.objectid,
1541                                   (u64)cache);
1542
1543                 if (key.objectid >=
1544                     btrfs_super_total_bytes(&info->super_copy))
1545                         break;
1546         }
1547
1548         btrfs_free_path(path);
1549         return 0;
1550 }