Btrfs: Explicitly send a root objectid to count_snapshots_in_path
[linux-2.6-block.git] / fs / btrfs / extent-tree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/sched.h>
20 #include <linux/crc32c.h>
21 #include <linux/pagemap.h>
22 #include "hash.h"
23 #include "ctree.h"
24 #include "disk-io.h"
25 #include "print-tree.h"
26 #include "transaction.h"
27
28 #define BLOCK_GROUP_DATA EXTENT_WRITEBACK
29 #define BLOCK_GROUP_METADATA EXTENT_UPTODATE
30 #define BLOCK_GROUP_DIRTY EXTENT_DIRTY
31
32 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
33                                  btrfs_root *extent_root);
34 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
35                                btrfs_root *extent_root);
36
37 static int cache_block_group(struct btrfs_root *root,
38                              struct btrfs_block_group_cache *block_group)
39 {
40         struct btrfs_path *path;
41         int ret;
42         struct btrfs_key key;
43         struct extent_buffer *leaf;
44         struct extent_map_tree *free_space_cache;
45         int slot;
46         u64 last = 0;
47         u64 hole_size;
48         u64 first_free;
49         int found = 0;
50
51         if (!block_group)
52                 return 0;
53
54         root = root->fs_info->extent_root;
55         free_space_cache = &root->fs_info->free_space_cache;
56
57         if (block_group->cached)
58                 return 0;
59
60         path = btrfs_alloc_path();
61         if (!path)
62                 return -ENOMEM;
63
64         path->reada = 2;
65         first_free = block_group->key.objectid;
66         key.objectid = block_group->key.objectid;
67         key.offset = 0;
68
69         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
70         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
71
72         if (ret < 0)
73                 return ret;
74
75         if (ret && path->slots[0] > 0)
76                 path->slots[0]--;
77
78         while(1) {
79                 leaf = path->nodes[0];
80                 slot = path->slots[0];
81                 if (slot >= btrfs_header_nritems(leaf)) {
82                         ret = btrfs_next_leaf(root, path);
83                         if (ret < 0)
84                                 goto err;
85                         if (ret == 0) {
86                                 continue;
87                         } else {
88                                 break;
89                         }
90                 }
91
92                 btrfs_item_key_to_cpu(leaf, &key, slot);
93                 if (key.objectid < block_group->key.objectid) {
94                         if (btrfs_key_type(&key) != BTRFS_EXTENT_REF_KEY &&
95                             key.objectid + key.offset > first_free)
96                                 first_free = key.objectid + key.offset;
97                         goto next;
98                 }
99
100                 if (key.objectid >= block_group->key.objectid +
101                     block_group->key.offset) {
102                         break;
103                 }
104
105                 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
106                         if (!found) {
107                                 last = first_free;
108                                 found = 1;
109                         }
110                         if (key.objectid > last) {
111                                 hole_size = key.objectid - last;
112                                 set_extent_dirty(free_space_cache, last,
113                                                  last + hole_size - 1,
114                                                  GFP_NOFS);
115                         }
116                         last = key.objectid + key.offset;
117                 }
118 next:
119                 path->slots[0]++;
120         }
121
122         if (!found)
123                 last = first_free;
124         if (block_group->key.objectid +
125             block_group->key.offset > last) {
126                 hole_size = block_group->key.objectid +
127                         block_group->key.offset - last;
128                 set_extent_dirty(free_space_cache, last,
129                                  last + hole_size - 1, GFP_NOFS);
130         }
131         block_group->cached = 1;
132 err:
133         btrfs_free_path(path);
134         return 0;
135 }
136
137 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
138                                                          btrfs_fs_info *info,
139                                                          u64 bytenr)
140 {
141         struct extent_map_tree *block_group_cache;
142         struct btrfs_block_group_cache *block_group = NULL;
143         u64 ptr;
144         u64 start;
145         u64 end;
146         int ret;
147
148         block_group_cache = &info->block_group_cache;
149         ret = find_first_extent_bit(block_group_cache,
150                                     bytenr, &start, &end,
151                                     BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA);
152         if (ret) {
153                 return NULL;
154         }
155         ret = get_state_private(block_group_cache, start, &ptr);
156         if (ret)
157                 return NULL;
158
159         block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
160         if (block_group->key.objectid <= bytenr && bytenr <
161             block_group->key.objectid + block_group->key.offset)
162                 return block_group;
163         return NULL;
164 }
165 static u64 find_search_start(struct btrfs_root *root,
166                              struct btrfs_block_group_cache **cache_ret,
167                              u64 search_start, int num,
168                              int data, int full_scan)
169 {
170         int ret;
171         struct btrfs_block_group_cache *cache = *cache_ret;
172         u64 last;
173         u64 start = 0;
174         u64 end = 0;
175         u64 cache_miss = 0;
176         int wrapped = 0;
177
178         if (!cache) {
179                 goto out;
180         }
181 again:
182         ret = cache_block_group(root, cache);
183         if (ret)
184                 goto out;
185
186         last = max(search_start, cache->key.objectid);
187
188         while(1) {
189                 ret = find_first_extent_bit(&root->fs_info->free_space_cache,
190                                             last, &start, &end, EXTENT_DIRTY);
191                 if (ret) {
192                         if (!cache_miss)
193                                 cache_miss = last;
194                         goto new_group;
195                 }
196
197                 start = max(last, start);
198                 last = end + 1;
199                 if (last - start < num) {
200                         if (last == cache->key.objectid + cache->key.offset)
201                                 cache_miss = start;
202                         continue;
203                 }
204                 if (data != BTRFS_BLOCK_GROUP_MIXED &&
205                     start + num > cache->key.objectid + cache->key.offset)
206                         goto new_group;
207                 return start;
208         }
209 out:
210         cache = btrfs_lookup_block_group(root->fs_info, search_start);
211         if (!cache) {
212                 printk("Unable to find block group for %Lu\n",
213                        search_start);
214                 WARN_ON(1);
215                 return search_start;
216         }
217         return search_start;
218
219 new_group:
220         last = cache->key.objectid + cache->key.offset;
221 wrapped:
222         cache = btrfs_lookup_block_group(root->fs_info, last);
223         if (!cache) {
224 no_cache:
225                 if (!wrapped) {
226                         wrapped = 1;
227                         last = search_start;
228                         data = BTRFS_BLOCK_GROUP_MIXED;
229                         goto wrapped;
230                 }
231                 goto out;
232         }
233         if (cache_miss && !cache->cached) {
234                 cache_block_group(root, cache);
235                 last = cache_miss;
236                 cache = btrfs_lookup_block_group(root->fs_info, last);
237         }
238         cache = btrfs_find_block_group(root, cache, last, data, 0);
239         if (!cache)
240                 goto no_cache;
241         *cache_ret = cache;
242         cache_miss = 0;
243         goto again;
244 }
245
246 static u64 div_factor(u64 num, int factor)
247 {
248         if (factor == 10)
249                 return num;
250         num *= factor;
251         do_div(num, 10);
252         return num;
253 }
254
255 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
256                                                  struct btrfs_block_group_cache
257                                                  *hint, u64 search_start,
258                                                  int data, int owner)
259 {
260         struct btrfs_block_group_cache *cache;
261         struct extent_map_tree *block_group_cache;
262         struct btrfs_block_group_cache *found_group = NULL;
263         struct btrfs_fs_info *info = root->fs_info;
264         u64 used;
265         u64 last = 0;
266         u64 hint_last;
267         u64 start;
268         u64 end;
269         u64 free_check;
270         u64 ptr;
271         int bit;
272         int ret;
273         int full_search = 0;
274         int factor = 8;
275         int data_swap = 0;
276
277         block_group_cache = &info->block_group_cache;
278
279         if (!owner)
280                 factor = 8;
281
282         if (data == BTRFS_BLOCK_GROUP_MIXED) {
283                 bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
284                 factor = 10;
285         } else if (data)
286                 bit = BLOCK_GROUP_DATA;
287         else
288                 bit = BLOCK_GROUP_METADATA;
289
290         if (search_start) {
291                 struct btrfs_block_group_cache *shint;
292                 shint = btrfs_lookup_block_group(info, search_start);
293                 if (shint && (shint->data == data ||
294                               shint->data == BTRFS_BLOCK_GROUP_MIXED)) {
295                         used = btrfs_block_group_used(&shint->item);
296                         if (used + shint->pinned <
297                             div_factor(shint->key.offset, factor)) {
298                                 return shint;
299                         }
300                 }
301         }
302         if (hint && (hint->data == data ||
303                      hint->data == BTRFS_BLOCK_GROUP_MIXED)) {
304                 used = btrfs_block_group_used(&hint->item);
305                 if (used + hint->pinned <
306                     div_factor(hint->key.offset, factor)) {
307                         return hint;
308                 }
309                 last = hint->key.objectid + hint->key.offset;
310                 hint_last = last;
311         } else {
312                 if (hint)
313                         hint_last = max(hint->key.objectid, search_start);
314                 else
315                         hint_last = search_start;
316
317                 last = hint_last;
318         }
319 again:
320         while(1) {
321                 ret = find_first_extent_bit(block_group_cache, last,
322                                             &start, &end, bit);
323                 if (ret)
324                         break;
325
326                 ret = get_state_private(block_group_cache, start, &ptr);
327                 if (ret)
328                         break;
329
330                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
331                 last = cache->key.objectid + cache->key.offset;
332                 used = btrfs_block_group_used(&cache->item);
333
334                 if (full_search)
335                         free_check = cache->key.offset;
336                 else
337                         free_check = div_factor(cache->key.offset, factor);
338                 if (used + cache->pinned < free_check) {
339                         found_group = cache;
340                         goto found;
341                 }
342                 cond_resched();
343         }
344         if (!full_search) {
345                 last = search_start;
346                 full_search = 1;
347                 goto again;
348         }
349         if (!data_swap) {
350                 data_swap = 1;
351                 bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
352                 last = search_start;
353                 goto again;
354         }
355 found:
356         return found_group;
357 }
358
359 static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation,
360                            u64 owner, u64 owner_offset)
361 {
362         u32 high_crc = ~(u32)0;
363         u32 low_crc = ~(u32)0;
364         __le64 lenum;
365
366         lenum = cpu_to_le64(root_objectid);
367         high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
368         lenum = cpu_to_le64(ref_generation);
369         low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
370
371 #if 0
372         lenum = cpu_to_le64(owner);
373         low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
374         lenum = cpu_to_le64(owner_offset);
375         low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
376 #endif
377         return ((u64)high_crc << 32) | (u64)low_crc;
378 }
379
380 static int match_extent_ref(struct extent_buffer *leaf,
381                             struct btrfs_extent_ref *disk_ref,
382                             struct btrfs_extent_ref *cpu_ref)
383 {
384         int ret;
385         int len;
386
387         if (cpu_ref->objectid)
388                 len = sizeof(*cpu_ref);
389         else
390                 len = 2 * sizeof(u64);
391         ret = memcmp_extent_buffer(leaf, cpu_ref, (unsigned long)disk_ref,
392                                    len);
393         return ret == 0;
394 }
395
396 static int lookup_extent_backref(struct btrfs_trans_handle *trans,
397                                  struct btrfs_root *root,
398                                  struct btrfs_path *path, u64 bytenr,
399                                  u64 root_objectid, u64 ref_generation,
400                                  u64 owner, u64 owner_offset, int del)
401 {
402         u64 hash;
403         struct btrfs_key key;
404         struct btrfs_key found_key;
405         struct btrfs_extent_ref ref;
406         struct extent_buffer *leaf;
407         struct btrfs_extent_ref *disk_ref;
408         int ret;
409         int ret2;
410
411         btrfs_set_stack_ref_root(&ref, root_objectid);
412         btrfs_set_stack_ref_generation(&ref, ref_generation);
413         btrfs_set_stack_ref_objectid(&ref, owner);
414         btrfs_set_stack_ref_offset(&ref, owner_offset);
415
416         hash = hash_extent_ref(root_objectid, ref_generation, owner,
417                                owner_offset);
418         key.offset = hash;
419         key.objectid = bytenr;
420         key.type = BTRFS_EXTENT_REF_KEY;
421
422         while (1) {
423                 ret = btrfs_search_slot(trans, root, &key, path,
424                                         del ? -1 : 0, del);
425                 if (ret < 0)
426                         goto out;
427                 leaf = path->nodes[0];
428                 if (ret != 0) {
429                         u32 nritems = btrfs_header_nritems(leaf);
430                         if (path->slots[0] >= nritems) {
431                                 ret2 = btrfs_next_leaf(root, path);
432                                 if (ret2)
433                                         goto out;
434                                 leaf = path->nodes[0];
435                         }
436                         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
437                         if (found_key.objectid != bytenr ||
438                             found_key.type != BTRFS_EXTENT_REF_KEY)
439                                 goto out;
440                         key.offset = found_key.offset;
441                         if (del) {
442                                 btrfs_release_path(root, path);
443                                 continue;
444                         }
445                 }
446                 disk_ref = btrfs_item_ptr(path->nodes[0],
447                                           path->slots[0],
448                                           struct btrfs_extent_ref);
449                 if (match_extent_ref(path->nodes[0], disk_ref, &ref)) {
450                         ret = 0;
451                         goto out;
452                 }
453                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
454                 key.offset = found_key.offset + 1;
455                 btrfs_release_path(root, path);
456         }
457 out:
458         return ret;
459 }
460
461 /*
462  * Back reference rules.  Back refs have three main goals:
463  *
464  * 1) differentiate between all holders of references to an extent so that
465  *    when a reference is dropped we can make sure it was a valid reference
466  *    before freeing the extent.
467  *
468  * 2) Provide enough information to quickly find the holders of an extent
469  *    if we notice a given block is corrupted or bad.
470  *
471  * 3) Make it easy to migrate blocks for FS shrinking or storage pool
472  *    maintenance.  This is actually the same as #2, but with a slightly
473  *    different use case.
474  *
475  * File extents can be referenced by:
476  *
477  * - multiple snapshots, subvolumes, or different generations in one subvol
478  * - different files inside a single subvolume (in theory, not implemented yet)
479  * - different offsets inside a file (bookend extents in file.c)
480  *
481  * The extent ref structure has fields for:
482  *
483  * - Objectid of the subvolume root
484  * - Generation number of the tree holding the reference
485  * - objectid of the file holding the reference
486  * - offset in the file corresponding to the key holding the reference
487  *
488  * When a file extent is allocated the fields are filled in:
489  *     (root_key.objectid, trans->transid, inode objectid, offset in file)
490  *
491  * When a leaf is cow'd new references are added for every file extent found
492  * in the leaf.  It looks the same as the create case, but trans->transid
493  * will be different when the block is cow'd.
494  *
495  *     (root_key.objectid, trans->transid, inode objectid, offset in file)
496  *
497  * When a file extent is removed either during snapshot deletion or file
498  * truncation, the corresponding back reference is found
499  * by searching for:
500  *
501  *     (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
502  *      inode objectid, offset in file)
503  *
504  * Btree extents can be referenced by:
505  *
506  * - Different subvolumes
507  * - Different generations of the same subvolume
508  *
509  * Storing sufficient information for a full reverse mapping of a btree
510  * block would require storing the lowest key of the block in the backref,
511  * and it would require updating that lowest key either before write out or
512  * every time it changed.  Instead, the objectid of the lowest key is stored
513  * along with the level of the tree block.  This provides a hint
514  * about where in the btree the block can be found.  Searches through the
515  * btree only need to look for a pointer to that block, so they stop one
516  * level higher than the level recorded in the backref.
517  *
518  * Some btrees do not do reference counting on their extents.  These
519  * include the extent tree and the tree of tree roots.  Backrefs for these
520  * trees always have a generation of zero.
521  *
522  * When a tree block is created, back references are inserted:
523  *
524  * (root->root_key.objectid, trans->transid or zero, level, lowest_key_objectid)
525  *
526  * When a tree block is cow'd in a reference counted root,
527  * new back references are added for all the blocks it points to.
528  * These are of the form (trans->transid will have increased since creation):
529  *
530  * (root->root_key.objectid, trans->transid, level, lowest_key_objectid)
531  *
532  * Because the lowest_key_objectid and the level are just hints
533  * they are not used when backrefs are deleted.  When a backref is deleted:
534  *
535  * if backref was for a tree root:
536  *     root_objectid = root->root_key.objectid
537  * else
538  *     root_objectid = btrfs_header_owner(parent)
539  *
540  * (root_objectid, btrfs_header_generation(parent) or zero, 0, 0)
541  *
542  * Back Reference Key hashing:
543  *
544  * Back references have four fields, each 64 bits long.  Unfortunately,
545  * This is hashed into a single 64 bit number and placed into the key offset.
546  * The key objectid corresponds to the first byte in the extent, and the
547  * key type is set to BTRFS_EXTENT_REF_KEY
548  */
549 int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans,
550                                  struct btrfs_root *root,
551                                  struct btrfs_path *path, u64 bytenr,
552                                  u64 root_objectid, u64 ref_generation,
553                                  u64 owner, u64 owner_offset)
554 {
555         u64 hash;
556         struct btrfs_key key;
557         struct btrfs_extent_ref ref;
558         struct btrfs_extent_ref *disk_ref;
559         int ret;
560
561         btrfs_set_stack_ref_root(&ref, root_objectid);
562         btrfs_set_stack_ref_generation(&ref, ref_generation);
563         btrfs_set_stack_ref_objectid(&ref, owner);
564         btrfs_set_stack_ref_offset(&ref, owner_offset);
565
566         hash = hash_extent_ref(root_objectid, ref_generation, owner,
567                                owner_offset);
568         key.offset = hash;
569         key.objectid = bytenr;
570         key.type = BTRFS_EXTENT_REF_KEY;
571
572         ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(ref));
573         while (ret == -EEXIST) {
574                 disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
575                                           struct btrfs_extent_ref);
576                 if (match_extent_ref(path->nodes[0], disk_ref, &ref))
577                         goto out;
578                 key.offset++;
579                 btrfs_release_path(root, path);
580                 ret = btrfs_insert_empty_item(trans, root, path, &key,
581                                               sizeof(ref));
582         }
583         if (ret)
584                 goto out;
585         disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
586                                   struct btrfs_extent_ref);
587         write_extent_buffer(path->nodes[0], &ref, (unsigned long)disk_ref,
588                             sizeof(ref));
589         btrfs_mark_buffer_dirty(path->nodes[0]);
590 out:
591         btrfs_release_path(root, path);
592         return ret;
593 }
594
595 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
596                                 struct btrfs_root *root,
597                                 u64 bytenr, u64 num_bytes,
598                                 u64 root_objectid, u64 ref_generation,
599                                 u64 owner, u64 owner_offset)
600 {
601         struct btrfs_path *path;
602         int ret;
603         struct btrfs_key key;
604         struct extent_buffer *l;
605         struct btrfs_extent_item *item;
606         u32 refs;
607
608         WARN_ON(num_bytes < root->sectorsize);
609         path = btrfs_alloc_path();
610         if (!path)
611                 return -ENOMEM;
612
613         key.objectid = bytenr;
614         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
615         key.offset = num_bytes;
616         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
617                                 0, 1);
618         if (ret < 0)
619                 return ret;
620         if (ret != 0) {
621                 BUG();
622         }
623         BUG_ON(ret != 0);
624         l = path->nodes[0];
625         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
626         refs = btrfs_extent_refs(l, item);
627         btrfs_set_extent_refs(l, item, refs + 1);
628         btrfs_mark_buffer_dirty(path->nodes[0]);
629
630         btrfs_release_path(root->fs_info->extent_root, path);
631
632         ret = btrfs_insert_extent_backref(trans, root->fs_info->extent_root,
633                                           path, bytenr, root_objectid,
634                                           ref_generation, owner, owner_offset);
635         BUG_ON(ret);
636         finish_current_insert(trans, root->fs_info->extent_root);
637         del_pending_extents(trans, root->fs_info->extent_root);
638
639         btrfs_free_path(path);
640         return 0;
641 }
642
643 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
644                          struct btrfs_root *root)
645 {
646         finish_current_insert(trans, root->fs_info->extent_root);
647         del_pending_extents(trans, root->fs_info->extent_root);
648         return 0;
649 }
650
651 static int lookup_extent_ref(struct btrfs_trans_handle *trans,
652                              struct btrfs_root *root, u64 bytenr,
653                              u64 num_bytes, u32 *refs)
654 {
655         struct btrfs_path *path;
656         int ret;
657         struct btrfs_key key;
658         struct extent_buffer *l;
659         struct btrfs_extent_item *item;
660
661         WARN_ON(num_bytes < root->sectorsize);
662         path = btrfs_alloc_path();
663         key.objectid = bytenr;
664         key.offset = num_bytes;
665         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
666         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
667                                 0, 0);
668         if (ret < 0)
669                 goto out;
670         if (ret != 0) {
671                 btrfs_print_leaf(root, path->nodes[0]);
672                 printk("failed to find block number %Lu\n", bytenr);
673                 BUG();
674         }
675         l = path->nodes[0];
676         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
677         *refs = btrfs_extent_refs(l, item);
678 out:
679         btrfs_free_path(path);
680         return 0;
681 }
682
683 u32 btrfs_count_snapshots_in_path(struct btrfs_root *root,
684                                   struct btrfs_path *count_path,
685                                   u64 first_extent)
686 {
687         struct btrfs_root *extent_root = root->fs_info->extent_root;
688         struct btrfs_path *path;
689         u64 bytenr;
690         u64 found_objectid;
691         u64 root_objectid = root->root_key.objectid;
692         u32 total_count = 0;
693         u32 cur_count;
694         u32 refs;
695         u32 nritems;
696         int ret;
697         struct btrfs_key key;
698         struct btrfs_key found_key;
699         struct extent_buffer *l;
700         struct btrfs_extent_item *item;
701         struct btrfs_extent_ref *ref_item;
702         int level = -1;
703
704         path = btrfs_alloc_path();
705 again:
706         if (level == -1)
707                 bytenr = first_extent;
708         else
709                 bytenr = count_path->nodes[level]->start;
710
711         cur_count = 0;
712         key.objectid = bytenr;
713         key.offset = 0;
714
715         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
716         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
717         if (ret < 0)
718                 goto out;
719         BUG_ON(ret == 0);
720
721         l = path->nodes[0];
722         btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
723
724         if (found_key.objectid != bytenr ||
725             found_key.type != BTRFS_EXTENT_ITEM_KEY) {
726                 goto out;
727         }
728
729         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
730         refs = btrfs_extent_refs(l, item);
731         while (1) {
732                 nritems = btrfs_header_nritems(l);
733                 if (path->slots[0] >= nritems) {
734                         ret = btrfs_next_leaf(extent_root, path);
735                         if (ret == 0)
736                                 continue;
737                         break;
738                 }
739                 btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
740                 if (found_key.objectid != bytenr)
741                         break;
742                 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
743                         path->slots[0]++;
744                         continue;
745                 }
746
747                 cur_count++;
748                 ref_item = btrfs_item_ptr(l, path->slots[0],
749                                           struct btrfs_extent_ref);
750                 found_objectid = btrfs_ref_root(l, ref_item);
751
752                 if (found_objectid != root_objectid) {
753                         total_count = 2;
754                         break;
755                 }
756                 total_count = 1;
757                 path->slots[0]++;
758         }
759         if (cur_count == 0) {
760                 total_count = 0;
761                 goto out;
762         }
763         if (total_count > 1)
764                 goto out;
765         if (level >= 0 && root->node == count_path->nodes[level])
766                 goto out;
767         level++;
768         btrfs_release_path(root, path);
769         goto again;
770
771 out:
772         btrfs_free_path(path);
773         return total_count;
774
775 }
776
777 int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
778                        struct btrfs_root *root, u64 owner_objectid)
779 {
780         u64 generation;
781         u64 key_objectid;
782         u64 level;
783         u32 nritems;
784         struct btrfs_disk_key disk_key;
785
786         level = btrfs_header_level(root->node);
787         generation = trans->transid;
788         nritems = btrfs_header_nritems(root->node);
789         if (nritems > 0) {
790                 if (level == 0)
791                         btrfs_item_key(root->node, &disk_key, 0);
792                 else
793                         btrfs_node_key(root->node, &disk_key, 0);
794                 key_objectid = btrfs_disk_key_objectid(&disk_key);
795         } else {
796                 key_objectid = 0;
797         }
798         return btrfs_inc_extent_ref(trans, root, root->node->start,
799                                     root->node->len, owner_objectid,
800                                     generation, level, key_objectid);
801 }
802
803 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
804                   struct extent_buffer *buf)
805 {
806         u64 bytenr;
807         u32 nritems;
808         struct btrfs_key key;
809         struct btrfs_file_extent_item *fi;
810         int i;
811         int level;
812         int ret;
813         int faili;
814
815         if (!root->ref_cows)
816                 return 0;
817
818         level = btrfs_header_level(buf);
819         nritems = btrfs_header_nritems(buf);
820         for (i = 0; i < nritems; i++) {
821                 if (level == 0) {
822                         u64 disk_bytenr;
823                         btrfs_item_key_to_cpu(buf, &key, i);
824                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
825                                 continue;
826                         fi = btrfs_item_ptr(buf, i,
827                                             struct btrfs_file_extent_item);
828                         if (btrfs_file_extent_type(buf, fi) ==
829                             BTRFS_FILE_EXTENT_INLINE)
830                                 continue;
831                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
832                         if (disk_bytenr == 0)
833                                 continue;
834                         ret = btrfs_inc_extent_ref(trans, root, disk_bytenr,
835                                     btrfs_file_extent_disk_num_bytes(buf, fi),
836                                     root->root_key.objectid, trans->transid,
837                                     key.objectid, key.offset);
838                         if (ret) {
839                                 faili = i;
840                                 goto fail;
841                         }
842                 } else {
843                         bytenr = btrfs_node_blockptr(buf, i);
844                         btrfs_node_key_to_cpu(buf, &key, i);
845                         ret = btrfs_inc_extent_ref(trans, root, bytenr,
846                                            btrfs_level_size(root, level - 1),
847                                            root->root_key.objectid,
848                                            trans->transid,
849                                            level - 1, key.objectid);
850                         if (ret) {
851                                 faili = i;
852                                 goto fail;
853                         }
854                 }
855         }
856         return 0;
857 fail:
858         WARN_ON(1);
859 #if 0
860         for (i =0; i < faili; i++) {
861                 if (level == 0) {
862                         u64 disk_bytenr;
863                         btrfs_item_key_to_cpu(buf, &key, i);
864                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
865                                 continue;
866                         fi = btrfs_item_ptr(buf, i,
867                                             struct btrfs_file_extent_item);
868                         if (btrfs_file_extent_type(buf, fi) ==
869                             BTRFS_FILE_EXTENT_INLINE)
870                                 continue;
871                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
872                         if (disk_bytenr == 0)
873                                 continue;
874                         err = btrfs_free_extent(trans, root, disk_bytenr,
875                                     btrfs_file_extent_disk_num_bytes(buf,
876                                                                       fi), 0);
877                         BUG_ON(err);
878                 } else {
879                         bytenr = btrfs_node_blockptr(buf, i);
880                         err = btrfs_free_extent(trans, root, bytenr,
881                                         btrfs_level_size(root, level - 1), 0);
882                         BUG_ON(err);
883                 }
884         }
885 #endif
886         return ret;
887 }
888
889 static int write_one_cache_group(struct btrfs_trans_handle *trans,
890                                  struct btrfs_root *root,
891                                  struct btrfs_path *path,
892                                  struct btrfs_block_group_cache *cache)
893 {
894         int ret;
895         int pending_ret;
896         struct btrfs_root *extent_root = root->fs_info->extent_root;
897         unsigned long bi;
898         struct extent_buffer *leaf;
899
900         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
901         if (ret < 0)
902                 goto fail;
903         BUG_ON(ret);
904
905         leaf = path->nodes[0];
906         bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
907         write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
908         btrfs_mark_buffer_dirty(leaf);
909         btrfs_release_path(extent_root, path);
910 fail:
911         finish_current_insert(trans, extent_root);
912         pending_ret = del_pending_extents(trans, extent_root);
913         if (ret)
914                 return ret;
915         if (pending_ret)
916                 return pending_ret;
917         return 0;
918
919 }
920
921 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
922                                    struct btrfs_root *root)
923 {
924         struct extent_map_tree *block_group_cache;
925         struct btrfs_block_group_cache *cache;
926         int ret;
927         int err = 0;
928         int werr = 0;
929         struct btrfs_path *path;
930         u64 last = 0;
931         u64 start;
932         u64 end;
933         u64 ptr;
934
935         block_group_cache = &root->fs_info->block_group_cache;
936         path = btrfs_alloc_path();
937         if (!path)
938                 return -ENOMEM;
939
940         while(1) {
941                 ret = find_first_extent_bit(block_group_cache, last,
942                                             &start, &end, BLOCK_GROUP_DIRTY);
943                 if (ret)
944                         break;
945
946                 last = end + 1;
947                 ret = get_state_private(block_group_cache, start, &ptr);
948                 if (ret)
949                         break;
950
951                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
952                 err = write_one_cache_group(trans, root,
953                                             path, cache);
954                 /*
955                  * if we fail to write the cache group, we want
956                  * to keep it marked dirty in hopes that a later
957                  * write will work
958                  */
959                 if (err) {
960                         werr = err;
961                         continue;
962                 }
963                 clear_extent_bits(block_group_cache, start, end,
964                                   BLOCK_GROUP_DIRTY, GFP_NOFS);
965         }
966         btrfs_free_path(path);
967         return werr;
968 }
969
970 static int update_block_group(struct btrfs_trans_handle *trans,
971                               struct btrfs_root *root,
972                               u64 bytenr, u64 num_bytes, int alloc,
973                               int mark_free, int data)
974 {
975         struct btrfs_block_group_cache *cache;
976         struct btrfs_fs_info *info = root->fs_info;
977         u64 total = num_bytes;
978         u64 old_val;
979         u64 byte_in_group;
980         u64 start;
981         u64 end;
982
983         while(total) {
984                 cache = btrfs_lookup_block_group(info, bytenr);
985                 if (!cache) {
986                         return -1;
987                 }
988                 byte_in_group = bytenr - cache->key.objectid;
989                 WARN_ON(byte_in_group > cache->key.offset);
990                 start = cache->key.objectid;
991                 end = start + cache->key.offset - 1;
992                 set_extent_bits(&info->block_group_cache, start, end,
993                                 BLOCK_GROUP_DIRTY, GFP_NOFS);
994
995                 old_val = btrfs_block_group_used(&cache->item);
996                 num_bytes = min(total, cache->key.offset - byte_in_group);
997                 if (alloc) {
998                         if (cache->data != data &&
999                             old_val < (cache->key.offset >> 1)) {
1000                                 int bit_to_clear;
1001                                 int bit_to_set;
1002                                 cache->data = data;
1003                                 if (data) {
1004                                         bit_to_clear = BLOCK_GROUP_METADATA;
1005                                         bit_to_set = BLOCK_GROUP_DATA;
1006                                         cache->item.flags &=
1007                                                 ~BTRFS_BLOCK_GROUP_MIXED;
1008                                         cache->item.flags |=
1009                                                 BTRFS_BLOCK_GROUP_DATA;
1010                                 } else {
1011                                         bit_to_clear = BLOCK_GROUP_DATA;
1012                                         bit_to_set = BLOCK_GROUP_METADATA;
1013                                         cache->item.flags &=
1014                                                 ~BTRFS_BLOCK_GROUP_MIXED;
1015                                         cache->item.flags &=
1016                                                 ~BTRFS_BLOCK_GROUP_DATA;
1017                                 }
1018                                 clear_extent_bits(&info->block_group_cache,
1019                                                   start, end, bit_to_clear,
1020                                                   GFP_NOFS);
1021                                 set_extent_bits(&info->block_group_cache,
1022                                                 start, end, bit_to_set,
1023                                                 GFP_NOFS);
1024                         } else if (cache->data != data &&
1025                                    cache->data != BTRFS_BLOCK_GROUP_MIXED) {
1026                                 cache->data = BTRFS_BLOCK_GROUP_MIXED;
1027                                 set_extent_bits(&info->block_group_cache,
1028                                                 start, end,
1029                                                 BLOCK_GROUP_DATA |
1030                                                 BLOCK_GROUP_METADATA,
1031                                                 GFP_NOFS);
1032                         }
1033                         old_val += num_bytes;
1034                 } else {
1035                         old_val -= num_bytes;
1036                         if (mark_free) {
1037                                 set_extent_dirty(&info->free_space_cache,
1038                                                  bytenr, bytenr + num_bytes - 1,
1039                                                  GFP_NOFS);
1040                         }
1041                 }
1042                 btrfs_set_block_group_used(&cache->item, old_val);
1043                 total -= num_bytes;
1044                 bytenr += num_bytes;
1045         }
1046         return 0;
1047 }
1048 static int update_pinned_extents(struct btrfs_root *root,
1049                                 u64 bytenr, u64 num, int pin)
1050 {
1051         u64 len;
1052         struct btrfs_block_group_cache *cache;
1053         struct btrfs_fs_info *fs_info = root->fs_info;
1054
1055         if (pin) {
1056                 set_extent_dirty(&fs_info->pinned_extents,
1057                                 bytenr, bytenr + num - 1, GFP_NOFS);
1058         } else {
1059                 clear_extent_dirty(&fs_info->pinned_extents,
1060                                 bytenr, bytenr + num - 1, GFP_NOFS);
1061         }
1062         while (num > 0) {
1063                 cache = btrfs_lookup_block_group(fs_info, bytenr);
1064                 WARN_ON(!cache);
1065                 len = min(num, cache->key.offset -
1066                           (bytenr - cache->key.objectid));
1067                 if (pin) {
1068                         cache->pinned += len;
1069                         fs_info->total_pinned += len;
1070                 } else {
1071                         cache->pinned -= len;
1072                         fs_info->total_pinned -= len;
1073                 }
1074                 bytenr += len;
1075                 num -= len;
1076         }
1077         return 0;
1078 }
1079
1080 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy)
1081 {
1082         u64 last = 0;
1083         u64 start;
1084         u64 end;
1085         struct extent_map_tree *pinned_extents = &root->fs_info->pinned_extents;
1086         int ret;
1087
1088         while(1) {
1089                 ret = find_first_extent_bit(pinned_extents, last,
1090                                             &start, &end, EXTENT_DIRTY);
1091                 if (ret)
1092                         break;
1093                 set_extent_dirty(copy, start, end, GFP_NOFS);
1094                 last = end + 1;
1095         }
1096         return 0;
1097 }
1098
1099 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
1100                                struct btrfs_root *root,
1101                                struct extent_map_tree *unpin)
1102 {
1103         u64 start;
1104         u64 end;
1105         int ret;
1106         struct extent_map_tree *free_space_cache;
1107         free_space_cache = &root->fs_info->free_space_cache;
1108
1109         while(1) {
1110                 ret = find_first_extent_bit(unpin, 0, &start, &end,
1111                                             EXTENT_DIRTY);
1112                 if (ret)
1113                         break;
1114                 update_pinned_extents(root, start, end + 1 - start, 0);
1115                 clear_extent_dirty(unpin, start, end, GFP_NOFS);
1116                 set_extent_dirty(free_space_cache, start, end, GFP_NOFS);
1117         }
1118         return 0;
1119 }
1120
1121 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
1122                                  btrfs_root *extent_root)
1123 {
1124         u64 start;
1125         u64 end;
1126         struct btrfs_fs_info *info = extent_root->fs_info;
1127         struct extent_buffer *eb;
1128         struct btrfs_path *path;
1129         struct btrfs_key ins;
1130         struct btrfs_disk_key first;
1131         struct btrfs_extent_item extent_item;
1132         int ret;
1133         int level;
1134         int err = 0;
1135
1136         btrfs_set_stack_extent_refs(&extent_item, 1);
1137         btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
1138         path = btrfs_alloc_path();
1139
1140         while(1) {
1141                 ret = find_first_extent_bit(&info->extent_ins, 0, &start,
1142                                             &end, EXTENT_LOCKED);
1143                 if (ret)
1144                         break;
1145
1146                 ins.objectid = start;
1147                 ins.offset = end + 1 - start;
1148                 err = btrfs_insert_item(trans, extent_root, &ins,
1149                                         &extent_item, sizeof(extent_item));
1150                 clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED,
1151                                   GFP_NOFS);
1152                 eb = read_tree_block(extent_root, ins.objectid, ins.offset);
1153                 level = btrfs_header_level(eb);
1154                 if (level == 0) {
1155                         btrfs_item_key(eb, &first, 0);
1156                 } else {
1157                         btrfs_node_key(eb, &first, 0);
1158                 }
1159                 err = btrfs_insert_extent_backref(trans, extent_root, path,
1160                                           start, extent_root->root_key.objectid,
1161                                           0, level,
1162                                           btrfs_disk_key_objectid(&first));
1163                 BUG_ON(err);
1164                 free_extent_buffer(eb);
1165         }
1166         btrfs_free_path(path);
1167         return 0;
1168 }
1169
1170 static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes,
1171                           int pending)
1172 {
1173         int err = 0;
1174         struct extent_buffer *buf;
1175
1176         if (!pending) {
1177                 buf = btrfs_find_tree_block(root, bytenr, num_bytes);
1178                 if (buf) {
1179                         if (btrfs_buffer_uptodate(buf)) {
1180                                 u64 transid =
1181                                     root->fs_info->running_transaction->transid;
1182                                 if (btrfs_header_generation(buf) == transid) {
1183                                         free_extent_buffer(buf);
1184                                         return 1;
1185                                 }
1186                         }
1187                         free_extent_buffer(buf);
1188                 }
1189                 update_pinned_extents(root, bytenr, num_bytes, 1);
1190         } else {
1191                 set_extent_bits(&root->fs_info->pending_del,
1192                                 bytenr, bytenr + num_bytes - 1,
1193                                 EXTENT_LOCKED, GFP_NOFS);
1194         }
1195         BUG_ON(err < 0);
1196         return 0;
1197 }
1198
1199 /*
1200  * remove an extent from the root, returns 0 on success
1201  */
1202 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1203                          *root, u64 bytenr, u64 num_bytes,
1204                          u64 root_objectid, u64 ref_generation,
1205                          u64 owner_objectid, u64 owner_offset, int pin,
1206                          int mark_free)
1207 {
1208         struct btrfs_path *path;
1209         struct btrfs_key key;
1210         struct btrfs_fs_info *info = root->fs_info;
1211         struct btrfs_root *extent_root = info->extent_root;
1212         struct extent_buffer *leaf;
1213         int ret;
1214         struct btrfs_extent_item *ei;
1215         u32 refs;
1216
1217         key.objectid = bytenr;
1218         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
1219         key.offset = num_bytes;
1220
1221         path = btrfs_alloc_path();
1222         if (!path)
1223                 return -ENOMEM;
1224
1225         ret = lookup_extent_backref(trans, extent_root, path,
1226                                     bytenr, root_objectid,
1227                                     ref_generation,
1228                                     owner_objectid, owner_offset, 1);
1229         if (ret == 0) {
1230                 ret = btrfs_del_item(trans, extent_root, path);
1231         } else {
1232                 btrfs_print_leaf(extent_root, path->nodes[0]);
1233                 WARN_ON(1);
1234                 printk("Unable to find ref byte nr %Lu root %Lu "
1235                        " gen %Lu owner %Lu offset %Lu\n", bytenr,
1236                        root_objectid, ref_generation, owner_objectid,
1237                        owner_offset);
1238         }
1239         btrfs_release_path(extent_root, path);
1240         ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
1241         if (ret < 0)
1242                 return ret;
1243         BUG_ON(ret);
1244
1245         leaf = path->nodes[0];
1246         ei = btrfs_item_ptr(leaf, path->slots[0],
1247                             struct btrfs_extent_item);
1248         refs = btrfs_extent_refs(leaf, ei);
1249         BUG_ON(refs == 0);
1250         refs -= 1;
1251         btrfs_set_extent_refs(leaf, ei, refs);
1252         btrfs_mark_buffer_dirty(leaf);
1253
1254         if (refs == 0) {
1255                 u64 super_used;
1256                 u64 root_used;
1257
1258                 if (pin) {
1259                         ret = pin_down_bytes(root, bytenr, num_bytes, 0);
1260                         if (ret > 0)
1261                                 mark_free = 1;
1262                         BUG_ON(ret < 0);
1263                 }
1264
1265                 /* block accounting for super block */
1266                 super_used = btrfs_super_bytes_used(&info->super_copy);
1267                 btrfs_set_super_bytes_used(&info->super_copy,
1268                                            super_used - num_bytes);
1269
1270                 /* block accounting for root item */
1271                 root_used = btrfs_root_used(&root->root_item);
1272                 btrfs_set_root_used(&root->root_item,
1273                                            root_used - num_bytes);
1274
1275                 ret = btrfs_del_item(trans, extent_root, path);
1276                 if (ret) {
1277                         return ret;
1278                 }
1279                 ret = update_block_group(trans, root, bytenr, num_bytes, 0,
1280                                          mark_free, 0);
1281                 BUG_ON(ret);
1282         }
1283         btrfs_free_path(path);
1284         finish_current_insert(trans, extent_root);
1285         return ret;
1286 }
1287
1288 /*
1289  * find all the blocks marked as pending in the radix tree and remove
1290  * them from the extent map
1291  */
1292 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
1293                                btrfs_root *extent_root)
1294 {
1295         int ret;
1296         int err = 0;
1297         u64 start;
1298         u64 end;
1299         struct extent_map_tree *pending_del;
1300         struct extent_map_tree *pinned_extents;
1301
1302         pending_del = &extent_root->fs_info->pending_del;
1303         pinned_extents = &extent_root->fs_info->pinned_extents;
1304
1305         while(1) {
1306                 ret = find_first_extent_bit(pending_del, 0, &start, &end,
1307                                             EXTENT_LOCKED);
1308                 if (ret)
1309                         break;
1310                 update_pinned_extents(extent_root, start, end + 1 - start, 1);
1311                 clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
1312                                   GFP_NOFS);
1313                 ret = __free_extent(trans, extent_root,
1314                                      start, end + 1 - start,
1315                                      extent_root->root_key.objectid,
1316                                      0, 0, 0, 0, 0);
1317                 if (ret)
1318                         err = ret;
1319         }
1320         return err;
1321 }
1322
1323 /*
1324  * remove an extent from the root, returns 0 on success
1325  */
1326 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1327                       *root, u64 bytenr, u64 num_bytes,
1328                       u64 root_objectid, u64 ref_generation,
1329                       u64 owner_objectid, u64 owner_offset, int pin)
1330 {
1331         struct btrfs_root *extent_root = root->fs_info->extent_root;
1332         int pending_ret;
1333         int ret;
1334
1335         WARN_ON(num_bytes < root->sectorsize);
1336         if (!root->ref_cows)
1337                 ref_generation = 0;
1338
1339         if (root == extent_root) {
1340                 pin_down_bytes(root, bytenr, num_bytes, 1);
1341                 return 0;
1342         }
1343         ret = __free_extent(trans, root, bytenr, num_bytes, root_objectid,
1344                             ref_generation, owner_objectid, owner_offset,
1345                             pin, pin == 0);
1346         pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
1347         return ret ? ret : pending_ret;
1348 }
1349
1350 static u64 stripe_align(struct btrfs_root *root, u64 val)
1351 {
1352         u64 mask = ((u64)root->stripesize - 1);
1353         u64 ret = (val + mask) & ~mask;
1354         return ret;
1355 }
1356
1357 /*
1358  * walks the btree of allocated extents and find a hole of a given size.
1359  * The key ins is changed to record the hole:
1360  * ins->objectid == block start
1361  * ins->flags = BTRFS_EXTENT_ITEM_KEY
1362  * ins->offset == number of blocks
1363  * Any available blocks before search_start are skipped.
1364  */
1365 static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1366                             *orig_root, u64 num_bytes, u64 empty_size,
1367                             u64 search_start, u64 search_end, u64 hint_byte,
1368                             struct btrfs_key *ins, u64 exclude_start,
1369                             u64 exclude_nr, int data)
1370 {
1371         struct btrfs_path *path;
1372         struct btrfs_key key;
1373         u64 hole_size = 0;
1374         u64 aligned;
1375         int ret;
1376         int slot = 0;
1377         u64 last_byte = 0;
1378         u64 orig_search_start = search_start;
1379         int start_found;
1380         struct extent_buffer *l;
1381         struct btrfs_root * root = orig_root->fs_info->extent_root;
1382         struct btrfs_fs_info *info = root->fs_info;
1383         u64 total_needed = num_bytes;
1384         int level;
1385         struct btrfs_block_group_cache *block_group;
1386         int full_scan = 0;
1387         int wrapped = 0;
1388         u64 cached_start;
1389
1390         WARN_ON(num_bytes < root->sectorsize);
1391         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
1392
1393         level = btrfs_header_level(root->node);
1394
1395         if (num_bytes >= 32 * 1024 * 1024 && hint_byte) {
1396                 data = BTRFS_BLOCK_GROUP_MIXED;
1397         }
1398
1399         if (search_end == (u64)-1)
1400                 search_end = btrfs_super_total_bytes(&info->super_copy);
1401         if (hint_byte) {
1402                 block_group = btrfs_lookup_block_group(info, hint_byte);
1403                 if (!block_group)
1404                         hint_byte = search_start;
1405                 block_group = btrfs_find_block_group(root, block_group,
1406                                                      hint_byte, data, 1);
1407         } else {
1408                 block_group = btrfs_find_block_group(root,
1409                                                      trans->block_group,
1410                                                      search_start, data, 1);
1411         }
1412
1413         total_needed += empty_size;
1414         path = btrfs_alloc_path();
1415 check_failed:
1416         if (!block_group) {
1417                 block_group = btrfs_lookup_block_group(info, search_start);
1418                 if (!block_group)
1419                         block_group = btrfs_lookup_block_group(info,
1420                                                        orig_search_start);
1421         }
1422         search_start = find_search_start(root, &block_group, search_start,
1423                                          total_needed, data, full_scan);
1424         search_start = stripe_align(root, search_start);
1425         cached_start = search_start;
1426         btrfs_init_path(path);
1427         ins->objectid = search_start;
1428         ins->offset = 0;
1429         start_found = 0;
1430         path->reada = 2;
1431
1432         ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
1433         if (ret < 0)
1434                 goto error;
1435
1436         if (path->slots[0] > 0) {
1437                 path->slots[0]--;
1438         }
1439
1440         l = path->nodes[0];
1441         btrfs_item_key_to_cpu(l, &key, path->slots[0]);
1442
1443         /*
1444          * walk backwards to find the first extent item key
1445          */
1446         while(btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) {
1447                 if (path->slots[0] == 0) {
1448                         ret = btrfs_prev_leaf(root, path);
1449                         if (ret != 0) {
1450                                 ret = btrfs_search_slot(trans, root, ins,
1451                                                         path, 0, 0);
1452                                 if (ret < 0)
1453                                         goto error;
1454                                 if (path->slots[0] > 0)
1455                                         path->slots[0]--;
1456                                 break;
1457                         }
1458                 } else {
1459                         path->slots[0]--;
1460                 }
1461                 l = path->nodes[0];
1462                 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
1463         }
1464         while (1) {
1465                 l = path->nodes[0];
1466                 slot = path->slots[0];
1467                 if (slot >= btrfs_header_nritems(l)) {
1468                         ret = btrfs_next_leaf(root, path);
1469                         if (ret == 0)
1470                                 continue;
1471                         if (ret < 0)
1472                                 goto error;
1473
1474                         search_start = max(search_start,
1475                                            block_group->key.objectid);
1476                         if (!start_found) {
1477                                 aligned = stripe_align(root, search_start);
1478                                 ins->objectid = aligned;
1479                                 if (aligned >= search_end) {
1480                                         ret = -ENOSPC;
1481                                         goto error;
1482                                 }
1483                                 ins->offset = search_end - aligned;
1484                                 start_found = 1;
1485                                 goto check_pending;
1486                         }
1487                         ins->objectid = stripe_align(root,
1488                                                      last_byte > search_start ?
1489                                                      last_byte : search_start);
1490                         if (search_end <= ins->objectid) {
1491                                 ret = -ENOSPC;
1492                                 goto error;
1493                         }
1494                         ins->offset = search_end - ins->objectid;
1495                         BUG_ON(ins->objectid >= search_end);
1496                         goto check_pending;
1497                 }
1498                 btrfs_item_key_to_cpu(l, &key, slot);
1499
1500                 if (key.objectid >= search_start && key.objectid > last_byte &&
1501                     start_found) {
1502                         if (last_byte < search_start)
1503                                 last_byte = search_start;
1504                         aligned = stripe_align(root, last_byte);
1505                         hole_size = key.objectid - aligned;
1506                         if (key.objectid > aligned && hole_size >= num_bytes) {
1507                                 ins->objectid = aligned;
1508                                 ins->offset = hole_size;
1509                                 goto check_pending;
1510                         }
1511                 }
1512                 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) {
1513                         if (!start_found && btrfs_key_type(&key) ==
1514                             BTRFS_BLOCK_GROUP_ITEM_KEY) {
1515                                 last_byte = key.objectid;
1516                                 start_found = 1;
1517                         }
1518                         goto next;
1519                 }
1520
1521
1522                 start_found = 1;
1523                 last_byte = key.objectid + key.offset;
1524
1525                 if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED &&
1526                     last_byte >= block_group->key.objectid +
1527                     block_group->key.offset) {
1528                         btrfs_release_path(root, path);
1529                         search_start = block_group->key.objectid +
1530                                 block_group->key.offset;
1531                         goto new_group;
1532                 }
1533 next:
1534                 path->slots[0]++;
1535                 cond_resched();
1536         }
1537 check_pending:
1538         /* we have to make sure we didn't find an extent that has already
1539          * been allocated by the map tree or the original allocation
1540          */
1541         btrfs_release_path(root, path);
1542         BUG_ON(ins->objectid < search_start);
1543
1544         if (ins->objectid + num_bytes >= search_end)
1545                 goto enospc;
1546         if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED &&
1547             ins->objectid + num_bytes > block_group->
1548             key.objectid + block_group->key.offset) {
1549                 search_start = block_group->key.objectid +
1550                         block_group->key.offset;
1551                 goto new_group;
1552         }
1553         if (test_range_bit(&info->extent_ins, ins->objectid,
1554                            ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) {
1555                 search_start = ins->objectid + num_bytes;
1556                 goto new_group;
1557         }
1558         if (test_range_bit(&info->pinned_extents, ins->objectid,
1559                            ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
1560                 search_start = ins->objectid + num_bytes;
1561                 goto new_group;
1562         }
1563         if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
1564             ins->objectid < exclude_start + exclude_nr)) {
1565                 search_start = exclude_start + exclude_nr;
1566                 goto new_group;
1567         }
1568         if (!data) {
1569                 block_group = btrfs_lookup_block_group(info, ins->objectid);
1570                 if (block_group)
1571                         trans->block_group = block_group;
1572         }
1573         ins->offset = num_bytes;
1574         btrfs_free_path(path);
1575         return 0;
1576
1577 new_group:
1578         if (search_start + num_bytes >= search_end) {
1579 enospc:
1580                 search_start = orig_search_start;
1581                 if (full_scan) {
1582                         ret = -ENOSPC;
1583                         goto error;
1584                 }
1585                 if (wrapped) {
1586                         if (!full_scan)
1587                                 total_needed -= empty_size;
1588                         full_scan = 1;
1589                         data = BTRFS_BLOCK_GROUP_MIXED;
1590                 } else
1591                         wrapped = 1;
1592         }
1593         block_group = btrfs_lookup_block_group(info, search_start);
1594         cond_resched();
1595         block_group = btrfs_find_block_group(root, block_group,
1596                                              search_start, data, 0);
1597         goto check_failed;
1598
1599 error:
1600         btrfs_release_path(root, path);
1601         btrfs_free_path(path);
1602         return ret;
1603 }
1604 /*
1605  * finds a free extent and does all the dirty work required for allocation
1606  * returns the key for the extent through ins, and a tree buffer for
1607  * the first block of the extent through buf.
1608  *
1609  * returns 0 if everything worked, non-zero otherwise.
1610  */
1611 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1612                        struct btrfs_root *root,
1613                        u64 num_bytes, u64 root_objectid, u64 ref_generation,
1614                        u64 owner, u64 owner_offset,
1615                        u64 empty_size, u64 hint_byte,
1616                        u64 search_end, struct btrfs_key *ins, int data)
1617 {
1618         int ret;
1619         int pending_ret;
1620         u64 super_used, root_used;
1621         u64 search_start = 0;
1622         u64 new_hint;
1623         struct btrfs_fs_info *info = root->fs_info;
1624         struct btrfs_root *extent_root = info->extent_root;
1625         struct btrfs_extent_item extent_item;
1626         struct btrfs_path *path;
1627
1628         btrfs_set_stack_extent_refs(&extent_item, 1);
1629
1630         new_hint = max(hint_byte, root->fs_info->alloc_start);
1631         if (new_hint < btrfs_super_total_bytes(&info->super_copy))
1632                 hint_byte = new_hint;
1633
1634         WARN_ON(num_bytes < root->sectorsize);
1635         ret = find_free_extent(trans, root, num_bytes, empty_size,
1636                                search_start, search_end, hint_byte, ins,
1637                                trans->alloc_exclude_start,
1638                                trans->alloc_exclude_nr, data);
1639         BUG_ON(ret);
1640         if (ret)
1641                 return ret;
1642
1643         /* block accounting for super block */
1644         super_used = btrfs_super_bytes_used(&info->super_copy);
1645         btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
1646
1647         /* block accounting for root item */
1648         root_used = btrfs_root_used(&root->root_item);
1649         btrfs_set_root_used(&root->root_item, root_used + num_bytes);
1650
1651         clear_extent_dirty(&root->fs_info->free_space_cache,
1652                            ins->objectid, ins->objectid + ins->offset - 1,
1653                            GFP_NOFS);
1654
1655         if (root == extent_root) {
1656                 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
1657                                 ins->objectid + ins->offset - 1,
1658                                 EXTENT_LOCKED, GFP_NOFS);
1659                 WARN_ON(data == 1);
1660                 goto update_block;
1661         }
1662
1663         WARN_ON(trans->alloc_exclude_nr);
1664         trans->alloc_exclude_start = ins->objectid;
1665         trans->alloc_exclude_nr = ins->offset;
1666         ret = btrfs_insert_item(trans, extent_root, ins, &extent_item,
1667                                 sizeof(extent_item));
1668
1669         trans->alloc_exclude_start = 0;
1670         trans->alloc_exclude_nr = 0;
1671         BUG_ON(ret);
1672
1673         path = btrfs_alloc_path();
1674         BUG_ON(!path);
1675         ret = btrfs_insert_extent_backref(trans, extent_root, path,
1676                                           ins->objectid, root_objectid,
1677                                           ref_generation, owner, owner_offset);
1678
1679         BUG_ON(ret);
1680         btrfs_free_path(path);
1681         finish_current_insert(trans, extent_root);
1682         pending_ret = del_pending_extents(trans, extent_root);
1683
1684         if (ret) {
1685                 return ret;
1686         }
1687         if (pending_ret) {
1688                 return pending_ret;
1689         }
1690
1691 update_block:
1692         ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0,
1693                                  data);
1694         BUG_ON(ret);
1695         return 0;
1696 }
1697
1698 /*
1699  * helper function to allocate a block for a given tree
1700  * returns the tree buffer or NULL.
1701  */
1702 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1703                                              struct btrfs_root *root,
1704                                              u32 blocksize,
1705                                              u64 root_objectid, u64 hint,
1706                                              u64 empty_size)
1707 {
1708         u64 ref_generation;
1709
1710         if (root->ref_cows)
1711                 ref_generation = trans->transid;
1712         else
1713                 ref_generation = 0;
1714
1715
1716         return __btrfs_alloc_free_block(trans, root, blocksize, root_objectid,
1717                                         ref_generation, 0, 0, hint, empty_size);
1718 }
1719
1720 /*
1721  * helper function to allocate a block for a given tree
1722  * returns the tree buffer or NULL.
1723  */
1724 struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1725                                              struct btrfs_root *root,
1726                                              u32 blocksize,
1727                                              u64 root_objectid,
1728                                              u64 ref_generation,
1729                                              u64 first_objectid,
1730                                              int level,
1731                                              u64 hint,
1732                                              u64 empty_size)
1733 {
1734         struct btrfs_key ins;
1735         int ret;
1736         struct extent_buffer *buf;
1737
1738         ret = btrfs_alloc_extent(trans, root, blocksize,
1739                                  root_objectid, ref_generation,
1740                                  level, first_objectid, empty_size, hint,
1741                                  (u64)-1, &ins, 0);
1742         if (ret) {
1743                 BUG_ON(ret > 0);
1744                 return ERR_PTR(ret);
1745         }
1746         buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize);
1747         if (!buf) {
1748                 btrfs_free_extent(trans, root, ins.objectid, blocksize,
1749                                   root->root_key.objectid, ref_generation,
1750                                   0, 0, 0);
1751                 return ERR_PTR(-ENOMEM);
1752         }
1753         btrfs_set_buffer_uptodate(buf);
1754         set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
1755                          buf->start + buf->len - 1, GFP_NOFS);
1756         set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->extent_tree,
1757                         buf->start, buf->start + buf->len - 1,
1758                         EXTENT_CSUM, GFP_NOFS);
1759         buf->flags |= EXTENT_CSUM;
1760         btrfs_set_buffer_defrag(buf);
1761         trans->blocks_used++;
1762         return buf;
1763 }
1764
1765 static int drop_leaf_ref(struct btrfs_trans_handle *trans,
1766                          struct btrfs_root *root, struct extent_buffer *leaf)
1767 {
1768         u64 leaf_owner;
1769         u64 leaf_generation;
1770         struct btrfs_key key;
1771         struct btrfs_file_extent_item *fi;
1772         int i;
1773         int nritems;
1774         int ret;
1775
1776         BUG_ON(!btrfs_is_leaf(leaf));
1777         nritems = btrfs_header_nritems(leaf);
1778         leaf_owner = btrfs_header_owner(leaf);
1779         leaf_generation = btrfs_header_generation(leaf);
1780
1781         for (i = 0; i < nritems; i++) {
1782                 u64 disk_bytenr;
1783
1784                 btrfs_item_key_to_cpu(leaf, &key, i);
1785                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1786                         continue;
1787                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1788                 if (btrfs_file_extent_type(leaf, fi) ==
1789                     BTRFS_FILE_EXTENT_INLINE)
1790                         continue;
1791                 /*
1792                  * FIXME make sure to insert a trans record that
1793                  * repeats the snapshot del on crash
1794                  */
1795                 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1796                 if (disk_bytenr == 0)
1797                         continue;
1798                 ret = btrfs_free_extent(trans, root, disk_bytenr,
1799                                 btrfs_file_extent_disk_num_bytes(leaf, fi),
1800                                 leaf_owner, leaf_generation,
1801                                 key.objectid, key.offset, 0);
1802                 BUG_ON(ret);
1803         }
1804         return 0;
1805 }
1806
1807 static void reada_walk_down(struct btrfs_root *root,
1808                             struct extent_buffer *node)
1809 {
1810         int i;
1811         u32 nritems;
1812         u64 bytenr;
1813         int ret;
1814         u32 refs;
1815         int level;
1816         u32 blocksize;
1817
1818         nritems = btrfs_header_nritems(node);
1819         level = btrfs_header_level(node);
1820         for (i = 0; i < nritems; i++) {
1821                 bytenr = btrfs_node_blockptr(node, i);
1822                 blocksize = btrfs_level_size(root, level - 1);
1823                 ret = lookup_extent_ref(NULL, root, bytenr, blocksize, &refs);
1824                 BUG_ON(ret);
1825                 if (refs != 1)
1826                         continue;
1827                 mutex_unlock(&root->fs_info->fs_mutex);
1828                 ret = readahead_tree_block(root, bytenr, blocksize);
1829                 cond_resched();
1830                 mutex_lock(&root->fs_info->fs_mutex);
1831                 if (ret)
1832                         break;
1833         }
1834 }
1835
1836 /*
1837  * helper function for drop_snapshot, this walks down the tree dropping ref
1838  * counts as it goes.
1839  */
1840 static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root
1841                           *root, struct btrfs_path *path, int *level)
1842 {
1843         u64 root_owner;
1844         u64 root_gen;
1845         u64 bytenr;
1846         struct extent_buffer *next;
1847         struct extent_buffer *cur;
1848         struct extent_buffer *parent;
1849         u32 blocksize;
1850         int ret;
1851         u32 refs;
1852
1853         WARN_ON(*level < 0);
1854         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1855         ret = lookup_extent_ref(trans, root,
1856                                 path->nodes[*level]->start,
1857                                 path->nodes[*level]->len, &refs);
1858         BUG_ON(ret);
1859         if (refs > 1)
1860                 goto out;
1861
1862         /*
1863          * walk down to the last node level and free all the leaves
1864          */
1865         while(*level >= 0) {
1866                 WARN_ON(*level < 0);
1867                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1868                 cur = path->nodes[*level];
1869
1870                 if (*level > 0 && path->slots[*level] == 0)
1871                         reada_walk_down(root, cur);
1872
1873                 if (btrfs_header_level(cur) != *level)
1874                         WARN_ON(1);
1875
1876                 if (path->slots[*level] >=
1877                     btrfs_header_nritems(cur))
1878                         break;
1879                 if (*level == 0) {
1880                         ret = drop_leaf_ref(trans, root, cur);
1881                         BUG_ON(ret);
1882                         break;
1883                 }
1884                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1885                 blocksize = btrfs_level_size(root, *level - 1);
1886                 ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs);
1887                 BUG_ON(ret);
1888                 if (refs != 1) {
1889                         parent = path->nodes[*level];
1890                         root_owner = btrfs_header_owner(parent);
1891                         root_gen = btrfs_header_generation(parent);
1892                         path->slots[*level]++;
1893                         ret = btrfs_free_extent(trans, root, bytenr,
1894                                                 blocksize, root_owner,
1895                                                 root_gen, 0, 0, 1);
1896                         BUG_ON(ret);
1897                         continue;
1898                 }
1899                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1900                 if (!next || !btrfs_buffer_uptodate(next)) {
1901                         free_extent_buffer(next);
1902                         mutex_unlock(&root->fs_info->fs_mutex);
1903                         next = read_tree_block(root, bytenr, blocksize);
1904                         mutex_lock(&root->fs_info->fs_mutex);
1905
1906                         /* we dropped the lock, check one more time */
1907                         ret = lookup_extent_ref(trans, root, bytenr,
1908                                                 blocksize, &refs);
1909                         BUG_ON(ret);
1910                         if (refs != 1) {
1911                                 parent = path->nodes[*level];
1912                                 root_owner = btrfs_header_owner(parent);
1913                                 root_gen = btrfs_header_generation(parent);
1914
1915                                 path->slots[*level]++;
1916                                 free_extent_buffer(next);
1917                                 ret = btrfs_free_extent(trans, root, bytenr,
1918                                                         blocksize,
1919                                                         root_owner,
1920                                                         root_gen, 0, 0, 1);
1921                                 BUG_ON(ret);
1922                                 continue;
1923                         }
1924                 }
1925                 WARN_ON(*level <= 0);
1926                 if (path->nodes[*level-1])
1927                         free_extent_buffer(path->nodes[*level-1]);
1928                 path->nodes[*level-1] = next;
1929                 *level = btrfs_header_level(next);
1930                 path->slots[*level] = 0;
1931         }
1932 out:
1933         WARN_ON(*level < 0);
1934         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1935
1936         if (path->nodes[*level] == root->node) {
1937                 root_owner = root->root_key.objectid;
1938                 parent = path->nodes[*level];
1939         } else {
1940                 parent = path->nodes[*level + 1];
1941                 root_owner = btrfs_header_owner(parent);
1942         }
1943
1944         root_gen = btrfs_header_generation(parent);
1945         ret = btrfs_free_extent(trans, root, path->nodes[*level]->start,
1946                                 path->nodes[*level]->len,
1947                                 root_owner, root_gen, 0, 0, 1);
1948         free_extent_buffer(path->nodes[*level]);
1949         path->nodes[*level] = NULL;
1950         *level += 1;
1951         BUG_ON(ret);
1952         return 0;
1953 }
1954
1955 /*
1956  * helper for dropping snapshots.  This walks back up the tree in the path
1957  * to find the first node higher up where we haven't yet gone through
1958  * all the slots
1959  */
1960 static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root
1961                         *root, struct btrfs_path *path, int *level)
1962 {
1963         u64 root_owner;
1964         u64 root_gen;
1965         struct btrfs_root_item *root_item = &root->root_item;
1966         int i;
1967         int slot;
1968         int ret;
1969
1970         for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1971                 slot = path->slots[i];
1972                 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
1973                         struct extent_buffer *node;
1974                         struct btrfs_disk_key disk_key;
1975                         node = path->nodes[i];
1976                         path->slots[i]++;
1977                         *level = i;
1978                         WARN_ON(*level == 0);
1979                         btrfs_node_key(node, &disk_key, path->slots[i]);
1980                         memcpy(&root_item->drop_progress,
1981                                &disk_key, sizeof(disk_key));
1982                         root_item->drop_level = i;
1983                         return 0;
1984                 } else {
1985                         if (path->nodes[*level] == root->node) {
1986                                 root_owner = root->root_key.objectid;
1987                                 root_gen =
1988                                    btrfs_header_generation(path->nodes[*level]);
1989                         } else {
1990                                 struct extent_buffer *node;
1991                                 node = path->nodes[*level + 1];
1992                                 root_owner = btrfs_header_owner(node);
1993                                 root_gen = btrfs_header_generation(node);
1994                         }
1995                         ret = btrfs_free_extent(trans, root,
1996                                                 path->nodes[*level]->start,
1997                                                 path->nodes[*level]->len,
1998                                                 root_owner, root_gen, 0, 0, 1);
1999                         BUG_ON(ret);
2000                         free_extent_buffer(path->nodes[*level]);
2001                         path->nodes[*level] = NULL;
2002                         *level = i + 1;
2003                 }
2004         }
2005         return 1;
2006 }
2007
2008 /*
2009  * drop the reference count on the tree rooted at 'snap'.  This traverses
2010  * the tree freeing any blocks that have a ref count of zero after being
2011  * decremented.
2012  */
2013 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
2014                         *root)
2015 {
2016         int ret = 0;
2017         int wret;
2018         int level;
2019         struct btrfs_path *path;
2020         int i;
2021         int orig_level;
2022         struct btrfs_root_item *root_item = &root->root_item;
2023
2024         path = btrfs_alloc_path();
2025         BUG_ON(!path);
2026
2027         level = btrfs_header_level(root->node);
2028         orig_level = level;
2029         if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
2030                 path->nodes[level] = root->node;
2031                 extent_buffer_get(root->node);
2032                 path->slots[level] = 0;
2033         } else {
2034                 struct btrfs_key key;
2035                 struct btrfs_disk_key found_key;
2036                 struct extent_buffer *node;
2037
2038                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
2039                 level = root_item->drop_level;
2040                 path->lowest_level = level;
2041                 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2042                 if (wret < 0) {
2043                         ret = wret;
2044                         goto out;
2045                 }
2046                 node = path->nodes[level];
2047                 btrfs_node_key(node, &found_key, path->slots[level]);
2048                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
2049                                sizeof(found_key)));
2050         }
2051         while(1) {
2052                 wret = walk_down_tree(trans, root, path, &level);
2053                 if (wret > 0)
2054                         break;
2055                 if (wret < 0)
2056                         ret = wret;
2057
2058                 wret = walk_up_tree(trans, root, path, &level);
2059                 if (wret > 0)
2060                         break;
2061                 if (wret < 0)
2062                         ret = wret;
2063                 ret = -EAGAIN;
2064                 break;
2065         }
2066         for (i = 0; i <= orig_level; i++) {
2067                 if (path->nodes[i]) {
2068                         free_extent_buffer(path->nodes[i]);
2069                         path->nodes[i] = NULL;
2070                 }
2071         }
2072 out:
2073         btrfs_free_path(path);
2074         return ret;
2075 }
2076
2077 int btrfs_free_block_groups(struct btrfs_fs_info *info)
2078 {
2079         u64 start;
2080         u64 end;
2081         u64 ptr;
2082         int ret;
2083         while(1) {
2084                 ret = find_first_extent_bit(&info->block_group_cache, 0,
2085                                             &start, &end, (unsigned int)-1);
2086                 if (ret)
2087                         break;
2088                 ret = get_state_private(&info->block_group_cache, start, &ptr);
2089                 if (!ret)
2090                         kfree((void *)(unsigned long)ptr);
2091                 clear_extent_bits(&info->block_group_cache, start,
2092                                   end, (unsigned int)-1, GFP_NOFS);
2093         }
2094         while(1) {
2095                 ret = find_first_extent_bit(&info->free_space_cache, 0,
2096                                             &start, &end, EXTENT_DIRTY);
2097                 if (ret)
2098                         break;
2099                 clear_extent_dirty(&info->free_space_cache, start,
2100                                    end, GFP_NOFS);
2101         }
2102         return 0;
2103 }
2104
2105 static int relocate_inode_pages(struct inode *inode, u64 start, u64 len)
2106 {
2107         u64 page_start;
2108         u64 page_end;
2109         u64 delalloc_start;
2110         u64 existing_delalloc;
2111         unsigned long last_index;
2112         unsigned long first_index;
2113         unsigned long i;
2114         struct page *page;
2115         struct btrfs_root *root = BTRFS_I(inode)->root;
2116         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2117         struct file_ra_state ra;
2118
2119         mutex_lock(&inode->i_mutex);
2120         first_index = start >> PAGE_CACHE_SHIFT;
2121         last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
2122
2123         memset(&ra, 0, sizeof(ra));
2124         file_ra_state_init(&ra, inode->i_mapping);
2125         btrfs_force_ra(inode->i_mapping, &ra, NULL, first_index, last_index);
2126
2127         for (i = first_index; i <= last_index; i++) {
2128                 page = grab_cache_page(inode->i_mapping, i);
2129                 if (!page)
2130                         goto out_unlock;
2131                 if (!PageUptodate(page)) {
2132                         btrfs_readpage(NULL, page);
2133                         lock_page(page);
2134                         if (!PageUptodate(page)) {
2135                                 unlock_page(page);
2136                                 page_cache_release(page);
2137                                 goto out_unlock;
2138                         }
2139                 }
2140                 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2141                 page_end = page_start + PAGE_CACHE_SIZE - 1;
2142
2143                 lock_extent(em_tree, page_start, page_end, GFP_NOFS);
2144
2145                 delalloc_start = page_start;
2146                 existing_delalloc =
2147                         count_range_bits(&BTRFS_I(inode)->extent_tree,
2148                                          &delalloc_start, page_end,
2149                                          PAGE_CACHE_SIZE, EXTENT_DELALLOC);
2150
2151                 set_extent_delalloc(em_tree, page_start,
2152                                     page_end, GFP_NOFS);
2153
2154                 spin_lock(&root->fs_info->delalloc_lock);
2155                 root->fs_info->delalloc_bytes += PAGE_CACHE_SIZE -
2156                                                  existing_delalloc;
2157                 spin_unlock(&root->fs_info->delalloc_lock);
2158
2159                 unlock_extent(em_tree, page_start, page_end, GFP_NOFS);
2160                 set_page_dirty(page);
2161                 unlock_page(page);
2162                 page_cache_release(page);
2163         }
2164
2165 out_unlock:
2166         mutex_unlock(&inode->i_mutex);
2167         return 0;
2168 }
2169
2170 static int relocate_one_reference(struct btrfs_root *extent_root,
2171                                   struct btrfs_path *path,
2172                                   struct btrfs_key *extent_key,
2173                                   u64 ref_root, u64 ref_gen, u64 ref_objectid,
2174                                   u64 ref_offset)
2175 {
2176         struct inode *inode;
2177         struct btrfs_root *found_root;
2178         struct btrfs_key root_location;
2179         int ret;
2180
2181         root_location.objectid = ref_root;
2182         if (ref_gen == 0)
2183                 root_location.offset = 0;
2184         else
2185                 root_location.offset = (u64)-1;
2186         root_location.type = BTRFS_ROOT_ITEM_KEY;
2187
2188         found_root = btrfs_read_fs_root_no_name(extent_root->fs_info,
2189                                                 &root_location);
2190         BUG_ON(!found_root);
2191
2192         if (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
2193                 mutex_unlock(&extent_root->fs_info->fs_mutex);
2194                 inode = btrfs_iget_locked(extent_root->fs_info->sb,
2195                                           ref_objectid, found_root);
2196                 if (inode->i_state & I_NEW) {
2197                         /* the inode and parent dir are two different roots */
2198                         BTRFS_I(inode)->root = found_root;
2199                         BTRFS_I(inode)->location.objectid = ref_objectid;
2200                         BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
2201                         BTRFS_I(inode)->location.offset = 0;
2202                         btrfs_read_locked_inode(inode);
2203                         unlock_new_inode(inode);
2204
2205                 }
2206                 /* this can happen if the reference is not against
2207                  * the latest version of the tree root
2208                  */
2209                 if (is_bad_inode(inode)) {
2210                         mutex_lock(&extent_root->fs_info->fs_mutex);
2211                         goto out;
2212                 }
2213                 relocate_inode_pages(inode, ref_offset, extent_key->offset);
2214                 /* FIXME, data=ordered will help get rid of this */
2215                 filemap_fdatawrite(inode->i_mapping);
2216                 iput(inode);
2217                 mutex_lock(&extent_root->fs_info->fs_mutex);
2218         } else {
2219                 struct btrfs_trans_handle *trans;
2220                 struct btrfs_key found_key;
2221                 struct extent_buffer *eb;
2222                 int level;
2223                 int i;
2224
2225                 trans = btrfs_start_transaction(found_root, 1);
2226                 eb = read_tree_block(found_root, extent_key->objectid,
2227                                      extent_key->offset);
2228                 level = btrfs_header_level(eb);
2229
2230                 if (level == 0)
2231                         btrfs_item_key_to_cpu(eb, &found_key, 0);
2232                 else
2233                         btrfs_node_key_to_cpu(eb, &found_key, 0);
2234
2235                 free_extent_buffer(eb);
2236
2237                 path->lowest_level = level;
2238                 path->reada = 2;
2239                 ret = btrfs_search_slot(trans, found_root, &found_key, path,
2240                                         0, 1);
2241                 path->lowest_level = 0;
2242                 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
2243                         if (!path->nodes[i])
2244                                 break;
2245                         free_extent_buffer(path->nodes[i]);
2246                         path->nodes[i] = NULL;
2247                 }
2248                 btrfs_release_path(found_root, path);
2249                 btrfs_end_transaction(trans, found_root);
2250         }
2251
2252 out:
2253         return 0;
2254 }
2255
2256 static int relocate_one_extent(struct btrfs_root *extent_root,
2257                                struct btrfs_path *path,
2258                                struct btrfs_key *extent_key)
2259 {
2260         struct btrfs_key key;
2261         struct btrfs_key found_key;
2262         struct btrfs_extent_ref *ref;
2263         struct extent_buffer *leaf;
2264         u64 ref_root;
2265         u64 ref_gen;
2266         u64 ref_objectid;
2267         u64 ref_offset;
2268         u32 nritems;
2269         u32 item_size;
2270         int ret = 0;
2271
2272         key.objectid = extent_key->objectid;
2273         key.type = BTRFS_EXTENT_REF_KEY;
2274         key.offset = 0;
2275
2276         while(1) {
2277                 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2278
2279                 BUG_ON(ret == 0);
2280
2281                 if (ret < 0)
2282                         goto out;
2283
2284                 ret = 0;
2285                 leaf = path->nodes[0];
2286                 nritems = btrfs_header_nritems(leaf);
2287                 if (path->slots[0] == nritems)
2288                         goto out;
2289
2290                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2291                 if (found_key.objectid != extent_key->objectid)
2292                         break;
2293
2294                 if (found_key.type != BTRFS_EXTENT_REF_KEY)
2295                         break;
2296
2297                 key.offset = found_key.offset + 1;
2298                 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2299
2300                 ref = btrfs_item_ptr(leaf, path->slots[0],
2301                                      struct btrfs_extent_ref);
2302                 ref_root = btrfs_ref_root(leaf, ref);
2303                 ref_gen = btrfs_ref_generation(leaf, ref);
2304                 ref_objectid = btrfs_ref_objectid(leaf, ref);
2305                 ref_offset = btrfs_ref_offset(leaf, ref);
2306                 btrfs_release_path(extent_root, path);
2307
2308                 ret = relocate_one_reference(extent_root, path,
2309                                              extent_key, ref_root, ref_gen,
2310                                              ref_objectid, ref_offset);
2311                 if (ret)
2312                         goto out;
2313         }
2314         ret = 0;
2315 out:
2316         btrfs_release_path(extent_root, path);
2317         return ret;
2318 }
2319
2320 static int find_overlapping_extent(struct btrfs_root *root,
2321                                    struct btrfs_path *path, u64 new_size)
2322 {
2323         struct btrfs_key found_key;
2324         struct extent_buffer *leaf;
2325         int ret;
2326
2327         while(1) {
2328                 if (path->slots[0] == 0) {
2329                         ret = btrfs_prev_leaf(root, path);
2330                         if (ret == 1) {
2331                                 return 1;
2332                         }
2333                         if (ret < 0)
2334                                 return ret;
2335                 } else {
2336                         path->slots[0]--;
2337                 }
2338                 leaf = path->nodes[0];
2339                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2340                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY) {
2341                         if (found_key.objectid + found_key.offset > new_size)
2342                                 return 0;
2343                         else
2344                                 return 1;
2345                 }
2346         }
2347         return 1;
2348 }
2349
2350 int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size)
2351 {
2352         struct btrfs_trans_handle *trans;
2353         struct btrfs_root *tree_root = root->fs_info->tree_root;
2354         struct btrfs_path *path;
2355         u64 cur_byte;
2356         u64 total_found;
2357         u64 ptr;
2358         struct btrfs_fs_info *info = root->fs_info;
2359         struct extent_map_tree *block_group_cache;
2360         struct btrfs_key key;
2361         struct btrfs_key found_key = { 0, 0, 0 };
2362         struct extent_buffer *leaf;
2363         u32 nritems;
2364         int ret;
2365         int slot;
2366
2367         btrfs_set_super_total_bytes(&info->super_copy, new_size);
2368         block_group_cache = &info->block_group_cache;
2369         path = btrfs_alloc_path();
2370         root = root->fs_info->extent_root;
2371         path->reada = 2;
2372
2373 again:
2374         total_found = 0;
2375         key.objectid = new_size;
2376         cur_byte = key.objectid;
2377         key.offset = 0;
2378         key.type = 0;
2379         while(1) {
2380                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2381                 if (ret < 0)
2382                         goto out;
2383 next:
2384                 leaf = path->nodes[0];
2385                 if (key.objectid == new_size - 1) {
2386                         ret = find_overlapping_extent(root, path, new_size);
2387                         if (ret != 0) {
2388                                 btrfs_release_path(root, path);
2389                                 ret = btrfs_search_slot(NULL, root, &key,
2390                                                         path, 0, 0);
2391                                 if (ret < 0)
2392                                         goto out;
2393                         }
2394                 }
2395                 nritems = btrfs_header_nritems(leaf);
2396                 ret = 0;
2397                 slot = path->slots[0];
2398                 if (slot < nritems)
2399                         btrfs_item_key_to_cpu(leaf, &found_key, slot);
2400                 if (slot == nritems ||
2401                     btrfs_key_type(&found_key) != BTRFS_EXTENT_ITEM_KEY) {
2402                         path->slots[0]++;
2403                         if (path->slots[0] >= nritems) {
2404                                 ret = btrfs_next_leaf(root, path);
2405                                 if (ret < 0)
2406                                         goto out;
2407                                 if (ret == 1) {
2408                                         ret = 0;
2409                                         break;
2410                                 }
2411                         }
2412                         goto next;
2413                 }
2414                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
2415                 if (found_key.objectid + found_key.offset <= cur_byte)
2416                         continue;
2417                 total_found++;
2418                 cur_byte = found_key.objectid + found_key.offset;
2419                 key.objectid = cur_byte;
2420                 btrfs_release_path(root, path);
2421                 ret = relocate_one_extent(root, path, &found_key);
2422         }
2423
2424         btrfs_release_path(root, path);
2425
2426         if (total_found > 0) {
2427                 trans = btrfs_start_transaction(tree_root, 1);
2428                 btrfs_commit_transaction(trans, tree_root);
2429
2430                 mutex_unlock(&root->fs_info->fs_mutex);
2431                 btrfs_clean_old_snapshots(tree_root);
2432                 mutex_lock(&root->fs_info->fs_mutex);
2433
2434                 trans = btrfs_start_transaction(tree_root, 1);
2435                 btrfs_commit_transaction(trans, tree_root);
2436                 goto again;
2437         }
2438
2439         trans = btrfs_start_transaction(root, 1);
2440         key.objectid = new_size;
2441         key.offset = 0;
2442         key.type = 0;
2443         while(1) {
2444                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2445                 if (ret < 0)
2446                         goto out;
2447 bg_next:
2448                 leaf = path->nodes[0];
2449                 nritems = btrfs_header_nritems(leaf);
2450                 ret = 0;
2451                 slot = path->slots[0];
2452                 if (slot < nritems)
2453                         btrfs_item_key_to_cpu(leaf, &found_key, slot);
2454                 if (slot == nritems ||
2455                     btrfs_key_type(&found_key) != BTRFS_BLOCK_GROUP_ITEM_KEY) {
2456                         if (slot < nritems) {
2457                                 printk("shrinker found key %Lu %u %Lu\n",
2458                                        found_key.objectid, found_key.type,
2459                                        found_key.offset);
2460                                 path->slots[0]++;
2461                         }
2462                         if (path->slots[0] >= nritems) {
2463                                 ret = btrfs_next_leaf(root, path);
2464                                 if (ret < 0)
2465                                         break;
2466                                 if (ret == 1) {
2467                                         ret = 0;
2468                                         break;
2469                                 }
2470                         }
2471                         goto bg_next;
2472                 }
2473                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
2474                 ret = get_state_private(&info->block_group_cache,
2475                                         found_key.objectid, &ptr);
2476                 if (!ret)
2477                         kfree((void *)(unsigned long)ptr);
2478
2479                 clear_extent_bits(&info->block_group_cache, found_key.objectid,
2480                                   found_key.objectid + found_key.offset - 1,
2481                                   (unsigned int)-1, GFP_NOFS);
2482
2483                 key.objectid = found_key.objectid + 1;
2484                 btrfs_del_item(trans, root, path);
2485                 btrfs_release_path(root, path);
2486         }
2487         clear_extent_dirty(&info->free_space_cache, new_size, (u64)-1,
2488                            GFP_NOFS);
2489         btrfs_commit_transaction(trans, root);
2490 out:
2491         btrfs_free_path(path);
2492         return ret;
2493 }
2494
2495 int btrfs_grow_extent_tree(struct btrfs_trans_handle *trans,
2496                            struct btrfs_root *root, u64 new_size)
2497 {
2498         struct btrfs_path *path;
2499         u64 nr = 0;
2500         u64 cur_byte;
2501         u64 old_size;
2502         struct btrfs_block_group_cache *cache;
2503         struct btrfs_block_group_item *item;
2504         struct btrfs_fs_info *info = root->fs_info;
2505         struct extent_map_tree *block_group_cache;
2506         struct btrfs_key key;
2507         struct extent_buffer *leaf;
2508         int ret;
2509         int bit;
2510
2511         old_size = btrfs_super_total_bytes(&info->super_copy);
2512         block_group_cache = &info->block_group_cache;
2513
2514         root = info->extent_root;
2515
2516         cache = btrfs_lookup_block_group(root->fs_info, old_size - 1);
2517
2518         cur_byte = cache->key.objectid + cache->key.offset;
2519         if (cur_byte >= new_size)
2520                 goto set_size;
2521
2522         key.offset = BTRFS_BLOCK_GROUP_SIZE;
2523         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
2524
2525         path = btrfs_alloc_path();
2526         if (!path)
2527                 return -ENOMEM;
2528
2529         while(cur_byte < new_size) {
2530                 key.objectid = cur_byte;
2531                 ret = btrfs_insert_empty_item(trans, root, path, &key,
2532                                         sizeof(struct btrfs_block_group_item));
2533                 BUG_ON(ret);
2534                 leaf = path->nodes[0];
2535                 item = btrfs_item_ptr(leaf, path->slots[0],
2536                                       struct btrfs_block_group_item);
2537
2538                 btrfs_set_disk_block_group_used(leaf, item, 0);
2539                 if (nr % 3) {
2540                         btrfs_set_disk_block_group_flags(leaf, item,
2541                                                  BTRFS_BLOCK_GROUP_DATA);
2542                 } else {
2543                         btrfs_set_disk_block_group_flags(leaf, item, 0);
2544                 }
2545                 nr++;
2546
2547                 cache = kmalloc(sizeof(*cache), GFP_NOFS);
2548                 BUG_ON(!cache);
2549
2550                 read_extent_buffer(leaf, &cache->item, (unsigned long)item,
2551                                    sizeof(cache->item));
2552
2553                 memcpy(&cache->key, &key, sizeof(key));
2554                 cache->cached = 0;
2555                 cache->pinned = 0;
2556                 cur_byte = key.objectid + key.offset;
2557                 btrfs_release_path(root, path);
2558
2559                 if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) {
2560                         bit = BLOCK_GROUP_DATA;
2561                         cache->data = BTRFS_BLOCK_GROUP_DATA;
2562                 } else {
2563                         bit = BLOCK_GROUP_METADATA;
2564                         cache->data = 0;
2565                 }
2566
2567                 /* use EXTENT_LOCKED to prevent merging */
2568                 set_extent_bits(block_group_cache, key.objectid,
2569                                 key.objectid + key.offset - 1,
2570                                 bit | EXTENT_LOCKED, GFP_NOFS);
2571                 set_state_private(block_group_cache, key.objectid,
2572                                   (unsigned long)cache);
2573         }
2574         btrfs_free_path(path);
2575 set_size:
2576         btrfs_set_super_total_bytes(&info->super_copy, new_size);
2577         return 0;
2578 }
2579
2580 int btrfs_read_block_groups(struct btrfs_root *root)
2581 {
2582         struct btrfs_path *path;
2583         int ret;
2584         int err = 0;
2585         int bit;
2586         struct btrfs_block_group_cache *cache;
2587         struct btrfs_fs_info *info = root->fs_info;
2588         struct extent_map_tree *block_group_cache;
2589         struct btrfs_key key;
2590         struct btrfs_key found_key;
2591         struct extent_buffer *leaf;
2592
2593         block_group_cache = &info->block_group_cache;
2594
2595         root = info->extent_root;
2596         key.objectid = 0;
2597         key.offset = BTRFS_BLOCK_GROUP_SIZE;
2598         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
2599
2600         path = btrfs_alloc_path();
2601         if (!path)
2602                 return -ENOMEM;
2603
2604         while(1) {
2605                 ret = btrfs_search_slot(NULL, info->extent_root,
2606                                         &key, path, 0, 0);
2607                 if (ret != 0) {
2608                         err = ret;
2609                         break;
2610                 }
2611                 leaf = path->nodes[0];
2612                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2613                 cache = kmalloc(sizeof(*cache), GFP_NOFS);
2614                 if (!cache) {
2615                         err = -1;
2616                         break;
2617                 }
2618
2619                 read_extent_buffer(leaf, &cache->item,
2620                                    btrfs_item_ptr_offset(leaf, path->slots[0]),
2621                                    sizeof(cache->item));
2622                 memcpy(&cache->key, &found_key, sizeof(found_key));
2623                 cache->cached = 0;
2624                 cache->pinned = 0;
2625                 key.objectid = found_key.objectid + found_key.offset;
2626                 btrfs_release_path(root, path);
2627
2628                 if (cache->item.flags & BTRFS_BLOCK_GROUP_MIXED) {
2629                         bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
2630                         cache->data = BTRFS_BLOCK_GROUP_MIXED;
2631                 } else if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) {
2632                         bit = BLOCK_GROUP_DATA;
2633                         cache->data = BTRFS_BLOCK_GROUP_DATA;
2634                 } else {
2635                         bit = BLOCK_GROUP_METADATA;
2636                         cache->data = 0;
2637                 }
2638
2639                 /* use EXTENT_LOCKED to prevent merging */
2640                 set_extent_bits(block_group_cache, found_key.objectid,
2641                                 found_key.objectid + found_key.offset - 1,
2642                                 bit | EXTENT_LOCKED, GFP_NOFS);
2643                 set_state_private(block_group_cache, found_key.objectid,
2644                                   (unsigned long)cache);
2645
2646                 if (key.objectid >=
2647                     btrfs_super_total_bytes(&info->super_copy))
2648                         break;
2649         }
2650
2651         btrfs_free_path(path);
2652         return 0;
2653 }