Add a min size parameter to btrfs_alloc_extent
[linux-2.6-block.git] / fs / btrfs / extent-tree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 #include <linux/sched.h>
19 #include <linux/pagemap.h>
20 #include "hash.h"
21 #include "crc32c.h"
22 #include "ctree.h"
23 #include "disk-io.h"
24 #include "print-tree.h"
25 #include "transaction.h"
26 #include "volumes.h"
27
28 #define BLOCK_GROUP_DATA     EXTENT_WRITEBACK
29 #define BLOCK_GROUP_METADATA EXTENT_UPTODATE
30 #define BLOCK_GROUP_SYSTEM   EXTENT_NEW
31
32 #define BLOCK_GROUP_DIRTY EXTENT_DIRTY
33
34 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
35                                  btrfs_root *extent_root);
36 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
37                                btrfs_root *extent_root);
38 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
39                            struct btrfs_root *root, u64 bytes_used,
40                            u64 type, u64 chunk_tree, u64 chunk_objectid,
41                            u64 size);
42
43
44 static int cache_block_group(struct btrfs_root *root,
45                              struct btrfs_block_group_cache *block_group)
46 {
47         struct btrfs_path *path;
48         int ret;
49         struct btrfs_key key;
50         struct extent_buffer *leaf;
51         struct extent_io_tree *free_space_cache;
52         int slot;
53         u64 last = 0;
54         u64 hole_size;
55         u64 first_free;
56         int found = 0;
57
58         if (!block_group)
59                 return 0;
60
61         root = root->fs_info->extent_root;
62         free_space_cache = &root->fs_info->free_space_cache;
63
64         if (block_group->cached)
65                 return 0;
66
67         path = btrfs_alloc_path();
68         if (!path)
69                 return -ENOMEM;
70
71         path->reada = 2;
72         first_free = block_group->key.objectid;
73         key.objectid = block_group->key.objectid;
74         key.offset = 0;
75         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
76         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
77         if (ret < 0)
78                 return ret;
79         ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
80         if (ret < 0)
81                 return ret;
82         if (ret == 0) {
83                 leaf = path->nodes[0];
84                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
85                 if (key.objectid + key.offset > first_free)
86                         first_free = key.objectid + key.offset;
87         }
88         while(1) {
89                 leaf = path->nodes[0];
90                 slot = path->slots[0];
91                 if (slot >= btrfs_header_nritems(leaf)) {
92                         ret = btrfs_next_leaf(root, path);
93                         if (ret < 0)
94                                 goto err;
95                         if (ret == 0) {
96                                 continue;
97                         } else {
98                                 break;
99                         }
100                 }
101                 btrfs_item_key_to_cpu(leaf, &key, slot);
102                 if (key.objectid < block_group->key.objectid) {
103                         goto next;
104                 }
105                 if (key.objectid >= block_group->key.objectid +
106                     block_group->key.offset) {
107                         break;
108                 }
109
110                 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
111                         if (!found) {
112                                 last = first_free;
113                                 found = 1;
114                         }
115                         if (key.objectid > last) {
116                                 hole_size = key.objectid - last;
117                                 set_extent_dirty(free_space_cache, last,
118                                                  last + hole_size - 1,
119                                                  GFP_NOFS);
120                         }
121                         last = key.objectid + key.offset;
122                 }
123 next:
124                 path->slots[0]++;
125         }
126
127         if (!found)
128                 last = first_free;
129         if (block_group->key.objectid +
130             block_group->key.offset > last) {
131                 hole_size = block_group->key.objectid +
132                         block_group->key.offset - last;
133                 set_extent_dirty(free_space_cache, last,
134                                  last + hole_size - 1, GFP_NOFS);
135         }
136         block_group->cached = 1;
137 err:
138         btrfs_free_path(path);
139         return 0;
140 }
141
142 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
143                                                          btrfs_fs_info *info,
144                                                          u64 bytenr)
145 {
146         struct extent_io_tree *block_group_cache;
147         struct btrfs_block_group_cache *block_group = NULL;
148         u64 ptr;
149         u64 start;
150         u64 end;
151         int ret;
152
153         block_group_cache = &info->block_group_cache;
154         ret = find_first_extent_bit(block_group_cache,
155                                     bytenr, &start, &end,
156                                     BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
157                                     BLOCK_GROUP_SYSTEM);
158         if (ret) {
159                 return NULL;
160         }
161         ret = get_state_private(block_group_cache, start, &ptr);
162         if (ret)
163                 return NULL;
164
165         block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
166         if (block_group->key.objectid <= bytenr && bytenr <
167             block_group->key.objectid + block_group->key.offset)
168                 return block_group;
169         return NULL;
170 }
171
172 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
173 {
174         return (cache->flags & bits) == bits;
175 }
176
177 static int noinline find_search_start(struct btrfs_root *root,
178                               struct btrfs_block_group_cache **cache_ret,
179                               u64 *start_ret, int num, int data)
180 {
181         int ret;
182         struct btrfs_block_group_cache *cache = *cache_ret;
183         struct extent_io_tree *free_space_cache;
184         struct extent_state *state;
185         u64 last;
186         u64 start = 0;
187         u64 cache_miss = 0;
188         u64 total_fs_bytes;
189         u64 search_start = *start_ret;
190         int wrapped = 0;
191
192         if (!cache)
193                 goto out;
194         total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
195         free_space_cache = &root->fs_info->free_space_cache;
196
197 again:
198         ret = cache_block_group(root, cache);
199         if (ret)
200                 goto out;
201
202         last = max(search_start, cache->key.objectid);
203         if (!block_group_bits(cache, data)) {
204                 goto new_group;
205         }
206
207         spin_lock_irq(&free_space_cache->lock);
208         state = find_first_extent_bit_state(free_space_cache, last, EXTENT_DIRTY);
209         while(1) {
210                 if (!state) {
211                         if (!cache_miss)
212                                 cache_miss = last;
213                         spin_unlock_irq(&free_space_cache->lock);
214                         goto new_group;
215                 }
216
217                 start = max(last, state->start);
218                 last = state->end + 1;
219                 if (last - start < num) {
220                         if (last == cache->key.objectid + cache->key.offset)
221                                 cache_miss = start;
222                         do {
223                                 state = extent_state_next(state);
224                         } while(state && !(state->state & EXTENT_DIRTY));
225                         continue;
226                 }
227                 spin_unlock_irq(&free_space_cache->lock);
228                 if (start + num > cache->key.objectid + cache->key.offset)
229                         goto new_group;
230                 if (start + num  > total_fs_bytes)
231                         goto new_group;
232                 if (!block_group_bits(cache, data)) {
233                         printk("block group bits don't match %Lu %d\n", cache->flags, data);
234                 }
235                 *start_ret = start;
236                 return 0;
237         }
238 out:
239         cache = btrfs_lookup_block_group(root->fs_info, search_start);
240         if (!cache) {
241                 printk("Unable to find block group for %Lu\n", search_start);
242                 WARN_ON(1);
243         }
244         return -ENOSPC;
245
246 new_group:
247         last = cache->key.objectid + cache->key.offset;
248 wrapped:
249         cache = btrfs_lookup_block_group(root->fs_info, last);
250         if (!cache || cache->key.objectid >= total_fs_bytes) {
251 no_cache:
252                 if (!wrapped) {
253                         wrapped = 1;
254                         last = search_start;
255                         goto wrapped;
256                 }
257                 goto out;
258         }
259         if (cache_miss && !cache->cached) {
260                 cache_block_group(root, cache);
261                 last = cache_miss;
262                 cache = btrfs_lookup_block_group(root->fs_info, last);
263         }
264         cache = btrfs_find_block_group(root, cache, last, data, 0);
265         if (!cache)
266                 goto no_cache;
267         *cache_ret = cache;
268         cache_miss = 0;
269         goto again;
270 }
271
272 static u64 div_factor(u64 num, int factor)
273 {
274         if (factor == 10)
275                 return num;
276         num *= factor;
277         do_div(num, 10);
278         return num;
279 }
280
281 static int block_group_state_bits(u64 flags)
282 {
283         int bits = 0;
284         if (flags & BTRFS_BLOCK_GROUP_DATA)
285                 bits |= BLOCK_GROUP_DATA;
286         if (flags & BTRFS_BLOCK_GROUP_METADATA)
287                 bits |= BLOCK_GROUP_METADATA;
288         if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
289                 bits |= BLOCK_GROUP_SYSTEM;
290         return bits;
291 }
292
293 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
294                                                  struct btrfs_block_group_cache
295                                                  *hint, u64 search_start,
296                                                  int data, int owner)
297 {
298         struct btrfs_block_group_cache *cache;
299         struct extent_io_tree *block_group_cache;
300         struct btrfs_block_group_cache *found_group = NULL;
301         struct btrfs_fs_info *info = root->fs_info;
302         u64 used;
303         u64 last = 0;
304         u64 hint_last;
305         u64 start;
306         u64 end;
307         u64 free_check;
308         u64 ptr;
309         u64 total_fs_bytes;
310         int bit;
311         int ret;
312         int full_search = 0;
313         int factor = 8;
314
315         block_group_cache = &info->block_group_cache;
316         total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
317
318         if (!owner)
319                 factor = 8;
320
321         bit = block_group_state_bits(data);
322
323         if (search_start && search_start < total_fs_bytes) {
324                 struct btrfs_block_group_cache *shint;
325                 shint = btrfs_lookup_block_group(info, search_start);
326                 if (shint && block_group_bits(shint, data)) {
327                         used = btrfs_block_group_used(&shint->item);
328                         if (used + shint->pinned <
329                             div_factor(shint->key.offset, factor)) {
330                                 return shint;
331                         }
332                 }
333         }
334         if (hint && block_group_bits(hint, data) &&
335             hint->key.objectid < total_fs_bytes) {
336                 used = btrfs_block_group_used(&hint->item);
337                 if (used + hint->pinned <
338                     div_factor(hint->key.offset, factor)) {
339                         return hint;
340                 }
341                 last = hint->key.objectid + hint->key.offset;
342                 hint_last = last;
343         } else {
344                 if (hint)
345                         hint_last = max(hint->key.objectid, search_start);
346                 else
347                         hint_last = search_start;
348
349                 if (hint_last >= total_fs_bytes)
350                         hint_last = search_start;
351                 last = hint_last;
352         }
353 again:
354         while(1) {
355                 ret = find_first_extent_bit(block_group_cache, last,
356                                             &start, &end, bit);
357                 if (ret)
358                         break;
359
360                 ret = get_state_private(block_group_cache, start, &ptr);
361                 if (ret)
362                         break;
363
364                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
365                 last = cache->key.objectid + cache->key.offset;
366                 used = btrfs_block_group_used(&cache->item);
367
368                 if (cache->key.objectid > total_fs_bytes)
369                         break;
370
371                 if (block_group_bits(cache, data)) {
372                         if (full_search)
373                                 free_check = cache->key.offset;
374                         else
375                                 free_check = div_factor(cache->key.offset,
376                                                         factor);
377
378                         if (used + cache->pinned < free_check) {
379                                 found_group = cache;
380                                 goto found;
381                         }
382                 }
383                 cond_resched();
384         }
385         if (!full_search) {
386                 last = search_start;
387                 full_search = 1;
388                 goto again;
389         }
390 found:
391         return found_group;
392 }
393
394 static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation,
395                            u64 owner, u64 owner_offset)
396 {
397         u32 high_crc = ~(u32)0;
398         u32 low_crc = ~(u32)0;
399         __le64 lenum;
400         lenum = cpu_to_le64(root_objectid);
401         high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
402         lenum = cpu_to_le64(ref_generation);
403         low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
404         if (owner >= BTRFS_FIRST_FREE_OBJECTID) {
405                 lenum = cpu_to_le64(owner);
406                 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
407                 lenum = cpu_to_le64(owner_offset);
408                 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
409         }
410         return ((u64)high_crc << 32) | (u64)low_crc;
411 }
412
413 static int match_extent_ref(struct extent_buffer *leaf,
414                             struct btrfs_extent_ref *disk_ref,
415                             struct btrfs_extent_ref *cpu_ref)
416 {
417         int ret;
418         int len;
419
420         if (cpu_ref->objectid)
421                 len = sizeof(*cpu_ref);
422         else
423                 len = 2 * sizeof(u64);
424         ret = memcmp_extent_buffer(leaf, cpu_ref, (unsigned long)disk_ref,
425                                    len);
426         return ret == 0;
427 }
428
429 static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans,
430                                           struct btrfs_root *root,
431                                           struct btrfs_path *path, u64 bytenr,
432                                           u64 root_objectid,
433                                           u64 ref_generation, u64 owner,
434                                           u64 owner_offset, int del)
435 {
436         u64 hash;
437         struct btrfs_key key;
438         struct btrfs_key found_key;
439         struct btrfs_extent_ref ref;
440         struct extent_buffer *leaf;
441         struct btrfs_extent_ref *disk_ref;
442         int ret;
443         int ret2;
444
445         btrfs_set_stack_ref_root(&ref, root_objectid);
446         btrfs_set_stack_ref_generation(&ref, ref_generation);
447         btrfs_set_stack_ref_objectid(&ref, owner);
448         btrfs_set_stack_ref_offset(&ref, owner_offset);
449
450         hash = hash_extent_ref(root_objectid, ref_generation, owner,
451                                owner_offset);
452         key.offset = hash;
453         key.objectid = bytenr;
454         key.type = BTRFS_EXTENT_REF_KEY;
455
456         while (1) {
457                 ret = btrfs_search_slot(trans, root, &key, path,
458                                         del ? -1 : 0, del);
459                 if (ret < 0)
460                         goto out;
461                 leaf = path->nodes[0];
462                 if (ret != 0) {
463                         u32 nritems = btrfs_header_nritems(leaf);
464                         if (path->slots[0] >= nritems) {
465                                 ret2 = btrfs_next_leaf(root, path);
466                                 if (ret2)
467                                         goto out;
468                                 leaf = path->nodes[0];
469                         }
470                         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
471                         if (found_key.objectid != bytenr ||
472                             found_key.type != BTRFS_EXTENT_REF_KEY)
473                                 goto out;
474                         key.offset = found_key.offset;
475                         if (del) {
476                                 btrfs_release_path(root, path);
477                                 continue;
478                         }
479                 }
480                 disk_ref = btrfs_item_ptr(path->nodes[0],
481                                           path->slots[0],
482                                           struct btrfs_extent_ref);
483                 if (match_extent_ref(path->nodes[0], disk_ref, &ref)) {
484                         ret = 0;
485                         goto out;
486                 }
487                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
488                 key.offset = found_key.offset + 1;
489                 btrfs_release_path(root, path);
490         }
491 out:
492         return ret;
493 }
494
495 /*
496  * Back reference rules.  Back refs have three main goals:
497  *
498  * 1) differentiate between all holders of references to an extent so that
499  *    when a reference is dropped we can make sure it was a valid reference
500  *    before freeing the extent.
501  *
502  * 2) Provide enough information to quickly find the holders of an extent
503  *    if we notice a given block is corrupted or bad.
504  *
505  * 3) Make it easy to migrate blocks for FS shrinking or storage pool
506  *    maintenance.  This is actually the same as #2, but with a slightly
507  *    different use case.
508  *
509  * File extents can be referenced by:
510  *
511  * - multiple snapshots, subvolumes, or different generations in one subvol
512  * - different files inside a single subvolume (in theory, not implemented yet)
513  * - different offsets inside a file (bookend extents in file.c)
514  *
515  * The extent ref structure has fields for:
516  *
517  * - Objectid of the subvolume root
518  * - Generation number of the tree holding the reference
519  * - objectid of the file holding the reference
520  * - offset in the file corresponding to the key holding the reference
521  *
522  * When a file extent is allocated the fields are filled in:
523  *     (root_key.objectid, trans->transid, inode objectid, offset in file)
524  *
525  * When a leaf is cow'd new references are added for every file extent found
526  * in the leaf.  It looks the same as the create case, but trans->transid
527  * will be different when the block is cow'd.
528  *
529  *     (root_key.objectid, trans->transid, inode objectid, offset in file)
530  *
531  * When a file extent is removed either during snapshot deletion or file
532  * truncation, the corresponding back reference is found
533  * by searching for:
534  *
535  *     (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
536  *      inode objectid, offset in file)
537  *
538  * Btree extents can be referenced by:
539  *
540  * - Different subvolumes
541  * - Different generations of the same subvolume
542  *
543  * Storing sufficient information for a full reverse mapping of a btree
544  * block would require storing the lowest key of the block in the backref,
545  * and it would require updating that lowest key either before write out or
546  * every time it changed.  Instead, the objectid of the lowest key is stored
547  * along with the level of the tree block.  This provides a hint
548  * about where in the btree the block can be found.  Searches through the
549  * btree only need to look for a pointer to that block, so they stop one
550  * level higher than the level recorded in the backref.
551  *
552  * Some btrees do not do reference counting on their extents.  These
553  * include the extent tree and the tree of tree roots.  Backrefs for these
554  * trees always have a generation of zero.
555  *
556  * When a tree block is created, back references are inserted:
557  *
558  * (root->root_key.objectid, trans->transid or zero, level, lowest_key_objectid)
559  *
560  * When a tree block is cow'd in a reference counted root,
561  * new back references are added for all the blocks it points to.
562  * These are of the form (trans->transid will have increased since creation):
563  *
564  * (root->root_key.objectid, trans->transid, level, lowest_key_objectid)
565  *
566  * Because the lowest_key_objectid and the level are just hints
567  * they are not used when backrefs are deleted.  When a backref is deleted:
568  *
569  * if backref was for a tree root:
570  *     root_objectid = root->root_key.objectid
571  * else
572  *     root_objectid = btrfs_header_owner(parent)
573  *
574  * (root_objectid, btrfs_header_generation(parent) or zero, 0, 0)
575  *
576  * Back Reference Key hashing:
577  *
578  * Back references have four fields, each 64 bits long.  Unfortunately,
579  * This is hashed into a single 64 bit number and placed into the key offset.
580  * The key objectid corresponds to the first byte in the extent, and the
581  * key type is set to BTRFS_EXTENT_REF_KEY
582  */
583 int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans,
584                                  struct btrfs_root *root,
585                                  struct btrfs_path *path, u64 bytenr,
586                                  u64 root_objectid, u64 ref_generation,
587                                  u64 owner, u64 owner_offset)
588 {
589         u64 hash;
590         struct btrfs_key key;
591         struct btrfs_extent_ref ref;
592         struct btrfs_extent_ref *disk_ref;
593         int ret;
594
595         btrfs_set_stack_ref_root(&ref, root_objectid);
596         btrfs_set_stack_ref_generation(&ref, ref_generation);
597         btrfs_set_stack_ref_objectid(&ref, owner);
598         btrfs_set_stack_ref_offset(&ref, owner_offset);
599
600         hash = hash_extent_ref(root_objectid, ref_generation, owner,
601                                owner_offset);
602         key.offset = hash;
603         key.objectid = bytenr;
604         key.type = BTRFS_EXTENT_REF_KEY;
605
606         ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(ref));
607         while (ret == -EEXIST) {
608                 disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
609                                           struct btrfs_extent_ref);
610                 if (match_extent_ref(path->nodes[0], disk_ref, &ref))
611                         goto out;
612                 key.offset++;
613                 btrfs_release_path(root, path);
614                 ret = btrfs_insert_empty_item(trans, root, path, &key,
615                                               sizeof(ref));
616         }
617         if (ret)
618                 goto out;
619         disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
620                                   struct btrfs_extent_ref);
621         write_extent_buffer(path->nodes[0], &ref, (unsigned long)disk_ref,
622                             sizeof(ref));
623         btrfs_mark_buffer_dirty(path->nodes[0]);
624 out:
625         btrfs_release_path(root, path);
626         return ret;
627 }
628
629 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
630                                 struct btrfs_root *root,
631                                 u64 bytenr, u64 num_bytes,
632                                 u64 root_objectid, u64 ref_generation,
633                                 u64 owner, u64 owner_offset)
634 {
635         struct btrfs_path *path;
636         int ret;
637         struct btrfs_key key;
638         struct extent_buffer *l;
639         struct btrfs_extent_item *item;
640         u32 refs;
641
642         WARN_ON(num_bytes < root->sectorsize);
643         path = btrfs_alloc_path();
644         if (!path)
645                 return -ENOMEM;
646
647         path->reada = 0;
648         key.objectid = bytenr;
649         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
650         key.offset = num_bytes;
651         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
652                                 0, 1);
653         if (ret < 0)
654                 return ret;
655         if (ret != 0) {
656                 BUG();
657         }
658         BUG_ON(ret != 0);
659         l = path->nodes[0];
660         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
661         refs = btrfs_extent_refs(l, item);
662         btrfs_set_extent_refs(l, item, refs + 1);
663         btrfs_mark_buffer_dirty(path->nodes[0]);
664
665         btrfs_release_path(root->fs_info->extent_root, path);
666
667         path->reada = 0;
668         ret = btrfs_insert_extent_backref(trans, root->fs_info->extent_root,
669                                           path, bytenr, root_objectid,
670                                           ref_generation, owner, owner_offset);
671         BUG_ON(ret);
672         finish_current_insert(trans, root->fs_info->extent_root);
673         del_pending_extents(trans, root->fs_info->extent_root);
674
675         btrfs_free_path(path);
676         return 0;
677 }
678
679 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
680                          struct btrfs_root *root)
681 {
682         finish_current_insert(trans, root->fs_info->extent_root);
683         del_pending_extents(trans, root->fs_info->extent_root);
684         return 0;
685 }
686
687 static int lookup_extent_ref(struct btrfs_trans_handle *trans,
688                              struct btrfs_root *root, u64 bytenr,
689                              u64 num_bytes, u32 *refs)
690 {
691         struct btrfs_path *path;
692         int ret;
693         struct btrfs_key key;
694         struct extent_buffer *l;
695         struct btrfs_extent_item *item;
696
697         WARN_ON(num_bytes < root->sectorsize);
698         path = btrfs_alloc_path();
699         path->reada = 0;
700         key.objectid = bytenr;
701         key.offset = num_bytes;
702         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
703         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
704                                 0, 0);
705         if (ret < 0)
706                 goto out;
707         if (ret != 0) {
708                 btrfs_print_leaf(root, path->nodes[0]);
709                 printk("failed to find block number %Lu\n", bytenr);
710                 BUG();
711         }
712         l = path->nodes[0];
713         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
714         *refs = btrfs_extent_refs(l, item);
715 out:
716         btrfs_free_path(path);
717         return 0;
718 }
719
720 u32 btrfs_count_snapshots_in_path(struct btrfs_root *root,
721                                   struct btrfs_path *count_path,
722                                   u64 first_extent)
723 {
724         struct btrfs_root *extent_root = root->fs_info->extent_root;
725         struct btrfs_path *path;
726         u64 bytenr;
727         u64 found_objectid;
728         u64 root_objectid = root->root_key.objectid;
729         u32 total_count = 0;
730         u32 cur_count;
731         u32 nritems;
732         int ret;
733         struct btrfs_key key;
734         struct btrfs_key found_key;
735         struct extent_buffer *l;
736         struct btrfs_extent_item *item;
737         struct btrfs_extent_ref *ref_item;
738         int level = -1;
739
740         path = btrfs_alloc_path();
741 again:
742         if (level == -1)
743                 bytenr = first_extent;
744         else
745                 bytenr = count_path->nodes[level]->start;
746
747         cur_count = 0;
748         key.objectid = bytenr;
749         key.offset = 0;
750
751         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
752         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
753         if (ret < 0)
754                 goto out;
755         BUG_ON(ret == 0);
756
757         l = path->nodes[0];
758         btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
759
760         if (found_key.objectid != bytenr ||
761             found_key.type != BTRFS_EXTENT_ITEM_KEY) {
762                 goto out;
763         }
764
765         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
766         while (1) {
767                 l = path->nodes[0];
768                 nritems = btrfs_header_nritems(l);
769                 if (path->slots[0] >= nritems) {
770                         ret = btrfs_next_leaf(extent_root, path);
771                         if (ret == 0)
772                                 continue;
773                         break;
774                 }
775                 btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
776                 if (found_key.objectid != bytenr)
777                         break;
778
779                 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
780                         path->slots[0]++;
781                         continue;
782                 }
783
784                 cur_count++;
785                 ref_item = btrfs_item_ptr(l, path->slots[0],
786                                           struct btrfs_extent_ref);
787                 found_objectid = btrfs_ref_root(l, ref_item);
788
789                 if (found_objectid != root_objectid) {
790                         total_count = 2;
791                         goto out;
792                 }
793                 total_count = 1;
794                 path->slots[0]++;
795         }
796         if (cur_count == 0) {
797                 total_count = 0;
798                 goto out;
799         }
800         if (level >= 0 && root->node == count_path->nodes[level])
801                 goto out;
802         level++;
803         btrfs_release_path(root, path);
804         goto again;
805
806 out:
807         btrfs_free_path(path);
808         return total_count;
809 }
810 int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
811                        struct btrfs_root *root, u64 owner_objectid)
812 {
813         u64 generation;
814         u64 key_objectid;
815         u64 level;
816         u32 nritems;
817         struct btrfs_disk_key disk_key;
818
819         level = btrfs_header_level(root->node);
820         generation = trans->transid;
821         nritems = btrfs_header_nritems(root->node);
822         if (nritems > 0) {
823                 if (level == 0)
824                         btrfs_item_key(root->node, &disk_key, 0);
825                 else
826                         btrfs_node_key(root->node, &disk_key, 0);
827                 key_objectid = btrfs_disk_key_objectid(&disk_key);
828         } else {
829                 key_objectid = 0;
830         }
831         return btrfs_inc_extent_ref(trans, root, root->node->start,
832                                     root->node->len, owner_objectid,
833                                     generation, level, key_objectid);
834 }
835
836 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
837                   struct extent_buffer *buf)
838 {
839         u64 bytenr;
840         u32 nritems;
841         struct btrfs_key key;
842         struct btrfs_file_extent_item *fi;
843         int i;
844         int level;
845         int ret;
846         int faili;
847
848         if (!root->ref_cows)
849                 return 0;
850
851         level = btrfs_header_level(buf);
852         nritems = btrfs_header_nritems(buf);
853         for (i = 0; i < nritems; i++) {
854                 if (level == 0) {
855                         u64 disk_bytenr;
856                         btrfs_item_key_to_cpu(buf, &key, i);
857                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
858                                 continue;
859                         fi = btrfs_item_ptr(buf, i,
860                                             struct btrfs_file_extent_item);
861                         if (btrfs_file_extent_type(buf, fi) ==
862                             BTRFS_FILE_EXTENT_INLINE)
863                                 continue;
864                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
865                         if (disk_bytenr == 0)
866                                 continue;
867                         ret = btrfs_inc_extent_ref(trans, root, disk_bytenr,
868                                     btrfs_file_extent_disk_num_bytes(buf, fi),
869                                     root->root_key.objectid, trans->transid,
870                                     key.objectid, key.offset);
871                         if (ret) {
872                                 faili = i;
873                                 goto fail;
874                         }
875                 } else {
876                         bytenr = btrfs_node_blockptr(buf, i);
877                         btrfs_node_key_to_cpu(buf, &key, i);
878                         ret = btrfs_inc_extent_ref(trans, root, bytenr,
879                                            btrfs_level_size(root, level - 1),
880                                            root->root_key.objectid,
881                                            trans->transid,
882                                            level - 1, key.objectid);
883                         if (ret) {
884                                 faili = i;
885                                 goto fail;
886                         }
887                 }
888         }
889         return 0;
890 fail:
891         WARN_ON(1);
892 #if 0
893         for (i =0; i < faili; i++) {
894                 if (level == 0) {
895                         u64 disk_bytenr;
896                         btrfs_item_key_to_cpu(buf, &key, i);
897                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
898                                 continue;
899                         fi = btrfs_item_ptr(buf, i,
900                                             struct btrfs_file_extent_item);
901                         if (btrfs_file_extent_type(buf, fi) ==
902                             BTRFS_FILE_EXTENT_INLINE)
903                                 continue;
904                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
905                         if (disk_bytenr == 0)
906                                 continue;
907                         err = btrfs_free_extent(trans, root, disk_bytenr,
908                                     btrfs_file_extent_disk_num_bytes(buf,
909                                                                       fi), 0);
910                         BUG_ON(err);
911                 } else {
912                         bytenr = btrfs_node_blockptr(buf, i);
913                         err = btrfs_free_extent(trans, root, bytenr,
914                                         btrfs_level_size(root, level - 1), 0);
915                         BUG_ON(err);
916                 }
917         }
918 #endif
919         return ret;
920 }
921
922 static int write_one_cache_group(struct btrfs_trans_handle *trans,
923                                  struct btrfs_root *root,
924                                  struct btrfs_path *path,
925                                  struct btrfs_block_group_cache *cache)
926 {
927         int ret;
928         int pending_ret;
929         struct btrfs_root *extent_root = root->fs_info->extent_root;
930         unsigned long bi;
931         struct extent_buffer *leaf;
932
933         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
934         if (ret < 0)
935                 goto fail;
936         BUG_ON(ret);
937
938         leaf = path->nodes[0];
939         bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
940         write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
941         btrfs_mark_buffer_dirty(leaf);
942         btrfs_release_path(extent_root, path);
943 fail:
944         finish_current_insert(trans, extent_root);
945         pending_ret = del_pending_extents(trans, extent_root);
946         if (ret)
947                 return ret;
948         if (pending_ret)
949                 return pending_ret;
950         return 0;
951
952 }
953
954 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
955                                    struct btrfs_root *root)
956 {
957         struct extent_io_tree *block_group_cache;
958         struct btrfs_block_group_cache *cache;
959         int ret;
960         int err = 0;
961         int werr = 0;
962         struct btrfs_path *path;
963         u64 last = 0;
964         u64 start;
965         u64 end;
966         u64 ptr;
967
968         block_group_cache = &root->fs_info->block_group_cache;
969         path = btrfs_alloc_path();
970         if (!path)
971                 return -ENOMEM;
972
973         while(1) {
974                 ret = find_first_extent_bit(block_group_cache, last,
975                                             &start, &end, BLOCK_GROUP_DIRTY);
976                 if (ret)
977                         break;
978
979                 last = end + 1;
980                 ret = get_state_private(block_group_cache, start, &ptr);
981                 if (ret)
982                         break;
983
984                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
985                 err = write_one_cache_group(trans, root,
986                                             path, cache);
987                 /*
988                  * if we fail to write the cache group, we want
989                  * to keep it marked dirty in hopes that a later
990                  * write will work
991                  */
992                 if (err) {
993                         werr = err;
994                         continue;
995                 }
996                 clear_extent_bits(block_group_cache, start, end,
997                                   BLOCK_GROUP_DIRTY, GFP_NOFS);
998         }
999         btrfs_free_path(path);
1000         return werr;
1001 }
1002
1003 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
1004                                                   u64 flags)
1005 {
1006         struct list_head *head = &info->space_info;
1007         struct list_head *cur;
1008         struct btrfs_space_info *found;
1009         list_for_each(cur, head) {
1010                 found = list_entry(cur, struct btrfs_space_info, list);
1011                 if (found->flags == flags)
1012                         return found;
1013         }
1014         return NULL;
1015
1016 }
1017
1018 static int update_space_info(struct btrfs_fs_info *info, u64 flags,
1019                              u64 total_bytes, u64 bytes_used,
1020                              struct btrfs_space_info **space_info)
1021 {
1022         struct btrfs_space_info *found;
1023
1024         found = __find_space_info(info, flags);
1025         if (found) {
1026                 found->total_bytes += total_bytes;
1027                 found->bytes_used += bytes_used;
1028                 WARN_ON(found->total_bytes < found->bytes_used);
1029                 *space_info = found;
1030                 return 0;
1031         }
1032         found = kmalloc(sizeof(*found), GFP_NOFS);
1033         if (!found)
1034                 return -ENOMEM;
1035
1036         list_add(&found->list, &info->space_info);
1037         found->flags = flags;
1038         found->total_bytes = total_bytes;
1039         found->bytes_used = bytes_used;
1040         found->bytes_pinned = 0;
1041         found->full = 0;
1042         *space_info = found;
1043         return 0;
1044 }
1045
1046 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
1047 {
1048         u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
1049                                    BTRFS_BLOCK_GROUP_RAID1 |
1050                                    BTRFS_BLOCK_GROUP_DUP);
1051         if (extra_flags) {
1052                 if (flags & BTRFS_BLOCK_GROUP_DATA)
1053                         fs_info->avail_data_alloc_bits |= extra_flags;
1054                 if (flags & BTRFS_BLOCK_GROUP_METADATA)
1055                         fs_info->avail_metadata_alloc_bits |= extra_flags;
1056                 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
1057                         fs_info->avail_system_alloc_bits |= extra_flags;
1058         }
1059 }
1060
1061 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1062                           struct btrfs_root *extent_root, u64 alloc_bytes,
1063                           u64 flags)
1064 {
1065         struct btrfs_space_info *space_info;
1066         u64 thresh;
1067         u64 start;
1068         u64 num_bytes;
1069         int ret;
1070
1071         space_info = __find_space_info(extent_root->fs_info, flags);
1072         if (!space_info) {
1073                 ret = update_space_info(extent_root->fs_info, flags,
1074                                         0, 0, &space_info);
1075                 BUG_ON(ret);
1076         }
1077         BUG_ON(!space_info);
1078
1079         if (space_info->full)
1080                 return 0;
1081
1082         thresh = div_factor(space_info->total_bytes, 6);
1083         if ((space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) <
1084             thresh)
1085                 return 0;
1086
1087         ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags);
1088         if (ret == -ENOSPC) {
1089 printk("space info full %Lu\n", flags);
1090                 space_info->full = 1;
1091                 return 0;
1092         }
1093
1094         BUG_ON(ret);
1095
1096         ret = btrfs_make_block_group(trans, extent_root, 0, flags,
1097                      extent_root->fs_info->chunk_root->root_key.objectid,
1098                      start, num_bytes);
1099         BUG_ON(ret);
1100
1101         return 0;
1102 }
1103
1104 static int update_block_group(struct btrfs_trans_handle *trans,
1105                               struct btrfs_root *root,
1106                               u64 bytenr, u64 num_bytes, int alloc,
1107                               int mark_free)
1108 {
1109         struct btrfs_block_group_cache *cache;
1110         struct btrfs_fs_info *info = root->fs_info;
1111         u64 total = num_bytes;
1112         u64 old_val;
1113         u64 byte_in_group;
1114         u64 start;
1115         u64 end;
1116
1117         while(total) {
1118                 cache = btrfs_lookup_block_group(info, bytenr);
1119                 if (!cache) {
1120                         return -1;
1121                 }
1122                 byte_in_group = bytenr - cache->key.objectid;
1123                 WARN_ON(byte_in_group > cache->key.offset);
1124                 start = cache->key.objectid;
1125                 end = start + cache->key.offset - 1;
1126                 set_extent_bits(&info->block_group_cache, start, end,
1127                                 BLOCK_GROUP_DIRTY, GFP_NOFS);
1128
1129                 old_val = btrfs_block_group_used(&cache->item);
1130                 num_bytes = min(total, cache->key.offset - byte_in_group);
1131                 if (alloc) {
1132                         old_val += num_bytes;
1133                         cache->space_info->bytes_used += num_bytes;
1134                 } else {
1135                         old_val -= num_bytes;
1136                         cache->space_info->bytes_used -= num_bytes;
1137                         if (mark_free) {
1138                                 set_extent_dirty(&info->free_space_cache,
1139                                                  bytenr, bytenr + num_bytes - 1,
1140                                                  GFP_NOFS);
1141                         }
1142                 }
1143                 btrfs_set_block_group_used(&cache->item, old_val);
1144                 total -= num_bytes;
1145                 bytenr += num_bytes;
1146         }
1147         return 0;
1148 }
1149
1150 static int update_pinned_extents(struct btrfs_root *root,
1151                                 u64 bytenr, u64 num, int pin)
1152 {
1153         u64 len;
1154         struct btrfs_block_group_cache *cache;
1155         struct btrfs_fs_info *fs_info = root->fs_info;
1156
1157         if (pin) {
1158                 set_extent_dirty(&fs_info->pinned_extents,
1159                                 bytenr, bytenr + num - 1, GFP_NOFS);
1160         } else {
1161                 clear_extent_dirty(&fs_info->pinned_extents,
1162                                 bytenr, bytenr + num - 1, GFP_NOFS);
1163         }
1164         while (num > 0) {
1165                 cache = btrfs_lookup_block_group(fs_info, bytenr);
1166                 WARN_ON(!cache);
1167                 len = min(num, cache->key.offset -
1168                           (bytenr - cache->key.objectid));
1169                 if (pin) {
1170                         cache->pinned += len;
1171                         cache->space_info->bytes_pinned += len;
1172                         fs_info->total_pinned += len;
1173                 } else {
1174                         cache->pinned -= len;
1175                         cache->space_info->bytes_pinned -= len;
1176                         fs_info->total_pinned -= len;
1177                 }
1178                 bytenr += len;
1179                 num -= len;
1180         }
1181         return 0;
1182 }
1183
1184 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
1185 {
1186         u64 last = 0;
1187         u64 start;
1188         u64 end;
1189         struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
1190         int ret;
1191
1192         while(1) {
1193                 ret = find_first_extent_bit(pinned_extents, last,
1194                                             &start, &end, EXTENT_DIRTY);
1195                 if (ret)
1196                         break;
1197                 set_extent_dirty(copy, start, end, GFP_NOFS);
1198                 last = end + 1;
1199         }
1200         return 0;
1201 }
1202
1203 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
1204                                struct btrfs_root *root,
1205                                struct extent_io_tree *unpin)
1206 {
1207         u64 start;
1208         u64 end;
1209         int ret;
1210         struct extent_io_tree *free_space_cache;
1211         free_space_cache = &root->fs_info->free_space_cache;
1212
1213         while(1) {
1214                 ret = find_first_extent_bit(unpin, 0, &start, &end,
1215                                             EXTENT_DIRTY);
1216                 if (ret)
1217                         break;
1218                 update_pinned_extents(root, start, end + 1 - start, 0);
1219                 clear_extent_dirty(unpin, start, end, GFP_NOFS);
1220                 set_extent_dirty(free_space_cache, start, end, GFP_NOFS);
1221         }
1222         return 0;
1223 }
1224
1225 static int finish_current_insert(struct btrfs_trans_handle *trans,
1226                                  struct btrfs_root *extent_root)
1227 {
1228         u64 start;
1229         u64 end;
1230         struct btrfs_fs_info *info = extent_root->fs_info;
1231         struct extent_buffer *eb;
1232         struct btrfs_path *path;
1233         struct btrfs_key ins;
1234         struct btrfs_disk_key first;
1235         struct btrfs_extent_item extent_item;
1236         int ret;
1237         int level;
1238         int err = 0;
1239
1240         btrfs_set_stack_extent_refs(&extent_item, 1);
1241         btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
1242         path = btrfs_alloc_path();
1243
1244         while(1) {
1245                 ret = find_first_extent_bit(&info->extent_ins, 0, &start,
1246                                             &end, EXTENT_LOCKED);
1247                 if (ret)
1248                         break;
1249
1250                 ins.objectid = start;
1251                 ins.offset = end + 1 - start;
1252                 err = btrfs_insert_item(trans, extent_root, &ins,
1253                                         &extent_item, sizeof(extent_item));
1254                 clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED,
1255                                   GFP_NOFS);
1256                 eb = read_tree_block(extent_root, ins.objectid, ins.offset);
1257                 level = btrfs_header_level(eb);
1258                 if (level == 0) {
1259                         btrfs_item_key(eb, &first, 0);
1260                 } else {
1261                         btrfs_node_key(eb, &first, 0);
1262                 }
1263                 err = btrfs_insert_extent_backref(trans, extent_root, path,
1264                                           start, extent_root->root_key.objectid,
1265                                           0, level,
1266                                           btrfs_disk_key_objectid(&first));
1267                 BUG_ON(err);
1268                 free_extent_buffer(eb);
1269         }
1270         btrfs_free_path(path);
1271         return 0;
1272 }
1273
1274 static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes,
1275                           int pending)
1276 {
1277         int err = 0;
1278         struct extent_buffer *buf;
1279
1280         if (!pending) {
1281                 buf = btrfs_find_tree_block(root, bytenr, num_bytes);
1282                 if (buf) {
1283                         if (btrfs_buffer_uptodate(buf)) {
1284                                 u64 transid =
1285                                     root->fs_info->running_transaction->transid;
1286                                 u64 header_transid =
1287                                         btrfs_header_generation(buf);
1288                                 if (header_transid == transid &&
1289                                     !btrfs_header_flag(buf,
1290                                                BTRFS_HEADER_FLAG_WRITTEN)) {
1291                                         clean_tree_block(NULL, root, buf);
1292                                         free_extent_buffer(buf);
1293                                         return 1;
1294                                 }
1295                         }
1296                         free_extent_buffer(buf);
1297                 }
1298                 update_pinned_extents(root, bytenr, num_bytes, 1);
1299         } else {
1300                 set_extent_bits(&root->fs_info->pending_del,
1301                                 bytenr, bytenr + num_bytes - 1,
1302                                 EXTENT_LOCKED, GFP_NOFS);
1303         }
1304         BUG_ON(err < 0);
1305         return 0;
1306 }
1307
1308 /*
1309  * remove an extent from the root, returns 0 on success
1310  */
1311 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1312                          *root, u64 bytenr, u64 num_bytes,
1313                          u64 root_objectid, u64 ref_generation,
1314                          u64 owner_objectid, u64 owner_offset, int pin,
1315                          int mark_free)
1316 {
1317         struct btrfs_path *path;
1318         struct btrfs_key key;
1319         struct btrfs_fs_info *info = root->fs_info;
1320         struct btrfs_root *extent_root = info->extent_root;
1321         struct extent_buffer *leaf;
1322         int ret;
1323         int extent_slot = 0;
1324         int found_extent = 0;
1325         int num_to_del = 1;
1326         struct btrfs_extent_item *ei;
1327         u32 refs;
1328
1329         key.objectid = bytenr;
1330         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
1331         key.offset = num_bytes;
1332         path = btrfs_alloc_path();
1333         if (!path)
1334                 return -ENOMEM;
1335
1336         path->reada = 0;
1337         ret = lookup_extent_backref(trans, extent_root, path,
1338                                     bytenr, root_objectid,
1339                                     ref_generation,
1340                                     owner_objectid, owner_offset, 1);
1341         if (ret == 0) {
1342                 struct btrfs_key found_key;
1343                 extent_slot = path->slots[0];
1344                 while(extent_slot > 0) {
1345                         extent_slot--;
1346                         btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1347                                               extent_slot);
1348                         if (found_key.objectid != bytenr)
1349                                 break;
1350                         if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
1351                             found_key.offset == num_bytes) {
1352                                 found_extent = 1;
1353                                 break;
1354                         }
1355                         if (path->slots[0] - extent_slot > 5)
1356                                 break;
1357                 }
1358                 if (!found_extent)
1359                         ret = btrfs_del_item(trans, extent_root, path);
1360         } else {
1361                 btrfs_print_leaf(extent_root, path->nodes[0]);
1362                 WARN_ON(1);
1363                 printk("Unable to find ref byte nr %Lu root %Lu "
1364                        " gen %Lu owner %Lu offset %Lu\n", bytenr,
1365                        root_objectid, ref_generation, owner_objectid,
1366                        owner_offset);
1367         }
1368         if (!found_extent) {
1369                 btrfs_release_path(extent_root, path);
1370                 ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
1371                 if (ret < 0)
1372                         return ret;
1373                 BUG_ON(ret);
1374                 extent_slot = path->slots[0];
1375         }
1376
1377         leaf = path->nodes[0];
1378         ei = btrfs_item_ptr(leaf, extent_slot,
1379                             struct btrfs_extent_item);
1380         refs = btrfs_extent_refs(leaf, ei);
1381         BUG_ON(refs == 0);
1382         refs -= 1;
1383         btrfs_set_extent_refs(leaf, ei, refs);
1384
1385         btrfs_mark_buffer_dirty(leaf);
1386
1387         if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) {
1388                 /* if the back ref and the extent are next to each other
1389                  * they get deleted below in one shot
1390                  */
1391                 path->slots[0] = extent_slot;
1392                 num_to_del = 2;
1393         } else if (found_extent) {
1394                 /* otherwise delete the extent back ref */
1395                 ret = btrfs_del_item(trans, extent_root, path);
1396                 BUG_ON(ret);
1397                 /* if refs are 0, we need to setup the path for deletion */
1398                 if (refs == 0) {
1399                         btrfs_release_path(extent_root, path);
1400                         ret = btrfs_search_slot(trans, extent_root, &key, path,
1401                                                 -1, 1);
1402                         if (ret < 0)
1403                                 return ret;
1404                         BUG_ON(ret);
1405                 }
1406         }
1407
1408         if (refs == 0) {
1409                 u64 super_used;
1410                 u64 root_used;
1411
1412                 if (pin) {
1413                         ret = pin_down_bytes(root, bytenr, num_bytes, 0);
1414                         if (ret > 0)
1415                                 mark_free = 1;
1416                         BUG_ON(ret < 0);
1417                 }
1418
1419                 /* block accounting for super block */
1420                 super_used = btrfs_super_bytes_used(&info->super_copy);
1421                 btrfs_set_super_bytes_used(&info->super_copy,
1422                                            super_used - num_bytes);
1423
1424                 /* block accounting for root item */
1425                 root_used = btrfs_root_used(&root->root_item);
1426                 btrfs_set_root_used(&root->root_item,
1427                                            root_used - num_bytes);
1428                 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
1429                                       num_to_del);
1430                 if (ret) {
1431                         return ret;
1432                 }
1433                 ret = update_block_group(trans, root, bytenr, num_bytes, 0,
1434                                          mark_free);
1435                 BUG_ON(ret);
1436         }
1437         btrfs_free_path(path);
1438         finish_current_insert(trans, extent_root);
1439         return ret;
1440 }
1441
1442 /*
1443  * find all the blocks marked as pending in the radix tree and remove
1444  * them from the extent map
1445  */
1446 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
1447                                btrfs_root *extent_root)
1448 {
1449         int ret;
1450         int err = 0;
1451         u64 start;
1452         u64 end;
1453         struct extent_io_tree *pending_del;
1454         struct extent_io_tree *pinned_extents;
1455
1456         pending_del = &extent_root->fs_info->pending_del;
1457         pinned_extents = &extent_root->fs_info->pinned_extents;
1458
1459         while(1) {
1460                 ret = find_first_extent_bit(pending_del, 0, &start, &end,
1461                                             EXTENT_LOCKED);
1462                 if (ret)
1463                         break;
1464                 update_pinned_extents(extent_root, start, end + 1 - start, 1);
1465                 clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
1466                                   GFP_NOFS);
1467                 ret = __free_extent(trans, extent_root,
1468                                      start, end + 1 - start,
1469                                      extent_root->root_key.objectid,
1470                                      0, 0, 0, 0, 0);
1471                 if (ret)
1472                         err = ret;
1473         }
1474         return err;
1475 }
1476
1477 /*
1478  * remove an extent from the root, returns 0 on success
1479  */
1480 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1481                       *root, u64 bytenr, u64 num_bytes,
1482                       u64 root_objectid, u64 ref_generation,
1483                       u64 owner_objectid, u64 owner_offset, int pin)
1484 {
1485         struct btrfs_root *extent_root = root->fs_info->extent_root;
1486         int pending_ret;
1487         int ret;
1488
1489         WARN_ON(num_bytes < root->sectorsize);
1490         if (!root->ref_cows)
1491                 ref_generation = 0;
1492
1493         if (root == extent_root) {
1494                 pin_down_bytes(root, bytenr, num_bytes, 1);
1495                 return 0;
1496         }
1497         ret = __free_extent(trans, root, bytenr, num_bytes, root_objectid,
1498                             ref_generation, owner_objectid, owner_offset,
1499                             pin, pin == 0);
1500         pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
1501         return ret ? ret : pending_ret;
1502 }
1503
1504 static u64 stripe_align(struct btrfs_root *root, u64 val)
1505 {
1506         u64 mask = ((u64)root->stripesize - 1);
1507         u64 ret = (val + mask) & ~mask;
1508         return ret;
1509 }
1510
1511 /*
1512  * walks the btree of allocated extents and find a hole of a given size.
1513  * The key ins is changed to record the hole:
1514  * ins->objectid == block start
1515  * ins->flags = BTRFS_EXTENT_ITEM_KEY
1516  * ins->offset == number of blocks
1517  * Any available blocks before search_start are skipped.
1518  */
1519 static int noinline find_free_extent(struct btrfs_trans_handle *trans,
1520                                      struct btrfs_root *orig_root,
1521                                      u64 num_bytes, u64 empty_size,
1522                                      u64 search_start, u64 search_end,
1523                                      u64 hint_byte, struct btrfs_key *ins,
1524                                      u64 exclude_start, u64 exclude_nr,
1525                                      int data)
1526 {
1527         int ret;
1528         u64 orig_search_start = search_start;
1529         struct btrfs_root * root = orig_root->fs_info->extent_root;
1530         struct btrfs_fs_info *info = root->fs_info;
1531         u64 total_needed = num_bytes;
1532         u64 *last_ptr = NULL;
1533         struct btrfs_block_group_cache *block_group;
1534         int full_scan = 0;
1535         int wrapped = 0;
1536         int empty_cluster = 2 * 1024 * 1024;
1537
1538         WARN_ON(num_bytes < root->sectorsize);
1539         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
1540
1541         if (data & BTRFS_BLOCK_GROUP_METADATA) {
1542                 last_ptr = &root->fs_info->last_alloc;
1543                 empty_cluster = 256 * 1024;
1544         }
1545
1546         if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) {
1547                 last_ptr = &root->fs_info->last_data_alloc;
1548         }
1549
1550         if (last_ptr) {
1551                 if (*last_ptr)
1552                         hint_byte = *last_ptr;
1553                 else {
1554                         empty_size += empty_cluster;
1555                 }
1556         }
1557
1558         if (search_end == (u64)-1)
1559                 search_end = btrfs_super_total_bytes(&info->super_copy);
1560
1561         if (hint_byte) {
1562                 block_group = btrfs_lookup_block_group(info, hint_byte);
1563                 if (!block_group)
1564                         hint_byte = search_start;
1565                 block_group = btrfs_find_block_group(root, block_group,
1566                                                      hint_byte, data, 1);
1567                 if (last_ptr && *last_ptr == 0 && block_group)
1568                         hint_byte = block_group->key.objectid;
1569         } else {
1570                 block_group = btrfs_find_block_group(root,
1571                                                      trans->block_group,
1572                                                      search_start, data, 1);
1573         }
1574         search_start = max(search_start, hint_byte);
1575
1576         total_needed += empty_size;
1577
1578 check_failed:
1579         if (!block_group) {
1580                 block_group = btrfs_lookup_block_group(info, search_start);
1581                 if (!block_group)
1582                         block_group = btrfs_lookup_block_group(info,
1583                                                        orig_search_start);
1584         }
1585         ret = find_search_start(root, &block_group, &search_start,
1586                                 total_needed, data);
1587         if (ret == -ENOSPC && last_ptr && *last_ptr) {
1588                 *last_ptr = 0;
1589                 block_group = btrfs_lookup_block_group(info,
1590                                                        orig_search_start);
1591                 search_start = orig_search_start;
1592                 ret = find_search_start(root, &block_group, &search_start,
1593                                         total_needed, data);
1594         }
1595         if (ret == -ENOSPC)
1596                 goto enospc;
1597         if (ret)
1598                 goto error;
1599
1600         if (last_ptr && *last_ptr && search_start != *last_ptr) {
1601                 *last_ptr = 0;
1602                 if (!empty_size) {
1603                         empty_size += empty_cluster;
1604                         total_needed += empty_size;
1605                 }
1606                 block_group = btrfs_lookup_block_group(info,
1607                                                        orig_search_start);
1608                 search_start = orig_search_start;
1609                 ret = find_search_start(root, &block_group,
1610                                         &search_start, total_needed, data);
1611                 if (ret == -ENOSPC)
1612                         goto enospc;
1613                 if (ret)
1614                         goto error;
1615         }
1616
1617         search_start = stripe_align(root, search_start);
1618         ins->objectid = search_start;
1619         ins->offset = num_bytes;
1620
1621         if (ins->objectid + num_bytes >= search_end)
1622                 goto enospc;
1623
1624         if (ins->objectid + num_bytes >
1625             block_group->key.objectid + block_group->key.offset) {
1626                 search_start = block_group->key.objectid +
1627                         block_group->key.offset;
1628                 goto new_group;
1629         }
1630
1631         if (test_range_bit(&info->extent_ins, ins->objectid,
1632                            ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) {
1633                 search_start = ins->objectid + num_bytes;
1634                 goto new_group;
1635         }
1636
1637         if (test_range_bit(&info->pinned_extents, ins->objectid,
1638                            ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
1639                 search_start = ins->objectid + num_bytes;
1640                 goto new_group;
1641         }
1642
1643         if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
1644             ins->objectid < exclude_start + exclude_nr)) {
1645                 search_start = exclude_start + exclude_nr;
1646                 goto new_group;
1647         }
1648
1649         if (!(data & BTRFS_BLOCK_GROUP_DATA)) {
1650                 block_group = btrfs_lookup_block_group(info, ins->objectid);
1651                 if (block_group)
1652                         trans->block_group = block_group;
1653         }
1654         ins->offset = num_bytes;
1655         if (last_ptr) {
1656                 *last_ptr = ins->objectid + ins->offset;
1657                 if (*last_ptr ==
1658                     btrfs_super_total_bytes(&root->fs_info->super_copy)) {
1659                         *last_ptr = 0;
1660                 }
1661         }
1662         return 0;
1663
1664 new_group:
1665         if (search_start + num_bytes >= search_end) {
1666 enospc:
1667                 search_start = orig_search_start;
1668                 if (full_scan) {
1669                         ret = -ENOSPC;
1670                         goto error;
1671                 }
1672                 if (wrapped) {
1673                         if (!full_scan)
1674                                 total_needed -= empty_size;
1675                         full_scan = 1;
1676                 } else
1677                         wrapped = 1;
1678         }
1679         block_group = btrfs_lookup_block_group(info, search_start);
1680         cond_resched();
1681         block_group = btrfs_find_block_group(root, block_group,
1682                                              search_start, data, 0);
1683         goto check_failed;
1684
1685 error:
1686         return ret;
1687 }
1688 /*
1689  * finds a free extent and does all the dirty work required for allocation
1690  * returns the key for the extent through ins, and a tree buffer for
1691  * the first block of the extent through buf.
1692  *
1693  * returns 0 if everything worked, non-zero otherwise.
1694  */
1695 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1696                        struct btrfs_root *root,
1697                        u64 num_bytes, u64 min_alloc_size,
1698                        u64 root_objectid, u64 ref_generation,
1699                        u64 owner, u64 owner_offset,
1700                        u64 empty_size, u64 hint_byte,
1701                        u64 search_end, struct btrfs_key *ins, int data)
1702 {
1703         int ret;
1704         int pending_ret;
1705         u64 super_used;
1706         u64 root_used;
1707         u64 search_start = 0;
1708         u64 new_hint;
1709         u64 alloc_profile;
1710         u32 sizes[2];
1711         struct btrfs_fs_info *info = root->fs_info;
1712         struct btrfs_root *extent_root = info->extent_root;
1713         struct btrfs_extent_item *extent_item;
1714         struct btrfs_extent_ref *ref;
1715         struct btrfs_path *path;
1716         struct btrfs_key keys[2];
1717
1718         if (data) {
1719                 alloc_profile = info->avail_data_alloc_bits &
1720                                 info->data_alloc_profile;
1721                 data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
1722         } else if (root == root->fs_info->chunk_root) {
1723                 alloc_profile = info->avail_system_alloc_bits &
1724                                 info->system_alloc_profile;
1725                 data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
1726         } else {
1727                 alloc_profile = info->avail_metadata_alloc_bits &
1728                                 info->metadata_alloc_profile;
1729                 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
1730         }
1731 again:
1732         if (root->ref_cows) {
1733                 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
1734                         ret = do_chunk_alloc(trans, root->fs_info->extent_root,
1735                                              2 * 1024 * 1024,
1736                                              BTRFS_BLOCK_GROUP_METADATA |
1737                                              (info->metadata_alloc_profile &
1738                                               info->avail_metadata_alloc_bits));
1739                         BUG_ON(ret);
1740                 }
1741                 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
1742                                      num_bytes + 2 * 1024 * 1024, data);
1743                 BUG_ON(ret);
1744         }
1745
1746         new_hint = max(hint_byte, root->fs_info->alloc_start);
1747         if (new_hint < btrfs_super_total_bytes(&info->super_copy))
1748                 hint_byte = new_hint;
1749
1750         WARN_ON(num_bytes < root->sectorsize);
1751         ret = find_free_extent(trans, root, num_bytes, empty_size,
1752                                search_start, search_end, hint_byte, ins,
1753                                trans->alloc_exclude_start,
1754                                trans->alloc_exclude_nr, data);
1755         if (ret == -ENOSPC && num_bytes > min_alloc_size) {
1756                 num_bytes = num_bytes >> 1;
1757                 num_bytes = max(num_bytes, min_alloc_size);
1758                 goto again;
1759         }
1760         BUG_ON(ret);
1761         if (ret)
1762                 return ret;
1763
1764         /* block accounting for super block */
1765         super_used = btrfs_super_bytes_used(&info->super_copy);
1766         btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
1767
1768         /* block accounting for root item */
1769         root_used = btrfs_root_used(&root->root_item);
1770         btrfs_set_root_used(&root->root_item, root_used + num_bytes);
1771
1772         clear_extent_dirty(&root->fs_info->free_space_cache,
1773                            ins->objectid, ins->objectid + ins->offset - 1,
1774                            GFP_NOFS);
1775
1776         if (root == extent_root) {
1777                 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
1778                                 ins->objectid + ins->offset - 1,
1779                                 EXTENT_LOCKED, GFP_NOFS);
1780                 goto update_block;
1781         }
1782
1783         WARN_ON(trans->alloc_exclude_nr);
1784         trans->alloc_exclude_start = ins->objectid;
1785         trans->alloc_exclude_nr = ins->offset;
1786
1787         memcpy(&keys[0], ins, sizeof(*ins));
1788         keys[1].offset = hash_extent_ref(root_objectid, ref_generation,
1789                                          owner, owner_offset);
1790         keys[1].objectid = ins->objectid;
1791         keys[1].type = BTRFS_EXTENT_REF_KEY;
1792         sizes[0] = sizeof(*extent_item);
1793         sizes[1] = sizeof(*ref);
1794
1795         path = btrfs_alloc_path();
1796         BUG_ON(!path);
1797
1798         ret = btrfs_insert_empty_items(trans, extent_root, path, keys,
1799                                        sizes, 2);
1800
1801         BUG_ON(ret);
1802         extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
1803                                      struct btrfs_extent_item);
1804         btrfs_set_extent_refs(path->nodes[0], extent_item, 1);
1805         ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
1806                              struct btrfs_extent_ref);
1807
1808         btrfs_set_ref_root(path->nodes[0], ref, root_objectid);
1809         btrfs_set_ref_generation(path->nodes[0], ref, ref_generation);
1810         btrfs_set_ref_objectid(path->nodes[0], ref, owner);
1811         btrfs_set_ref_offset(path->nodes[0], ref, owner_offset);
1812
1813         btrfs_mark_buffer_dirty(path->nodes[0]);
1814
1815         trans->alloc_exclude_start = 0;
1816         trans->alloc_exclude_nr = 0;
1817         btrfs_free_path(path);
1818         finish_current_insert(trans, extent_root);
1819         pending_ret = del_pending_extents(trans, extent_root);
1820
1821         if (ret) {
1822                 return ret;
1823         }
1824         if (pending_ret) {
1825                 return pending_ret;
1826         }
1827
1828 update_block:
1829         ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0);
1830         if (ret) {
1831                 printk("update block group failed for %Lu %Lu\n",
1832                        ins->objectid, ins->offset);
1833                 BUG();
1834         }
1835         return 0;
1836 }
1837
1838 /*
1839  * helper function to allocate a block for a given tree
1840  * returns the tree buffer or NULL.
1841  */
1842 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1843                                              struct btrfs_root *root,
1844                                              u32 blocksize,
1845                                              u64 root_objectid, u64 hint,
1846                                              u64 empty_size)
1847 {
1848         u64 ref_generation;
1849
1850         if (root->ref_cows)
1851                 ref_generation = trans->transid;
1852         else
1853                 ref_generation = 0;
1854
1855
1856         return __btrfs_alloc_free_block(trans, root, blocksize, root_objectid,
1857                                         ref_generation, 0, 0, hint, empty_size);
1858 }
1859
1860 /*
1861  * helper function to allocate a block for a given tree
1862  * returns the tree buffer or NULL.
1863  */
1864 struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1865                                              struct btrfs_root *root,
1866                                              u32 blocksize,
1867                                              u64 root_objectid,
1868                                              u64 ref_generation,
1869                                              u64 first_objectid,
1870                                              int level,
1871                                              u64 hint,
1872                                              u64 empty_size)
1873 {
1874         struct btrfs_key ins;
1875         int ret;
1876         struct extent_buffer *buf;
1877
1878         ret = btrfs_alloc_extent(trans, root, blocksize, blocksize,
1879                                  root_objectid, ref_generation,
1880                                  level, first_objectid, empty_size, hint,
1881                                  (u64)-1, &ins, 0);
1882         if (ret) {
1883                 BUG_ON(ret > 0);
1884                 return ERR_PTR(ret);
1885         }
1886         buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize);
1887         if (!buf) {
1888                 btrfs_free_extent(trans, root, ins.objectid, blocksize,
1889                                   root->root_key.objectid, ref_generation,
1890                                   0, 0, 0);
1891                 return ERR_PTR(-ENOMEM);
1892         }
1893         btrfs_set_header_generation(buf, trans->transid);
1894         clean_tree_block(trans, root, buf);
1895         wait_on_tree_block_writeback(root, buf);
1896         btrfs_set_buffer_uptodate(buf);
1897
1898         if (PageDirty(buf->first_page)) {
1899                 printk("page %lu dirty\n", buf->first_page->index);
1900                 WARN_ON(1);
1901         }
1902
1903         set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
1904                          buf->start + buf->len - 1, GFP_NOFS);
1905         if (!btrfs_test_opt(root, SSD))
1906                 btrfs_set_buffer_defrag(buf);
1907         trans->blocks_used++;
1908         return buf;
1909 }
1910
1911 static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
1912                                   struct btrfs_root *root,
1913                                   struct extent_buffer *leaf)
1914 {
1915         u64 leaf_owner;
1916         u64 leaf_generation;
1917         struct btrfs_key key;
1918         struct btrfs_file_extent_item *fi;
1919         int i;
1920         int nritems;
1921         int ret;
1922
1923         BUG_ON(!btrfs_is_leaf(leaf));
1924         nritems = btrfs_header_nritems(leaf);
1925         leaf_owner = btrfs_header_owner(leaf);
1926         leaf_generation = btrfs_header_generation(leaf);
1927
1928         for (i = 0; i < nritems; i++) {
1929                 u64 disk_bytenr;
1930
1931                 btrfs_item_key_to_cpu(leaf, &key, i);
1932                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1933                         continue;
1934                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1935                 if (btrfs_file_extent_type(leaf, fi) ==
1936                     BTRFS_FILE_EXTENT_INLINE)
1937                         continue;
1938                 /*
1939                  * FIXME make sure to insert a trans record that
1940                  * repeats the snapshot del on crash
1941                  */
1942                 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1943                 if (disk_bytenr == 0)
1944                         continue;
1945                 ret = btrfs_free_extent(trans, root, disk_bytenr,
1946                                 btrfs_file_extent_disk_num_bytes(leaf, fi),
1947                                 leaf_owner, leaf_generation,
1948                                 key.objectid, key.offset, 0);
1949                 BUG_ON(ret);
1950         }
1951         return 0;
1952 }
1953
1954 static void noinline reada_walk_down(struct btrfs_root *root,
1955                                      struct extent_buffer *node,
1956                                      int slot)
1957 {
1958         u64 bytenr;
1959         u64 last = 0;
1960         u32 nritems;
1961         u32 refs;
1962         u32 blocksize;
1963         int ret;
1964         int i;
1965         int level;
1966         int skipped = 0;
1967
1968         nritems = btrfs_header_nritems(node);
1969         level = btrfs_header_level(node);
1970         if (level)
1971                 return;
1972
1973         for (i = slot; i < nritems && skipped < 32; i++) {
1974                 bytenr = btrfs_node_blockptr(node, i);
1975                 if (last && ((bytenr > last && bytenr - last > 32 * 1024) ||
1976                              (last > bytenr && last - bytenr > 32 * 1024))) {
1977                         skipped++;
1978                         continue;
1979                 }
1980                 blocksize = btrfs_level_size(root, level - 1);
1981                 if (i != slot) {
1982                         ret = lookup_extent_ref(NULL, root, bytenr,
1983                                                 blocksize, &refs);
1984                         BUG_ON(ret);
1985                         if (refs != 1) {
1986                                 skipped++;
1987                                 continue;
1988                         }
1989                 }
1990                 mutex_unlock(&root->fs_info->fs_mutex);
1991                 ret = readahead_tree_block(root, bytenr, blocksize);
1992                 last = bytenr + blocksize;
1993                 cond_resched();
1994                 mutex_lock(&root->fs_info->fs_mutex);
1995                 if (ret)
1996                         break;
1997         }
1998 }
1999
2000 /*
2001  * helper function for drop_snapshot, this walks down the tree dropping ref
2002  * counts as it goes.
2003  */
2004 static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
2005                                    struct btrfs_root *root,
2006                                    struct btrfs_path *path, int *level)
2007 {
2008         u64 root_owner;
2009         u64 root_gen;
2010         u64 bytenr;
2011         struct extent_buffer *next;
2012         struct extent_buffer *cur;
2013         struct extent_buffer *parent;
2014         u32 blocksize;
2015         int ret;
2016         u32 refs;
2017
2018         WARN_ON(*level < 0);
2019         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2020         ret = lookup_extent_ref(trans, root,
2021                                 path->nodes[*level]->start,
2022                                 path->nodes[*level]->len, &refs);
2023         BUG_ON(ret);
2024         if (refs > 1)
2025                 goto out;
2026
2027         /*
2028          * walk down to the last node level and free all the leaves
2029          */
2030         while(*level >= 0) {
2031                 WARN_ON(*level < 0);
2032                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2033                 cur = path->nodes[*level];
2034
2035                 if (btrfs_header_level(cur) != *level)
2036                         WARN_ON(1);
2037
2038                 if (path->slots[*level] >=
2039                     btrfs_header_nritems(cur))
2040                         break;
2041                 if (*level == 0) {
2042                         ret = drop_leaf_ref(trans, root, cur);
2043                         BUG_ON(ret);
2044                         break;
2045                 }
2046                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2047                 blocksize = btrfs_level_size(root, *level - 1);
2048                 ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs);
2049                 BUG_ON(ret);
2050                 if (refs != 1) {
2051                         parent = path->nodes[*level];
2052                         root_owner = btrfs_header_owner(parent);
2053                         root_gen = btrfs_header_generation(parent);
2054                         path->slots[*level]++;
2055                         ret = btrfs_free_extent(trans, root, bytenr,
2056                                                 blocksize, root_owner,
2057                                                 root_gen, 0, 0, 1);
2058                         BUG_ON(ret);
2059                         continue;
2060                 }
2061                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2062                 if (!next || !btrfs_buffer_uptodate(next)) {
2063                         free_extent_buffer(next);
2064                         reada_walk_down(root, cur, path->slots[*level]);
2065
2066                         mutex_unlock(&root->fs_info->fs_mutex);
2067                         next = read_tree_block(root, bytenr, blocksize);
2068                         mutex_lock(&root->fs_info->fs_mutex);
2069
2070                         /* we've dropped the lock, double check */
2071                         ret = lookup_extent_ref(trans, root, bytenr,
2072                                                 blocksize, &refs);
2073                         BUG_ON(ret);
2074                         if (refs != 1) {
2075                                 parent = path->nodes[*level];
2076                                 root_owner = btrfs_header_owner(parent);
2077                                 root_gen = btrfs_header_generation(parent);
2078
2079                                 path->slots[*level]++;
2080                                 free_extent_buffer(next);
2081                                 ret = btrfs_free_extent(trans, root, bytenr,
2082                                                         blocksize,
2083                                                         root_owner,
2084                                                         root_gen, 0, 0, 1);
2085                                 BUG_ON(ret);
2086                                 continue;
2087                         }
2088                 } else if (next) {
2089                         btrfs_verify_block_csum(root, next);
2090                 }
2091                 WARN_ON(*level <= 0);
2092                 if (path->nodes[*level-1])
2093                         free_extent_buffer(path->nodes[*level-1]);
2094                 path->nodes[*level-1] = next;
2095                 *level = btrfs_header_level(next);
2096                 path->slots[*level] = 0;
2097         }
2098 out:
2099         WARN_ON(*level < 0);
2100         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2101
2102         if (path->nodes[*level] == root->node) {
2103                 root_owner = root->root_key.objectid;
2104                 parent = path->nodes[*level];
2105         } else {
2106                 parent = path->nodes[*level + 1];
2107                 root_owner = btrfs_header_owner(parent);
2108         }
2109
2110         root_gen = btrfs_header_generation(parent);
2111         ret = btrfs_free_extent(trans, root, path->nodes[*level]->start,
2112                                 path->nodes[*level]->len,
2113                                 root_owner, root_gen, 0, 0, 1);
2114         free_extent_buffer(path->nodes[*level]);
2115         path->nodes[*level] = NULL;
2116         *level += 1;
2117         BUG_ON(ret);
2118         return 0;
2119 }
2120
2121 /*
2122  * helper for dropping snapshots.  This walks back up the tree in the path
2123  * to find the first node higher up where we haven't yet gone through
2124  * all the slots
2125  */
2126 static int noinline walk_up_tree(struct btrfs_trans_handle *trans,
2127                                  struct btrfs_root *root,
2128                                  struct btrfs_path *path, int *level)
2129 {
2130         u64 root_owner;
2131         u64 root_gen;
2132         struct btrfs_root_item *root_item = &root->root_item;
2133         int i;
2134         int slot;
2135         int ret;
2136
2137         for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2138                 slot = path->slots[i];
2139                 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
2140                         struct extent_buffer *node;
2141                         struct btrfs_disk_key disk_key;
2142                         node = path->nodes[i];
2143                         path->slots[i]++;
2144                         *level = i;
2145                         WARN_ON(*level == 0);
2146                         btrfs_node_key(node, &disk_key, path->slots[i]);
2147                         memcpy(&root_item->drop_progress,
2148                                &disk_key, sizeof(disk_key));
2149                         root_item->drop_level = i;
2150                         return 0;
2151                 } else {
2152                         if (path->nodes[*level] == root->node) {
2153                                 root_owner = root->root_key.objectid;
2154                                 root_gen =
2155                                    btrfs_header_generation(path->nodes[*level]);
2156                         } else {
2157                                 struct extent_buffer *node;
2158                                 node = path->nodes[*level + 1];
2159                                 root_owner = btrfs_header_owner(node);
2160                                 root_gen = btrfs_header_generation(node);
2161                         }
2162                         ret = btrfs_free_extent(trans, root,
2163                                                 path->nodes[*level]->start,
2164                                                 path->nodes[*level]->len,
2165                                                 root_owner, root_gen, 0, 0, 1);
2166                         BUG_ON(ret);
2167                         free_extent_buffer(path->nodes[*level]);
2168                         path->nodes[*level] = NULL;
2169                         *level = i + 1;
2170                 }
2171         }
2172         return 1;
2173 }
2174
2175 /*
2176  * drop the reference count on the tree rooted at 'snap'.  This traverses
2177  * the tree freeing any blocks that have a ref count of zero after being
2178  * decremented.
2179  */
2180 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
2181                         *root)
2182 {
2183         int ret = 0;
2184         int wret;
2185         int level;
2186         struct btrfs_path *path;
2187         int i;
2188         int orig_level;
2189         struct btrfs_root_item *root_item = &root->root_item;
2190
2191         path = btrfs_alloc_path();
2192         BUG_ON(!path);
2193
2194         level = btrfs_header_level(root->node);
2195         orig_level = level;
2196         if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
2197                 path->nodes[level] = root->node;
2198                 extent_buffer_get(root->node);
2199                 path->slots[level] = 0;
2200         } else {
2201                 struct btrfs_key key;
2202                 struct btrfs_disk_key found_key;
2203                 struct extent_buffer *node;
2204
2205                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
2206                 level = root_item->drop_level;
2207                 path->lowest_level = level;
2208                 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2209                 if (wret < 0) {
2210                         ret = wret;
2211                         goto out;
2212                 }
2213                 node = path->nodes[level];
2214                 btrfs_node_key(node, &found_key, path->slots[level]);
2215                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
2216                                sizeof(found_key)));
2217         }
2218         while(1) {
2219                 wret = walk_down_tree(trans, root, path, &level);
2220                 if (wret > 0)
2221                         break;
2222                 if (wret < 0)
2223                         ret = wret;
2224
2225                 wret = walk_up_tree(trans, root, path, &level);
2226                 if (wret > 0)
2227                         break;
2228                 if (wret < 0)
2229                         ret = wret;
2230                 ret = -EAGAIN;
2231                 break;
2232         }
2233         for (i = 0; i <= orig_level; i++) {
2234                 if (path->nodes[i]) {
2235                         free_extent_buffer(path->nodes[i]);
2236                         path->nodes[i] = NULL;
2237                 }
2238         }
2239 out:
2240         btrfs_free_path(path);
2241         return ret;
2242 }
2243
2244 int btrfs_free_block_groups(struct btrfs_fs_info *info)
2245 {
2246         u64 start;
2247         u64 end;
2248         u64 ptr;
2249         int ret;
2250         while(1) {
2251                 ret = find_first_extent_bit(&info->block_group_cache, 0,
2252                                             &start, &end, (unsigned int)-1);
2253                 if (ret)
2254                         break;
2255                 ret = get_state_private(&info->block_group_cache, start, &ptr);
2256                 if (!ret)
2257                         kfree((void *)(unsigned long)ptr);
2258                 clear_extent_bits(&info->block_group_cache, start,
2259                                   end, (unsigned int)-1, GFP_NOFS);
2260         }
2261         while(1) {
2262                 ret = find_first_extent_bit(&info->free_space_cache, 0,
2263                                             &start, &end, EXTENT_DIRTY);
2264                 if (ret)
2265                         break;
2266                 clear_extent_dirty(&info->free_space_cache, start,
2267                                    end, GFP_NOFS);
2268         }
2269         return 0;
2270 }
2271
2272 static int noinline relocate_inode_pages(struct inode *inode, u64 start,
2273                                          u64 len)
2274 {
2275         u64 page_start;
2276         u64 page_end;
2277         u64 delalloc_start;
2278         u64 existing_delalloc;
2279         unsigned long last_index;
2280         unsigned long i;
2281         struct page *page;
2282         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2283         struct file_ra_state *ra;
2284
2285         ra = kzalloc(sizeof(*ra), GFP_NOFS);
2286
2287         mutex_lock(&inode->i_mutex);
2288         i = start >> PAGE_CACHE_SHIFT;
2289         last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
2290
2291         file_ra_state_init(ra, inode->i_mapping);
2292         btrfs_force_ra(inode->i_mapping, ra, NULL, i, last_index);
2293         kfree(ra);
2294
2295         for (; i <= last_index; i++) {
2296                 page = grab_cache_page(inode->i_mapping, i);
2297                 if (!page)
2298                         goto out_unlock;
2299                 if (!PageUptodate(page)) {
2300                         btrfs_readpage(NULL, page);
2301                         lock_page(page);
2302                         if (!PageUptodate(page)) {
2303                                 unlock_page(page);
2304                                 page_cache_release(page);
2305                                 goto out_unlock;
2306                         }
2307                 }
2308                 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2309                 page_end = page_start + PAGE_CACHE_SIZE - 1;
2310
2311                 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
2312
2313                 delalloc_start = page_start;
2314                 existing_delalloc = count_range_bits(io_tree,
2315                                              &delalloc_start, page_end,
2316                                              PAGE_CACHE_SIZE, EXTENT_DELALLOC);
2317
2318                 set_extent_delalloc(io_tree, page_start,
2319                                     page_end, GFP_NOFS);
2320
2321                 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
2322                 set_page_dirty(page);
2323                 unlock_page(page);
2324                 page_cache_release(page);
2325         }
2326
2327 out_unlock:
2328         mutex_unlock(&inode->i_mutex);
2329         return 0;
2330 }
2331
2332 /*
2333  * note, this releases the path
2334  */
2335 static int noinline relocate_one_reference(struct btrfs_root *extent_root,
2336                                   struct btrfs_path *path,
2337                                   struct btrfs_key *extent_key)
2338 {
2339         struct inode *inode;
2340         struct btrfs_root *found_root;
2341         struct btrfs_key *root_location;
2342         struct btrfs_extent_ref *ref;
2343         u64 ref_root;
2344         u64 ref_gen;
2345         u64 ref_objectid;
2346         u64 ref_offset;
2347         int ret;
2348
2349         ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
2350                              struct btrfs_extent_ref);
2351         ref_root = btrfs_ref_root(path->nodes[0], ref);
2352         ref_gen = btrfs_ref_generation(path->nodes[0], ref);
2353         ref_objectid = btrfs_ref_objectid(path->nodes[0], ref);
2354         ref_offset = btrfs_ref_offset(path->nodes[0], ref);
2355         btrfs_release_path(extent_root, path);
2356
2357         root_location = kmalloc(sizeof(*root_location), GFP_NOFS);
2358         root_location->objectid = ref_root;
2359         if (ref_gen == 0)
2360                 root_location->offset = 0;
2361         else
2362                 root_location->offset = (u64)-1;
2363         root_location->type = BTRFS_ROOT_ITEM_KEY;
2364
2365         found_root = btrfs_read_fs_root_no_name(extent_root->fs_info,
2366                                                 root_location);
2367         BUG_ON(!found_root);
2368         kfree(root_location);
2369
2370         if (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
2371                 mutex_unlock(&extent_root->fs_info->fs_mutex);
2372                 inode = btrfs_iget_locked(extent_root->fs_info->sb,
2373                                           ref_objectid, found_root);
2374                 if (inode->i_state & I_NEW) {
2375                         /* the inode and parent dir are two different roots */
2376                         BTRFS_I(inode)->root = found_root;
2377                         BTRFS_I(inode)->location.objectid = ref_objectid;
2378                         BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
2379                         BTRFS_I(inode)->location.offset = 0;
2380                         btrfs_read_locked_inode(inode);
2381                         unlock_new_inode(inode);
2382
2383                 }
2384                 /* this can happen if the reference is not against
2385                  * the latest version of the tree root
2386                  */
2387                 if (is_bad_inode(inode)) {
2388                         mutex_lock(&extent_root->fs_info->fs_mutex);
2389                         goto out;
2390                 }
2391                 relocate_inode_pages(inode, ref_offset, extent_key->offset);
2392                 /* FIXME, data=ordered will help get rid of this */
2393                 filemap_fdatawrite(inode->i_mapping);
2394                 iput(inode);
2395                 mutex_lock(&extent_root->fs_info->fs_mutex);
2396         } else {
2397                 struct btrfs_trans_handle *trans;
2398                 struct btrfs_key found_key;
2399                 struct extent_buffer *eb;
2400                 int level;
2401                 int i;
2402
2403                 trans = btrfs_start_transaction(found_root, 1);
2404                 eb = read_tree_block(found_root, extent_key->objectid,
2405                                      extent_key->offset);
2406                 level = btrfs_header_level(eb);
2407
2408                 if (level == 0)
2409                         btrfs_item_key_to_cpu(eb, &found_key, 0);
2410                 else
2411                         btrfs_node_key_to_cpu(eb, &found_key, 0);
2412
2413                 free_extent_buffer(eb);
2414
2415                 path->lowest_level = level;
2416                 path->reada = 2;
2417                 ret = btrfs_search_slot(trans, found_root, &found_key, path,
2418                                         0, 1);
2419                 path->lowest_level = 0;
2420                 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
2421                         if (!path->nodes[i])
2422                                 break;
2423                         free_extent_buffer(path->nodes[i]);
2424                         path->nodes[i] = NULL;
2425                 }
2426                 btrfs_release_path(found_root, path);
2427                 btrfs_end_transaction(trans, found_root);
2428         }
2429
2430 out:
2431         return 0;
2432 }
2433
2434 static int noinline relocate_one_extent(struct btrfs_root *extent_root,
2435                                         struct btrfs_path *path,
2436                                         struct btrfs_key *extent_key)
2437 {
2438         struct btrfs_key key;
2439         struct btrfs_key found_key;
2440         struct extent_buffer *leaf;
2441         u32 nritems;
2442         u32 item_size;
2443         int ret = 0;
2444
2445         key.objectid = extent_key->objectid;
2446         key.type = BTRFS_EXTENT_REF_KEY;
2447         key.offset = 0;
2448
2449         while(1) {
2450                 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2451
2452                 if (ret < 0)
2453                         goto out;
2454
2455                 ret = 0;
2456                 leaf = path->nodes[0];
2457                 nritems = btrfs_header_nritems(leaf);
2458                 if (path->slots[0] == nritems)
2459                         goto out;
2460
2461                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2462                 if (found_key.objectid != extent_key->objectid)
2463                         break;
2464
2465                 if (found_key.type != BTRFS_EXTENT_REF_KEY)
2466                         break;
2467
2468                 key.offset = found_key.offset + 1;
2469                 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2470
2471                 ret = relocate_one_reference(extent_root, path, extent_key);
2472                 if (ret)
2473                         goto out;
2474         }
2475         ret = 0;
2476 out:
2477         btrfs_release_path(extent_root, path);
2478         return ret;
2479 }
2480
2481 int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size)
2482 {
2483         struct btrfs_trans_handle *trans;
2484         struct btrfs_root *tree_root = root->fs_info->tree_root;
2485         struct btrfs_path *path;
2486         u64 cur_byte;
2487         u64 total_found;
2488         struct btrfs_fs_info *info = root->fs_info;
2489         struct extent_io_tree *block_group_cache;
2490         struct btrfs_key key;
2491         struct btrfs_key found_key;
2492         struct extent_buffer *leaf;
2493         u32 nritems;
2494         int ret;
2495         int progress = 0;
2496
2497         btrfs_set_super_total_bytes(&info->super_copy, new_size);
2498         clear_extent_dirty(&info->free_space_cache, new_size, (u64)-1,
2499                            GFP_NOFS);
2500         block_group_cache = &info->block_group_cache;
2501         path = btrfs_alloc_path();
2502         root = root->fs_info->extent_root;
2503         path->reada = 2;
2504
2505 again:
2506         total_found = 0;
2507         key.objectid = new_size;
2508         key.offset = 0;
2509         key.type = 0;
2510         cur_byte = key.objectid;
2511
2512         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2513         if (ret < 0)
2514                 goto out;
2515
2516         ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
2517         if (ret < 0)
2518                 goto out;
2519         if (ret == 0) {
2520                 leaf = path->nodes[0];
2521                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2522                 if (found_key.objectid + found_key.offset > new_size) {
2523                         cur_byte = found_key.objectid;
2524                         key.objectid = cur_byte;
2525                 }
2526         }
2527         btrfs_release_path(root, path);
2528
2529         while(1) {
2530                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2531                 if (ret < 0)
2532                         goto out;
2533
2534                 leaf = path->nodes[0];
2535                 nritems = btrfs_header_nritems(leaf);
2536 next:
2537                 if (path->slots[0] >= nritems) {
2538                         ret = btrfs_next_leaf(root, path);
2539                         if (ret < 0)
2540                                 goto out;
2541                         if (ret == 1) {
2542                                 ret = 0;
2543                                 break;
2544                         }
2545                         leaf = path->nodes[0];
2546                         nritems = btrfs_header_nritems(leaf);
2547                 }
2548
2549                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2550
2551                 if (progress && need_resched()) {
2552                         memcpy(&key, &found_key, sizeof(key));
2553                         mutex_unlock(&root->fs_info->fs_mutex);
2554                         cond_resched();
2555                         mutex_lock(&root->fs_info->fs_mutex);
2556                         btrfs_release_path(root, path);
2557                         btrfs_search_slot(NULL, root, &key, path, 0, 0);
2558                         progress = 0;
2559                         goto next;
2560                 }
2561                 progress = 1;
2562
2563                 if (btrfs_key_type(&found_key) != BTRFS_EXTENT_ITEM_KEY ||
2564                     found_key.objectid + found_key.offset <= cur_byte) {
2565                         path->slots[0]++;
2566                         goto next;
2567                 }
2568
2569                 total_found++;
2570                 cur_byte = found_key.objectid + found_key.offset;
2571                 key.objectid = cur_byte;
2572                 btrfs_release_path(root, path);
2573                 ret = relocate_one_extent(root, path, &found_key);
2574         }
2575
2576         btrfs_release_path(root, path);
2577
2578         if (total_found > 0) {
2579                 trans = btrfs_start_transaction(tree_root, 1);
2580                 btrfs_commit_transaction(trans, tree_root);
2581
2582                 mutex_unlock(&root->fs_info->fs_mutex);
2583                 btrfs_clean_old_snapshots(tree_root);
2584                 mutex_lock(&root->fs_info->fs_mutex);
2585
2586                 trans = btrfs_start_transaction(tree_root, 1);
2587                 btrfs_commit_transaction(trans, tree_root);
2588                 goto again;
2589         }
2590
2591         trans = btrfs_start_transaction(root, 1);
2592         key.objectid = new_size;
2593         key.offset = 0;
2594         key.type = 0;
2595         while(1) {
2596                 u64 ptr;
2597
2598                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2599                 if (ret < 0)
2600                         goto out;
2601
2602                 leaf = path->nodes[0];
2603                 nritems = btrfs_header_nritems(leaf);
2604 bg_next:
2605                 if (path->slots[0] >= nritems) {
2606                         ret = btrfs_next_leaf(root, path);
2607                         if (ret < 0)
2608                                 break;
2609                         if (ret == 1) {
2610                                 ret = 0;
2611                                 break;
2612                         }
2613                         leaf = path->nodes[0];
2614                         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2615
2616                         /*
2617                          * btrfs_next_leaf doesn't cow buffers, we have to
2618                          * do the search again
2619                          */
2620                         memcpy(&key, &found_key, sizeof(key));
2621                         btrfs_release_path(root, path);
2622                         goto resched_check;
2623                 }
2624
2625                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2626                 if (btrfs_key_type(&found_key) != BTRFS_BLOCK_GROUP_ITEM_KEY) {
2627                         printk("shrinker found key %Lu %u %Lu\n",
2628                                 found_key.objectid, found_key.type,
2629                                 found_key.offset);
2630                         path->slots[0]++;
2631                         goto bg_next;
2632                 }
2633                 ret = get_state_private(&info->block_group_cache,
2634                                         found_key.objectid, &ptr);
2635                 if (!ret)
2636                         kfree((void *)(unsigned long)ptr);
2637
2638                 clear_extent_bits(&info->block_group_cache, found_key.objectid,
2639                                   found_key.objectid + found_key.offset - 1,
2640                                   (unsigned int)-1, GFP_NOFS);
2641
2642                 key.objectid = found_key.objectid + 1;
2643                 btrfs_del_item(trans, root, path);
2644                 btrfs_release_path(root, path);
2645 resched_check:
2646                 if (need_resched()) {
2647                         mutex_unlock(&root->fs_info->fs_mutex);
2648                         cond_resched();
2649                         mutex_lock(&root->fs_info->fs_mutex);
2650                 }
2651         }
2652         clear_extent_dirty(&info->free_space_cache, new_size, (u64)-1,
2653                            GFP_NOFS);
2654         btrfs_commit_transaction(trans, root);
2655 out:
2656         btrfs_free_path(path);
2657         return ret;
2658 }
2659
2660 int btrfs_grow_extent_tree(struct btrfs_trans_handle *trans,
2661                            struct btrfs_root *root, u64 new_size)
2662 {
2663         btrfs_set_super_total_bytes(&root->fs_info->super_copy, new_size);
2664         return 0;
2665 }
2666
2667 int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path,
2668                            struct btrfs_key *key)
2669 {
2670         int ret;
2671         struct btrfs_key found_key;
2672         struct extent_buffer *leaf;
2673         int slot;
2674
2675         ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
2676         if (ret < 0)
2677                 return ret;
2678         while(1) {
2679                 slot = path->slots[0];
2680                 leaf = path->nodes[0];
2681                 if (slot >= btrfs_header_nritems(leaf)) {
2682                         ret = btrfs_next_leaf(root, path);
2683                         if (ret == 0)
2684                                 continue;
2685                         if (ret < 0)
2686                                 goto error;
2687                         break;
2688                 }
2689                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
2690
2691                 if (found_key.objectid >= key->objectid &&
2692                     found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY)
2693                         return 0;
2694                 path->slots[0]++;
2695         }
2696         ret = -ENOENT;
2697 error:
2698         return ret;
2699 }
2700
2701 int btrfs_read_block_groups(struct btrfs_root *root)
2702 {
2703         struct btrfs_path *path;
2704         int ret;
2705         int bit;
2706         struct btrfs_block_group_cache *cache;
2707         struct btrfs_fs_info *info = root->fs_info;
2708         struct btrfs_space_info *space_info;
2709         struct extent_io_tree *block_group_cache;
2710         struct btrfs_key key;
2711         struct btrfs_key found_key;
2712         struct extent_buffer *leaf;
2713
2714         block_group_cache = &info->block_group_cache;
2715         root = info->extent_root;
2716         key.objectid = 0;
2717         key.offset = 0;
2718         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
2719         path = btrfs_alloc_path();
2720         if (!path)
2721                 return -ENOMEM;
2722
2723         while(1) {
2724                 ret = find_first_block_group(root, path, &key);
2725                 if (ret > 0) {
2726                         ret = 0;
2727                         goto error;
2728                 }
2729                 if (ret != 0)
2730                         goto error;
2731
2732                 leaf = path->nodes[0];
2733                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2734                 cache = kmalloc(sizeof(*cache), GFP_NOFS);
2735                 if (!cache) {
2736                         ret = -ENOMEM;
2737                         break;
2738                 }
2739
2740                 read_extent_buffer(leaf, &cache->item,
2741                                    btrfs_item_ptr_offset(leaf, path->slots[0]),
2742                                    sizeof(cache->item));
2743                 memcpy(&cache->key, &found_key, sizeof(found_key));
2744                 cache->cached = 0;
2745                 cache->pinned = 0;
2746
2747                 key.objectid = found_key.objectid + found_key.offset;
2748                 btrfs_release_path(root, path);
2749                 cache->flags = btrfs_block_group_flags(&cache->item);
2750                 bit = 0;
2751                 if (cache->flags & BTRFS_BLOCK_GROUP_DATA) {
2752                         bit = BLOCK_GROUP_DATA;
2753                 } else if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
2754                         bit = BLOCK_GROUP_SYSTEM;
2755                 } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) {
2756                         bit = BLOCK_GROUP_METADATA;
2757                 }
2758                 set_avail_alloc_bits(info, cache->flags);
2759
2760                 ret = update_space_info(info, cache->flags, found_key.offset,
2761                                         btrfs_block_group_used(&cache->item),
2762                                         &space_info);
2763                 BUG_ON(ret);
2764                 cache->space_info = space_info;
2765
2766                 /* use EXTENT_LOCKED to prevent merging */
2767                 set_extent_bits(block_group_cache, found_key.objectid,
2768                                 found_key.objectid + found_key.offset - 1,
2769                                 bit | EXTENT_LOCKED, GFP_NOFS);
2770                 set_state_private(block_group_cache, found_key.objectid,
2771                                   (unsigned long)cache);
2772
2773                 if (key.objectid >=
2774                     btrfs_super_total_bytes(&info->super_copy))
2775                         break;
2776         }
2777         ret = 0;
2778 error:
2779         btrfs_free_path(path);
2780         return ret;
2781 }
2782
2783 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
2784                            struct btrfs_root *root, u64 bytes_used,
2785                            u64 type, u64 chunk_tree, u64 chunk_objectid,
2786                            u64 size)
2787 {
2788         int ret;
2789         int bit = 0;
2790         struct btrfs_root *extent_root;
2791         struct btrfs_block_group_cache *cache;
2792         struct extent_io_tree *block_group_cache;
2793
2794         extent_root = root->fs_info->extent_root;
2795         block_group_cache = &root->fs_info->block_group_cache;
2796
2797         cache = kmalloc(sizeof(*cache), GFP_NOFS);
2798         BUG_ON(!cache);
2799         cache->key.objectid = chunk_objectid;
2800         cache->key.offset = size;
2801         cache->cached = 0;
2802         cache->pinned = 0;
2803         btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
2804         memset(&cache->item, 0, sizeof(cache->item));
2805         btrfs_set_block_group_used(&cache->item, bytes_used);
2806         btrfs_set_block_group_chunk_tree(&cache->item, chunk_tree);
2807         btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
2808         cache->flags = type;
2809         btrfs_set_block_group_flags(&cache->item, type);
2810
2811         ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
2812                                 &cache->space_info);
2813         BUG_ON(ret);
2814
2815         bit = block_group_state_bits(type);
2816         set_extent_bits(block_group_cache, chunk_objectid,
2817                         chunk_objectid + size - 1,
2818                         bit | EXTENT_LOCKED, GFP_NOFS);
2819         set_state_private(block_group_cache, chunk_objectid,
2820                           (unsigned long)cache);
2821
2822         ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
2823                                 sizeof(cache->item));
2824         BUG_ON(ret);
2825
2826         finish_current_insert(trans, extent_root);
2827         ret = del_pending_extents(trans, extent_root);
2828         BUG_ON(ret);
2829         set_avail_alloc_bits(extent_root->fs_info, type);
2830         return 0;
2831 }