Btrfs: Add support for mirroring across drives
[linux-2.6-block.git] / fs / btrfs / extent-tree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/sched.h>
20 #include <linux/crc32c.h>
21 #include <linux/pagemap.h>
22 #include "hash.h"
23 #include "ctree.h"
24 #include "disk-io.h"
25 #include "print-tree.h"
26 #include "transaction.h"
27 #include "volumes.h"
28
29 #define BLOCK_GROUP_DATA     EXTENT_WRITEBACK
30 #define BLOCK_GROUP_METADATA EXTENT_UPTODATE
31 #define BLOCK_GROUP_SYSTEM   EXTENT_NEW
32
33 #define BLOCK_GROUP_DIRTY EXTENT_DIRTY
34
35 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
36                                  btrfs_root *extent_root);
37 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
38                                btrfs_root *extent_root);
39 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
40                            struct btrfs_root *root, u64 bytes_used,
41                            u64 type, u64 chunk_tree, u64 chunk_objectid,
42                            u64 size);
43
44
45 static int cache_block_group(struct btrfs_root *root,
46                              struct btrfs_block_group_cache *block_group)
47 {
48         struct btrfs_path *path;
49         int ret;
50         struct btrfs_key key;
51         struct extent_buffer *leaf;
52         struct extent_io_tree *free_space_cache;
53         int slot;
54         u64 last = 0;
55         u64 hole_size;
56         u64 first_free;
57         int found = 0;
58
59         if (!block_group)
60                 return 0;
61
62         root = root->fs_info->extent_root;
63         free_space_cache = &root->fs_info->free_space_cache;
64
65         if (block_group->cached)
66                 return 0;
67
68         path = btrfs_alloc_path();
69         if (!path)
70                 return -ENOMEM;
71
72         path->reada = 2;
73         first_free = block_group->key.objectid;
74         key.objectid = block_group->key.objectid;
75         key.offset = 0;
76         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
77         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
78         if (ret < 0)
79                 return ret;
80         ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
81         if (ret < 0)
82                 return ret;
83         if (ret == 0) {
84                 leaf = path->nodes[0];
85                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
86                 if (key.objectid + key.offset > first_free)
87                         first_free = key.objectid + key.offset;
88         }
89         while(1) {
90                 leaf = path->nodes[0];
91                 slot = path->slots[0];
92                 if (slot >= btrfs_header_nritems(leaf)) {
93                         ret = btrfs_next_leaf(root, path);
94                         if (ret < 0)
95                                 goto err;
96                         if (ret == 0) {
97                                 continue;
98                         } else {
99                                 break;
100                         }
101                 }
102                 btrfs_item_key_to_cpu(leaf, &key, slot);
103                 if (key.objectid < block_group->key.objectid) {
104                         goto next;
105                 }
106                 if (key.objectid >= block_group->key.objectid +
107                     block_group->key.offset) {
108                         break;
109                 }
110
111                 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
112                         if (!found) {
113                                 last = first_free;
114                                 found = 1;
115                         }
116                         if (key.objectid > last) {
117                                 hole_size = key.objectid - last;
118                                 set_extent_dirty(free_space_cache, last,
119                                                  last + hole_size - 1,
120                                                  GFP_NOFS);
121                         }
122                         last = key.objectid + key.offset;
123                 }
124 next:
125                 path->slots[0]++;
126         }
127
128         if (!found)
129                 last = first_free;
130         if (block_group->key.objectid +
131             block_group->key.offset > last) {
132                 hole_size = block_group->key.objectid +
133                         block_group->key.offset - last;
134                 set_extent_dirty(free_space_cache, last,
135                                  last + hole_size - 1, GFP_NOFS);
136         }
137         block_group->cached = 1;
138 err:
139         btrfs_free_path(path);
140         return 0;
141 }
142
143 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
144                                                          btrfs_fs_info *info,
145                                                          u64 bytenr)
146 {
147         struct extent_io_tree *block_group_cache;
148         struct btrfs_block_group_cache *block_group = NULL;
149         u64 ptr;
150         u64 start;
151         u64 end;
152         int ret;
153
154         block_group_cache = &info->block_group_cache;
155         ret = find_first_extent_bit(block_group_cache,
156                                     bytenr, &start, &end,
157                                     BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
158                                     BLOCK_GROUP_SYSTEM);
159         if (ret) {
160                 return NULL;
161         }
162         ret = get_state_private(block_group_cache, start, &ptr);
163         if (ret)
164                 return NULL;
165
166         block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
167         if (block_group->key.objectid <= bytenr && bytenr <
168             block_group->key.objectid + block_group->key.offset)
169                 return block_group;
170         return NULL;
171 }
172
173 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
174 {
175         return (cache->flags & bits) == bits;
176 }
177
178 static int noinline find_search_start(struct btrfs_root *root,
179                               struct btrfs_block_group_cache **cache_ret,
180                               u64 *start_ret, int num, int data)
181 {
182         int ret;
183         struct btrfs_block_group_cache *cache = *cache_ret;
184         struct extent_io_tree *free_space_cache;
185         struct extent_state *state;
186         u64 last;
187         u64 start = 0;
188         u64 cache_miss = 0;
189         u64 total_fs_bytes;
190         u64 search_start = *start_ret;
191         int wrapped = 0;
192
193         if (!cache)
194                 goto out;
195         total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
196         free_space_cache = &root->fs_info->free_space_cache;
197
198 again:
199         ret = cache_block_group(root, cache);
200         if (ret)
201                 goto out;
202
203         last = max(search_start, cache->key.objectid);
204         if (!block_group_bits(cache, data)) {
205                 goto new_group;
206         }
207
208         spin_lock_irq(&free_space_cache->lock);
209         state = find_first_extent_bit_state(free_space_cache, last, EXTENT_DIRTY);
210         while(1) {
211                 if (!state) {
212                         if (!cache_miss)
213                                 cache_miss = last;
214                         spin_unlock_irq(&free_space_cache->lock);
215                         goto new_group;
216                 }
217
218                 start = max(last, state->start);
219                 last = state->end + 1;
220                 if (last - start < num) {
221                         if (last == cache->key.objectid + cache->key.offset)
222                                 cache_miss = start;
223                         do {
224                                 state = extent_state_next(state);
225                         } while(state && !(state->state & EXTENT_DIRTY));
226                         continue;
227                 }
228                 spin_unlock_irq(&free_space_cache->lock);
229                 if (start + num > cache->key.objectid + cache->key.offset)
230                         goto new_group;
231                 if (start + num  > total_fs_bytes)
232                         goto new_group;
233                 if (!block_group_bits(cache, data)) {
234                         printk("block group bits don't match %Lu %Lu\n", cache->flags, data);
235                 }
236                 *start_ret = start;
237                 return 0;
238         }
239 out:
240         cache = btrfs_lookup_block_group(root->fs_info, search_start);
241         if (!cache) {
242                 printk("Unable to find block group for %Lu\n", search_start);
243                 WARN_ON(1);
244         }
245         return -ENOSPC;
246
247 new_group:
248         last = cache->key.objectid + cache->key.offset;
249 wrapped:
250         cache = btrfs_lookup_block_group(root->fs_info, last);
251         if (!cache || cache->key.objectid >= total_fs_bytes) {
252 no_cache:
253                 if (!wrapped) {
254                         wrapped = 1;
255                         last = search_start;
256                         goto wrapped;
257                 }
258                 goto out;
259         }
260         if (cache_miss && !cache->cached) {
261                 cache_block_group(root, cache);
262                 last = cache_miss;
263                 cache = btrfs_lookup_block_group(root->fs_info, last);
264         }
265         cache = btrfs_find_block_group(root, cache, last, data, 0);
266         if (!cache)
267                 goto no_cache;
268         *cache_ret = cache;
269         cache_miss = 0;
270         goto again;
271 }
272
273 static u64 div_factor(u64 num, int factor)
274 {
275         if (factor == 10)
276                 return num;
277         num *= factor;
278         do_div(num, 10);
279         return num;
280 }
281
282 static int block_group_state_bits(u64 flags)
283 {
284         int bits = 0;
285         if (flags & BTRFS_BLOCK_GROUP_DATA)
286                 bits |= BLOCK_GROUP_DATA;
287         if (flags & BTRFS_BLOCK_GROUP_METADATA)
288                 bits |= BLOCK_GROUP_METADATA;
289         if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
290                 bits |= BLOCK_GROUP_SYSTEM;
291         return bits;
292 }
293
294 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
295                                                  struct btrfs_block_group_cache
296                                                  *hint, u64 search_start,
297                                                  int data, int owner)
298 {
299         struct btrfs_block_group_cache *cache;
300         struct extent_io_tree *block_group_cache;
301         struct btrfs_block_group_cache *found_group = NULL;
302         struct btrfs_fs_info *info = root->fs_info;
303         u64 used;
304         u64 last = 0;
305         u64 hint_last;
306         u64 start;
307         u64 end;
308         u64 free_check;
309         u64 ptr;
310         u64 total_fs_bytes;
311         int bit;
312         int ret;
313         int full_search = 0;
314         int factor = 8;
315
316         block_group_cache = &info->block_group_cache;
317         total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
318
319         if (!owner)
320                 factor = 8;
321
322         bit = block_group_state_bits(data);
323
324         if (search_start && search_start < total_fs_bytes) {
325                 struct btrfs_block_group_cache *shint;
326                 shint = btrfs_lookup_block_group(info, search_start);
327                 if (shint && block_group_bits(shint, data)) {
328                         used = btrfs_block_group_used(&shint->item);
329                         if (used + shint->pinned <
330                             div_factor(shint->key.offset, factor)) {
331                                 return shint;
332                         }
333                 }
334         }
335         if (hint && block_group_bits(hint, data) &&
336             hint->key.objectid < total_fs_bytes) {
337                 used = btrfs_block_group_used(&hint->item);
338                 if (used + hint->pinned <
339                     div_factor(hint->key.offset, factor)) {
340                         return hint;
341                 }
342                 last = hint->key.objectid + hint->key.offset;
343                 hint_last = last;
344         } else {
345                 if (hint)
346                         hint_last = max(hint->key.objectid, search_start);
347                 else
348                         hint_last = search_start;
349
350                 if (hint_last >= total_fs_bytes)
351                         hint_last = search_start;
352                 last = hint_last;
353         }
354 again:
355         while(1) {
356                 ret = find_first_extent_bit(block_group_cache, last,
357                                             &start, &end, bit);
358                 if (ret)
359                         break;
360
361                 ret = get_state_private(block_group_cache, start, &ptr);
362                 if (ret)
363                         break;
364
365                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
366                 last = cache->key.objectid + cache->key.offset;
367                 used = btrfs_block_group_used(&cache->item);
368
369                 if (cache->key.objectid > total_fs_bytes)
370                         break;
371
372                 if (block_group_bits(cache, data)) {
373                         if (full_search)
374                                 free_check = cache->key.offset;
375                         else
376                                 free_check = div_factor(cache->key.offset,
377                                                         factor);
378
379                         if (used + cache->pinned < free_check) {
380                                 found_group = cache;
381                                 goto found;
382                         }
383                 }
384                 cond_resched();
385         }
386         if (!full_search) {
387                 last = search_start;
388                 full_search = 1;
389                 goto again;
390         }
391 found:
392         return found_group;
393 }
394
395 static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation,
396                            u64 owner, u64 owner_offset)
397 {
398         u32 high_crc = ~(u32)0;
399         u32 low_crc = ~(u32)0;
400         __le64 lenum;
401
402         lenum = cpu_to_le64(root_objectid);
403         high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
404         lenum = cpu_to_le64(ref_generation);
405         low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
406         if (owner >= BTRFS_FIRST_FREE_OBJECTID) {
407                 lenum = cpu_to_le64(owner);
408                 low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
409                 lenum = cpu_to_le64(owner_offset);
410                 low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
411         }
412         return ((u64)high_crc << 32) | (u64)low_crc;
413 }
414
415 static int match_extent_ref(struct extent_buffer *leaf,
416                             struct btrfs_extent_ref *disk_ref,
417                             struct btrfs_extent_ref *cpu_ref)
418 {
419         int ret;
420         int len;
421
422         if (cpu_ref->objectid)
423                 len = sizeof(*cpu_ref);
424         else
425                 len = 2 * sizeof(u64);
426         ret = memcmp_extent_buffer(leaf, cpu_ref, (unsigned long)disk_ref,
427                                    len);
428         return ret == 0;
429 }
430
431 static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans,
432                                           struct btrfs_root *root,
433                                           struct btrfs_path *path, u64 bytenr,
434                                           u64 root_objectid,
435                                           u64 ref_generation, u64 owner,
436                                           u64 owner_offset, int del)
437 {
438         u64 hash;
439         struct btrfs_key key;
440         struct btrfs_key found_key;
441         struct btrfs_extent_ref ref;
442         struct extent_buffer *leaf;
443         struct btrfs_extent_ref *disk_ref;
444         int ret;
445         int ret2;
446
447         btrfs_set_stack_ref_root(&ref, root_objectid);
448         btrfs_set_stack_ref_generation(&ref, ref_generation);
449         btrfs_set_stack_ref_objectid(&ref, owner);
450         btrfs_set_stack_ref_offset(&ref, owner_offset);
451
452         hash = hash_extent_ref(root_objectid, ref_generation, owner,
453                                owner_offset);
454         key.offset = hash;
455         key.objectid = bytenr;
456         key.type = BTRFS_EXTENT_REF_KEY;
457
458         while (1) {
459                 ret = btrfs_search_slot(trans, root, &key, path,
460                                         del ? -1 : 0, del);
461                 if (ret < 0)
462                         goto out;
463                 leaf = path->nodes[0];
464                 if (ret != 0) {
465                         u32 nritems = btrfs_header_nritems(leaf);
466                         if (path->slots[0] >= nritems) {
467                                 ret2 = btrfs_next_leaf(root, path);
468                                 if (ret2)
469                                         goto out;
470                                 leaf = path->nodes[0];
471                         }
472                         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
473                         if (found_key.objectid != bytenr ||
474                             found_key.type != BTRFS_EXTENT_REF_KEY)
475                                 goto out;
476                         key.offset = found_key.offset;
477                         if (del) {
478                                 btrfs_release_path(root, path);
479                                 continue;
480                         }
481                 }
482                 disk_ref = btrfs_item_ptr(path->nodes[0],
483                                           path->slots[0],
484                                           struct btrfs_extent_ref);
485                 if (match_extent_ref(path->nodes[0], disk_ref, &ref)) {
486                         ret = 0;
487                         goto out;
488                 }
489                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
490                 key.offset = found_key.offset + 1;
491                 btrfs_release_path(root, path);
492         }
493 out:
494         return ret;
495 }
496
497 /*
498  * Back reference rules.  Back refs have three main goals:
499  *
500  * 1) differentiate between all holders of references to an extent so that
501  *    when a reference is dropped we can make sure it was a valid reference
502  *    before freeing the extent.
503  *
504  * 2) Provide enough information to quickly find the holders of an extent
505  *    if we notice a given block is corrupted or bad.
506  *
507  * 3) Make it easy to migrate blocks for FS shrinking or storage pool
508  *    maintenance.  This is actually the same as #2, but with a slightly
509  *    different use case.
510  *
511  * File extents can be referenced by:
512  *
513  * - multiple snapshots, subvolumes, or different generations in one subvol
514  * - different files inside a single subvolume (in theory, not implemented yet)
515  * - different offsets inside a file (bookend extents in file.c)
516  *
517  * The extent ref structure has fields for:
518  *
519  * - Objectid of the subvolume root
520  * - Generation number of the tree holding the reference
521  * - objectid of the file holding the reference
522  * - offset in the file corresponding to the key holding the reference
523  *
524  * When a file extent is allocated the fields are filled in:
525  *     (root_key.objectid, trans->transid, inode objectid, offset in file)
526  *
527  * When a leaf is cow'd new references are added for every file extent found
528  * in the leaf.  It looks the same as the create case, but trans->transid
529  * will be different when the block is cow'd.
530  *
531  *     (root_key.objectid, trans->transid, inode objectid, offset in file)
532  *
533  * When a file extent is removed either during snapshot deletion or file
534  * truncation, the corresponding back reference is found
535  * by searching for:
536  *
537  *     (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
538  *      inode objectid, offset in file)
539  *
540  * Btree extents can be referenced by:
541  *
542  * - Different subvolumes
543  * - Different generations of the same subvolume
544  *
545  * Storing sufficient information for a full reverse mapping of a btree
546  * block would require storing the lowest key of the block in the backref,
547  * and it would require updating that lowest key either before write out or
548  * every time it changed.  Instead, the objectid of the lowest key is stored
549  * along with the level of the tree block.  This provides a hint
550  * about where in the btree the block can be found.  Searches through the
551  * btree only need to look for a pointer to that block, so they stop one
552  * level higher than the level recorded in the backref.
553  *
554  * Some btrees do not do reference counting on their extents.  These
555  * include the extent tree and the tree of tree roots.  Backrefs for these
556  * trees always have a generation of zero.
557  *
558  * When a tree block is created, back references are inserted:
559  *
560  * (root->root_key.objectid, trans->transid or zero, level, lowest_key_objectid)
561  *
562  * When a tree block is cow'd in a reference counted root,
563  * new back references are added for all the blocks it points to.
564  * These are of the form (trans->transid will have increased since creation):
565  *
566  * (root->root_key.objectid, trans->transid, level, lowest_key_objectid)
567  *
568  * Because the lowest_key_objectid and the level are just hints
569  * they are not used when backrefs are deleted.  When a backref is deleted:
570  *
571  * if backref was for a tree root:
572  *     root_objectid = root->root_key.objectid
573  * else
574  *     root_objectid = btrfs_header_owner(parent)
575  *
576  * (root_objectid, btrfs_header_generation(parent) or zero, 0, 0)
577  *
578  * Back Reference Key hashing:
579  *
580  * Back references have four fields, each 64 bits long.  Unfortunately,
581  * This is hashed into a single 64 bit number and placed into the key offset.
582  * The key objectid corresponds to the first byte in the extent, and the
583  * key type is set to BTRFS_EXTENT_REF_KEY
584  */
585 int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans,
586                                  struct btrfs_root *root,
587                                  struct btrfs_path *path, u64 bytenr,
588                                  u64 root_objectid, u64 ref_generation,
589                                  u64 owner, u64 owner_offset)
590 {
591         u64 hash;
592         struct btrfs_key key;
593         struct btrfs_extent_ref ref;
594         struct btrfs_extent_ref *disk_ref;
595         int ret;
596
597         btrfs_set_stack_ref_root(&ref, root_objectid);
598         btrfs_set_stack_ref_generation(&ref, ref_generation);
599         btrfs_set_stack_ref_objectid(&ref, owner);
600         btrfs_set_stack_ref_offset(&ref, owner_offset);
601
602         hash = hash_extent_ref(root_objectid, ref_generation, owner,
603                                owner_offset);
604         key.offset = hash;
605         key.objectid = bytenr;
606         key.type = BTRFS_EXTENT_REF_KEY;
607
608         ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(ref));
609         while (ret == -EEXIST) {
610                 disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
611                                           struct btrfs_extent_ref);
612                 if (match_extent_ref(path->nodes[0], disk_ref, &ref))
613                         goto out;
614                 key.offset++;
615                 btrfs_release_path(root, path);
616                 ret = btrfs_insert_empty_item(trans, root, path, &key,
617                                               sizeof(ref));
618         }
619         if (ret)
620                 goto out;
621         disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
622                                   struct btrfs_extent_ref);
623         write_extent_buffer(path->nodes[0], &ref, (unsigned long)disk_ref,
624                             sizeof(ref));
625         btrfs_mark_buffer_dirty(path->nodes[0]);
626 out:
627         btrfs_release_path(root, path);
628         return ret;
629 }
630
631 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
632                                 struct btrfs_root *root,
633                                 u64 bytenr, u64 num_bytes,
634                                 u64 root_objectid, u64 ref_generation,
635                                 u64 owner, u64 owner_offset)
636 {
637         struct btrfs_path *path;
638         int ret;
639         struct btrfs_key key;
640         struct extent_buffer *l;
641         struct btrfs_extent_item *item;
642         u32 refs;
643
644         WARN_ON(num_bytes < root->sectorsize);
645         path = btrfs_alloc_path();
646         if (!path)
647                 return -ENOMEM;
648
649         path->reada = 0;
650         key.objectid = bytenr;
651         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
652         key.offset = num_bytes;
653         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
654                                 0, 1);
655         if (ret < 0)
656                 return ret;
657         if (ret != 0) {
658                 BUG();
659         }
660         BUG_ON(ret != 0);
661         l = path->nodes[0];
662         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
663         refs = btrfs_extent_refs(l, item);
664         btrfs_set_extent_refs(l, item, refs + 1);
665         btrfs_mark_buffer_dirty(path->nodes[0]);
666
667         btrfs_release_path(root->fs_info->extent_root, path);
668
669         path->reada = 0;
670         ret = btrfs_insert_extent_backref(trans, root->fs_info->extent_root,
671                                           path, bytenr, root_objectid,
672                                           ref_generation, owner, owner_offset);
673         BUG_ON(ret);
674         finish_current_insert(trans, root->fs_info->extent_root);
675         del_pending_extents(trans, root->fs_info->extent_root);
676
677         btrfs_free_path(path);
678         return 0;
679 }
680
681 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
682                          struct btrfs_root *root)
683 {
684         finish_current_insert(trans, root->fs_info->extent_root);
685         del_pending_extents(trans, root->fs_info->extent_root);
686         return 0;
687 }
688
689 static int lookup_extent_ref(struct btrfs_trans_handle *trans,
690                              struct btrfs_root *root, u64 bytenr,
691                              u64 num_bytes, u32 *refs)
692 {
693         struct btrfs_path *path;
694         int ret;
695         struct btrfs_key key;
696         struct extent_buffer *l;
697         struct btrfs_extent_item *item;
698
699         WARN_ON(num_bytes < root->sectorsize);
700         path = btrfs_alloc_path();
701         path->reada = 0;
702         key.objectid = bytenr;
703         key.offset = num_bytes;
704         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
705         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
706                                 0, 0);
707         if (ret < 0)
708                 goto out;
709         if (ret != 0) {
710                 btrfs_print_leaf(root, path->nodes[0]);
711                 printk("failed to find block number %Lu\n", bytenr);
712                 BUG();
713         }
714         l = path->nodes[0];
715         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
716         *refs = btrfs_extent_refs(l, item);
717 out:
718         btrfs_free_path(path);
719         return 0;
720 }
721
722 u32 btrfs_count_snapshots_in_path(struct btrfs_root *root,
723                                   struct btrfs_path *count_path,
724                                   u64 first_extent)
725 {
726         struct btrfs_root *extent_root = root->fs_info->extent_root;
727         struct btrfs_path *path;
728         u64 bytenr;
729         u64 found_objectid;
730         u64 root_objectid = root->root_key.objectid;
731         u32 total_count = 0;
732         u32 cur_count;
733         u32 nritems;
734         int ret;
735         struct btrfs_key key;
736         struct btrfs_key found_key;
737         struct extent_buffer *l;
738         struct btrfs_extent_item *item;
739         struct btrfs_extent_ref *ref_item;
740         int level = -1;
741
742         path = btrfs_alloc_path();
743 again:
744         if (level == -1)
745                 bytenr = first_extent;
746         else
747                 bytenr = count_path->nodes[level]->start;
748
749         cur_count = 0;
750         key.objectid = bytenr;
751         key.offset = 0;
752
753         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
754         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
755         if (ret < 0)
756                 goto out;
757         BUG_ON(ret == 0);
758
759         l = path->nodes[0];
760         btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
761
762         if (found_key.objectid != bytenr ||
763             found_key.type != BTRFS_EXTENT_ITEM_KEY) {
764                 goto out;
765         }
766
767         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
768         while (1) {
769                 l = path->nodes[0];
770                 nritems = btrfs_header_nritems(l);
771                 if (path->slots[0] >= nritems) {
772                         ret = btrfs_next_leaf(extent_root, path);
773                         if (ret == 0)
774                                 continue;
775                         break;
776                 }
777                 btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
778                 if (found_key.objectid != bytenr)
779                         break;
780
781                 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
782                         path->slots[0]++;
783                         continue;
784                 }
785
786                 cur_count++;
787                 ref_item = btrfs_item_ptr(l, path->slots[0],
788                                           struct btrfs_extent_ref);
789                 found_objectid = btrfs_ref_root(l, ref_item);
790
791                 if (found_objectid != root_objectid) {
792                         total_count = 2;
793                         goto out;
794                 }
795                 total_count = 1;
796                 path->slots[0]++;
797         }
798         if (cur_count == 0) {
799                 total_count = 0;
800                 goto out;
801         }
802         if (level >= 0 && root->node == count_path->nodes[level])
803                 goto out;
804         level++;
805         btrfs_release_path(root, path);
806         goto again;
807
808 out:
809         btrfs_free_path(path);
810         return total_count;
811 }
812 int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
813                        struct btrfs_root *root, u64 owner_objectid)
814 {
815         u64 generation;
816         u64 key_objectid;
817         u64 level;
818         u32 nritems;
819         struct btrfs_disk_key disk_key;
820
821         level = btrfs_header_level(root->node);
822         generation = trans->transid;
823         nritems = btrfs_header_nritems(root->node);
824         if (nritems > 0) {
825                 if (level == 0)
826                         btrfs_item_key(root->node, &disk_key, 0);
827                 else
828                         btrfs_node_key(root->node, &disk_key, 0);
829                 key_objectid = btrfs_disk_key_objectid(&disk_key);
830         } else {
831                 key_objectid = 0;
832         }
833         return btrfs_inc_extent_ref(trans, root, root->node->start,
834                                     root->node->len, owner_objectid,
835                                     generation, level, key_objectid);
836 }
837
838 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
839                   struct extent_buffer *buf)
840 {
841         u64 bytenr;
842         u32 nritems;
843         struct btrfs_key key;
844         struct btrfs_file_extent_item *fi;
845         int i;
846         int level;
847         int ret;
848         int faili;
849
850         if (!root->ref_cows)
851                 return 0;
852
853         level = btrfs_header_level(buf);
854         nritems = btrfs_header_nritems(buf);
855         for (i = 0; i < nritems; i++) {
856                 if (level == 0) {
857                         u64 disk_bytenr;
858                         btrfs_item_key_to_cpu(buf, &key, i);
859                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
860                                 continue;
861                         fi = btrfs_item_ptr(buf, i,
862                                             struct btrfs_file_extent_item);
863                         if (btrfs_file_extent_type(buf, fi) ==
864                             BTRFS_FILE_EXTENT_INLINE)
865                                 continue;
866                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
867                         if (disk_bytenr == 0)
868                                 continue;
869                         ret = btrfs_inc_extent_ref(trans, root, disk_bytenr,
870                                     btrfs_file_extent_disk_num_bytes(buf, fi),
871                                     root->root_key.objectid, trans->transid,
872                                     key.objectid, key.offset);
873                         if (ret) {
874                                 faili = i;
875                                 goto fail;
876                         }
877                 } else {
878                         bytenr = btrfs_node_blockptr(buf, i);
879                         btrfs_node_key_to_cpu(buf, &key, i);
880                         ret = btrfs_inc_extent_ref(trans, root, bytenr,
881                                            btrfs_level_size(root, level - 1),
882                                            root->root_key.objectid,
883                                            trans->transid,
884                                            level - 1, key.objectid);
885                         if (ret) {
886                                 faili = i;
887                                 goto fail;
888                         }
889                 }
890         }
891         return 0;
892 fail:
893         WARN_ON(1);
894 #if 0
895         for (i =0; i < faili; i++) {
896                 if (level == 0) {
897                         u64 disk_bytenr;
898                         btrfs_item_key_to_cpu(buf, &key, i);
899                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
900                                 continue;
901                         fi = btrfs_item_ptr(buf, i,
902                                             struct btrfs_file_extent_item);
903                         if (btrfs_file_extent_type(buf, fi) ==
904                             BTRFS_FILE_EXTENT_INLINE)
905                                 continue;
906                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
907                         if (disk_bytenr == 0)
908                                 continue;
909                         err = btrfs_free_extent(trans, root, disk_bytenr,
910                                     btrfs_file_extent_disk_num_bytes(buf,
911                                                                       fi), 0);
912                         BUG_ON(err);
913                 } else {
914                         bytenr = btrfs_node_blockptr(buf, i);
915                         err = btrfs_free_extent(trans, root, bytenr,
916                                         btrfs_level_size(root, level - 1), 0);
917                         BUG_ON(err);
918                 }
919         }
920 #endif
921         return ret;
922 }
923
924 static int write_one_cache_group(struct btrfs_trans_handle *trans,
925                                  struct btrfs_root *root,
926                                  struct btrfs_path *path,
927                                  struct btrfs_block_group_cache *cache)
928 {
929         int ret;
930         int pending_ret;
931         struct btrfs_root *extent_root = root->fs_info->extent_root;
932         unsigned long bi;
933         struct extent_buffer *leaf;
934
935         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
936         if (ret < 0)
937                 goto fail;
938         BUG_ON(ret);
939
940         leaf = path->nodes[0];
941         bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
942         write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
943         btrfs_mark_buffer_dirty(leaf);
944         btrfs_release_path(extent_root, path);
945 fail:
946         finish_current_insert(trans, extent_root);
947         pending_ret = del_pending_extents(trans, extent_root);
948         if (ret)
949                 return ret;
950         if (pending_ret)
951                 return pending_ret;
952         return 0;
953
954 }
955
956 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
957                                    struct btrfs_root *root)
958 {
959         struct extent_io_tree *block_group_cache;
960         struct btrfs_block_group_cache *cache;
961         int ret;
962         int err = 0;
963         int werr = 0;
964         struct btrfs_path *path;
965         u64 last = 0;
966         u64 start;
967         u64 end;
968         u64 ptr;
969
970         block_group_cache = &root->fs_info->block_group_cache;
971         path = btrfs_alloc_path();
972         if (!path)
973                 return -ENOMEM;
974
975         while(1) {
976                 ret = find_first_extent_bit(block_group_cache, last,
977                                             &start, &end, BLOCK_GROUP_DIRTY);
978                 if (ret)
979                         break;
980
981                 last = end + 1;
982                 ret = get_state_private(block_group_cache, start, &ptr);
983                 if (ret)
984                         break;
985
986                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
987                 err = write_one_cache_group(trans, root,
988                                             path, cache);
989                 /*
990                  * if we fail to write the cache group, we want
991                  * to keep it marked dirty in hopes that a later
992                  * write will work
993                  */
994                 if (err) {
995                         werr = err;
996                         continue;
997                 }
998                 clear_extent_bits(block_group_cache, start, end,
999                                   BLOCK_GROUP_DIRTY, GFP_NOFS);
1000         }
1001         btrfs_free_path(path);
1002         return werr;
1003 }
1004
1005 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
1006                                                   u64 flags)
1007 {
1008         struct list_head *head = &info->space_info;
1009         struct list_head *cur;
1010         struct btrfs_space_info *found;
1011         list_for_each(cur, head) {
1012                 found = list_entry(cur, struct btrfs_space_info, list);
1013                 if (found->flags == flags)
1014                         return found;
1015         }
1016         return NULL;
1017
1018 }
1019
1020 static int update_space_info(struct btrfs_fs_info *info, u64 flags,
1021                              u64 total_bytes, u64 bytes_used,
1022                              struct btrfs_space_info **space_info)
1023 {
1024         struct btrfs_space_info *found;
1025
1026         found = __find_space_info(info, flags);
1027         if (found) {
1028                 found->total_bytes += total_bytes;
1029                 found->bytes_used += bytes_used;
1030                 WARN_ON(found->total_bytes < found->bytes_used);
1031                 *space_info = found;
1032                 return 0;
1033         }
1034         found = kmalloc(sizeof(*found), GFP_NOFS);
1035         if (!found)
1036                 return -ENOMEM;
1037
1038         list_add(&found->list, &info->space_info);
1039         found->flags = flags;
1040         found->total_bytes = total_bytes;
1041         found->bytes_used = bytes_used;
1042         found->bytes_pinned = 0;
1043         found->full = 0;
1044         *space_info = found;
1045         return 0;
1046 }
1047
1048 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
1049 {
1050         u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
1051                                    BTRFS_BLOCK_GROUP_RAID1);
1052         if (extra_flags) {
1053                 if (flags & BTRFS_BLOCK_GROUP_DATA)
1054                         fs_info->avail_data_alloc_bits |= extra_flags;
1055                 if (flags & BTRFS_BLOCK_GROUP_METADATA)
1056                         fs_info->avail_metadata_alloc_bits |= extra_flags;
1057                 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
1058                         fs_info->avail_system_alloc_bits |= extra_flags;
1059         }
1060 }
1061
1062 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1063                           struct btrfs_root *extent_root, u64 alloc_bytes,
1064                           u64 flags)
1065 {
1066         struct btrfs_space_info *space_info;
1067         u64 thresh;
1068         u64 start;
1069         u64 num_bytes;
1070         int ret;
1071
1072         space_info = __find_space_info(extent_root->fs_info, flags);
1073         if (!space_info) {
1074                 ret = update_space_info(extent_root->fs_info, flags,
1075                                         0, 0, &space_info);
1076                 BUG_ON(ret);
1077         }
1078         BUG_ON(!space_info);
1079
1080         if (space_info->full)
1081                 return 0;
1082
1083         thresh = div_factor(space_info->total_bytes, 6);
1084         if ((space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) <
1085             thresh)
1086                 return 0;
1087
1088         ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags);
1089         if (ret == -ENOSPC) {
1090 printk("space info full %Lu\n", flags);
1091                 space_info->full = 1;
1092                 return 0;
1093         }
1094
1095         BUG_ON(ret);
1096
1097         ret = btrfs_make_block_group(trans, extent_root, 0, flags,
1098                      extent_root->fs_info->chunk_root->root_key.objectid,
1099                      start, num_bytes);
1100         BUG_ON(ret);
1101
1102         set_avail_alloc_bits(extent_root->fs_info, flags);
1103         return 0;
1104 }
1105
1106 static int update_block_group(struct btrfs_trans_handle *trans,
1107                               struct btrfs_root *root,
1108                               u64 bytenr, u64 num_bytes, int alloc,
1109                               int mark_free)
1110 {
1111         struct btrfs_block_group_cache *cache;
1112         struct btrfs_fs_info *info = root->fs_info;
1113         u64 total = num_bytes;
1114         u64 old_val;
1115         u64 byte_in_group;
1116         u64 start;
1117         u64 end;
1118
1119         while(total) {
1120                 cache = btrfs_lookup_block_group(info, bytenr);
1121                 if (!cache) {
1122                         return -1;
1123                 }
1124                 byte_in_group = bytenr - cache->key.objectid;
1125                 WARN_ON(byte_in_group > cache->key.offset);
1126                 start = cache->key.objectid;
1127                 end = start + cache->key.offset - 1;
1128                 set_extent_bits(&info->block_group_cache, start, end,
1129                                 BLOCK_GROUP_DIRTY, GFP_NOFS);
1130
1131                 old_val = btrfs_block_group_used(&cache->item);
1132                 num_bytes = min(total, cache->key.offset - byte_in_group);
1133                 if (alloc) {
1134                         old_val += num_bytes;
1135                         cache->space_info->bytes_used += num_bytes;
1136                 } else {
1137                         old_val -= num_bytes;
1138                         cache->space_info->bytes_used -= num_bytes;
1139                         if (mark_free) {
1140                                 set_extent_dirty(&info->free_space_cache,
1141                                                  bytenr, bytenr + num_bytes - 1,
1142                                                  GFP_NOFS);
1143                         }
1144                 }
1145                 btrfs_set_block_group_used(&cache->item, old_val);
1146                 total -= num_bytes;
1147                 bytenr += num_bytes;
1148         }
1149         return 0;
1150 }
1151
1152 static int update_pinned_extents(struct btrfs_root *root,
1153                                 u64 bytenr, u64 num, int pin)
1154 {
1155         u64 len;
1156         struct btrfs_block_group_cache *cache;
1157         struct btrfs_fs_info *fs_info = root->fs_info;
1158
1159         if (pin) {
1160                 set_extent_dirty(&fs_info->pinned_extents,
1161                                 bytenr, bytenr + num - 1, GFP_NOFS);
1162         } else {
1163                 clear_extent_dirty(&fs_info->pinned_extents,
1164                                 bytenr, bytenr + num - 1, GFP_NOFS);
1165         }
1166         while (num > 0) {
1167                 cache = btrfs_lookup_block_group(fs_info, bytenr);
1168                 WARN_ON(!cache);
1169                 len = min(num, cache->key.offset -
1170                           (bytenr - cache->key.objectid));
1171                 if (pin) {
1172                         cache->pinned += len;
1173                         cache->space_info->bytes_pinned += len;
1174                         fs_info->total_pinned += len;
1175                 } else {
1176                         cache->pinned -= len;
1177                         cache->space_info->bytes_pinned -= len;
1178                         fs_info->total_pinned -= len;
1179                 }
1180                 bytenr += len;
1181                 num -= len;
1182         }
1183         return 0;
1184 }
1185
1186 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
1187 {
1188         u64 last = 0;
1189         u64 start;
1190         u64 end;
1191         struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
1192         int ret;
1193
1194         while(1) {
1195                 ret = find_first_extent_bit(pinned_extents, last,
1196                                             &start, &end, EXTENT_DIRTY);
1197                 if (ret)
1198                         break;
1199                 set_extent_dirty(copy, start, end, GFP_NOFS);
1200                 last = end + 1;
1201         }
1202         return 0;
1203 }
1204
1205 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
1206                                struct btrfs_root *root,
1207                                struct extent_io_tree *unpin)
1208 {
1209         u64 start;
1210         u64 end;
1211         int ret;
1212         struct extent_io_tree *free_space_cache;
1213         free_space_cache = &root->fs_info->free_space_cache;
1214
1215         while(1) {
1216                 ret = find_first_extent_bit(unpin, 0, &start, &end,
1217                                             EXTENT_DIRTY);
1218                 if (ret)
1219                         break;
1220                 update_pinned_extents(root, start, end + 1 - start, 0);
1221                 clear_extent_dirty(unpin, start, end, GFP_NOFS);
1222                 set_extent_dirty(free_space_cache, start, end, GFP_NOFS);
1223         }
1224         return 0;
1225 }
1226
1227 static int finish_current_insert(struct btrfs_trans_handle *trans,
1228                                  struct btrfs_root *extent_root)
1229 {
1230         u64 start;
1231         u64 end;
1232         struct btrfs_fs_info *info = extent_root->fs_info;
1233         struct extent_buffer *eb;
1234         struct btrfs_path *path;
1235         struct btrfs_key ins;
1236         struct btrfs_disk_key first;
1237         struct btrfs_extent_item extent_item;
1238         int ret;
1239         int level;
1240         int err = 0;
1241
1242         btrfs_set_stack_extent_refs(&extent_item, 1);
1243         btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
1244         path = btrfs_alloc_path();
1245
1246         while(1) {
1247                 ret = find_first_extent_bit(&info->extent_ins, 0, &start,
1248                                             &end, EXTENT_LOCKED);
1249                 if (ret)
1250                         break;
1251
1252                 ins.objectid = start;
1253                 ins.offset = end + 1 - start;
1254                 err = btrfs_insert_item(trans, extent_root, &ins,
1255                                         &extent_item, sizeof(extent_item));
1256                 clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED,
1257                                   GFP_NOFS);
1258                 eb = read_tree_block(extent_root, ins.objectid, ins.offset);
1259                 level = btrfs_header_level(eb);
1260                 if (level == 0) {
1261                         btrfs_item_key(eb, &first, 0);
1262                 } else {
1263                         btrfs_node_key(eb, &first, 0);
1264                 }
1265                 err = btrfs_insert_extent_backref(trans, extent_root, path,
1266                                           start, extent_root->root_key.objectid,
1267                                           0, level,
1268                                           btrfs_disk_key_objectid(&first));
1269                 BUG_ON(err);
1270                 free_extent_buffer(eb);
1271         }
1272         btrfs_free_path(path);
1273         return 0;
1274 }
1275
1276 static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes,
1277                           int pending)
1278 {
1279         int err = 0;
1280         struct extent_buffer *buf;
1281
1282         if (!pending) {
1283                 buf = btrfs_find_tree_block(root, bytenr, num_bytes);
1284                 if (buf) {
1285                         if (btrfs_buffer_uptodate(buf)) {
1286                                 u64 transid =
1287                                     root->fs_info->running_transaction->transid;
1288                                 u64 header_transid =
1289                                         btrfs_header_generation(buf);
1290                                 if (header_transid == transid) {
1291                                         clean_tree_block(NULL, root, buf);
1292                                         free_extent_buffer(buf);
1293                                         return 1;
1294                                 }
1295                         }
1296                         free_extent_buffer(buf);
1297                 }
1298                 update_pinned_extents(root, bytenr, num_bytes, 1);
1299         } else {
1300                 set_extent_bits(&root->fs_info->pending_del,
1301                                 bytenr, bytenr + num_bytes - 1,
1302                                 EXTENT_LOCKED, GFP_NOFS);
1303         }
1304         BUG_ON(err < 0);
1305         return 0;
1306 }
1307
1308 /*
1309  * remove an extent from the root, returns 0 on success
1310  */
1311 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1312                          *root, u64 bytenr, u64 num_bytes,
1313                          u64 root_objectid, u64 ref_generation,
1314                          u64 owner_objectid, u64 owner_offset, int pin,
1315                          int mark_free)
1316 {
1317         struct btrfs_path *path;
1318         struct btrfs_key key;
1319         struct btrfs_fs_info *info = root->fs_info;
1320         struct btrfs_root *extent_root = info->extent_root;
1321         struct extent_buffer *leaf;
1322         int ret;
1323         int extent_slot = 0;
1324         int found_extent = 0;
1325         int num_to_del = 1;
1326         struct btrfs_extent_item *ei;
1327         u32 refs;
1328
1329         key.objectid = bytenr;
1330         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
1331         key.offset = num_bytes;
1332         path = btrfs_alloc_path();
1333         if (!path)
1334                 return -ENOMEM;
1335
1336         path->reada = 0;
1337         ret = lookup_extent_backref(trans, extent_root, path,
1338                                     bytenr, root_objectid,
1339                                     ref_generation,
1340                                     owner_objectid, owner_offset, 1);
1341         if (ret == 0) {
1342                 struct btrfs_key found_key;
1343                 extent_slot = path->slots[0];
1344                 while(extent_slot > 0) {
1345                         extent_slot--;
1346                         btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1347                                               extent_slot);
1348                         if (found_key.objectid != bytenr)
1349                                 break;
1350                         if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
1351                             found_key.offset == num_bytes) {
1352                                 found_extent = 1;
1353                                 break;
1354                         }
1355                         if (path->slots[0] - extent_slot > 5)
1356                                 break;
1357                 }
1358                 if (!found_extent)
1359                         ret = btrfs_del_item(trans, extent_root, path);
1360         } else {
1361                 btrfs_print_leaf(extent_root, path->nodes[0]);
1362                 WARN_ON(1);
1363                 printk("Unable to find ref byte nr %Lu root %Lu "
1364                        " gen %Lu owner %Lu offset %Lu\n", bytenr,
1365                        root_objectid, ref_generation, owner_objectid,
1366                        owner_offset);
1367         }
1368         if (!found_extent) {
1369                 btrfs_release_path(extent_root, path);
1370                 ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
1371                 if (ret < 0)
1372                         return ret;
1373                 BUG_ON(ret);
1374                 extent_slot = path->slots[0];
1375         }
1376
1377         leaf = path->nodes[0];
1378         ei = btrfs_item_ptr(leaf, extent_slot,
1379                             struct btrfs_extent_item);
1380         refs = btrfs_extent_refs(leaf, ei);
1381         BUG_ON(refs == 0);
1382         refs -= 1;
1383         btrfs_set_extent_refs(leaf, ei, refs);
1384
1385         btrfs_mark_buffer_dirty(leaf);
1386
1387         if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) {
1388                 /* if the back ref and the extent are next to each other
1389                  * they get deleted below in one shot
1390                  */
1391                 path->slots[0] = extent_slot;
1392                 num_to_del = 2;
1393         } else if (found_extent) {
1394                 /* otherwise delete the extent back ref */
1395                 ret = btrfs_del_item(trans, extent_root, path);
1396                 BUG_ON(ret);
1397                 /* if refs are 0, we need to setup the path for deletion */
1398                 if (refs == 0) {
1399                         btrfs_release_path(extent_root, path);
1400                         ret = btrfs_search_slot(trans, extent_root, &key, path,
1401                                                 -1, 1);
1402                         if (ret < 0)
1403                                 return ret;
1404                         BUG_ON(ret);
1405                 }
1406         }
1407
1408         if (refs == 0) {
1409                 u64 super_used;
1410                 u64 root_used;
1411
1412                 if (pin) {
1413                         ret = pin_down_bytes(root, bytenr, num_bytes, 0);
1414                         if (ret > 0)
1415                                 mark_free = 1;
1416                         BUG_ON(ret < 0);
1417                 }
1418
1419                 /* block accounting for super block */
1420                 super_used = btrfs_super_bytes_used(&info->super_copy);
1421                 btrfs_set_super_bytes_used(&info->super_copy,
1422                                            super_used - num_bytes);
1423
1424                 /* block accounting for root item */
1425                 root_used = btrfs_root_used(&root->root_item);
1426                 btrfs_set_root_used(&root->root_item,
1427                                            root_used - num_bytes);
1428                 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
1429                                       num_to_del);
1430                 if (ret) {
1431                         return ret;
1432                 }
1433                 ret = update_block_group(trans, root, bytenr, num_bytes, 0,
1434                                          mark_free);
1435                 BUG_ON(ret);
1436         }
1437         btrfs_free_path(path);
1438         finish_current_insert(trans, extent_root);
1439         return ret;
1440 }
1441
1442 /*
1443  * find all the blocks marked as pending in the radix tree and remove
1444  * them from the extent map
1445  */
1446 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
1447                                btrfs_root *extent_root)
1448 {
1449         int ret;
1450         int err = 0;
1451         u64 start;
1452         u64 end;
1453         struct extent_io_tree *pending_del;
1454         struct extent_io_tree *pinned_extents;
1455
1456         pending_del = &extent_root->fs_info->pending_del;
1457         pinned_extents = &extent_root->fs_info->pinned_extents;
1458
1459         while(1) {
1460                 ret = find_first_extent_bit(pending_del, 0, &start, &end,
1461                                             EXTENT_LOCKED);
1462                 if (ret)
1463                         break;
1464                 update_pinned_extents(extent_root, start, end + 1 - start, 1);
1465                 clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
1466                                   GFP_NOFS);
1467                 ret = __free_extent(trans, extent_root,
1468                                      start, end + 1 - start,
1469                                      extent_root->root_key.objectid,
1470                                      0, 0, 0, 0, 0);
1471                 if (ret)
1472                         err = ret;
1473         }
1474         return err;
1475 }
1476
1477 /*
1478  * remove an extent from the root, returns 0 on success
1479  */
1480 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1481                       *root, u64 bytenr, u64 num_bytes,
1482                       u64 root_objectid, u64 ref_generation,
1483                       u64 owner_objectid, u64 owner_offset, int pin)
1484 {
1485         struct btrfs_root *extent_root = root->fs_info->extent_root;
1486         int pending_ret;
1487         int ret;
1488
1489         WARN_ON(num_bytes < root->sectorsize);
1490         if (!root->ref_cows)
1491                 ref_generation = 0;
1492
1493         if (root == extent_root) {
1494                 pin_down_bytes(root, bytenr, num_bytes, 1);
1495                 return 0;
1496         }
1497         ret = __free_extent(trans, root, bytenr, num_bytes, root_objectid,
1498                             ref_generation, owner_objectid, owner_offset,
1499                             pin, pin == 0);
1500         pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
1501         return ret ? ret : pending_ret;
1502 }
1503
1504 static u64 stripe_align(struct btrfs_root *root, u64 val)
1505 {
1506         u64 mask = ((u64)root->stripesize - 1);
1507         u64 ret = (val + mask) & ~mask;
1508         return ret;
1509 }
1510
1511 /*
1512  * walks the btree of allocated extents and find a hole of a given size.
1513  * The key ins is changed to record the hole:
1514  * ins->objectid == block start
1515  * ins->flags = BTRFS_EXTENT_ITEM_KEY
1516  * ins->offset == number of blocks
1517  * Any available blocks before search_start are skipped.
1518  */
1519 static int noinline find_free_extent(struct btrfs_trans_handle *trans,
1520                                      struct btrfs_root *orig_root,
1521                                      u64 num_bytes, u64 empty_size,
1522                                      u64 search_start, u64 search_end,
1523                                      u64 hint_byte, struct btrfs_key *ins,
1524                                      u64 exclude_start, u64 exclude_nr,
1525                                      int data)
1526 {
1527         int ret;
1528         u64 orig_search_start = search_start;
1529         struct btrfs_root * root = orig_root->fs_info->extent_root;
1530         struct btrfs_fs_info *info = root->fs_info;
1531         u64 total_needed = num_bytes;
1532         u64 *last_ptr = NULL;
1533         struct btrfs_block_group_cache *block_group;
1534         int full_scan = 0;
1535         int wrapped = 0;
1536         int empty_cluster = 2 * 1024 * 1024;
1537
1538         WARN_ON(num_bytes < root->sectorsize);
1539         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
1540
1541         if (data & BTRFS_BLOCK_GROUP_METADATA) {
1542                 last_ptr = &root->fs_info->last_alloc;
1543                 empty_cluster = 256 * 1024;
1544         }
1545
1546         if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) {
1547                 last_ptr = &root->fs_info->last_data_alloc;
1548         }
1549
1550         if (last_ptr) {
1551                 if (*last_ptr)
1552                         hint_byte = *last_ptr;
1553                 else {
1554                         empty_size += empty_cluster;
1555                 }
1556         }
1557
1558         if (search_end == (u64)-1)
1559                 search_end = btrfs_super_total_bytes(&info->super_copy);
1560
1561         if (hint_byte) {
1562                 block_group = btrfs_lookup_block_group(info, hint_byte);
1563                 if (!block_group)
1564                         hint_byte = search_start;
1565                 block_group = btrfs_find_block_group(root, block_group,
1566                                                      hint_byte, data, 1);
1567                 if (last_ptr && *last_ptr == 0 && block_group)
1568                         hint_byte = block_group->key.objectid;
1569         } else {
1570                 block_group = btrfs_find_block_group(root,
1571                                                      trans->block_group,
1572                                                      search_start, data, 1);
1573         }
1574         search_start = max(search_start, hint_byte);
1575
1576         total_needed += empty_size;
1577
1578 check_failed:
1579         if (!block_group) {
1580                 block_group = btrfs_lookup_block_group(info, search_start);
1581                 if (!block_group)
1582                         block_group = btrfs_lookup_block_group(info,
1583                                                        orig_search_start);
1584         }
1585         ret = find_search_start(root, &block_group, &search_start,
1586                                 total_needed, data);
1587         if (ret == -ENOSPC && last_ptr && *last_ptr) {
1588                 *last_ptr = 0;
1589                 block_group = btrfs_lookup_block_group(info,
1590                                                        orig_search_start);
1591                 search_start = orig_search_start;
1592                 ret = find_search_start(root, &block_group, &search_start,
1593                                         total_needed, data);
1594         }
1595         if (ret == -ENOSPC)
1596                 goto enospc;
1597         if (ret)
1598                 goto error;
1599
1600         if (last_ptr && *last_ptr && search_start != *last_ptr) {
1601                 *last_ptr = 0;
1602                 if (!empty_size) {
1603                         empty_size += empty_cluster;
1604                         total_needed += empty_size;
1605                 }
1606                 block_group = btrfs_lookup_block_group(info,
1607                                                        orig_search_start);
1608                 search_start = orig_search_start;
1609                 ret = find_search_start(root, &block_group,
1610                                         &search_start, total_needed, data);
1611                 if (ret == -ENOSPC)
1612                         goto enospc;
1613                 if (ret)
1614                         goto error;
1615         }
1616
1617         search_start = stripe_align(root, search_start);
1618         ins->objectid = search_start;
1619         ins->offset = num_bytes;
1620
1621         if (ins->objectid + num_bytes >= search_end)
1622                 goto enospc;
1623
1624         if (ins->objectid + num_bytes >
1625             block_group->key.objectid + block_group->key.offset) {
1626                 search_start = block_group->key.objectid +
1627                         block_group->key.offset;
1628                 goto new_group;
1629         }
1630
1631         if (test_range_bit(&info->extent_ins, ins->objectid,
1632                            ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) {
1633                 search_start = ins->objectid + num_bytes;
1634                 goto new_group;
1635         }
1636
1637         if (test_range_bit(&info->pinned_extents, ins->objectid,
1638                            ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
1639                 search_start = ins->objectid + num_bytes;
1640                 goto new_group;
1641         }
1642
1643         if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
1644             ins->objectid < exclude_start + exclude_nr)) {
1645                 search_start = exclude_start + exclude_nr;
1646                 goto new_group;
1647         }
1648
1649         if (!(data & BTRFS_BLOCK_GROUP_DATA)) {
1650                 block_group = btrfs_lookup_block_group(info, ins->objectid);
1651                 if (block_group)
1652                         trans->block_group = block_group;
1653         }
1654         ins->offset = num_bytes;
1655         if (last_ptr) {
1656                 *last_ptr = ins->objectid + ins->offset;
1657                 if (*last_ptr ==
1658                     btrfs_super_total_bytes(&root->fs_info->super_copy)) {
1659                         *last_ptr = 0;
1660                 }
1661         }
1662         return 0;
1663
1664 new_group:
1665         if (search_start + num_bytes >= search_end) {
1666 enospc:
1667                 search_start = orig_search_start;
1668                 if (full_scan) {
1669                         ret = -ENOSPC;
1670                         goto error;
1671                 }
1672                 if (wrapped) {
1673                         if (!full_scan)
1674                                 total_needed -= empty_size;
1675                         full_scan = 1;
1676                 } else
1677                         wrapped = 1;
1678         }
1679         block_group = btrfs_lookup_block_group(info, search_start);
1680         cond_resched();
1681         block_group = btrfs_find_block_group(root, block_group,
1682                                              search_start, data, 0);
1683         goto check_failed;
1684
1685 error:
1686         return ret;
1687 }
1688 /*
1689  * finds a free extent and does all the dirty work required for allocation
1690  * returns the key for the extent through ins, and a tree buffer for
1691  * the first block of the extent through buf.
1692  *
1693  * returns 0 if everything worked, non-zero otherwise.
1694  */
1695 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1696                        struct btrfs_root *root,
1697                        u64 num_bytes, u64 root_objectid, u64 ref_generation,
1698                        u64 owner, u64 owner_offset,
1699                        u64 empty_size, u64 hint_byte,
1700                        u64 search_end, struct btrfs_key *ins, int data)
1701 {
1702         int ret;
1703         int pending_ret;
1704         u64 super_used;
1705         u64 root_used;
1706         u64 search_start = 0;
1707         u64 new_hint;
1708         u64 alloc_profile;
1709         u32 sizes[2];
1710         struct btrfs_fs_info *info = root->fs_info;
1711         struct btrfs_root *extent_root = info->extent_root;
1712         struct btrfs_extent_item *extent_item;
1713         struct btrfs_extent_ref *ref;
1714         struct btrfs_path *path;
1715         struct btrfs_key keys[2];
1716
1717         if (data) {
1718                 alloc_profile = info->avail_data_alloc_bits &
1719                                 info->data_alloc_profile;
1720                 data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
1721         } else if (root == root->fs_info->chunk_root) {
1722                 alloc_profile = info->avail_system_alloc_bits &
1723                                 info->system_alloc_profile;
1724                 data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
1725         } else {
1726                 alloc_profile = info->avail_metadata_alloc_bits &
1727                                 info->metadata_alloc_profile;
1728                 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
1729         }
1730
1731         if (root->ref_cows) {
1732                 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
1733                         ret = do_chunk_alloc(trans, root->fs_info->extent_root,
1734                                              2 * 1024 * 1024,
1735                                              BTRFS_BLOCK_GROUP_METADATA |
1736                                              (info->metadata_alloc_profile &
1737                                               info->avail_metadata_alloc_bits));
1738                         BUG_ON(ret);
1739                 }
1740                 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
1741                                      num_bytes + 2 * 1024 * 1024, data);
1742                 BUG_ON(ret);
1743         }
1744
1745         new_hint = max(hint_byte, root->fs_info->alloc_start);
1746         if (new_hint < btrfs_super_total_bytes(&info->super_copy))
1747                 hint_byte = new_hint;
1748
1749         WARN_ON(num_bytes < root->sectorsize);
1750         ret = find_free_extent(trans, root, num_bytes, empty_size,
1751                                search_start, search_end, hint_byte, ins,
1752                                trans->alloc_exclude_start,
1753                                trans->alloc_exclude_nr, data);
1754         BUG_ON(ret);
1755         if (ret)
1756                 return ret;
1757
1758         /* block accounting for super block */
1759         super_used = btrfs_super_bytes_used(&info->super_copy);
1760         btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
1761
1762         /* block accounting for root item */
1763         root_used = btrfs_root_used(&root->root_item);
1764         btrfs_set_root_used(&root->root_item, root_used + num_bytes);
1765
1766         clear_extent_dirty(&root->fs_info->free_space_cache,
1767                            ins->objectid, ins->objectid + ins->offset - 1,
1768                            GFP_NOFS);
1769
1770         if (root == extent_root) {
1771                 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
1772                                 ins->objectid + ins->offset - 1,
1773                                 EXTENT_LOCKED, GFP_NOFS);
1774                 goto update_block;
1775         }
1776
1777         WARN_ON(trans->alloc_exclude_nr);
1778         trans->alloc_exclude_start = ins->objectid;
1779         trans->alloc_exclude_nr = ins->offset;
1780
1781         memcpy(&keys[0], ins, sizeof(*ins));
1782         keys[1].offset = hash_extent_ref(root_objectid, ref_generation,
1783                                          owner, owner_offset);
1784         keys[1].objectid = ins->objectid;
1785         keys[1].type = BTRFS_EXTENT_REF_KEY;
1786         sizes[0] = sizeof(*extent_item);
1787         sizes[1] = sizeof(*ref);
1788
1789         path = btrfs_alloc_path();
1790         BUG_ON(!path);
1791
1792         ret = btrfs_insert_empty_items(trans, extent_root, path, keys,
1793                                        sizes, 2);
1794
1795         BUG_ON(ret);
1796         extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
1797                                      struct btrfs_extent_item);
1798         btrfs_set_extent_refs(path->nodes[0], extent_item, 1);
1799         ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
1800                              struct btrfs_extent_ref);
1801
1802         btrfs_set_ref_root(path->nodes[0], ref, root_objectid);
1803         btrfs_set_ref_generation(path->nodes[0], ref, ref_generation);
1804         btrfs_set_ref_objectid(path->nodes[0], ref, owner);
1805         btrfs_set_ref_offset(path->nodes[0], ref, owner_offset);
1806
1807         btrfs_mark_buffer_dirty(path->nodes[0]);
1808
1809         trans->alloc_exclude_start = 0;
1810         trans->alloc_exclude_nr = 0;
1811         btrfs_free_path(path);
1812         finish_current_insert(trans, extent_root);
1813         pending_ret = del_pending_extents(trans, extent_root);
1814
1815         if (ret) {
1816                 return ret;
1817         }
1818         if (pending_ret) {
1819                 return pending_ret;
1820         }
1821
1822 update_block:
1823         ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0);
1824         if (ret) {
1825                 printk("update block group failed for %Lu %Lu\n",
1826                        ins->objectid, ins->offset);
1827                 BUG();
1828         }
1829         return 0;
1830 }
1831
1832 /*
1833  * helper function to allocate a block for a given tree
1834  * returns the tree buffer or NULL.
1835  */
1836 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1837                                              struct btrfs_root *root,
1838                                              u32 blocksize,
1839                                              u64 root_objectid, u64 hint,
1840                                              u64 empty_size)
1841 {
1842         u64 ref_generation;
1843
1844         if (root->ref_cows)
1845                 ref_generation = trans->transid;
1846         else
1847                 ref_generation = 0;
1848
1849
1850         return __btrfs_alloc_free_block(trans, root, blocksize, root_objectid,
1851                                         ref_generation, 0, 0, hint, empty_size);
1852 }
1853
1854 /*
1855  * helper function to allocate a block for a given tree
1856  * returns the tree buffer or NULL.
1857  */
1858 struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1859                                              struct btrfs_root *root,
1860                                              u32 blocksize,
1861                                              u64 root_objectid,
1862                                              u64 ref_generation,
1863                                              u64 first_objectid,
1864                                              int level,
1865                                              u64 hint,
1866                                              u64 empty_size)
1867 {
1868         struct btrfs_key ins;
1869         int ret;
1870         struct extent_buffer *buf;
1871
1872         ret = btrfs_alloc_extent(trans, root, blocksize,
1873                                  root_objectid, ref_generation,
1874                                  level, first_objectid, empty_size, hint,
1875                                  (u64)-1, &ins, 0);
1876         if (ret) {
1877                 BUG_ON(ret > 0);
1878                 return ERR_PTR(ret);
1879         }
1880         buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize);
1881         if (!buf) {
1882                 btrfs_free_extent(trans, root, ins.objectid, blocksize,
1883                                   root->root_key.objectid, ref_generation,
1884                                   0, 0, 0);
1885                 return ERR_PTR(-ENOMEM);
1886         }
1887         btrfs_set_header_generation(buf, trans->transid);
1888         clean_tree_block(trans, root, buf);
1889         wait_on_tree_block_writeback(root, buf);
1890         btrfs_set_buffer_uptodate(buf);
1891
1892         if (PageDirty(buf->first_page)) {
1893                 printk("page %lu dirty\n", buf->first_page->index);
1894                 WARN_ON(1);
1895         }
1896
1897         set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
1898                          buf->start + buf->len - 1, GFP_NOFS);
1899         set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->io_tree,
1900                         buf->start, buf->start + buf->len - 1,
1901                         EXTENT_CSUM, GFP_NOFS);
1902         buf->flags |= EXTENT_CSUM;
1903         if (!btrfs_test_opt(root, SSD))
1904                 btrfs_set_buffer_defrag(buf);
1905         trans->blocks_used++;
1906         return buf;
1907 }
1908
1909 static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
1910                                   struct btrfs_root *root,
1911                                   struct extent_buffer *leaf)
1912 {
1913         u64 leaf_owner;
1914         u64 leaf_generation;
1915         struct btrfs_key key;
1916         struct btrfs_file_extent_item *fi;
1917         int i;
1918         int nritems;
1919         int ret;
1920
1921         BUG_ON(!btrfs_is_leaf(leaf));
1922         nritems = btrfs_header_nritems(leaf);
1923         leaf_owner = btrfs_header_owner(leaf);
1924         leaf_generation = btrfs_header_generation(leaf);
1925
1926         for (i = 0; i < nritems; i++) {
1927                 u64 disk_bytenr;
1928
1929                 btrfs_item_key_to_cpu(leaf, &key, i);
1930                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1931                         continue;
1932                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1933                 if (btrfs_file_extent_type(leaf, fi) ==
1934                     BTRFS_FILE_EXTENT_INLINE)
1935                         continue;
1936                 /*
1937                  * FIXME make sure to insert a trans record that
1938                  * repeats the snapshot del on crash
1939                  */
1940                 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1941                 if (disk_bytenr == 0)
1942                         continue;
1943                 ret = btrfs_free_extent(trans, root, disk_bytenr,
1944                                 btrfs_file_extent_disk_num_bytes(leaf, fi),
1945                                 leaf_owner, leaf_generation,
1946                                 key.objectid, key.offset, 0);
1947                 BUG_ON(ret);
1948         }
1949         return 0;
1950 }
1951
1952 static void noinline reada_walk_down(struct btrfs_root *root,
1953                                      struct extent_buffer *node,
1954                                      int slot)
1955 {
1956         u64 bytenr;
1957         u64 last = 0;
1958         u32 nritems;
1959         u32 refs;
1960         u32 blocksize;
1961         int ret;
1962         int i;
1963         int level;
1964         int skipped = 0;
1965
1966         nritems = btrfs_header_nritems(node);
1967         level = btrfs_header_level(node);
1968         if (level)
1969                 return;
1970
1971         for (i = slot; i < nritems && skipped < 32; i++) {
1972                 bytenr = btrfs_node_blockptr(node, i);
1973                 if (last && ((bytenr > last && bytenr - last > 32 * 1024) ||
1974                              (last > bytenr && last - bytenr > 32 * 1024))) {
1975                         skipped++;
1976                         continue;
1977                 }
1978                 blocksize = btrfs_level_size(root, level - 1);
1979                 if (i != slot) {
1980                         ret = lookup_extent_ref(NULL, root, bytenr,
1981                                                 blocksize, &refs);
1982                         BUG_ON(ret);
1983                         if (refs != 1) {
1984                                 skipped++;
1985                                 continue;
1986                         }
1987                 }
1988                 mutex_unlock(&root->fs_info->fs_mutex);
1989                 ret = readahead_tree_block(root, bytenr, blocksize);
1990                 last = bytenr + blocksize;
1991                 cond_resched();
1992                 mutex_lock(&root->fs_info->fs_mutex);
1993                 if (ret)
1994                         break;
1995         }
1996 }
1997
1998 /*
1999  * helper function for drop_snapshot, this walks down the tree dropping ref
2000  * counts as it goes.
2001  */
2002 static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
2003                                    struct btrfs_root *root,
2004                                    struct btrfs_path *path, int *level)
2005 {
2006         u64 root_owner;
2007         u64 root_gen;
2008         u64 bytenr;
2009         struct extent_buffer *next;
2010         struct extent_buffer *cur;
2011         struct extent_buffer *parent;
2012         u32 blocksize;
2013         int ret;
2014         u32 refs;
2015
2016         WARN_ON(*level < 0);
2017         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2018         ret = lookup_extent_ref(trans, root,
2019                                 path->nodes[*level]->start,
2020                                 path->nodes[*level]->len, &refs);
2021         BUG_ON(ret);
2022         if (refs > 1)
2023                 goto out;
2024
2025         /*
2026          * walk down to the last node level and free all the leaves
2027          */
2028         while(*level >= 0) {
2029                 WARN_ON(*level < 0);
2030                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2031                 cur = path->nodes[*level];
2032
2033                 if (btrfs_header_level(cur) != *level)
2034                         WARN_ON(1);
2035
2036                 if (path->slots[*level] >=
2037                     btrfs_header_nritems(cur))
2038                         break;
2039                 if (*level == 0) {
2040                         ret = drop_leaf_ref(trans, root, cur);
2041                         BUG_ON(ret);
2042                         break;
2043                 }
2044                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2045                 blocksize = btrfs_level_size(root, *level - 1);
2046                 ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs);
2047                 BUG_ON(ret);
2048                 if (refs != 1) {
2049                         parent = path->nodes[*level];
2050                         root_owner = btrfs_header_owner(parent);
2051                         root_gen = btrfs_header_generation(parent);
2052                         path->slots[*level]++;
2053                         ret = btrfs_free_extent(trans, root, bytenr,
2054                                                 blocksize, root_owner,
2055                                                 root_gen, 0, 0, 1);
2056                         BUG_ON(ret);
2057                         continue;
2058                 }
2059                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2060                 if (!next || !btrfs_buffer_uptodate(next)) {
2061                         free_extent_buffer(next);
2062                         reada_walk_down(root, cur, path->slots[*level]);
2063
2064                         mutex_unlock(&root->fs_info->fs_mutex);
2065                         next = read_tree_block(root, bytenr, blocksize);
2066                         mutex_lock(&root->fs_info->fs_mutex);
2067
2068                         /* we've dropped the lock, double check */
2069                         ret = lookup_extent_ref(trans, root, bytenr,
2070                                                 blocksize, &refs);
2071                         BUG_ON(ret);
2072                         if (refs != 1) {
2073                                 parent = path->nodes[*level];
2074                                 root_owner = btrfs_header_owner(parent);
2075                                 root_gen = btrfs_header_generation(parent);
2076
2077                                 path->slots[*level]++;
2078                                 free_extent_buffer(next);
2079                                 ret = btrfs_free_extent(trans, root, bytenr,
2080                                                         blocksize,
2081                                                         root_owner,
2082                                                         root_gen, 0, 0, 1);
2083                                 BUG_ON(ret);
2084                                 continue;
2085                         }
2086                 } else if (next) {
2087                         btrfs_verify_block_csum(root, next);
2088                 }
2089                 WARN_ON(*level <= 0);
2090                 if (path->nodes[*level-1])
2091                         free_extent_buffer(path->nodes[*level-1]);
2092                 path->nodes[*level-1] = next;
2093                 *level = btrfs_header_level(next);
2094                 path->slots[*level] = 0;
2095         }
2096 out:
2097         WARN_ON(*level < 0);
2098         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2099
2100         if (path->nodes[*level] == root->node) {
2101                 root_owner = root->root_key.objectid;
2102                 parent = path->nodes[*level];
2103         } else {
2104                 parent = path->nodes[*level + 1];
2105                 root_owner = btrfs_header_owner(parent);
2106         }
2107
2108         root_gen = btrfs_header_generation(parent);
2109         ret = btrfs_free_extent(trans, root, path->nodes[*level]->start,
2110                                 path->nodes[*level]->len,
2111                                 root_owner, root_gen, 0, 0, 1);
2112         free_extent_buffer(path->nodes[*level]);
2113         path->nodes[*level] = NULL;
2114         *level += 1;
2115         BUG_ON(ret);
2116         return 0;
2117 }
2118
2119 /*
2120  * helper for dropping snapshots.  This walks back up the tree in the path
2121  * to find the first node higher up where we haven't yet gone through
2122  * all the slots
2123  */
2124 static int noinline walk_up_tree(struct btrfs_trans_handle *trans,
2125                                  struct btrfs_root *root,
2126                                  struct btrfs_path *path, int *level)
2127 {
2128         u64 root_owner;
2129         u64 root_gen;
2130         struct btrfs_root_item *root_item = &root->root_item;
2131         int i;
2132         int slot;
2133         int ret;
2134
2135         for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2136                 slot = path->slots[i];
2137                 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
2138                         struct extent_buffer *node;
2139                         struct btrfs_disk_key disk_key;
2140                         node = path->nodes[i];
2141                         path->slots[i]++;
2142                         *level = i;
2143                         WARN_ON(*level == 0);
2144                         btrfs_node_key(node, &disk_key, path->slots[i]);
2145                         memcpy(&root_item->drop_progress,
2146                                &disk_key, sizeof(disk_key));
2147                         root_item->drop_level = i;
2148                         return 0;
2149                 } else {
2150                         if (path->nodes[*level] == root->node) {
2151                                 root_owner = root->root_key.objectid;
2152                                 root_gen =
2153                                    btrfs_header_generation(path->nodes[*level]);
2154                         } else {
2155                                 struct extent_buffer *node;
2156                                 node = path->nodes[*level + 1];
2157                                 root_owner = btrfs_header_owner(node);
2158                                 root_gen = btrfs_header_generation(node);
2159                         }
2160                         ret = btrfs_free_extent(trans, root,
2161                                                 path->nodes[*level]->start,
2162                                                 path->nodes[*level]->len,
2163                                                 root_owner, root_gen, 0, 0, 1);
2164                         BUG_ON(ret);
2165                         free_extent_buffer(path->nodes[*level]);
2166                         path->nodes[*level] = NULL;
2167                         *level = i + 1;
2168                 }
2169         }
2170         return 1;
2171 }
2172
2173 /*
2174  * drop the reference count on the tree rooted at 'snap'.  This traverses
2175  * the tree freeing any blocks that have a ref count of zero after being
2176  * decremented.
2177  */
2178 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
2179                         *root)
2180 {
2181         int ret = 0;
2182         int wret;
2183         int level;
2184         struct btrfs_path *path;
2185         int i;
2186         int orig_level;
2187         struct btrfs_root_item *root_item = &root->root_item;
2188
2189         path = btrfs_alloc_path();
2190         BUG_ON(!path);
2191
2192         level = btrfs_header_level(root->node);
2193         orig_level = level;
2194         if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
2195                 path->nodes[level] = root->node;
2196                 extent_buffer_get(root->node);
2197                 path->slots[level] = 0;
2198         } else {
2199                 struct btrfs_key key;
2200                 struct btrfs_disk_key found_key;
2201                 struct extent_buffer *node;
2202
2203                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
2204                 level = root_item->drop_level;
2205                 path->lowest_level = level;
2206                 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2207                 if (wret < 0) {
2208                         ret = wret;
2209                         goto out;
2210                 }
2211                 node = path->nodes[level];
2212                 btrfs_node_key(node, &found_key, path->slots[level]);
2213                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
2214                                sizeof(found_key)));
2215         }
2216         while(1) {
2217                 wret = walk_down_tree(trans, root, path, &level);
2218                 if (wret > 0)
2219                         break;
2220                 if (wret < 0)
2221                         ret = wret;
2222
2223                 wret = walk_up_tree(trans, root, path, &level);
2224                 if (wret > 0)
2225                         break;
2226                 if (wret < 0)
2227                         ret = wret;
2228                 ret = -EAGAIN;
2229                 break;
2230         }
2231         for (i = 0; i <= orig_level; i++) {
2232                 if (path->nodes[i]) {
2233                         free_extent_buffer(path->nodes[i]);
2234                         path->nodes[i] = NULL;
2235                 }
2236         }
2237 out:
2238         btrfs_free_path(path);
2239         return ret;
2240 }
2241
2242 int btrfs_free_block_groups(struct btrfs_fs_info *info)
2243 {
2244         u64 start;
2245         u64 end;
2246         u64 ptr;
2247         int ret;
2248         while(1) {
2249                 ret = find_first_extent_bit(&info->block_group_cache, 0,
2250                                             &start, &end, (unsigned int)-1);
2251                 if (ret)
2252                         break;
2253                 ret = get_state_private(&info->block_group_cache, start, &ptr);
2254                 if (!ret)
2255                         kfree((void *)(unsigned long)ptr);
2256                 clear_extent_bits(&info->block_group_cache, start,
2257                                   end, (unsigned int)-1, GFP_NOFS);
2258         }
2259         while(1) {
2260                 ret = find_first_extent_bit(&info->free_space_cache, 0,
2261                                             &start, &end, EXTENT_DIRTY);
2262                 if (ret)
2263                         break;
2264                 clear_extent_dirty(&info->free_space_cache, start,
2265                                    end, GFP_NOFS);
2266         }
2267         return 0;
2268 }
2269
2270 static int noinline relocate_inode_pages(struct inode *inode, u64 start,
2271                                          u64 len)
2272 {
2273         u64 page_start;
2274         u64 page_end;
2275         u64 delalloc_start;
2276         u64 existing_delalloc;
2277         unsigned long last_index;
2278         unsigned long i;
2279         struct page *page;
2280         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2281         struct file_ra_state *ra;
2282
2283         ra = kzalloc(sizeof(*ra), GFP_NOFS);
2284
2285         mutex_lock(&inode->i_mutex);
2286         i = start >> PAGE_CACHE_SHIFT;
2287         last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
2288
2289         file_ra_state_init(ra, inode->i_mapping);
2290         btrfs_force_ra(inode->i_mapping, ra, NULL, i, last_index);
2291         kfree(ra);
2292
2293         for (; i <= last_index; i++) {
2294                 page = grab_cache_page(inode->i_mapping, i);
2295                 if (!page)
2296                         goto out_unlock;
2297                 if (!PageUptodate(page)) {
2298                         btrfs_readpage(NULL, page);
2299                         lock_page(page);
2300                         if (!PageUptodate(page)) {
2301                                 unlock_page(page);
2302                                 page_cache_release(page);
2303                                 goto out_unlock;
2304                         }
2305                 }
2306                 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2307                 page_end = page_start + PAGE_CACHE_SIZE - 1;
2308
2309                 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
2310
2311                 delalloc_start = page_start;
2312                 existing_delalloc = count_range_bits(io_tree,
2313                                              &delalloc_start, page_end,
2314                                              PAGE_CACHE_SIZE, EXTENT_DELALLOC);
2315
2316                 set_extent_delalloc(io_tree, page_start,
2317                                     page_end, GFP_NOFS);
2318
2319                 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
2320                 set_page_dirty(page);
2321                 unlock_page(page);
2322                 page_cache_release(page);
2323         }
2324
2325 out_unlock:
2326         mutex_unlock(&inode->i_mutex);
2327         return 0;
2328 }
2329
2330 /*
2331  * note, this releases the path
2332  */
2333 static int noinline relocate_one_reference(struct btrfs_root *extent_root,
2334                                   struct btrfs_path *path,
2335                                   struct btrfs_key *extent_key)
2336 {
2337         struct inode *inode;
2338         struct btrfs_root *found_root;
2339         struct btrfs_key *root_location;
2340         struct btrfs_extent_ref *ref;
2341         u64 ref_root;
2342         u64 ref_gen;
2343         u64 ref_objectid;
2344         u64 ref_offset;
2345         int ret;
2346
2347         ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
2348                              struct btrfs_extent_ref);
2349         ref_root = btrfs_ref_root(path->nodes[0], ref);
2350         ref_gen = btrfs_ref_generation(path->nodes[0], ref);
2351         ref_objectid = btrfs_ref_objectid(path->nodes[0], ref);
2352         ref_offset = btrfs_ref_offset(path->nodes[0], ref);
2353         btrfs_release_path(extent_root, path);
2354
2355         root_location = kmalloc(sizeof(*root_location), GFP_NOFS);
2356         root_location->objectid = ref_root;
2357         if (ref_gen == 0)
2358                 root_location->offset = 0;
2359         else
2360                 root_location->offset = (u64)-1;
2361         root_location->type = BTRFS_ROOT_ITEM_KEY;
2362
2363         found_root = btrfs_read_fs_root_no_name(extent_root->fs_info,
2364                                                 root_location);
2365         BUG_ON(!found_root);
2366         kfree(root_location);
2367
2368         if (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
2369                 mutex_unlock(&extent_root->fs_info->fs_mutex);
2370                 inode = btrfs_iget_locked(extent_root->fs_info->sb,
2371                                           ref_objectid, found_root);
2372                 if (inode->i_state & I_NEW) {
2373                         /* the inode and parent dir are two different roots */
2374                         BTRFS_I(inode)->root = found_root;
2375                         BTRFS_I(inode)->location.objectid = ref_objectid;
2376                         BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
2377                         BTRFS_I(inode)->location.offset = 0;
2378                         btrfs_read_locked_inode(inode);
2379                         unlock_new_inode(inode);
2380
2381                 }
2382                 /* this can happen if the reference is not against
2383                  * the latest version of the tree root
2384                  */
2385                 if (is_bad_inode(inode)) {
2386                         mutex_lock(&extent_root->fs_info->fs_mutex);
2387                         goto out;
2388                 }
2389                 relocate_inode_pages(inode, ref_offset, extent_key->offset);
2390                 /* FIXME, data=ordered will help get rid of this */
2391                 filemap_fdatawrite(inode->i_mapping);
2392                 iput(inode);
2393                 mutex_lock(&extent_root->fs_info->fs_mutex);
2394         } else {
2395                 struct btrfs_trans_handle *trans;
2396                 struct btrfs_key found_key;
2397                 struct extent_buffer *eb;
2398                 int level;
2399                 int i;
2400
2401                 trans = btrfs_start_transaction(found_root, 1);
2402                 eb = read_tree_block(found_root, extent_key->objectid,
2403                                      extent_key->offset);
2404                 level = btrfs_header_level(eb);
2405
2406                 if (level == 0)
2407                         btrfs_item_key_to_cpu(eb, &found_key, 0);
2408                 else
2409                         btrfs_node_key_to_cpu(eb, &found_key, 0);
2410
2411                 free_extent_buffer(eb);
2412
2413                 path->lowest_level = level;
2414                 path->reada = 2;
2415                 ret = btrfs_search_slot(trans, found_root, &found_key, path,
2416                                         0, 1);
2417                 path->lowest_level = 0;
2418                 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
2419                         if (!path->nodes[i])
2420                                 break;
2421                         free_extent_buffer(path->nodes[i]);
2422                         path->nodes[i] = NULL;
2423                 }
2424                 btrfs_release_path(found_root, path);
2425                 btrfs_end_transaction(trans, found_root);
2426         }
2427
2428 out:
2429         return 0;
2430 }
2431
2432 static int noinline relocate_one_extent(struct btrfs_root *extent_root,
2433                                         struct btrfs_path *path,
2434                                         struct btrfs_key *extent_key)
2435 {
2436         struct btrfs_key key;
2437         struct btrfs_key found_key;
2438         struct extent_buffer *leaf;
2439         u32 nritems;
2440         u32 item_size;
2441         int ret = 0;
2442
2443         key.objectid = extent_key->objectid;
2444         key.type = BTRFS_EXTENT_REF_KEY;
2445         key.offset = 0;
2446
2447         while(1) {
2448                 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2449
2450                 if (ret < 0)
2451                         goto out;
2452
2453                 ret = 0;
2454                 leaf = path->nodes[0];
2455                 nritems = btrfs_header_nritems(leaf);
2456                 if (path->slots[0] == nritems)
2457                         goto out;
2458
2459                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2460                 if (found_key.objectid != extent_key->objectid)
2461                         break;
2462
2463                 if (found_key.type != BTRFS_EXTENT_REF_KEY)
2464                         break;
2465
2466                 key.offset = found_key.offset + 1;
2467                 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2468
2469                 ret = relocate_one_reference(extent_root, path, extent_key);
2470                 if (ret)
2471                         goto out;
2472         }
2473         ret = 0;
2474 out:
2475         btrfs_release_path(extent_root, path);
2476         return ret;
2477 }
2478
2479 int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size)
2480 {
2481         struct btrfs_trans_handle *trans;
2482         struct btrfs_root *tree_root = root->fs_info->tree_root;
2483         struct btrfs_path *path;
2484         u64 cur_byte;
2485         u64 total_found;
2486         struct btrfs_fs_info *info = root->fs_info;
2487         struct extent_io_tree *block_group_cache;
2488         struct btrfs_key key;
2489         struct btrfs_key found_key;
2490         struct extent_buffer *leaf;
2491         u32 nritems;
2492         int ret;
2493         int progress = 0;
2494
2495         btrfs_set_super_total_bytes(&info->super_copy, new_size);
2496         clear_extent_dirty(&info->free_space_cache, new_size, (u64)-1,
2497                            GFP_NOFS);
2498         block_group_cache = &info->block_group_cache;
2499         path = btrfs_alloc_path();
2500         root = root->fs_info->extent_root;
2501         path->reada = 2;
2502
2503 again:
2504         total_found = 0;
2505         key.objectid = new_size;
2506         key.offset = 0;
2507         key.type = 0;
2508         cur_byte = key.objectid;
2509
2510         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2511         if (ret < 0)
2512                 goto out;
2513
2514         ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
2515         if (ret < 0)
2516                 goto out;
2517         if (ret == 0) {
2518                 leaf = path->nodes[0];
2519                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2520                 if (found_key.objectid + found_key.offset > new_size) {
2521                         cur_byte = found_key.objectid;
2522                         key.objectid = cur_byte;
2523                 }
2524         }
2525         btrfs_release_path(root, path);
2526
2527         while(1) {
2528                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2529                 if (ret < 0)
2530                         goto out;
2531
2532                 leaf = path->nodes[0];
2533                 nritems = btrfs_header_nritems(leaf);
2534 next:
2535                 if (path->slots[0] >= nritems) {
2536                         ret = btrfs_next_leaf(root, path);
2537                         if (ret < 0)
2538                                 goto out;
2539                         if (ret == 1) {
2540                                 ret = 0;
2541                                 break;
2542                         }
2543                         leaf = path->nodes[0];
2544                         nritems = btrfs_header_nritems(leaf);
2545                 }
2546
2547                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2548
2549                 if (progress && need_resched()) {
2550                         memcpy(&key, &found_key, sizeof(key));
2551                         mutex_unlock(&root->fs_info->fs_mutex);
2552                         cond_resched();
2553                         mutex_lock(&root->fs_info->fs_mutex);
2554                         btrfs_release_path(root, path);
2555                         btrfs_search_slot(NULL, root, &key, path, 0, 0);
2556                         progress = 0;
2557                         goto next;
2558                 }
2559                 progress = 1;
2560
2561                 if (btrfs_key_type(&found_key) != BTRFS_EXTENT_ITEM_KEY ||
2562                     found_key.objectid + found_key.offset <= cur_byte) {
2563                         path->slots[0]++;
2564                         goto next;
2565                 }
2566
2567                 total_found++;
2568                 cur_byte = found_key.objectid + found_key.offset;
2569                 key.objectid = cur_byte;
2570                 btrfs_release_path(root, path);
2571                 ret = relocate_one_extent(root, path, &found_key);
2572         }
2573
2574         btrfs_release_path(root, path);
2575
2576         if (total_found > 0) {
2577                 trans = btrfs_start_transaction(tree_root, 1);
2578                 btrfs_commit_transaction(trans, tree_root);
2579
2580                 mutex_unlock(&root->fs_info->fs_mutex);
2581                 btrfs_clean_old_snapshots(tree_root);
2582                 mutex_lock(&root->fs_info->fs_mutex);
2583
2584                 trans = btrfs_start_transaction(tree_root, 1);
2585                 btrfs_commit_transaction(trans, tree_root);
2586                 goto again;
2587         }
2588
2589         trans = btrfs_start_transaction(root, 1);
2590         key.objectid = new_size;
2591         key.offset = 0;
2592         key.type = 0;
2593         while(1) {
2594                 u64 ptr;
2595
2596                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2597                 if (ret < 0)
2598                         goto out;
2599
2600                 leaf = path->nodes[0];
2601                 nritems = btrfs_header_nritems(leaf);
2602 bg_next:
2603                 if (path->slots[0] >= nritems) {
2604                         ret = btrfs_next_leaf(root, path);
2605                         if (ret < 0)
2606                                 break;
2607                         if (ret == 1) {
2608                                 ret = 0;
2609                                 break;
2610                         }
2611                         leaf = path->nodes[0];
2612                         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2613
2614                         /*
2615                          * btrfs_next_leaf doesn't cow buffers, we have to
2616                          * do the search again
2617                          */
2618                         memcpy(&key, &found_key, sizeof(key));
2619                         btrfs_release_path(root, path);
2620                         goto resched_check;
2621                 }
2622
2623                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2624                 if (btrfs_key_type(&found_key) != BTRFS_BLOCK_GROUP_ITEM_KEY) {
2625                         printk("shrinker found key %Lu %u %Lu\n",
2626                                 found_key.objectid, found_key.type,
2627                                 found_key.offset);
2628                         path->slots[0]++;
2629                         goto bg_next;
2630                 }
2631                 ret = get_state_private(&info->block_group_cache,
2632                                         found_key.objectid, &ptr);
2633                 if (!ret)
2634                         kfree((void *)(unsigned long)ptr);
2635
2636                 clear_extent_bits(&info->block_group_cache, found_key.objectid,
2637                                   found_key.objectid + found_key.offset - 1,
2638                                   (unsigned int)-1, GFP_NOFS);
2639
2640                 key.objectid = found_key.objectid + 1;
2641                 btrfs_del_item(trans, root, path);
2642                 btrfs_release_path(root, path);
2643 resched_check:
2644                 if (need_resched()) {
2645                         mutex_unlock(&root->fs_info->fs_mutex);
2646                         cond_resched();
2647                         mutex_lock(&root->fs_info->fs_mutex);
2648                 }
2649         }
2650         clear_extent_dirty(&info->free_space_cache, new_size, (u64)-1,
2651                            GFP_NOFS);
2652         btrfs_commit_transaction(trans, root);
2653 out:
2654         btrfs_free_path(path);
2655         return ret;
2656 }
2657
2658 int btrfs_grow_extent_tree(struct btrfs_trans_handle *trans,
2659                            struct btrfs_root *root, u64 new_size)
2660 {
2661         btrfs_set_super_total_bytes(&root->fs_info->super_copy, new_size);
2662         return 0;
2663 }
2664
2665 int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path,
2666                            struct btrfs_key *key)
2667 {
2668         int ret;
2669         struct btrfs_key found_key;
2670         struct extent_buffer *leaf;
2671         int slot;
2672
2673         ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
2674         if (ret < 0)
2675                 return ret;
2676         while(1) {
2677                 slot = path->slots[0];
2678                 leaf = path->nodes[0];
2679                 if (slot >= btrfs_header_nritems(leaf)) {
2680                         ret = btrfs_next_leaf(root, path);
2681                         if (ret == 0)
2682                                 continue;
2683                         if (ret < 0)
2684                                 goto error;
2685                         break;
2686                 }
2687                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
2688
2689                 if (found_key.objectid >= key->objectid &&
2690                     found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY)
2691                         return 0;
2692                 path->slots[0]++;
2693         }
2694         ret = -ENOENT;
2695 error:
2696         return ret;
2697 }
2698
2699 int btrfs_read_block_groups(struct btrfs_root *root)
2700 {
2701         struct btrfs_path *path;
2702         int ret;
2703         int bit;
2704         struct btrfs_block_group_cache *cache;
2705         struct btrfs_fs_info *info = root->fs_info;
2706         struct btrfs_space_info *space_info;
2707         struct extent_io_tree *block_group_cache;
2708         struct btrfs_key key;
2709         struct btrfs_key found_key;
2710         struct extent_buffer *leaf;
2711
2712         block_group_cache = &info->block_group_cache;
2713         root = info->extent_root;
2714         key.objectid = 0;
2715         key.offset = 0;
2716         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
2717         path = btrfs_alloc_path();
2718         if (!path)
2719                 return -ENOMEM;
2720
2721         while(1) {
2722                 ret = find_first_block_group(root, path, &key);
2723                 if (ret > 0) {
2724                         ret = 0;
2725                         goto error;
2726                 }
2727                 if (ret != 0)
2728                         goto error;
2729
2730                 leaf = path->nodes[0];
2731                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2732                 cache = kmalloc(sizeof(*cache), GFP_NOFS);
2733                 if (!cache) {
2734                         ret = -ENOMEM;
2735                         break;
2736                 }
2737
2738                 read_extent_buffer(leaf, &cache->item,
2739                                    btrfs_item_ptr_offset(leaf, path->slots[0]),
2740                                    sizeof(cache->item));
2741                 memcpy(&cache->key, &found_key, sizeof(found_key));
2742                 cache->cached = 0;
2743                 cache->pinned = 0;
2744
2745                 key.objectid = found_key.objectid + found_key.offset;
2746                 btrfs_release_path(root, path);
2747                 cache->flags = btrfs_block_group_flags(&cache->item);
2748                 bit = 0;
2749                 if (cache->flags & BTRFS_BLOCK_GROUP_DATA) {
2750                         bit = BLOCK_GROUP_DATA;
2751                 } else if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
2752                         bit = BLOCK_GROUP_SYSTEM;
2753                 } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) {
2754                         bit = BLOCK_GROUP_METADATA;
2755                 }
2756                 set_avail_alloc_bits(info, cache->flags);
2757
2758                 ret = update_space_info(info, cache->flags, found_key.offset,
2759                                         btrfs_block_group_used(&cache->item),
2760                                         &space_info);
2761                 BUG_ON(ret);
2762                 cache->space_info = space_info;
2763
2764                 /* use EXTENT_LOCKED to prevent merging */
2765                 set_extent_bits(block_group_cache, found_key.objectid,
2766                                 found_key.objectid + found_key.offset - 1,
2767                                 bit | EXTENT_LOCKED, GFP_NOFS);
2768                 set_state_private(block_group_cache, found_key.objectid,
2769                                   (unsigned long)cache);
2770
2771                 if (key.objectid >=
2772                     btrfs_super_total_bytes(&info->super_copy))
2773                         break;
2774         }
2775         ret = 0;
2776 error:
2777         btrfs_free_path(path);
2778         return ret;
2779 }
2780
2781 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
2782                            struct btrfs_root *root, u64 bytes_used,
2783                            u64 type, u64 chunk_tree, u64 chunk_objectid,
2784                            u64 size)
2785 {
2786         int ret;
2787         int bit = 0;
2788         struct btrfs_root *extent_root;
2789         struct btrfs_block_group_cache *cache;
2790         struct extent_io_tree *block_group_cache;
2791
2792         extent_root = root->fs_info->extent_root;
2793         block_group_cache = &root->fs_info->block_group_cache;
2794
2795         cache = kmalloc(sizeof(*cache), GFP_NOFS);
2796         BUG_ON(!cache);
2797         cache->key.objectid = chunk_objectid;
2798         cache->key.offset = size;
2799         cache->cached = 0;
2800         cache->pinned = 0;
2801         btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
2802         memset(&cache->item, 0, sizeof(cache->item));
2803         btrfs_set_block_group_used(&cache->item, bytes_used);
2804         btrfs_set_block_group_chunk_tree(&cache->item, chunk_tree);
2805         btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
2806         cache->flags = type;
2807         btrfs_set_block_group_flags(&cache->item, type);
2808
2809         ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
2810                                 &cache->space_info);
2811         BUG_ON(ret);
2812
2813         if (type & BTRFS_BLOCK_GROUP_DATA) {
2814                 bit = BLOCK_GROUP_DATA;
2815         } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
2816                 bit = BLOCK_GROUP_SYSTEM;
2817         } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
2818                 bit = BLOCK_GROUP_METADATA;
2819         }
2820         set_extent_bits(block_group_cache, chunk_objectid,
2821                         chunk_objectid + size - 1,
2822                         bit | EXTENT_LOCKED, GFP_NOFS);
2823         set_state_private(block_group_cache, chunk_objectid,
2824                           (unsigned long)cache);
2825
2826         ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
2827                                 sizeof(cache->item));
2828         BUG_ON(ret);
2829
2830         finish_current_insert(trans, extent_root);
2831         ret = del_pending_extents(trans, extent_root);
2832         BUG_ON(ret);
2833         return 0;
2834 }