8f441783a6ed7f4d4ac7e4e4b55502deb9a715d8
[linux-2.6-block.git] / fs / btrfs / extent-tree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/sched.h>
20 #include <linux/crc32c.h>
21 #include <linux/pagemap.h>
22 #include "hash.h"
23 #include "ctree.h"
24 #include "disk-io.h"
25 #include "print-tree.h"
26 #include "transaction.h"
27
28 #define BLOCK_GROUP_DATA EXTENT_WRITEBACK
29 #define BLOCK_GROUP_METADATA EXTENT_UPTODATE
30 #define BLOCK_GROUP_DIRTY EXTENT_DIRTY
31
32 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
33                                  btrfs_root *extent_root);
34 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
35                                btrfs_root *extent_root);
36 static int find_previous_extent(struct btrfs_root *root,
37                                 struct btrfs_path *path)
38 {
39         struct btrfs_key found_key;
40         struct extent_buffer *leaf;
41         int ret;
42
43         while(1) {
44                 if (path->slots[0] == 0) {
45                         ret = btrfs_prev_leaf(root, path);
46                         if (ret != 0)
47                                 return ret;
48                 } else {
49                         path->slots[0]--;
50                 }
51                 leaf = path->nodes[0];
52                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
53                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY)
54                         return 0;
55         }
56         return 1;
57 }
58
59 static int cache_block_group(struct btrfs_root *root,
60                              struct btrfs_block_group_cache *block_group)
61 {
62         struct btrfs_path *path;
63         int ret;
64         struct btrfs_key key;
65         struct extent_buffer *leaf;
66         struct extent_io_tree *free_space_cache;
67         int slot;
68         u64 last = 0;
69         u64 hole_size;
70         u64 first_free;
71         int found = 0;
72
73         if (!block_group)
74                 return 0;
75
76         root = root->fs_info->extent_root;
77         free_space_cache = &root->fs_info->free_space_cache;
78
79         if (block_group->cached)
80                 return 0;
81
82         path = btrfs_alloc_path();
83         if (!path)
84                 return -ENOMEM;
85
86         path->reada = 2;
87         first_free = block_group->key.objectid;
88         key.objectid = block_group->key.objectid;
89         key.offset = 0;
90         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
91         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
92         if (ret < 0)
93                 return ret;
94         ret = find_previous_extent(root, path);
95         if (ret < 0)
96                 return ret;
97         if (ret == 0) {
98                 leaf = path->nodes[0];
99                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
100                 if (key.objectid + key.offset > first_free)
101                         first_free = key.objectid + key.offset;
102         }
103         while(1) {
104                 leaf = path->nodes[0];
105                 slot = path->slots[0];
106                 if (slot >= btrfs_header_nritems(leaf)) {
107                         ret = btrfs_next_leaf(root, path);
108                         if (ret < 0)
109                                 goto err;
110                         if (ret == 0) {
111                                 continue;
112                         } else {
113                                 break;
114                         }
115                 }
116                 btrfs_item_key_to_cpu(leaf, &key, slot);
117                 if (key.objectid < block_group->key.objectid) {
118                         goto next;
119                 }
120                 if (key.objectid >= block_group->key.objectid +
121                     block_group->key.offset) {
122                         break;
123                 }
124
125                 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
126                         if (!found) {
127                                 last = first_free;
128                                 found = 1;
129                         }
130                         if (key.objectid > last) {
131                                 hole_size = key.objectid - last;
132                                 set_extent_dirty(free_space_cache, last,
133                                                  last + hole_size - 1,
134                                                  GFP_NOFS);
135                         }
136                         last = key.objectid + key.offset;
137                 }
138 next:
139                 path->slots[0]++;
140         }
141
142         if (!found)
143                 last = first_free;
144         if (block_group->key.objectid +
145             block_group->key.offset > last) {
146                 hole_size = block_group->key.objectid +
147                         block_group->key.offset - last;
148                 set_extent_dirty(free_space_cache, last,
149                                  last + hole_size - 1, GFP_NOFS);
150         }
151         block_group->cached = 1;
152 err:
153         btrfs_free_path(path);
154         return 0;
155 }
156
157 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
158                                                          btrfs_fs_info *info,
159                                                          u64 bytenr)
160 {
161         struct extent_io_tree *block_group_cache;
162         struct btrfs_block_group_cache *block_group = NULL;
163         u64 ptr;
164         u64 start;
165         u64 end;
166         int ret;
167
168         block_group_cache = &info->block_group_cache;
169         ret = find_first_extent_bit(block_group_cache,
170                                     bytenr, &start, &end,
171                                     BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA);
172         if (ret) {
173                 return NULL;
174         }
175         ret = get_state_private(block_group_cache, start, &ptr);
176         if (ret)
177                 return NULL;
178
179         block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
180         if (block_group->key.objectid <= bytenr && bytenr <
181             block_group->key.objectid + block_group->key.offset)
182                 return block_group;
183         return NULL;
184 }
185 static u64 noinline find_search_start(struct btrfs_root *root,
186                               struct btrfs_block_group_cache **cache_ret,
187                               u64 search_start, int num, int data)
188 {
189         int ret;
190         struct btrfs_block_group_cache *cache = *cache_ret;
191         struct extent_io_tree *free_space_cache;
192         struct extent_state *state;
193         u64 last;
194         u64 start = 0;
195         u64 cache_miss = 0;
196         u64 total_fs_bytes;
197         int wrapped = 0;
198
199         if (!cache) {
200                 goto out;
201         }
202         total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
203         free_space_cache = &root->fs_info->free_space_cache;
204
205 again:
206         ret = cache_block_group(root, cache);
207         if (ret)
208                 goto out;
209
210         last = max(search_start, cache->key.objectid);
211
212         spin_lock_irq(&free_space_cache->lock);
213         state = find_first_extent_bit_state(free_space_cache, last, EXTENT_DIRTY);
214         while(1) {
215                 if (!state) {
216                         if (!cache_miss)
217                                 cache_miss = last;
218                         spin_unlock_irq(&free_space_cache->lock);
219                         goto new_group;
220                 }
221
222                 start = max(last, state->start);
223                 last = state->end + 1;
224                 if (last - start < num) {
225                         if (last == cache->key.objectid + cache->key.offset)
226                                 cache_miss = start;
227                         do {
228                                 state = extent_state_next(state);
229                         } while(state && !(state->state & EXTENT_DIRTY));
230                         continue;
231                 }
232                 spin_unlock_irq(&free_space_cache->lock);
233                 if (data != BTRFS_BLOCK_GROUP_MIXED &&
234                     start + num > cache->key.objectid + cache->key.offset)
235                         goto new_group;
236                 if (start + num  > total_fs_bytes)
237                         goto new_group;
238                 return start;
239         }
240 out:
241         cache = btrfs_lookup_block_group(root->fs_info, search_start);
242         if (!cache) {
243                 printk("Unable to find block group for %Lu\n",
244                        search_start);
245                 WARN_ON(1);
246                 return search_start;
247         }
248         return search_start;
249
250 new_group:
251         last = cache->key.objectid + cache->key.offset;
252 wrapped:
253         cache = btrfs_lookup_block_group(root->fs_info, last);
254         if (!cache || cache->key.objectid >= total_fs_bytes) {
255 no_cache:
256                 if (!wrapped) {
257                         wrapped = 1;
258                         last = search_start;
259                         data = BTRFS_BLOCK_GROUP_MIXED;
260                         goto wrapped;
261                 }
262                 goto out;
263         }
264         if (cache_miss && !cache->cached) {
265                 cache_block_group(root, cache);
266                 last = cache_miss;
267                 cache = btrfs_lookup_block_group(root->fs_info, last);
268         }
269         cache = btrfs_find_block_group(root, cache, last, data, 0);
270         if (!cache)
271                 goto no_cache;
272         *cache_ret = cache;
273         cache_miss = 0;
274         goto again;
275 }
276
277 static u64 div_factor(u64 num, int factor)
278 {
279         if (factor == 10)
280                 return num;
281         num *= factor;
282         do_div(num, 10);
283         return num;
284 }
285
286 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
287                                                  struct btrfs_block_group_cache
288                                                  *hint, u64 search_start,
289                                                  int data, int owner)
290 {
291         struct btrfs_block_group_cache *cache;
292         struct extent_io_tree *block_group_cache;
293         struct btrfs_block_group_cache *found_group = NULL;
294         struct btrfs_fs_info *info = root->fs_info;
295         u64 used;
296         u64 last = 0;
297         u64 hint_last;
298         u64 start;
299         u64 end;
300         u64 free_check;
301         u64 ptr;
302         u64 total_fs_bytes;
303         int bit;
304         int ret;
305         int full_search = 0;
306         int factor = 8;
307         int data_swap = 0;
308
309         block_group_cache = &info->block_group_cache;
310         total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
311
312         if (!owner)
313                 factor = 8;
314
315         if (data == BTRFS_BLOCK_GROUP_MIXED) {
316                 bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
317                 factor = 10;
318         } else if (data)
319                 bit = BLOCK_GROUP_DATA;
320         else
321                 bit = BLOCK_GROUP_METADATA;
322
323         if (search_start && search_start < total_fs_bytes) {
324                 struct btrfs_block_group_cache *shint;
325                 shint = btrfs_lookup_block_group(info, search_start);
326                 if (shint && (shint->data == data ||
327                               shint->data == BTRFS_BLOCK_GROUP_MIXED)) {
328                         used = btrfs_block_group_used(&shint->item);
329                         if (used + shint->pinned <
330                             div_factor(shint->key.offset, factor)) {
331                                 return shint;
332                         }
333                 }
334         }
335         if (hint && hint->key.objectid < total_fs_bytes &&
336             (hint->data == data || hint->data == BTRFS_BLOCK_GROUP_MIXED)) {
337                 used = btrfs_block_group_used(&hint->item);
338                 if (used + hint->pinned <
339                     div_factor(hint->key.offset, factor)) {
340                         return hint;
341                 }
342                 last = hint->key.objectid + hint->key.offset;
343                 hint_last = last;
344         } else {
345                 if (hint)
346                         hint_last = max(hint->key.objectid, search_start);
347                 else
348                         hint_last = search_start;
349
350                 if (hint_last >= total_fs_bytes)
351                         hint_last = search_start;
352                 last = hint_last;
353         }
354 again:
355         while(1) {
356                 ret = find_first_extent_bit(block_group_cache, last,
357                                             &start, &end, bit);
358                 if (ret)
359                         break;
360
361                 ret = get_state_private(block_group_cache, start, &ptr);
362                 if (ret)
363                         break;
364
365                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
366                 last = cache->key.objectid + cache->key.offset;
367                 used = btrfs_block_group_used(&cache->item);
368
369                 if (cache->key.objectid > total_fs_bytes)
370                         break;
371
372                 if (full_search)
373                         free_check = cache->key.offset;
374                 else
375                         free_check = div_factor(cache->key.offset, factor);
376                 if (used + cache->pinned < free_check) {
377                         found_group = cache;
378                         goto found;
379                 }
380                 cond_resched();
381         }
382         if (!full_search) {
383                 last = search_start;
384                 full_search = 1;
385                 goto again;
386         }
387         if (!data_swap) {
388                 data_swap = 1;
389                 bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
390                 last = search_start;
391                 goto again;
392         }
393 found:
394         return found_group;
395 }
396
397 static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation,
398                            u64 owner, u64 owner_offset)
399 {
400         u32 high_crc = ~(u32)0;
401         u32 low_crc = ~(u32)0;
402         __le64 lenum;
403
404         lenum = cpu_to_le64(root_objectid);
405         high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
406         lenum = cpu_to_le64(ref_generation);
407         low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
408         if (owner >= BTRFS_FIRST_FREE_OBJECTID) {
409                 lenum = cpu_to_le64(owner);
410                 low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
411                 lenum = cpu_to_le64(owner_offset);
412                 low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
413         }
414         return ((u64)high_crc << 32) | (u64)low_crc;
415 }
416
417 static int match_extent_ref(struct extent_buffer *leaf,
418                             struct btrfs_extent_ref *disk_ref,
419                             struct btrfs_extent_ref *cpu_ref)
420 {
421         int ret;
422         int len;
423
424         if (cpu_ref->objectid)
425                 len = sizeof(*cpu_ref);
426         else
427                 len = 2 * sizeof(u64);
428         ret = memcmp_extent_buffer(leaf, cpu_ref, (unsigned long)disk_ref,
429                                    len);
430         return ret == 0;
431 }
432
433 static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans,
434                                           struct btrfs_root *root,
435                                           struct btrfs_path *path, u64 bytenr,
436                                           u64 root_objectid,
437                                           u64 ref_generation, u64 owner,
438                                           u64 owner_offset, int del)
439 {
440         u64 hash;
441         struct btrfs_key key;
442         struct btrfs_key found_key;
443         struct btrfs_extent_ref ref;
444         struct extent_buffer *leaf;
445         struct btrfs_extent_ref *disk_ref;
446         int ret;
447         int ret2;
448
449         btrfs_set_stack_ref_root(&ref, root_objectid);
450         btrfs_set_stack_ref_generation(&ref, ref_generation);
451         btrfs_set_stack_ref_objectid(&ref, owner);
452         btrfs_set_stack_ref_offset(&ref, owner_offset);
453
454         hash = hash_extent_ref(root_objectid, ref_generation, owner,
455                                owner_offset);
456         key.offset = hash;
457         key.objectid = bytenr;
458         key.type = BTRFS_EXTENT_REF_KEY;
459
460         while (1) {
461                 ret = btrfs_search_slot(trans, root, &key, path,
462                                         del ? -1 : 0, del);
463                 if (ret < 0)
464                         goto out;
465                 leaf = path->nodes[0];
466                 if (ret != 0) {
467                         u32 nritems = btrfs_header_nritems(leaf);
468                         if (path->slots[0] >= nritems) {
469                                 ret2 = btrfs_next_leaf(root, path);
470                                 if (ret2)
471                                         goto out;
472                                 leaf = path->nodes[0];
473                         }
474                         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
475                         if (found_key.objectid != bytenr ||
476                             found_key.type != BTRFS_EXTENT_REF_KEY)
477                                 goto out;
478                         key.offset = found_key.offset;
479                         if (del) {
480                                 btrfs_release_path(root, path);
481                                 continue;
482                         }
483                 }
484                 disk_ref = btrfs_item_ptr(path->nodes[0],
485                                           path->slots[0],
486                                           struct btrfs_extent_ref);
487                 if (match_extent_ref(path->nodes[0], disk_ref, &ref)) {
488                         ret = 0;
489                         goto out;
490                 }
491                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
492                 key.offset = found_key.offset + 1;
493                 btrfs_release_path(root, path);
494         }
495 out:
496         return ret;
497 }
498
499 /*
500  * Back reference rules.  Back refs have three main goals:
501  *
502  * 1) differentiate between all holders of references to an extent so that
503  *    when a reference is dropped we can make sure it was a valid reference
504  *    before freeing the extent.
505  *
506  * 2) Provide enough information to quickly find the holders of an extent
507  *    if we notice a given block is corrupted or bad.
508  *
509  * 3) Make it easy to migrate blocks for FS shrinking or storage pool
510  *    maintenance.  This is actually the same as #2, but with a slightly
511  *    different use case.
512  *
513  * File extents can be referenced by:
514  *
515  * - multiple snapshots, subvolumes, or different generations in one subvol
516  * - different files inside a single subvolume (in theory, not implemented yet)
517  * - different offsets inside a file (bookend extents in file.c)
518  *
519  * The extent ref structure has fields for:
520  *
521  * - Objectid of the subvolume root
522  * - Generation number of the tree holding the reference
523  * - objectid of the file holding the reference
524  * - offset in the file corresponding to the key holding the reference
525  *
526  * When a file extent is allocated the fields are filled in:
527  *     (root_key.objectid, trans->transid, inode objectid, offset in file)
528  *
529  * When a leaf is cow'd new references are added for every file extent found
530  * in the leaf.  It looks the same as the create case, but trans->transid
531  * will be different when the block is cow'd.
532  *
533  *     (root_key.objectid, trans->transid, inode objectid, offset in file)
534  *
535  * When a file extent is removed either during snapshot deletion or file
536  * truncation, the corresponding back reference is found
537  * by searching for:
538  *
539  *     (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
540  *      inode objectid, offset in file)
541  *
542  * Btree extents can be referenced by:
543  *
544  * - Different subvolumes
545  * - Different generations of the same subvolume
546  *
547  * Storing sufficient information for a full reverse mapping of a btree
548  * block would require storing the lowest key of the block in the backref,
549  * and it would require updating that lowest key either before write out or
550  * every time it changed.  Instead, the objectid of the lowest key is stored
551  * along with the level of the tree block.  This provides a hint
552  * about where in the btree the block can be found.  Searches through the
553  * btree only need to look for a pointer to that block, so they stop one
554  * level higher than the level recorded in the backref.
555  *
556  * Some btrees do not do reference counting on their extents.  These
557  * include the extent tree and the tree of tree roots.  Backrefs for these
558  * trees always have a generation of zero.
559  *
560  * When a tree block is created, back references are inserted:
561  *
562  * (root->root_key.objectid, trans->transid or zero, level, lowest_key_objectid)
563  *
564  * When a tree block is cow'd in a reference counted root,
565  * new back references are added for all the blocks it points to.
566  * These are of the form (trans->transid will have increased since creation):
567  *
568  * (root->root_key.objectid, trans->transid, level, lowest_key_objectid)
569  *
570  * Because the lowest_key_objectid and the level are just hints
571  * they are not used when backrefs are deleted.  When a backref is deleted:
572  *
573  * if backref was for a tree root:
574  *     root_objectid = root->root_key.objectid
575  * else
576  *     root_objectid = btrfs_header_owner(parent)
577  *
578  * (root_objectid, btrfs_header_generation(parent) or zero, 0, 0)
579  *
580  * Back Reference Key hashing:
581  *
582  * Back references have four fields, each 64 bits long.  Unfortunately,
583  * This is hashed into a single 64 bit number and placed into the key offset.
584  * The key objectid corresponds to the first byte in the extent, and the
585  * key type is set to BTRFS_EXTENT_REF_KEY
586  */
587 int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans,
588                                  struct btrfs_root *root,
589                                  struct btrfs_path *path, u64 bytenr,
590                                  u64 root_objectid, u64 ref_generation,
591                                  u64 owner, u64 owner_offset)
592 {
593         u64 hash;
594         struct btrfs_key key;
595         struct btrfs_extent_ref ref;
596         struct btrfs_extent_ref *disk_ref;
597         int ret;
598
599         btrfs_set_stack_ref_root(&ref, root_objectid);
600         btrfs_set_stack_ref_generation(&ref, ref_generation);
601         btrfs_set_stack_ref_objectid(&ref, owner);
602         btrfs_set_stack_ref_offset(&ref, owner_offset);
603
604         hash = hash_extent_ref(root_objectid, ref_generation, owner,
605                                owner_offset);
606         key.offset = hash;
607         key.objectid = bytenr;
608         key.type = BTRFS_EXTENT_REF_KEY;
609
610         ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(ref));
611         while (ret == -EEXIST) {
612                 disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
613                                           struct btrfs_extent_ref);
614                 if (match_extent_ref(path->nodes[0], disk_ref, &ref))
615                         goto out;
616                 key.offset++;
617                 btrfs_release_path(root, path);
618                 ret = btrfs_insert_empty_item(trans, root, path, &key,
619                                               sizeof(ref));
620         }
621         if (ret)
622                 goto out;
623         disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
624                                   struct btrfs_extent_ref);
625         write_extent_buffer(path->nodes[0], &ref, (unsigned long)disk_ref,
626                             sizeof(ref));
627         btrfs_mark_buffer_dirty(path->nodes[0]);
628 out:
629         btrfs_release_path(root, path);
630         return ret;
631 }
632
633 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
634                                 struct btrfs_root *root,
635                                 u64 bytenr, u64 num_bytes,
636                                 u64 root_objectid, u64 ref_generation,
637                                 u64 owner, u64 owner_offset)
638 {
639         struct btrfs_path *path;
640         int ret;
641         struct btrfs_key key;
642         struct extent_buffer *l;
643         struct btrfs_extent_item *item;
644         u32 refs;
645
646         WARN_ON(num_bytes < root->sectorsize);
647         path = btrfs_alloc_path();
648         if (!path)
649                 return -ENOMEM;
650
651         path->reada = 0;
652         key.objectid = bytenr;
653         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
654         key.offset = num_bytes;
655         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
656                                 0, 1);
657         if (ret < 0)
658                 return ret;
659         if (ret != 0) {
660                 BUG();
661         }
662         BUG_ON(ret != 0);
663         l = path->nodes[0];
664         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
665         refs = btrfs_extent_refs(l, item);
666         btrfs_set_extent_refs(l, item, refs + 1);
667         btrfs_mark_buffer_dirty(path->nodes[0]);
668
669         btrfs_release_path(root->fs_info->extent_root, path);
670
671         path->reada = 0;
672         ret = btrfs_insert_extent_backref(trans, root->fs_info->extent_root,
673                                           path, bytenr, root_objectid,
674                                           ref_generation, owner, owner_offset);
675         BUG_ON(ret);
676         finish_current_insert(trans, root->fs_info->extent_root);
677         del_pending_extents(trans, root->fs_info->extent_root);
678
679         btrfs_free_path(path);
680         return 0;
681 }
682
683 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
684                          struct btrfs_root *root)
685 {
686         finish_current_insert(trans, root->fs_info->extent_root);
687         del_pending_extents(trans, root->fs_info->extent_root);
688         return 0;
689 }
690
691 static int lookup_extent_ref(struct btrfs_trans_handle *trans,
692                              struct btrfs_root *root, u64 bytenr,
693                              u64 num_bytes, u32 *refs)
694 {
695         struct btrfs_path *path;
696         int ret;
697         struct btrfs_key key;
698         struct extent_buffer *l;
699         struct btrfs_extent_item *item;
700
701         WARN_ON(num_bytes < root->sectorsize);
702         path = btrfs_alloc_path();
703         path->reada = 0;
704         key.objectid = bytenr;
705         key.offset = num_bytes;
706         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
707         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
708                                 0, 0);
709         if (ret < 0)
710                 goto out;
711         if (ret != 0) {
712                 btrfs_print_leaf(root, path->nodes[0]);
713                 printk("failed to find block number %Lu\n", bytenr);
714                 BUG();
715         }
716         l = path->nodes[0];
717         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
718         *refs = btrfs_extent_refs(l, item);
719 out:
720         btrfs_free_path(path);
721         return 0;
722 }
723
724 u32 btrfs_count_snapshots_in_path(struct btrfs_root *root,
725                                   struct btrfs_path *count_path,
726                                   u64 first_extent)
727 {
728         struct btrfs_root *extent_root = root->fs_info->extent_root;
729         struct btrfs_path *path;
730         u64 bytenr;
731         u64 found_objectid;
732         u64 root_objectid = root->root_key.objectid;
733         u32 total_count = 0;
734         u32 cur_count;
735         u32 nritems;
736         int ret;
737         struct btrfs_key key;
738         struct btrfs_key found_key;
739         struct extent_buffer *l;
740         struct btrfs_extent_item *item;
741         struct btrfs_extent_ref *ref_item;
742         int level = -1;
743
744         path = btrfs_alloc_path();
745 again:
746         if (level == -1)
747                 bytenr = first_extent;
748         else
749                 bytenr = count_path->nodes[level]->start;
750
751         cur_count = 0;
752         key.objectid = bytenr;
753         key.offset = 0;
754
755         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
756         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
757         if (ret < 0)
758                 goto out;
759         BUG_ON(ret == 0);
760
761         l = path->nodes[0];
762         btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
763
764         if (found_key.objectid != bytenr ||
765             found_key.type != BTRFS_EXTENT_ITEM_KEY) {
766                 goto out;
767         }
768
769         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
770         while (1) {
771                 l = path->nodes[0];
772                 nritems = btrfs_header_nritems(l);
773                 if (path->slots[0] >= nritems) {
774                         ret = btrfs_next_leaf(extent_root, path);
775                         if (ret == 0)
776                                 continue;
777                         break;
778                 }
779                 btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
780                 if (found_key.objectid != bytenr)
781                         break;
782
783                 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
784                         path->slots[0]++;
785                         continue;
786                 }
787
788                 cur_count++;
789                 ref_item = btrfs_item_ptr(l, path->slots[0],
790                                           struct btrfs_extent_ref);
791                 found_objectid = btrfs_ref_root(l, ref_item);
792
793                 if (found_objectid != root_objectid) {
794                         total_count = 2;
795                         goto out;
796                 }
797                 total_count = 1;
798                 path->slots[0]++;
799         }
800         if (cur_count == 0) {
801                 total_count = 0;
802                 goto out;
803         }
804         if (level >= 0 && root->node == count_path->nodes[level])
805                 goto out;
806         level++;
807         btrfs_release_path(root, path);
808         goto again;
809
810 out:
811         btrfs_free_path(path);
812         return total_count;
813 }
814 int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
815                        struct btrfs_root *root, u64 owner_objectid)
816 {
817         u64 generation;
818         u64 key_objectid;
819         u64 level;
820         u32 nritems;
821         struct btrfs_disk_key disk_key;
822
823         level = btrfs_header_level(root->node);
824         generation = trans->transid;
825         nritems = btrfs_header_nritems(root->node);
826         if (nritems > 0) {
827                 if (level == 0)
828                         btrfs_item_key(root->node, &disk_key, 0);
829                 else
830                         btrfs_node_key(root->node, &disk_key, 0);
831                 key_objectid = btrfs_disk_key_objectid(&disk_key);
832         } else {
833                 key_objectid = 0;
834         }
835         return btrfs_inc_extent_ref(trans, root, root->node->start,
836                                     root->node->len, owner_objectid,
837                                     generation, level, key_objectid);
838 }
839
840 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
841                   struct extent_buffer *buf)
842 {
843         u64 bytenr;
844         u32 nritems;
845         struct btrfs_key key;
846         struct btrfs_file_extent_item *fi;
847         int i;
848         int level;
849         int ret;
850         int faili;
851
852         if (!root->ref_cows)
853                 return 0;
854
855         level = btrfs_header_level(buf);
856         nritems = btrfs_header_nritems(buf);
857         for (i = 0; i < nritems; i++) {
858                 if (level == 0) {
859                         u64 disk_bytenr;
860                         btrfs_item_key_to_cpu(buf, &key, i);
861                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
862                                 continue;
863                         fi = btrfs_item_ptr(buf, i,
864                                             struct btrfs_file_extent_item);
865                         if (btrfs_file_extent_type(buf, fi) ==
866                             BTRFS_FILE_EXTENT_INLINE)
867                                 continue;
868                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
869                         if (disk_bytenr == 0)
870                                 continue;
871                         ret = btrfs_inc_extent_ref(trans, root, disk_bytenr,
872                                     btrfs_file_extent_disk_num_bytes(buf, fi),
873                                     root->root_key.objectid, trans->transid,
874                                     key.objectid, key.offset);
875                         if (ret) {
876                                 faili = i;
877                                 goto fail;
878                         }
879                 } else {
880                         bytenr = btrfs_node_blockptr(buf, i);
881                         btrfs_node_key_to_cpu(buf, &key, i);
882                         ret = btrfs_inc_extent_ref(trans, root, bytenr,
883                                            btrfs_level_size(root, level - 1),
884                                            root->root_key.objectid,
885                                            trans->transid,
886                                            level - 1, key.objectid);
887                         if (ret) {
888                                 faili = i;
889                                 goto fail;
890                         }
891                 }
892         }
893         return 0;
894 fail:
895         WARN_ON(1);
896 #if 0
897         for (i =0; i < faili; i++) {
898                 if (level == 0) {
899                         u64 disk_bytenr;
900                         btrfs_item_key_to_cpu(buf, &key, i);
901                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
902                                 continue;
903                         fi = btrfs_item_ptr(buf, i,
904                                             struct btrfs_file_extent_item);
905                         if (btrfs_file_extent_type(buf, fi) ==
906                             BTRFS_FILE_EXTENT_INLINE)
907                                 continue;
908                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
909                         if (disk_bytenr == 0)
910                                 continue;
911                         err = btrfs_free_extent(trans, root, disk_bytenr,
912                                     btrfs_file_extent_disk_num_bytes(buf,
913                                                                       fi), 0);
914                         BUG_ON(err);
915                 } else {
916                         bytenr = btrfs_node_blockptr(buf, i);
917                         err = btrfs_free_extent(trans, root, bytenr,
918                                         btrfs_level_size(root, level - 1), 0);
919                         BUG_ON(err);
920                 }
921         }
922 #endif
923         return ret;
924 }
925
926 static int write_one_cache_group(struct btrfs_trans_handle *trans,
927                                  struct btrfs_root *root,
928                                  struct btrfs_path *path,
929                                  struct btrfs_block_group_cache *cache)
930 {
931         int ret;
932         int pending_ret;
933         struct btrfs_root *extent_root = root->fs_info->extent_root;
934         unsigned long bi;
935         struct extent_buffer *leaf;
936
937         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
938         if (ret < 0)
939                 goto fail;
940         BUG_ON(ret);
941
942         leaf = path->nodes[0];
943         bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
944         write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
945         btrfs_mark_buffer_dirty(leaf);
946         btrfs_release_path(extent_root, path);
947 fail:
948         finish_current_insert(trans, extent_root);
949         pending_ret = del_pending_extents(trans, extent_root);
950         if (ret)
951                 return ret;
952         if (pending_ret)
953                 return pending_ret;
954         return 0;
955
956 }
957
958 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
959                                    struct btrfs_root *root)
960 {
961         struct extent_io_tree *block_group_cache;
962         struct btrfs_block_group_cache *cache;
963         int ret;
964         int err = 0;
965         int werr = 0;
966         struct btrfs_path *path;
967         u64 last = 0;
968         u64 start;
969         u64 end;
970         u64 ptr;
971
972         block_group_cache = &root->fs_info->block_group_cache;
973         path = btrfs_alloc_path();
974         if (!path)
975                 return -ENOMEM;
976
977         while(1) {
978                 ret = find_first_extent_bit(block_group_cache, last,
979                                             &start, &end, BLOCK_GROUP_DIRTY);
980                 if (ret)
981                         break;
982
983                 last = end + 1;
984                 ret = get_state_private(block_group_cache, start, &ptr);
985                 if (ret)
986                         break;
987
988                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
989                 err = write_one_cache_group(trans, root,
990                                             path, cache);
991                 /*
992                  * if we fail to write the cache group, we want
993                  * to keep it marked dirty in hopes that a later
994                  * write will work
995                  */
996                 if (err) {
997                         werr = err;
998                         continue;
999                 }
1000                 clear_extent_bits(block_group_cache, start, end,
1001                                   BLOCK_GROUP_DIRTY, GFP_NOFS);
1002         }
1003         btrfs_free_path(path);
1004         return werr;
1005 }
1006
1007 static int update_block_group(struct btrfs_trans_handle *trans,
1008                               struct btrfs_root *root,
1009                               u64 bytenr, u64 num_bytes, int alloc,
1010                               int mark_free, int data)
1011 {
1012         struct btrfs_block_group_cache *cache;
1013         struct btrfs_fs_info *info = root->fs_info;
1014         u64 total = num_bytes;
1015         u64 old_val;
1016         u64 byte_in_group;
1017         u64 start;
1018         u64 end;
1019
1020         while(total) {
1021                 cache = btrfs_lookup_block_group(info, bytenr);
1022                 if (!cache) {
1023                         return -1;
1024                 }
1025                 byte_in_group = bytenr - cache->key.objectid;
1026                 WARN_ON(byte_in_group > cache->key.offset);
1027                 start = cache->key.objectid;
1028                 end = start + cache->key.offset - 1;
1029                 set_extent_bits(&info->block_group_cache, start, end,
1030                                 BLOCK_GROUP_DIRTY, GFP_NOFS);
1031
1032                 old_val = btrfs_block_group_used(&cache->item);
1033                 num_bytes = min(total, cache->key.offset - byte_in_group);
1034                 if (alloc) {
1035                         if (cache->data != data &&
1036                             old_val < (cache->key.offset >> 1)) {
1037                                 int bit_to_clear;
1038                                 int bit_to_set;
1039                                 cache->data = data;
1040                                 if (data) {
1041                                         bit_to_clear = BLOCK_GROUP_METADATA;
1042                                         bit_to_set = BLOCK_GROUP_DATA;
1043                                         cache->item.flags &=
1044                                                 ~BTRFS_BLOCK_GROUP_MIXED;
1045                                         cache->item.flags |=
1046                                                 BTRFS_BLOCK_GROUP_DATA;
1047                                 } else {
1048                                         bit_to_clear = BLOCK_GROUP_DATA;
1049                                         bit_to_set = BLOCK_GROUP_METADATA;
1050                                         cache->item.flags &=
1051                                                 ~BTRFS_BLOCK_GROUP_MIXED;
1052                                         cache->item.flags &=
1053                                                 ~BTRFS_BLOCK_GROUP_DATA;
1054                                 }
1055                                 clear_extent_bits(&info->block_group_cache,
1056                                                   start, end, bit_to_clear,
1057                                                   GFP_NOFS);
1058                                 set_extent_bits(&info->block_group_cache,
1059                                                 start, end, bit_to_set,
1060                                                 GFP_NOFS);
1061                         } else if (cache->data != data &&
1062                                    cache->data != BTRFS_BLOCK_GROUP_MIXED) {
1063                                 cache->data = BTRFS_BLOCK_GROUP_MIXED;
1064                                 set_extent_bits(&info->block_group_cache,
1065                                                 start, end,
1066                                                 BLOCK_GROUP_DATA |
1067                                                 BLOCK_GROUP_METADATA,
1068                                                 GFP_NOFS);
1069                         }
1070                         old_val += num_bytes;
1071                 } else {
1072                         old_val -= num_bytes;
1073                         if (mark_free) {
1074                                 set_extent_dirty(&info->free_space_cache,
1075                                                  bytenr, bytenr + num_bytes - 1,
1076                                                  GFP_NOFS);
1077                         }
1078                 }
1079                 btrfs_set_block_group_used(&cache->item, old_val);
1080                 total -= num_bytes;
1081                 bytenr += num_bytes;
1082         }
1083         return 0;
1084 }
1085 static int update_pinned_extents(struct btrfs_root *root,
1086                                 u64 bytenr, u64 num, int pin)
1087 {
1088         u64 len;
1089         struct btrfs_block_group_cache *cache;
1090         struct btrfs_fs_info *fs_info = root->fs_info;
1091
1092         if (pin) {
1093                 set_extent_dirty(&fs_info->pinned_extents,
1094                                 bytenr, bytenr + num - 1, GFP_NOFS);
1095         } else {
1096                 clear_extent_dirty(&fs_info->pinned_extents,
1097                                 bytenr, bytenr + num - 1, GFP_NOFS);
1098         }
1099         while (num > 0) {
1100                 cache = btrfs_lookup_block_group(fs_info, bytenr);
1101                 WARN_ON(!cache);
1102                 len = min(num, cache->key.offset -
1103                           (bytenr - cache->key.objectid));
1104                 if (pin) {
1105                         cache->pinned += len;
1106                         fs_info->total_pinned += len;
1107                 } else {
1108                         cache->pinned -= len;
1109                         fs_info->total_pinned -= len;
1110                 }
1111                 bytenr += len;
1112                 num -= len;
1113         }
1114         return 0;
1115 }
1116
1117 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
1118 {
1119         u64 last = 0;
1120         u64 start;
1121         u64 end;
1122         struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
1123         int ret;
1124
1125         while(1) {
1126                 ret = find_first_extent_bit(pinned_extents, last,
1127                                             &start, &end, EXTENT_DIRTY);
1128                 if (ret)
1129                         break;
1130                 set_extent_dirty(copy, start, end, GFP_NOFS);
1131                 last = end + 1;
1132         }
1133         return 0;
1134 }
1135
1136 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
1137                                struct btrfs_root *root,
1138                                struct extent_io_tree *unpin)
1139 {
1140         u64 start;
1141         u64 end;
1142         int ret;
1143         struct extent_io_tree *free_space_cache;
1144         free_space_cache = &root->fs_info->free_space_cache;
1145
1146         while(1) {
1147                 ret = find_first_extent_bit(unpin, 0, &start, &end,
1148                                             EXTENT_DIRTY);
1149                 if (ret)
1150                         break;
1151                 update_pinned_extents(root, start, end + 1 - start, 0);
1152                 clear_extent_dirty(unpin, start, end, GFP_NOFS);
1153                 set_extent_dirty(free_space_cache, start, end, GFP_NOFS);
1154         }
1155         return 0;
1156 }
1157
1158 static int finish_current_insert(struct btrfs_trans_handle *trans,
1159                                  struct btrfs_root *extent_root)
1160 {
1161         u64 start;
1162         u64 end;
1163         struct btrfs_fs_info *info = extent_root->fs_info;
1164         struct extent_buffer *eb;
1165         struct btrfs_path *path;
1166         struct btrfs_key ins;
1167         struct btrfs_disk_key first;
1168         struct btrfs_extent_item extent_item;
1169         int ret;
1170         int level;
1171         int err = 0;
1172
1173         btrfs_set_stack_extent_refs(&extent_item, 1);
1174         btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
1175         path = btrfs_alloc_path();
1176
1177         while(1) {
1178                 ret = find_first_extent_bit(&info->extent_ins, 0, &start,
1179                                             &end, EXTENT_LOCKED);
1180                 if (ret)
1181                         break;
1182
1183                 ins.objectid = start;
1184                 ins.offset = end + 1 - start;
1185                 err = btrfs_insert_item(trans, extent_root, &ins,
1186                                         &extent_item, sizeof(extent_item));
1187                 clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED,
1188                                   GFP_NOFS);
1189                 eb = read_tree_block(extent_root, ins.objectid, ins.offset);
1190                 level = btrfs_header_level(eb);
1191                 if (level == 0) {
1192                         btrfs_item_key(eb, &first, 0);
1193                 } else {
1194                         btrfs_node_key(eb, &first, 0);
1195                 }
1196                 err = btrfs_insert_extent_backref(trans, extent_root, path,
1197                                           start, extent_root->root_key.objectid,
1198                                           0, level,
1199                                           btrfs_disk_key_objectid(&first));
1200                 BUG_ON(err);
1201                 free_extent_buffer(eb);
1202         }
1203         btrfs_free_path(path);
1204         return 0;
1205 }
1206
1207 static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes,
1208                           int pending)
1209 {
1210         int err = 0;
1211         struct extent_buffer *buf;
1212
1213         if (!pending) {
1214                 buf = btrfs_find_tree_block(root, bytenr, num_bytes);
1215                 if (buf) {
1216                         if (btrfs_buffer_uptodate(buf)) {
1217                                 u64 transid =
1218                                     root->fs_info->running_transaction->transid;
1219                                 u64 header_transid =
1220                                         btrfs_header_generation(buf);
1221                                 if (header_transid == transid) {
1222                                         clean_tree_block(NULL, root, buf);
1223                                         free_extent_buffer(buf);
1224                                         return 1;
1225                                 }
1226                         }
1227                         free_extent_buffer(buf);
1228                 }
1229                 update_pinned_extents(root, bytenr, num_bytes, 1);
1230         } else {
1231                 set_extent_bits(&root->fs_info->pending_del,
1232                                 bytenr, bytenr + num_bytes - 1,
1233                                 EXTENT_LOCKED, GFP_NOFS);
1234         }
1235         BUG_ON(err < 0);
1236         return 0;
1237 }
1238
1239 /*
1240  * remove an extent from the root, returns 0 on success
1241  */
1242 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1243                          *root, u64 bytenr, u64 num_bytes,
1244                          u64 root_objectid, u64 ref_generation,
1245                          u64 owner_objectid, u64 owner_offset, int pin,
1246                          int mark_free)
1247 {
1248         struct btrfs_path *path;
1249         struct btrfs_key key;
1250         struct btrfs_fs_info *info = root->fs_info;
1251         struct btrfs_root *extent_root = info->extent_root;
1252         struct extent_buffer *leaf;
1253         int ret;
1254         int extent_slot = 0;
1255         int found_extent = 0;
1256         int num_to_del = 1;
1257         struct btrfs_extent_item *ei;
1258         u32 refs;
1259
1260         key.objectid = bytenr;
1261         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
1262         key.offset = num_bytes;
1263         path = btrfs_alloc_path();
1264         if (!path)
1265                 return -ENOMEM;
1266
1267         path->reada = 0;
1268         ret = lookup_extent_backref(trans, extent_root, path,
1269                                     bytenr, root_objectid,
1270                                     ref_generation,
1271                                     owner_objectid, owner_offset, 1);
1272         if (ret == 0) {
1273                 struct btrfs_key found_key;
1274                 extent_slot = path->slots[0];
1275                 while(extent_slot > 0) {
1276                         extent_slot--;
1277                         btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1278                                               extent_slot);
1279                         if (found_key.objectid != bytenr)
1280                                 break;
1281                         if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
1282                             found_key.offset == num_bytes) {
1283                                 found_extent = 1;
1284                                 break;
1285                         }
1286                         if (path->slots[0] - extent_slot > 5)
1287                                 break;
1288                 }
1289                 if (!found_extent)
1290                         ret = btrfs_del_item(trans, extent_root, path);
1291         } else {
1292                 btrfs_print_leaf(extent_root, path->nodes[0]);
1293                 WARN_ON(1);
1294                 printk("Unable to find ref byte nr %Lu root %Lu "
1295                        " gen %Lu owner %Lu offset %Lu\n", bytenr,
1296                        root_objectid, ref_generation, owner_objectid,
1297                        owner_offset);
1298         }
1299         if (!found_extent) {
1300                 btrfs_release_path(extent_root, path);
1301                 ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
1302                 if (ret < 0)
1303                         return ret;
1304                 BUG_ON(ret);
1305                 extent_slot = path->slots[0];
1306         }
1307
1308         leaf = path->nodes[0];
1309         ei = btrfs_item_ptr(leaf, extent_slot,
1310                             struct btrfs_extent_item);
1311         refs = btrfs_extent_refs(leaf, ei);
1312         BUG_ON(refs == 0);
1313         refs -= 1;
1314         btrfs_set_extent_refs(leaf, ei, refs);
1315
1316         btrfs_mark_buffer_dirty(leaf);
1317
1318         if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) {
1319                 /* if the back ref and the extent are next to each other
1320                  * they get deleted below in one shot
1321                  */
1322                 path->slots[0] = extent_slot;
1323                 num_to_del = 2;
1324         } else if (found_extent) {
1325                 /* otherwise delete the extent back ref */
1326                 ret = btrfs_del_item(trans, extent_root, path);
1327                 BUG_ON(ret);
1328                 /* if refs are 0, we need to setup the path for deletion */
1329                 if (refs == 0) {
1330                         btrfs_release_path(extent_root, path);
1331                         ret = btrfs_search_slot(trans, extent_root, &key, path,
1332                                                 -1, 1);
1333                         if (ret < 0)
1334                                 return ret;
1335                         BUG_ON(ret);
1336                 }
1337         }
1338
1339         if (refs == 0) {
1340                 u64 super_used;
1341                 u64 root_used;
1342
1343                 if (pin) {
1344                         ret = pin_down_bytes(root, bytenr, num_bytes, 0);
1345                         if (ret > 0)
1346                                 mark_free = 1;
1347                         BUG_ON(ret < 0);
1348                 }
1349
1350                 /* block accounting for super block */
1351                 super_used = btrfs_super_bytes_used(&info->super_copy);
1352                 btrfs_set_super_bytes_used(&info->super_copy,
1353                                            super_used - num_bytes);
1354
1355                 /* block accounting for root item */
1356                 root_used = btrfs_root_used(&root->root_item);
1357                 btrfs_set_root_used(&root->root_item,
1358                                            root_used - num_bytes);
1359                 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
1360                                       num_to_del);
1361                 if (ret) {
1362                         return ret;
1363                 }
1364                 ret = update_block_group(trans, root, bytenr, num_bytes, 0,
1365                                          mark_free, 0);
1366                 BUG_ON(ret);
1367         }
1368         btrfs_free_path(path);
1369         finish_current_insert(trans, extent_root);
1370         return ret;
1371 }
1372
1373 /*
1374  * find all the blocks marked as pending in the radix tree and remove
1375  * them from the extent map
1376  */
1377 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
1378                                btrfs_root *extent_root)
1379 {
1380         int ret;
1381         int err = 0;
1382         u64 start;
1383         u64 end;
1384         struct extent_io_tree *pending_del;
1385         struct extent_io_tree *pinned_extents;
1386
1387         pending_del = &extent_root->fs_info->pending_del;
1388         pinned_extents = &extent_root->fs_info->pinned_extents;
1389
1390         while(1) {
1391                 ret = find_first_extent_bit(pending_del, 0, &start, &end,
1392                                             EXTENT_LOCKED);
1393                 if (ret)
1394                         break;
1395                 update_pinned_extents(extent_root, start, end + 1 - start, 1);
1396                 clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
1397                                   GFP_NOFS);
1398                 ret = __free_extent(trans, extent_root,
1399                                      start, end + 1 - start,
1400                                      extent_root->root_key.objectid,
1401                                      0, 0, 0, 0, 0);
1402                 if (ret)
1403                         err = ret;
1404         }
1405         return err;
1406 }
1407
1408 /*
1409  * remove an extent from the root, returns 0 on success
1410  */
1411 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1412                       *root, u64 bytenr, u64 num_bytes,
1413                       u64 root_objectid, u64 ref_generation,
1414                       u64 owner_objectid, u64 owner_offset, int pin)
1415 {
1416         struct btrfs_root *extent_root = root->fs_info->extent_root;
1417         int pending_ret;
1418         int ret;
1419
1420         WARN_ON(num_bytes < root->sectorsize);
1421         if (!root->ref_cows)
1422                 ref_generation = 0;
1423
1424         if (root == extent_root) {
1425                 pin_down_bytes(root, bytenr, num_bytes, 1);
1426                 return 0;
1427         }
1428         ret = __free_extent(trans, root, bytenr, num_bytes, root_objectid,
1429                             ref_generation, owner_objectid, owner_offset,
1430                             pin, pin == 0);
1431         pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
1432         return ret ? ret : pending_ret;
1433 }
1434
1435 static u64 stripe_align(struct btrfs_root *root, u64 val)
1436 {
1437         u64 mask = ((u64)root->stripesize - 1);
1438         u64 ret = (val + mask) & ~mask;
1439         return ret;
1440 }
1441
1442 /*
1443  * walks the btree of allocated extents and find a hole of a given size.
1444  * The key ins is changed to record the hole:
1445  * ins->objectid == block start
1446  * ins->flags = BTRFS_EXTENT_ITEM_KEY
1447  * ins->offset == number of blocks
1448  * Any available blocks before search_start are skipped.
1449  */
1450 static int noinline find_free_extent(struct btrfs_trans_handle *trans,
1451                                      struct btrfs_root *orig_root,
1452                                      u64 num_bytes, u64 empty_size,
1453                                      u64 search_start, u64 search_end,
1454                                      u64 hint_byte, struct btrfs_key *ins,
1455                                      u64 exclude_start, u64 exclude_nr,
1456                                      int data)
1457 {
1458         struct btrfs_path *path;
1459         struct btrfs_key key;
1460         u64 hole_size = 0;
1461         u64 aligned;
1462         int ret;
1463         int slot = 0;
1464         u64 last_byte = 0;
1465         u64 *last_ptr = NULL;
1466         u64 orig_search_start = search_start;
1467         int start_found;
1468         struct extent_buffer *l;
1469         struct btrfs_root * root = orig_root->fs_info->extent_root;
1470         struct btrfs_fs_info *info = root->fs_info;
1471         u64 total_needed = num_bytes;
1472         int level;
1473         struct btrfs_block_group_cache *block_group;
1474         int full_scan = 0;
1475         int wrapped = 0;
1476         int empty_cluster;
1477         u64 cached_start;
1478
1479         WARN_ON(num_bytes < root->sectorsize);
1480         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
1481
1482         level = btrfs_header_level(root->node);
1483
1484         if (num_bytes >= 32 * 1024 * 1024 && hint_byte) {
1485                 data = BTRFS_BLOCK_GROUP_MIXED;
1486         }
1487
1488         if (!data) {
1489                 last_ptr = &root->fs_info->last_alloc;
1490                 empty_cluster = 128 * 1024;
1491         }
1492
1493         if (data && btrfs_test_opt(root, SSD)) {
1494                 last_ptr = &root->fs_info->last_data_alloc;
1495                 empty_cluster = 2 * 1024 * 1024;
1496         }
1497
1498         if (last_ptr) {
1499                 if (*last_ptr)
1500                         hint_byte = *last_ptr;
1501                 else {
1502                         hint_byte = hint_byte &
1503                                 ~((u64)BTRFS_BLOCK_GROUP_SIZE - 1);
1504                         empty_size += empty_cluster;
1505                 }
1506                 search_start = max(search_start, hint_byte);
1507         }
1508
1509         search_end = min(search_end,
1510                          btrfs_super_total_bytes(&info->super_copy));
1511         if (hint_byte) {
1512                 block_group = btrfs_lookup_block_group(info, hint_byte);
1513                 if (!block_group)
1514                         hint_byte = search_start;
1515                 block_group = btrfs_find_block_group(root, block_group,
1516                                                      hint_byte, data, 1);
1517         } else {
1518                 block_group = btrfs_find_block_group(root,
1519                                                      trans->block_group,
1520                                                      search_start, data, 1);
1521         }
1522
1523         total_needed += empty_size;
1524         path = btrfs_alloc_path();
1525 check_failed:
1526         if (!block_group) {
1527                 block_group = btrfs_lookup_block_group(info, search_start);
1528                 if (!block_group)
1529                         block_group = btrfs_lookup_block_group(info,
1530                                                        orig_search_start);
1531         }
1532         search_start = find_search_start(root, &block_group, search_start,
1533                                          total_needed, data);
1534
1535         if (last_ptr && *last_ptr && search_start != *last_ptr) {
1536                 *last_ptr = 0;
1537                 if (!empty_size) {
1538                         empty_size += empty_cluster;
1539                         total_needed += empty_size;
1540                 }
1541                 search_start = find_search_start(root, &block_group,
1542                                                  search_start, total_needed,
1543                                                  data);
1544         }
1545
1546         search_start = stripe_align(root, search_start);
1547         cached_start = search_start;
1548         btrfs_init_path(path);
1549         ins->objectid = search_start;
1550         ins->offset = 0;
1551         start_found = 0;
1552         path->reada = 2;
1553
1554         ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
1555         if (ret < 0)
1556                 goto error;
1557         ret = find_previous_extent(root, path);
1558         if (ret < 0)
1559                 goto error;
1560         l = path->nodes[0];
1561         btrfs_item_key_to_cpu(l, &key, path->slots[0]);
1562         while (1) {
1563                 l = path->nodes[0];
1564                 slot = path->slots[0];
1565                 if (slot >= btrfs_header_nritems(l)) {
1566                         ret = btrfs_next_leaf(root, path);
1567                         if (ret == 0)
1568                                 continue;
1569                         if (ret < 0)
1570                                 goto error;
1571
1572                         search_start = max(search_start,
1573                                            block_group->key.objectid);
1574                         if (!start_found) {
1575                                 aligned = stripe_align(root, search_start);
1576                                 ins->objectid = aligned;
1577                                 if (aligned >= search_end) {
1578                                         ret = -ENOSPC;
1579                                         goto error;
1580                                 }
1581                                 ins->offset = search_end - aligned;
1582                                 start_found = 1;
1583                                 goto check_pending;
1584                         }
1585                         ins->objectid = stripe_align(root,
1586                                                      last_byte > search_start ?
1587                                                      last_byte : search_start);
1588                         if (search_end <= ins->objectid) {
1589                                 ret = -ENOSPC;
1590                                 goto error;
1591                         }
1592                         ins->offset = search_end - ins->objectid;
1593                         BUG_ON(ins->objectid >= search_end);
1594                         goto check_pending;
1595                 }
1596                 btrfs_item_key_to_cpu(l, &key, slot);
1597
1598                 if (key.objectid >= search_start && key.objectid > last_byte &&
1599                     start_found) {
1600                         if (last_byte < search_start)
1601                                 last_byte = search_start;
1602                         aligned = stripe_align(root, last_byte);
1603                         hole_size = key.objectid - aligned;
1604                         if (key.objectid > aligned && hole_size >= num_bytes) {
1605                                 ins->objectid = aligned;
1606                                 ins->offset = hole_size;
1607                                 goto check_pending;
1608                         }
1609                 }
1610                 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) {
1611                         if (!start_found && btrfs_key_type(&key) ==
1612                             BTRFS_BLOCK_GROUP_ITEM_KEY) {
1613                                 last_byte = key.objectid;
1614                                 start_found = 1;
1615                         }
1616                         goto next;
1617                 }
1618
1619
1620                 start_found = 1;
1621                 last_byte = key.objectid + key.offset;
1622
1623                 if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED &&
1624                     last_byte >= block_group->key.objectid +
1625                     block_group->key.offset) {
1626                         btrfs_release_path(root, path);
1627                         search_start = block_group->key.objectid +
1628                                 block_group->key.offset;
1629                         goto new_group;
1630                 }
1631 next:
1632                 path->slots[0]++;
1633                 cond_resched();
1634         }
1635 check_pending:
1636         /* we have to make sure we didn't find an extent that has already
1637          * been allocated by the map tree or the original allocation
1638          */
1639         btrfs_release_path(root, path);
1640         BUG_ON(ins->objectid < search_start);
1641
1642         if (ins->objectid + num_bytes >= search_end)
1643                 goto enospc;
1644         if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED &&
1645             ins->objectid + num_bytes > block_group->
1646             key.objectid + block_group->key.offset) {
1647                 search_start = block_group->key.objectid +
1648                         block_group->key.offset;
1649                 goto new_group;
1650         }
1651         if (test_range_bit(&info->extent_ins, ins->objectid,
1652                            ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) {
1653                 search_start = ins->objectid + num_bytes;
1654                 goto new_group;
1655         }
1656         if (test_range_bit(&info->pinned_extents, ins->objectid,
1657                            ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
1658                 search_start = ins->objectid + num_bytes;
1659                 goto new_group;
1660         }
1661         if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
1662             ins->objectid < exclude_start + exclude_nr)) {
1663                 search_start = exclude_start + exclude_nr;
1664                 goto new_group;
1665         }
1666         if (!data) {
1667                 block_group = btrfs_lookup_block_group(info, ins->objectid);
1668                 if (block_group)
1669                         trans->block_group = block_group;
1670         }
1671         ins->offset = num_bytes;
1672         btrfs_free_path(path);
1673         if (last_ptr) {
1674                 *last_ptr = ins->objectid + ins->offset;
1675                 if (*last_ptr ==
1676                     btrfs_super_total_bytes(&root->fs_info->super_copy)) {
1677                         *last_ptr = 0;
1678                 }
1679         }
1680         return 0;
1681
1682 new_group:
1683         if (search_start + num_bytes >= search_end) {
1684 enospc:
1685                 search_start = orig_search_start;
1686                 if (full_scan) {
1687                         ret = -ENOSPC;
1688                         goto error;
1689                 }
1690                 if (wrapped) {
1691                         if (!full_scan)
1692                                 total_needed -= empty_size;
1693                         full_scan = 1;
1694                         data = BTRFS_BLOCK_GROUP_MIXED;
1695                 } else
1696                         wrapped = 1;
1697         }
1698         block_group = btrfs_lookup_block_group(info, search_start);
1699         cond_resched();
1700         block_group = btrfs_find_block_group(root, block_group,
1701                                              search_start, data, 0);
1702         goto check_failed;
1703
1704 error:
1705         btrfs_release_path(root, path);
1706         btrfs_free_path(path);
1707         return ret;
1708 }
1709 /*
1710  * finds a free extent and does all the dirty work required for allocation
1711  * returns the key for the extent through ins, and a tree buffer for
1712  * the first block of the extent through buf.
1713  *
1714  * returns 0 if everything worked, non-zero otherwise.
1715  */
1716 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1717                        struct btrfs_root *root,
1718                        u64 num_bytes, u64 root_objectid, u64 ref_generation,
1719                        u64 owner, u64 owner_offset,
1720                        u64 empty_size, u64 hint_byte,
1721                        u64 search_end, struct btrfs_key *ins, int data)
1722 {
1723         int ret;
1724         int pending_ret;
1725         u64 super_used;
1726         u64 root_used;
1727         u64 search_start = 0;
1728         u64 new_hint;
1729         u32 sizes[2];
1730         struct btrfs_fs_info *info = root->fs_info;
1731         struct btrfs_root *extent_root = info->extent_root;
1732         struct btrfs_extent_item *extent_item;
1733         struct btrfs_extent_ref *ref;
1734         struct btrfs_path *path;
1735         struct btrfs_key keys[2];
1736
1737         new_hint = max(hint_byte, root->fs_info->alloc_start);
1738         if (new_hint < btrfs_super_total_bytes(&info->super_copy))
1739                 hint_byte = new_hint;
1740
1741         WARN_ON(num_bytes < root->sectorsize);
1742         ret = find_free_extent(trans, root, num_bytes, empty_size,
1743                                search_start, search_end, hint_byte, ins,
1744                                trans->alloc_exclude_start,
1745                                trans->alloc_exclude_nr, data);
1746         BUG_ON(ret);
1747         if (ret)
1748                 return ret;
1749
1750         /* block accounting for super block */
1751         super_used = btrfs_super_bytes_used(&info->super_copy);
1752         btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
1753
1754         /* block accounting for root item */
1755         root_used = btrfs_root_used(&root->root_item);
1756         btrfs_set_root_used(&root->root_item, root_used + num_bytes);
1757
1758         clear_extent_dirty(&root->fs_info->free_space_cache,
1759                            ins->objectid, ins->objectid + ins->offset - 1,
1760                            GFP_NOFS);
1761
1762         if (root == extent_root) {
1763                 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
1764                                 ins->objectid + ins->offset - 1,
1765                                 EXTENT_LOCKED, GFP_NOFS);
1766                 WARN_ON(data == 1);
1767                 goto update_block;
1768         }
1769
1770         WARN_ON(trans->alloc_exclude_nr);
1771         trans->alloc_exclude_start = ins->objectid;
1772         trans->alloc_exclude_nr = ins->offset;
1773
1774         memcpy(&keys[0], ins, sizeof(*ins));
1775         keys[1].offset = hash_extent_ref(root_objectid, ref_generation,
1776                                          owner, owner_offset);
1777         keys[1].objectid = ins->objectid;
1778         keys[1].type = BTRFS_EXTENT_REF_KEY;
1779         sizes[0] = sizeof(*extent_item);
1780         sizes[1] = sizeof(*ref);
1781
1782         path = btrfs_alloc_path();
1783         BUG_ON(!path);
1784
1785         ret = btrfs_insert_empty_items(trans, extent_root, path, keys,
1786                                        sizes, 2);
1787
1788         BUG_ON(ret);
1789         extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
1790                                      struct btrfs_extent_item);
1791         btrfs_set_extent_refs(path->nodes[0], extent_item, 1);
1792         ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
1793                              struct btrfs_extent_ref);
1794
1795         btrfs_set_ref_root(path->nodes[0], ref, root_objectid);
1796         btrfs_set_ref_generation(path->nodes[0], ref, ref_generation);
1797         btrfs_set_ref_objectid(path->nodes[0], ref, owner);
1798         btrfs_set_ref_offset(path->nodes[0], ref, owner_offset);
1799
1800         btrfs_mark_buffer_dirty(path->nodes[0]);
1801
1802         trans->alloc_exclude_start = 0;
1803         trans->alloc_exclude_nr = 0;
1804         btrfs_free_path(path);
1805         finish_current_insert(trans, extent_root);
1806         pending_ret = del_pending_extents(trans, extent_root);
1807
1808         if (ret) {
1809                 return ret;
1810         }
1811         if (pending_ret) {
1812                 return pending_ret;
1813         }
1814
1815 update_block:
1816         ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0,
1817                                  data);
1818         if (ret) {
1819                 printk("update block group failed for %Lu %Lu\n",
1820                        ins->objectid, ins->offset);
1821                 BUG();
1822         }
1823         return 0;
1824 }
1825
1826 /*
1827  * helper function to allocate a block for a given tree
1828  * returns the tree buffer or NULL.
1829  */
1830 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1831                                              struct btrfs_root *root,
1832                                              u32 blocksize,
1833                                              u64 root_objectid, u64 hint,
1834                                              u64 empty_size)
1835 {
1836         u64 ref_generation;
1837
1838         if (root->ref_cows)
1839                 ref_generation = trans->transid;
1840         else
1841                 ref_generation = 0;
1842
1843
1844         return __btrfs_alloc_free_block(trans, root, blocksize, root_objectid,
1845                                         ref_generation, 0, 0, hint, empty_size);
1846 }
1847
1848 /*
1849  * helper function to allocate a block for a given tree
1850  * returns the tree buffer or NULL.
1851  */
1852 struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1853                                              struct btrfs_root *root,
1854                                              u32 blocksize,
1855                                              u64 root_objectid,
1856                                              u64 ref_generation,
1857                                              u64 first_objectid,
1858                                              int level,
1859                                              u64 hint,
1860                                              u64 empty_size)
1861 {
1862         struct btrfs_key ins;
1863         int ret;
1864         struct extent_buffer *buf;
1865
1866         ret = btrfs_alloc_extent(trans, root, blocksize,
1867                                  root_objectid, ref_generation,
1868                                  level, first_objectid, empty_size, hint,
1869                                  (u64)-1, &ins, 0);
1870         if (ret) {
1871                 BUG_ON(ret > 0);
1872                 return ERR_PTR(ret);
1873         }
1874         buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize);
1875         if (!buf) {
1876                 btrfs_free_extent(trans, root, ins.objectid, blocksize,
1877                                   root->root_key.objectid, ref_generation,
1878                                   0, 0, 0);
1879                 return ERR_PTR(-ENOMEM);
1880         }
1881         btrfs_set_header_generation(buf, trans->transid);
1882         clean_tree_block(trans, root, buf);
1883         wait_on_tree_block_writeback(root, buf);
1884         btrfs_set_buffer_uptodate(buf);
1885
1886         if (PageDirty(buf->first_page)) {
1887                 printk("page %lu dirty\n", buf->first_page->index);
1888                 WARN_ON(1);
1889         }
1890
1891         set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
1892                          buf->start + buf->len - 1, GFP_NOFS);
1893         set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->io_tree,
1894                         buf->start, buf->start + buf->len - 1,
1895                         EXTENT_CSUM, GFP_NOFS);
1896         buf->flags |= EXTENT_CSUM;
1897         if (!btrfs_test_opt(root, SSD))
1898                 btrfs_set_buffer_defrag(buf);
1899         trans->blocks_used++;
1900         return buf;
1901 }
1902
1903 static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
1904                                   struct btrfs_root *root,
1905                                   struct extent_buffer *leaf)
1906 {
1907         u64 leaf_owner;
1908         u64 leaf_generation;
1909         struct btrfs_key key;
1910         struct btrfs_file_extent_item *fi;
1911         int i;
1912         int nritems;
1913         int ret;
1914
1915         BUG_ON(!btrfs_is_leaf(leaf));
1916         nritems = btrfs_header_nritems(leaf);
1917         leaf_owner = btrfs_header_owner(leaf);
1918         leaf_generation = btrfs_header_generation(leaf);
1919
1920         for (i = 0; i < nritems; i++) {
1921                 u64 disk_bytenr;
1922
1923                 btrfs_item_key_to_cpu(leaf, &key, i);
1924                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1925                         continue;
1926                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1927                 if (btrfs_file_extent_type(leaf, fi) ==
1928                     BTRFS_FILE_EXTENT_INLINE)
1929                         continue;
1930                 /*
1931                  * FIXME make sure to insert a trans record that
1932                  * repeats the snapshot del on crash
1933                  */
1934                 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1935                 if (disk_bytenr == 0)
1936                         continue;
1937                 ret = btrfs_free_extent(trans, root, disk_bytenr,
1938                                 btrfs_file_extent_disk_num_bytes(leaf, fi),
1939                                 leaf_owner, leaf_generation,
1940                                 key.objectid, key.offset, 0);
1941                 BUG_ON(ret);
1942         }
1943         return 0;
1944 }
1945
1946 static void noinline reada_walk_down(struct btrfs_root *root,
1947                                      struct extent_buffer *node,
1948                                      int slot)
1949 {
1950         u64 bytenr;
1951         u64 last = 0;
1952         u32 nritems;
1953         u32 refs;
1954         u32 blocksize;
1955         int ret;
1956         int i;
1957         int level;
1958         int skipped = 0;
1959
1960         nritems = btrfs_header_nritems(node);
1961         level = btrfs_header_level(node);
1962         if (level)
1963                 return;
1964
1965         for (i = slot; i < nritems && skipped < 32; i++) {
1966                 bytenr = btrfs_node_blockptr(node, i);
1967                 if (last && ((bytenr > last && bytenr - last > 32 * 1024) ||
1968                              (last > bytenr && last - bytenr > 32 * 1024))) {
1969                         skipped++;
1970                         continue;
1971                 }
1972                 blocksize = btrfs_level_size(root, level - 1);
1973                 if (i != slot) {
1974                         ret = lookup_extent_ref(NULL, root, bytenr,
1975                                                 blocksize, &refs);
1976                         BUG_ON(ret);
1977                         if (refs != 1) {
1978                                 skipped++;
1979                                 continue;
1980                         }
1981                 }
1982                 mutex_unlock(&root->fs_info->fs_mutex);
1983                 ret = readahead_tree_block(root, bytenr, blocksize);
1984                 last = bytenr + blocksize;
1985                 cond_resched();
1986                 mutex_lock(&root->fs_info->fs_mutex);
1987                 if (ret)
1988                         break;
1989         }
1990 }
1991
1992 /*
1993  * helper function for drop_snapshot, this walks down the tree dropping ref
1994  * counts as it goes.
1995  */
1996 static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
1997                                    struct btrfs_root *root,
1998                                    struct btrfs_path *path, int *level)
1999 {
2000         u64 root_owner;
2001         u64 root_gen;
2002         u64 bytenr;
2003         struct extent_buffer *next;
2004         struct extent_buffer *cur;
2005         struct extent_buffer *parent;
2006         u32 blocksize;
2007         int ret;
2008         u32 refs;
2009
2010         WARN_ON(*level < 0);
2011         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2012         ret = lookup_extent_ref(trans, root,
2013                                 path->nodes[*level]->start,
2014                                 path->nodes[*level]->len, &refs);
2015         BUG_ON(ret);
2016         if (refs > 1)
2017                 goto out;
2018
2019         /*
2020          * walk down to the last node level and free all the leaves
2021          */
2022         while(*level >= 0) {
2023                 WARN_ON(*level < 0);
2024                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2025                 cur = path->nodes[*level];
2026
2027                 if (btrfs_header_level(cur) != *level)
2028                         WARN_ON(1);
2029
2030                 if (path->slots[*level] >=
2031                     btrfs_header_nritems(cur))
2032                         break;
2033                 if (*level == 0) {
2034                         ret = drop_leaf_ref(trans, root, cur);
2035                         BUG_ON(ret);
2036                         break;
2037                 }
2038                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2039                 blocksize = btrfs_level_size(root, *level - 1);
2040                 ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs);
2041                 BUG_ON(ret);
2042                 if (refs != 1) {
2043                         parent = path->nodes[*level];
2044                         root_owner = btrfs_header_owner(parent);
2045                         root_gen = btrfs_header_generation(parent);
2046                         path->slots[*level]++;
2047                         ret = btrfs_free_extent(trans, root, bytenr,
2048                                                 blocksize, root_owner,
2049                                                 root_gen, 0, 0, 1);
2050                         BUG_ON(ret);
2051                         continue;
2052                 }
2053                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2054                 if (!next || !btrfs_buffer_uptodate(next)) {
2055                         free_extent_buffer(next);
2056                         reada_walk_down(root, cur, path->slots[*level]);
2057                         mutex_unlock(&root->fs_info->fs_mutex);
2058                         next = read_tree_block(root, bytenr, blocksize);
2059                         mutex_lock(&root->fs_info->fs_mutex);
2060
2061                         /* we dropped the lock, check one more time */
2062                         ret = lookup_extent_ref(trans, root, bytenr,
2063                                                 blocksize, &refs);
2064                         BUG_ON(ret);
2065                         if (refs != 1) {
2066                                 parent = path->nodes[*level];
2067                                 root_owner = btrfs_header_owner(parent);
2068                                 root_gen = btrfs_header_generation(parent);
2069
2070                                 path->slots[*level]++;
2071                                 free_extent_buffer(next);
2072                                 ret = btrfs_free_extent(trans, root, bytenr,
2073                                                         blocksize,
2074                                                         root_owner,
2075                                                         root_gen, 0, 0, 1);
2076                                 BUG_ON(ret);
2077                                 continue;
2078                         }
2079                 }
2080                 WARN_ON(*level <= 0);
2081                 if (path->nodes[*level-1])
2082                         free_extent_buffer(path->nodes[*level-1]);
2083                 path->nodes[*level-1] = next;
2084                 *level = btrfs_header_level(next);
2085                 path->slots[*level] = 0;
2086         }
2087 out:
2088         WARN_ON(*level < 0);
2089         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2090
2091         if (path->nodes[*level] == root->node) {
2092                 root_owner = root->root_key.objectid;
2093                 parent = path->nodes[*level];
2094         } else {
2095                 parent = path->nodes[*level + 1];
2096                 root_owner = btrfs_header_owner(parent);
2097         }
2098
2099         root_gen = btrfs_header_generation(parent);
2100         ret = btrfs_free_extent(trans, root, path->nodes[*level]->start,
2101                                 path->nodes[*level]->len,
2102                                 root_owner, root_gen, 0, 0, 1);
2103         free_extent_buffer(path->nodes[*level]);
2104         path->nodes[*level] = NULL;
2105         *level += 1;
2106         BUG_ON(ret);
2107         return 0;
2108 }
2109
2110 /*
2111  * helper for dropping snapshots.  This walks back up the tree in the path
2112  * to find the first node higher up where we haven't yet gone through
2113  * all the slots
2114  */
2115 static int noinline walk_up_tree(struct btrfs_trans_handle *trans,
2116                                  struct btrfs_root *root,
2117                                  struct btrfs_path *path, int *level)
2118 {
2119         u64 root_owner;
2120         u64 root_gen;
2121         struct btrfs_root_item *root_item = &root->root_item;
2122         int i;
2123         int slot;
2124         int ret;
2125
2126         for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2127                 slot = path->slots[i];
2128                 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
2129                         struct extent_buffer *node;
2130                         struct btrfs_disk_key disk_key;
2131                         node = path->nodes[i];
2132                         path->slots[i]++;
2133                         *level = i;
2134                         WARN_ON(*level == 0);
2135                         btrfs_node_key(node, &disk_key, path->slots[i]);
2136                         memcpy(&root_item->drop_progress,
2137                                &disk_key, sizeof(disk_key));
2138                         root_item->drop_level = i;
2139                         return 0;
2140                 } else {
2141                         if (path->nodes[*level] == root->node) {
2142                                 root_owner = root->root_key.objectid;
2143                                 root_gen =
2144                                    btrfs_header_generation(path->nodes[*level]);
2145                         } else {
2146                                 struct extent_buffer *node;
2147                                 node = path->nodes[*level + 1];
2148                                 root_owner = btrfs_header_owner(node);
2149                                 root_gen = btrfs_header_generation(node);
2150                         }
2151                         ret = btrfs_free_extent(trans, root,
2152                                                 path->nodes[*level]->start,
2153                                                 path->nodes[*level]->len,
2154                                                 root_owner, root_gen, 0, 0, 1);
2155                         BUG_ON(ret);
2156                         free_extent_buffer(path->nodes[*level]);
2157                         path->nodes[*level] = NULL;
2158                         *level = i + 1;
2159                 }
2160         }
2161         return 1;
2162 }
2163
2164 /*
2165  * drop the reference count on the tree rooted at 'snap'.  This traverses
2166  * the tree freeing any blocks that have a ref count of zero after being
2167  * decremented.
2168  */
2169 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
2170                         *root)
2171 {
2172         int ret = 0;
2173         int wret;
2174         int level;
2175         struct btrfs_path *path;
2176         int i;
2177         int orig_level;
2178         struct btrfs_root_item *root_item = &root->root_item;
2179
2180         path = btrfs_alloc_path();
2181         BUG_ON(!path);
2182
2183         level = btrfs_header_level(root->node);
2184         orig_level = level;
2185         if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
2186                 path->nodes[level] = root->node;
2187                 extent_buffer_get(root->node);
2188                 path->slots[level] = 0;
2189         } else {
2190                 struct btrfs_key key;
2191                 struct btrfs_disk_key found_key;
2192                 struct extent_buffer *node;
2193
2194                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
2195                 level = root_item->drop_level;
2196                 path->lowest_level = level;
2197                 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2198                 if (wret < 0) {
2199                         ret = wret;
2200                         goto out;
2201                 }
2202                 node = path->nodes[level];
2203                 btrfs_node_key(node, &found_key, path->slots[level]);
2204                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
2205                                sizeof(found_key)));
2206         }
2207         while(1) {
2208                 wret = walk_down_tree(trans, root, path, &level);
2209                 if (wret > 0)
2210                         break;
2211                 if (wret < 0)
2212                         ret = wret;
2213
2214                 wret = walk_up_tree(trans, root, path, &level);
2215                 if (wret > 0)
2216                         break;
2217                 if (wret < 0)
2218                         ret = wret;
2219                 ret = -EAGAIN;
2220                 break;
2221         }
2222         for (i = 0; i <= orig_level; i++) {
2223                 if (path->nodes[i]) {
2224                         free_extent_buffer(path->nodes[i]);
2225                         path->nodes[i] = NULL;
2226                 }
2227         }
2228 out:
2229         btrfs_free_path(path);
2230         return ret;
2231 }
2232
2233 int btrfs_free_block_groups(struct btrfs_fs_info *info)
2234 {
2235         u64 start;
2236         u64 end;
2237         u64 ptr;
2238         int ret;
2239         while(1) {
2240                 ret = find_first_extent_bit(&info->block_group_cache, 0,
2241                                             &start, &end, (unsigned int)-1);
2242                 if (ret)
2243                         break;
2244                 ret = get_state_private(&info->block_group_cache, start, &ptr);
2245                 if (!ret)
2246                         kfree((void *)(unsigned long)ptr);
2247                 clear_extent_bits(&info->block_group_cache, start,
2248                                   end, (unsigned int)-1, GFP_NOFS);
2249         }
2250         while(1) {
2251                 ret = find_first_extent_bit(&info->free_space_cache, 0,
2252                                             &start, &end, EXTENT_DIRTY);
2253                 if (ret)
2254                         break;
2255                 clear_extent_dirty(&info->free_space_cache, start,
2256                                    end, GFP_NOFS);
2257         }
2258         return 0;
2259 }
2260
2261 static int noinline relocate_inode_pages(struct inode *inode, u64 start,
2262                                          u64 len)
2263 {
2264         u64 page_start;
2265         u64 page_end;
2266         u64 delalloc_start;
2267         u64 existing_delalloc;
2268         unsigned long last_index;
2269         unsigned long i;
2270         struct page *page;
2271         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2272         struct file_ra_state *ra;
2273
2274         ra = kzalloc(sizeof(*ra), GFP_NOFS);
2275
2276         mutex_lock(&inode->i_mutex);
2277         i = start >> PAGE_CACHE_SHIFT;
2278         last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
2279
2280         file_ra_state_init(ra, inode->i_mapping);
2281         btrfs_force_ra(inode->i_mapping, ra, NULL, i, last_index);
2282         kfree(ra);
2283
2284         for (; i <= last_index; i++) {
2285                 page = grab_cache_page(inode->i_mapping, i);
2286                 if (!page)
2287                         goto out_unlock;
2288                 if (!PageUptodate(page)) {
2289                         btrfs_readpage(NULL, page);
2290                         lock_page(page);
2291                         if (!PageUptodate(page)) {
2292                                 unlock_page(page);
2293                                 page_cache_release(page);
2294                                 goto out_unlock;
2295                         }
2296                 }
2297                 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2298                 page_end = page_start + PAGE_CACHE_SIZE - 1;
2299
2300                 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
2301
2302                 delalloc_start = page_start;
2303                 existing_delalloc = count_range_bits(io_tree,
2304                                              &delalloc_start, page_end,
2305                                              PAGE_CACHE_SIZE, EXTENT_DELALLOC);
2306
2307                 set_extent_delalloc(io_tree, page_start,
2308                                     page_end, GFP_NOFS);
2309
2310                 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
2311                 set_page_dirty(page);
2312                 unlock_page(page);
2313                 page_cache_release(page);
2314         }
2315
2316 out_unlock:
2317         mutex_unlock(&inode->i_mutex);
2318         return 0;
2319 }
2320
2321 /*
2322  * note, this releases the path
2323  */
2324 static int noinline relocate_one_reference(struct btrfs_root *extent_root,
2325                                   struct btrfs_path *path,
2326                                   struct btrfs_key *extent_key)
2327 {
2328         struct inode *inode;
2329         struct btrfs_root *found_root;
2330         struct btrfs_key *root_location;
2331         struct btrfs_extent_ref *ref;
2332         u64 ref_root;
2333         u64 ref_gen;
2334         u64 ref_objectid;
2335         u64 ref_offset;
2336         int ret;
2337
2338         ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
2339                              struct btrfs_extent_ref);
2340         ref_root = btrfs_ref_root(path->nodes[0], ref);
2341         ref_gen = btrfs_ref_generation(path->nodes[0], ref);
2342         ref_objectid = btrfs_ref_objectid(path->nodes[0], ref);
2343         ref_offset = btrfs_ref_offset(path->nodes[0], ref);
2344         btrfs_release_path(extent_root, path);
2345
2346         root_location = kmalloc(sizeof(*root_location), GFP_NOFS);
2347         root_location->objectid = ref_root;
2348         if (ref_gen == 0)
2349                 root_location->offset = 0;
2350         else
2351                 root_location->offset = (u64)-1;
2352         root_location->type = BTRFS_ROOT_ITEM_KEY;
2353
2354         found_root = btrfs_read_fs_root_no_name(extent_root->fs_info,
2355                                                 root_location);
2356         BUG_ON(!found_root);
2357         kfree(root_location);
2358
2359         if (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
2360                 mutex_unlock(&extent_root->fs_info->fs_mutex);
2361                 inode = btrfs_iget_locked(extent_root->fs_info->sb,
2362                                           ref_objectid, found_root);
2363                 if (inode->i_state & I_NEW) {
2364                         /* the inode and parent dir are two different roots */
2365                         BTRFS_I(inode)->root = found_root;
2366                         BTRFS_I(inode)->location.objectid = ref_objectid;
2367                         BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
2368                         BTRFS_I(inode)->location.offset = 0;
2369                         btrfs_read_locked_inode(inode);
2370                         unlock_new_inode(inode);
2371
2372                 }
2373                 /* this can happen if the reference is not against
2374                  * the latest version of the tree root
2375                  */
2376                 if (is_bad_inode(inode)) {
2377                         mutex_lock(&extent_root->fs_info->fs_mutex);
2378                         goto out;
2379                 }
2380                 relocate_inode_pages(inode, ref_offset, extent_key->offset);
2381                 /* FIXME, data=ordered will help get rid of this */
2382                 filemap_fdatawrite(inode->i_mapping);
2383                 iput(inode);
2384                 mutex_lock(&extent_root->fs_info->fs_mutex);
2385         } else {
2386                 struct btrfs_trans_handle *trans;
2387                 struct btrfs_key found_key;
2388                 struct extent_buffer *eb;
2389                 int level;
2390                 int i;
2391
2392                 trans = btrfs_start_transaction(found_root, 1);
2393                 eb = read_tree_block(found_root, extent_key->objectid,
2394                                      extent_key->offset);
2395                 level = btrfs_header_level(eb);
2396
2397                 if (level == 0)
2398                         btrfs_item_key_to_cpu(eb, &found_key, 0);
2399                 else
2400                         btrfs_node_key_to_cpu(eb, &found_key, 0);
2401
2402                 free_extent_buffer(eb);
2403
2404                 path->lowest_level = level;
2405                 path->reada = 2;
2406                 ret = btrfs_search_slot(trans, found_root, &found_key, path,
2407                                         0, 1);
2408                 path->lowest_level = 0;
2409                 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
2410                         if (!path->nodes[i])
2411                                 break;
2412                         free_extent_buffer(path->nodes[i]);
2413                         path->nodes[i] = NULL;
2414                 }
2415                 btrfs_release_path(found_root, path);
2416                 btrfs_end_transaction(trans, found_root);
2417         }
2418
2419 out:
2420         return 0;
2421 }
2422
2423 static int noinline relocate_one_extent(struct btrfs_root *extent_root,
2424                                         struct btrfs_path *path,
2425                                         struct btrfs_key *extent_key)
2426 {
2427         struct btrfs_key key;
2428         struct btrfs_key found_key;
2429         struct extent_buffer *leaf;
2430         u32 nritems;
2431         u32 item_size;
2432         int ret = 0;
2433
2434         key.objectid = extent_key->objectid;
2435         key.type = BTRFS_EXTENT_REF_KEY;
2436         key.offset = 0;
2437
2438         while(1) {
2439                 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2440
2441                 if (ret < 0)
2442                         goto out;
2443
2444                 ret = 0;
2445                 leaf = path->nodes[0];
2446                 nritems = btrfs_header_nritems(leaf);
2447                 if (path->slots[0] == nritems)
2448                         goto out;
2449
2450                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2451                 if (found_key.objectid != extent_key->objectid)
2452                         break;
2453
2454                 if (found_key.type != BTRFS_EXTENT_REF_KEY)
2455                         break;
2456
2457                 key.offset = found_key.offset + 1;
2458                 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2459
2460                 ret = relocate_one_reference(extent_root, path, extent_key);
2461                 if (ret)
2462                         goto out;
2463         }
2464         ret = 0;
2465 out:
2466         btrfs_release_path(extent_root, path);
2467         return ret;
2468 }
2469
2470 int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size)
2471 {
2472         struct btrfs_trans_handle *trans;
2473         struct btrfs_root *tree_root = root->fs_info->tree_root;
2474         struct btrfs_path *path;
2475         u64 cur_byte;
2476         u64 total_found;
2477         struct btrfs_fs_info *info = root->fs_info;
2478         struct extent_io_tree *block_group_cache;
2479         struct btrfs_key key;
2480         struct btrfs_key found_key;
2481         struct extent_buffer *leaf;
2482         u32 nritems;
2483         int ret;
2484         int progress = 0;
2485
2486         btrfs_set_super_total_bytes(&info->super_copy, new_size);
2487         clear_extent_dirty(&info->free_space_cache, new_size, (u64)-1,
2488                            GFP_NOFS);
2489         block_group_cache = &info->block_group_cache;
2490         path = btrfs_alloc_path();
2491         root = root->fs_info->extent_root;
2492         path->reada = 2;
2493
2494 again:
2495         total_found = 0;
2496         key.objectid = new_size;
2497         key.offset = 0;
2498         key.type = 0;
2499         cur_byte = key.objectid;
2500
2501         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2502         if (ret < 0)
2503                 goto out;
2504
2505         ret = find_previous_extent(root, path);
2506         if (ret < 0)
2507                 goto out;
2508         if (ret == 0) {
2509                 leaf = path->nodes[0];
2510                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2511                 if (found_key.objectid + found_key.offset > new_size) {
2512                         cur_byte = found_key.objectid;
2513                         key.objectid = cur_byte;
2514                 }
2515         }
2516         btrfs_release_path(root, path);
2517
2518         while(1) {
2519                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2520                 if (ret < 0)
2521                         goto out;
2522
2523                 leaf = path->nodes[0];
2524                 nritems = btrfs_header_nritems(leaf);
2525 next:
2526                 if (path->slots[0] >= nritems) {
2527                         ret = btrfs_next_leaf(root, path);
2528                         if (ret < 0)
2529                                 goto out;
2530                         if (ret == 1) {
2531                                 ret = 0;
2532                                 break;
2533                         }
2534                         leaf = path->nodes[0];
2535                         nritems = btrfs_header_nritems(leaf);
2536                 }
2537
2538                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2539
2540                 if (progress && need_resched()) {
2541                         memcpy(&key, &found_key, sizeof(key));
2542                         mutex_unlock(&root->fs_info->fs_mutex);
2543                         cond_resched();
2544                         mutex_lock(&root->fs_info->fs_mutex);
2545                         btrfs_release_path(root, path);
2546                         btrfs_search_slot(NULL, root, &key, path, 0, 0);
2547                         progress = 0;
2548                         goto next;
2549                 }
2550                 progress = 1;
2551
2552                 if (btrfs_key_type(&found_key) != BTRFS_EXTENT_ITEM_KEY ||
2553                     found_key.objectid + found_key.offset <= cur_byte) {
2554                         path->slots[0]++;
2555                         goto next;
2556                 }
2557
2558                 total_found++;
2559                 cur_byte = found_key.objectid + found_key.offset;
2560                 key.objectid = cur_byte;
2561                 btrfs_release_path(root, path);
2562                 ret = relocate_one_extent(root, path, &found_key);
2563         }
2564
2565         btrfs_release_path(root, path);
2566
2567         if (total_found > 0) {
2568                 trans = btrfs_start_transaction(tree_root, 1);
2569                 btrfs_commit_transaction(trans, tree_root);
2570
2571                 mutex_unlock(&root->fs_info->fs_mutex);
2572                 btrfs_clean_old_snapshots(tree_root);
2573                 mutex_lock(&root->fs_info->fs_mutex);
2574
2575                 trans = btrfs_start_transaction(tree_root, 1);
2576                 btrfs_commit_transaction(trans, tree_root);
2577                 goto again;
2578         }
2579
2580         trans = btrfs_start_transaction(root, 1);
2581         key.objectid = new_size;
2582         key.offset = 0;
2583         key.type = 0;
2584         while(1) {
2585                 u64 ptr;
2586
2587                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2588                 if (ret < 0)
2589                         goto out;
2590
2591                 leaf = path->nodes[0];
2592                 nritems = btrfs_header_nritems(leaf);
2593 bg_next:
2594                 if (path->slots[0] >= nritems) {
2595                         ret = btrfs_next_leaf(root, path);
2596                         if (ret < 0)
2597                                 break;
2598                         if (ret == 1) {
2599                                 ret = 0;
2600                                 break;
2601                         }
2602                         leaf = path->nodes[0];
2603                         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2604
2605                         /*
2606                          * btrfs_next_leaf doesn't cow buffers, we have to
2607                          * do the search again
2608                          */
2609                         memcpy(&key, &found_key, sizeof(key));
2610                         btrfs_release_path(root, path);
2611                         goto resched_check;
2612                 }
2613
2614                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2615                 if (btrfs_key_type(&found_key) != BTRFS_BLOCK_GROUP_ITEM_KEY) {
2616                         printk("shrinker found key %Lu %u %Lu\n",
2617                                 found_key.objectid, found_key.type,
2618                                 found_key.offset);
2619                         path->slots[0]++;
2620                         goto bg_next;
2621                 }
2622                 ret = get_state_private(&info->block_group_cache,
2623                                         found_key.objectid, &ptr);
2624                 if (!ret)
2625                         kfree((void *)(unsigned long)ptr);
2626
2627                 clear_extent_bits(&info->block_group_cache, found_key.objectid,
2628                                   found_key.objectid + found_key.offset - 1,
2629                                   (unsigned int)-1, GFP_NOFS);
2630
2631                 key.objectid = found_key.objectid + 1;
2632                 btrfs_del_item(trans, root, path);
2633                 btrfs_release_path(root, path);
2634 resched_check:
2635                 if (need_resched()) {
2636                         mutex_unlock(&root->fs_info->fs_mutex);
2637                         cond_resched();
2638                         mutex_lock(&root->fs_info->fs_mutex);
2639                 }
2640         }
2641         clear_extent_dirty(&info->free_space_cache, new_size, (u64)-1,
2642                            GFP_NOFS);
2643         btrfs_commit_transaction(trans, root);
2644 out:
2645         btrfs_free_path(path);
2646         return ret;
2647 }
2648
2649 int btrfs_grow_extent_tree(struct btrfs_trans_handle *trans,
2650                            struct btrfs_root *root, u64 new_size)
2651 {
2652         struct btrfs_path *path;
2653         u64 nr = 0;
2654         u64 cur_byte;
2655         u64 old_size;
2656         unsigned long rem;
2657         struct btrfs_block_group_cache *cache;
2658         struct btrfs_block_group_item *item;
2659         struct btrfs_fs_info *info = root->fs_info;
2660         struct extent_io_tree *block_group_cache;
2661         struct btrfs_key key;
2662         struct extent_buffer *leaf;
2663         int ret;
2664         int bit;
2665
2666         old_size = btrfs_super_total_bytes(&info->super_copy);
2667         block_group_cache = &info->block_group_cache;
2668
2669         root = info->extent_root;
2670
2671         cache = btrfs_lookup_block_group(root->fs_info, old_size - 1);
2672
2673         cur_byte = cache->key.objectid + cache->key.offset;
2674         if (cur_byte >= new_size)
2675                 goto set_size;
2676
2677         key.offset = BTRFS_BLOCK_GROUP_SIZE;
2678         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
2679
2680         path = btrfs_alloc_path();
2681         if (!path)
2682                 return -ENOMEM;
2683
2684         while(cur_byte < new_size) {
2685                 key.objectid = cur_byte;
2686                 ret = btrfs_insert_empty_item(trans, root, path, &key,
2687                                         sizeof(struct btrfs_block_group_item));
2688                 BUG_ON(ret);
2689                 leaf = path->nodes[0];
2690                 item = btrfs_item_ptr(leaf, path->slots[0],
2691                                       struct btrfs_block_group_item);
2692
2693                 btrfs_set_disk_block_group_used(leaf, item, 0);
2694                 div_long_long_rem(nr, 3, &rem);
2695                 if (rem) {
2696                         btrfs_set_disk_block_group_flags(leaf, item,
2697                                                  BTRFS_BLOCK_GROUP_DATA);
2698                 } else {
2699                         btrfs_set_disk_block_group_flags(leaf, item, 0);
2700                 }
2701                 nr++;
2702
2703                 cache = kmalloc(sizeof(*cache), GFP_NOFS);
2704                 BUG_ON(!cache);
2705
2706                 read_extent_buffer(leaf, &cache->item, (unsigned long)item,
2707                                    sizeof(cache->item));
2708
2709                 memcpy(&cache->key, &key, sizeof(key));
2710                 cache->cached = 0;
2711                 cache->pinned = 0;
2712                 cur_byte = key.objectid + key.offset;
2713                 btrfs_release_path(root, path);
2714
2715                 if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) {
2716                         bit = BLOCK_GROUP_DATA;
2717                         cache->data = BTRFS_BLOCK_GROUP_DATA;
2718                 } else {
2719                         bit = BLOCK_GROUP_METADATA;
2720                         cache->data = 0;
2721                 }
2722
2723                 /* use EXTENT_LOCKED to prevent merging */
2724                 set_extent_bits(block_group_cache, key.objectid,
2725                                 key.objectid + key.offset - 1,
2726                                 bit | EXTENT_LOCKED, GFP_NOFS);
2727                 set_state_private(block_group_cache, key.objectid,
2728                                   (unsigned long)cache);
2729         }
2730         btrfs_free_path(path);
2731 set_size:
2732         btrfs_set_super_total_bytes(&info->super_copy, new_size);
2733         return 0;
2734 }
2735
2736 int btrfs_read_block_groups(struct btrfs_root *root)
2737 {
2738         struct btrfs_path *path;
2739         int ret;
2740         int err = 0;
2741         int bit;
2742         struct btrfs_block_group_cache *cache;
2743         struct btrfs_fs_info *info = root->fs_info;
2744         struct extent_io_tree *block_group_cache;
2745         struct btrfs_key key;
2746         struct btrfs_key found_key;
2747         struct extent_buffer *leaf;
2748
2749         block_group_cache = &info->block_group_cache;
2750
2751         root = info->extent_root;
2752         key.objectid = 0;
2753         key.offset = BTRFS_BLOCK_GROUP_SIZE;
2754         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
2755
2756         path = btrfs_alloc_path();
2757         if (!path)
2758                 return -ENOMEM;
2759
2760         while(1) {
2761                 ret = btrfs_search_slot(NULL, info->extent_root,
2762                                         &key, path, 0, 0);
2763                 if (ret != 0) {
2764                         err = ret;
2765                         break;
2766                 }
2767                 leaf = path->nodes[0];
2768                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2769                 cache = kmalloc(sizeof(*cache), GFP_NOFS);
2770                 if (!cache) {
2771                         err = -1;
2772                         break;
2773                 }
2774
2775                 read_extent_buffer(leaf, &cache->item,
2776                                    btrfs_item_ptr_offset(leaf, path->slots[0]),
2777                                    sizeof(cache->item));
2778                 memcpy(&cache->key, &found_key, sizeof(found_key));
2779                 cache->cached = 0;
2780                 cache->pinned = 0;
2781                 key.objectid = found_key.objectid + found_key.offset;
2782                 btrfs_release_path(root, path);
2783
2784                 if (cache->item.flags & BTRFS_BLOCK_GROUP_MIXED) {
2785                         bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
2786                         cache->data = BTRFS_BLOCK_GROUP_MIXED;
2787                 } else if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) {
2788                         bit = BLOCK_GROUP_DATA;
2789                         cache->data = BTRFS_BLOCK_GROUP_DATA;
2790                 } else {
2791                         bit = BLOCK_GROUP_METADATA;
2792                         cache->data = 0;
2793                 }
2794
2795                 /* use EXTENT_LOCKED to prevent merging */
2796                 set_extent_bits(block_group_cache, found_key.objectid,
2797                                 found_key.objectid + found_key.offset - 1,
2798                                 bit | EXTENT_LOCKED, GFP_NOFS);
2799                 set_state_private(block_group_cache, found_key.objectid,
2800                                   (unsigned long)cache);
2801
2802                 if (key.objectid >=
2803                     btrfs_super_total_bytes(&info->super_copy))
2804                         break;
2805         }
2806
2807         btrfs_free_path(path);
2808         return 0;
2809 }