Btrfs: Force caching of metadata block groups on mount to avoid deadlock
[linux-2.6-block.git] / fs / btrfs / extent-tree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 #include <linux/sched.h>
19 #include <linux/pagemap.h>
20 #include <linux/writeback.h>
21 #include "hash.h"
22 #include "crc32c.h"
23 #include "ctree.h"
24 #include "disk-io.h"
25 #include "print-tree.h"
26 #include "transaction.h"
27 #include "volumes.h"
28 #include "locking.h"
29
30 #define BLOCK_GROUP_DATA     EXTENT_WRITEBACK
31 #define BLOCK_GROUP_METADATA EXTENT_UPTODATE
32 #define BLOCK_GROUP_SYSTEM   EXTENT_NEW
33
34 #define BLOCK_GROUP_DIRTY EXTENT_DIRTY
35
36 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
37                                  btrfs_root *extent_root);
38 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
39                                btrfs_root *extent_root);
40 static struct btrfs_block_group_cache *
41 __btrfs_find_block_group(struct btrfs_root *root,
42                          struct btrfs_block_group_cache *hint,
43                          u64 search_start, int data, int owner);
44
45 void maybe_lock_mutex(struct btrfs_root *root)
46 {
47         if (root != root->fs_info->extent_root &&
48             root != root->fs_info->chunk_root &&
49             root != root->fs_info->dev_root) {
50                 mutex_lock(&root->fs_info->alloc_mutex);
51         }
52 }
53
54 void maybe_unlock_mutex(struct btrfs_root *root)
55 {
56         if (root != root->fs_info->extent_root &&
57             root != root->fs_info->chunk_root &&
58             root != root->fs_info->dev_root) {
59                 mutex_unlock(&root->fs_info->alloc_mutex);
60         }
61 }
62
63 static int cache_block_group(struct btrfs_root *root,
64                              struct btrfs_block_group_cache *block_group)
65 {
66         struct btrfs_path *path;
67         int ret;
68         struct btrfs_key key;
69         struct extent_buffer *leaf;
70         struct extent_io_tree *free_space_cache;
71         int slot;
72         u64 last = 0;
73         u64 hole_size;
74         u64 first_free;
75         int found = 0;
76
77         if (!block_group)
78                 return 0;
79
80         root = root->fs_info->extent_root;
81         free_space_cache = &root->fs_info->free_space_cache;
82
83         if (block_group->cached)
84                 return 0;
85
86         path = btrfs_alloc_path();
87         if (!path)
88                 return -ENOMEM;
89
90         path->reada = 2;
91         /*
92          * we get into deadlocks with paths held by callers of this function.
93          * since the alloc_mutex is protecting things right now, just
94          * skip the locking here
95          */
96         path->skip_locking = 1;
97         first_free = block_group->key.objectid;
98         key.objectid = block_group->key.objectid;
99         key.offset = 0;
100         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
101         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
102         if (ret < 0)
103                 return ret;
104         ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
105         if (ret < 0)
106                 return ret;
107         if (ret == 0) {
108                 leaf = path->nodes[0];
109                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
110                 if (key.objectid + key.offset > first_free)
111                         first_free = key.objectid + key.offset;
112         }
113         while(1) {
114                 leaf = path->nodes[0];
115                 slot = path->slots[0];
116                 if (slot >= btrfs_header_nritems(leaf)) {
117                         ret = btrfs_next_leaf(root, path);
118                         if (ret < 0)
119                                 goto err;
120                         if (ret == 0) {
121                                 continue;
122                         } else {
123                                 break;
124                         }
125                 }
126                 btrfs_item_key_to_cpu(leaf, &key, slot);
127                 if (key.objectid < block_group->key.objectid) {
128                         goto next;
129                 }
130                 if (key.objectid >= block_group->key.objectid +
131                     block_group->key.offset) {
132                         break;
133                 }
134
135                 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
136                         if (!found) {
137                                 last = first_free;
138                                 found = 1;
139                         }
140                         if (key.objectid > last) {
141                                 hole_size = key.objectid - last;
142                                 set_extent_dirty(free_space_cache, last,
143                                                  last + hole_size - 1,
144                                                  GFP_NOFS);
145                         }
146                         last = key.objectid + key.offset;
147                 }
148 next:
149                 path->slots[0]++;
150         }
151
152         if (!found)
153                 last = first_free;
154         if (block_group->key.objectid +
155             block_group->key.offset > last) {
156                 hole_size = block_group->key.objectid +
157                         block_group->key.offset - last;
158                 set_extent_dirty(free_space_cache, last,
159                                  last + hole_size - 1, GFP_NOFS);
160         }
161         block_group->cached = 1;
162 err:
163         btrfs_free_path(path);
164         return 0;
165 }
166
167 struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct
168                                                        btrfs_fs_info *info,
169                                                          u64 bytenr)
170 {
171         struct extent_io_tree *block_group_cache;
172         struct btrfs_block_group_cache *block_group = NULL;
173         u64 ptr;
174         u64 start;
175         u64 end;
176         int ret;
177
178         bytenr = max_t(u64, bytenr,
179                        BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE);
180         block_group_cache = &info->block_group_cache;
181         ret = find_first_extent_bit(block_group_cache,
182                                     bytenr, &start, &end,
183                                     BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
184                                     BLOCK_GROUP_SYSTEM);
185         if (ret) {
186                 return NULL;
187         }
188         ret = get_state_private(block_group_cache, start, &ptr);
189         if (ret)
190                 return NULL;
191
192         block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
193         return block_group;
194 }
195
196 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
197                                                          btrfs_fs_info *info,
198                                                          u64 bytenr)
199 {
200         struct extent_io_tree *block_group_cache;
201         struct btrfs_block_group_cache *block_group = NULL;
202         u64 ptr;
203         u64 start;
204         u64 end;
205         int ret;
206
207         bytenr = max_t(u64, bytenr,
208                        BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE);
209         block_group_cache = &info->block_group_cache;
210         ret = find_first_extent_bit(block_group_cache,
211                                     bytenr, &start, &end,
212                                     BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
213                                     BLOCK_GROUP_SYSTEM);
214         if (ret) {
215                 return NULL;
216         }
217         ret = get_state_private(block_group_cache, start, &ptr);
218         if (ret)
219                 return NULL;
220
221         block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
222         if (block_group->key.objectid <= bytenr && bytenr <
223             block_group->key.objectid + block_group->key.offset)
224                 return block_group;
225         return NULL;
226 }
227
228 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
229 {
230         return (cache->flags & bits) == bits;
231 }
232
233 static int noinline find_search_start(struct btrfs_root *root,
234                               struct btrfs_block_group_cache **cache_ret,
235                               u64 *start_ret, u64 num, int data)
236 {
237         int ret;
238         struct btrfs_block_group_cache *cache = *cache_ret;
239         struct extent_io_tree *free_space_cache;
240         struct extent_state *state;
241         u64 last;
242         u64 start = 0;
243         u64 cache_miss = 0;
244         u64 total_fs_bytes;
245         u64 search_start = *start_ret;
246         int wrapped = 0;
247
248         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
249         total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
250         free_space_cache = &root->fs_info->free_space_cache;
251
252         if (!cache)
253                 goto out;
254
255 again:
256         ret = cache_block_group(root, cache);
257         if (ret) {
258                 goto out;
259         }
260
261         last = max(search_start, cache->key.objectid);
262         if (!block_group_bits(cache, data) || cache->ro)
263                 goto new_group;
264
265         spin_lock_irq(&free_space_cache->lock);
266         state = find_first_extent_bit_state(free_space_cache, last, EXTENT_DIRTY);
267         while(1) {
268                 if (!state) {
269                         if (!cache_miss)
270                                 cache_miss = last;
271                         spin_unlock_irq(&free_space_cache->lock);
272                         goto new_group;
273                 }
274
275                 start = max(last, state->start);
276                 last = state->end + 1;
277                 if (last - start < num) {
278                         do {
279                                 state = extent_state_next(state);
280                         } while(state && !(state->state & EXTENT_DIRTY));
281                         continue;
282                 }
283                 spin_unlock_irq(&free_space_cache->lock);
284                 if (cache->ro) {
285                         goto new_group;
286                 }
287                 if (start + num > cache->key.objectid + cache->key.offset)
288                         goto new_group;
289                 if (!block_group_bits(cache, data)) {
290                         printk("block group bits don't match %Lu %d\n", cache->flags, data);
291                 }
292                 *start_ret = start;
293                 return 0;
294         }
295 out:
296         cache = btrfs_lookup_block_group(root->fs_info, search_start);
297         if (!cache) {
298                 printk("Unable to find block group for %Lu\n", search_start);
299                 WARN_ON(1);
300         }
301         return -ENOSPC;
302
303 new_group:
304         last = cache->key.objectid + cache->key.offset;
305 wrapped:
306         cache = btrfs_lookup_first_block_group(root->fs_info, last);
307         if (!cache || cache->key.objectid >= total_fs_bytes) {
308 no_cache:
309                 if (!wrapped) {
310                         wrapped = 1;
311                         last = search_start;
312                         goto wrapped;
313                 }
314                 goto out;
315         }
316         if (cache_miss && !cache->cached) {
317                 cache_block_group(root, cache);
318                 last = cache_miss;
319                 cache = btrfs_lookup_first_block_group(root->fs_info, last);
320         }
321         cache_miss = 0;
322         cache = __btrfs_find_block_group(root, cache, last, data, 0);
323         if (!cache)
324                 goto no_cache;
325         *cache_ret = cache;
326         goto again;
327 }
328
329 static u64 div_factor(u64 num, int factor)
330 {
331         if (factor == 10)
332                 return num;
333         num *= factor;
334         do_div(num, 10);
335         return num;
336 }
337
338 static int block_group_state_bits(u64 flags)
339 {
340         int bits = 0;
341         if (flags & BTRFS_BLOCK_GROUP_DATA)
342                 bits |= BLOCK_GROUP_DATA;
343         if (flags & BTRFS_BLOCK_GROUP_METADATA)
344                 bits |= BLOCK_GROUP_METADATA;
345         if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
346                 bits |= BLOCK_GROUP_SYSTEM;
347         return bits;
348 }
349
350 static struct btrfs_block_group_cache *
351 __btrfs_find_block_group(struct btrfs_root *root,
352                          struct btrfs_block_group_cache *hint,
353                          u64 search_start, int data, int owner)
354 {
355         struct btrfs_block_group_cache *cache;
356         struct extent_io_tree *block_group_cache;
357         struct btrfs_block_group_cache *found_group = NULL;
358         struct btrfs_fs_info *info = root->fs_info;
359         u64 used;
360         u64 last = 0;
361         u64 start;
362         u64 end;
363         u64 free_check;
364         u64 ptr;
365         int bit;
366         int ret;
367         int full_search = 0;
368         int factor = 10;
369         int wrapped = 0;
370
371         block_group_cache = &info->block_group_cache;
372
373         if (data & BTRFS_BLOCK_GROUP_METADATA)
374                 factor = 9;
375
376         bit = block_group_state_bits(data);
377
378         if (search_start) {
379                 struct btrfs_block_group_cache *shint;
380                 shint = btrfs_lookup_first_block_group(info, search_start);
381                 if (shint && block_group_bits(shint, data) && !shint->ro) {
382                         used = btrfs_block_group_used(&shint->item);
383                         if (used + shint->pinned <
384                             div_factor(shint->key.offset, factor)) {
385                                 return shint;
386                         }
387                 }
388         }
389         if (hint && !hint->ro && block_group_bits(hint, data)) {
390                 used = btrfs_block_group_used(&hint->item);
391                 if (used + hint->pinned <
392                     div_factor(hint->key.offset, factor)) {
393                         return hint;
394                 }
395                 last = hint->key.objectid + hint->key.offset;
396         } else {
397                 if (hint)
398                         last = max(hint->key.objectid, search_start);
399                 else
400                         last = search_start;
401         }
402 again:
403         while(1) {
404                 ret = find_first_extent_bit(block_group_cache, last,
405                                             &start, &end, bit);
406                 if (ret)
407                         break;
408
409                 ret = get_state_private(block_group_cache, start, &ptr);
410                 if (ret) {
411                         last = end + 1;
412                         continue;
413                 }
414
415                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
416                 last = cache->key.objectid + cache->key.offset;
417                 used = btrfs_block_group_used(&cache->item);
418
419                 if (!cache->ro && block_group_bits(cache, data)) {
420                         free_check = div_factor(cache->key.offset, factor);
421                         if (used + cache->pinned < free_check) {
422                                 found_group = cache;
423                                 goto found;
424                         }
425                 }
426                 cond_resched();
427         }
428         if (!wrapped) {
429                 last = search_start;
430                 wrapped = 1;
431                 goto again;
432         }
433         if (!full_search && factor < 10) {
434                 last = search_start;
435                 full_search = 1;
436                 factor = 10;
437                 goto again;
438         }
439 found:
440         return found_group;
441 }
442
443 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
444                                                  struct btrfs_block_group_cache
445                                                  *hint, u64 search_start,
446                                                  int data, int owner)
447 {
448
449         struct btrfs_block_group_cache *ret;
450         mutex_lock(&root->fs_info->alloc_mutex);
451         ret = __btrfs_find_block_group(root, hint, search_start, data, owner);
452         mutex_unlock(&root->fs_info->alloc_mutex);
453         return ret;
454 }
455 static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation,
456                            u64 owner, u64 owner_offset)
457 {
458         u32 high_crc = ~(u32)0;
459         u32 low_crc = ~(u32)0;
460         __le64 lenum;
461         lenum = cpu_to_le64(root_objectid);
462         high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
463         lenum = cpu_to_le64(ref_generation);
464         low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
465         if (owner >= BTRFS_FIRST_FREE_OBJECTID) {
466                 lenum = cpu_to_le64(owner);
467                 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
468                 lenum = cpu_to_le64(owner_offset);
469                 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
470         }
471         return ((u64)high_crc << 32) | (u64)low_crc;
472 }
473
474 static int match_extent_ref(struct extent_buffer *leaf,
475                             struct btrfs_extent_ref *disk_ref,
476                             struct btrfs_extent_ref *cpu_ref)
477 {
478         int ret;
479         int len;
480
481         if (cpu_ref->objectid)
482                 len = sizeof(*cpu_ref);
483         else
484                 len = 2 * sizeof(u64);
485         ret = memcmp_extent_buffer(leaf, cpu_ref, (unsigned long)disk_ref,
486                                    len);
487         return ret == 0;
488 }
489
490 static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans,
491                                           struct btrfs_root *root,
492                                           struct btrfs_path *path, u64 bytenr,
493                                           u64 root_objectid,
494                                           u64 ref_generation, u64 owner,
495                                           u64 owner_offset, int del)
496 {
497         u64 hash;
498         struct btrfs_key key;
499         struct btrfs_key found_key;
500         struct btrfs_extent_ref ref;
501         struct extent_buffer *leaf;
502         struct btrfs_extent_ref *disk_ref;
503         int ret;
504         int ret2;
505
506         btrfs_set_stack_ref_root(&ref, root_objectid);
507         btrfs_set_stack_ref_generation(&ref, ref_generation);
508         btrfs_set_stack_ref_objectid(&ref, owner);
509         btrfs_set_stack_ref_offset(&ref, owner_offset);
510
511         hash = hash_extent_ref(root_objectid, ref_generation, owner,
512                                owner_offset);
513         key.offset = hash;
514         key.objectid = bytenr;
515         key.type = BTRFS_EXTENT_REF_KEY;
516
517         while (1) {
518                 ret = btrfs_search_slot(trans, root, &key, path,
519                                         del ? -1 : 0, del);
520                 if (ret < 0)
521                         goto out;
522                 leaf = path->nodes[0];
523                 if (ret != 0) {
524                         u32 nritems = btrfs_header_nritems(leaf);
525                         if (path->slots[0] >= nritems) {
526                                 ret2 = btrfs_next_leaf(root, path);
527                                 if (ret2)
528                                         goto out;
529                                 leaf = path->nodes[0];
530                         }
531                         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
532                         if (found_key.objectid != bytenr ||
533                             found_key.type != BTRFS_EXTENT_REF_KEY)
534                                 goto out;
535                         key.offset = found_key.offset;
536                         if (del) {
537                                 btrfs_release_path(root, path);
538                                 continue;
539                         }
540                 }
541                 disk_ref = btrfs_item_ptr(path->nodes[0],
542                                           path->slots[0],
543                                           struct btrfs_extent_ref);
544                 if (match_extent_ref(path->nodes[0], disk_ref, &ref)) {
545                         ret = 0;
546                         goto out;
547                 }
548                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
549                 key.offset = found_key.offset + 1;
550                 btrfs_release_path(root, path);
551         }
552 out:
553         return ret;
554 }
555
556 /*
557  * Back reference rules.  Back refs have three main goals:
558  *
559  * 1) differentiate between all holders of references to an extent so that
560  *    when a reference is dropped we can make sure it was a valid reference
561  *    before freeing the extent.
562  *
563  * 2) Provide enough information to quickly find the holders of an extent
564  *    if we notice a given block is corrupted or bad.
565  *
566  * 3) Make it easy to migrate blocks for FS shrinking or storage pool
567  *    maintenance.  This is actually the same as #2, but with a slightly
568  *    different use case.
569  *
570  * File extents can be referenced by:
571  *
572  * - multiple snapshots, subvolumes, or different generations in one subvol
573  * - different files inside a single subvolume (in theory, not implemented yet)
574  * - different offsets inside a file (bookend extents in file.c)
575  *
576  * The extent ref structure has fields for:
577  *
578  * - Objectid of the subvolume root
579  * - Generation number of the tree holding the reference
580  * - objectid of the file holding the reference
581  * - offset in the file corresponding to the key holding the reference
582  *
583  * When a file extent is allocated the fields are filled in:
584  *     (root_key.objectid, trans->transid, inode objectid, offset in file)
585  *
586  * When a leaf is cow'd new references are added for every file extent found
587  * in the leaf.  It looks the same as the create case, but trans->transid
588  * will be different when the block is cow'd.
589  *
590  *     (root_key.objectid, trans->transid, inode objectid, offset in file)
591  *
592  * When a file extent is removed either during snapshot deletion or file
593  * truncation, the corresponding back reference is found
594  * by searching for:
595  *
596  *     (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
597  *      inode objectid, offset in file)
598  *
599  * Btree extents can be referenced by:
600  *
601  * - Different subvolumes
602  * - Different generations of the same subvolume
603  *
604  * Storing sufficient information for a full reverse mapping of a btree
605  * block would require storing the lowest key of the block in the backref,
606  * and it would require updating that lowest key either before write out or
607  * every time it changed.  Instead, the objectid of the lowest key is stored
608  * along with the level of the tree block.  This provides a hint
609  * about where in the btree the block can be found.  Searches through the
610  * btree only need to look for a pointer to that block, so they stop one
611  * level higher than the level recorded in the backref.
612  *
613  * Some btrees do not do reference counting on their extents.  These
614  * include the extent tree and the tree of tree roots.  Backrefs for these
615  * trees always have a generation of zero.
616  *
617  * When a tree block is created, back references are inserted:
618  *
619  * (root->root_key.objectid, trans->transid or zero, level, lowest_key_objectid)
620  *
621  * When a tree block is cow'd in a reference counted root,
622  * new back references are added for all the blocks it points to.
623  * These are of the form (trans->transid will have increased since creation):
624  *
625  * (root->root_key.objectid, trans->transid, level, lowest_key_objectid)
626  *
627  * Because the lowest_key_objectid and the level are just hints
628  * they are not used when backrefs are deleted.  When a backref is deleted:
629  *
630  * if backref was for a tree root:
631  *     root_objectid = root->root_key.objectid
632  * else
633  *     root_objectid = btrfs_header_owner(parent)
634  *
635  * (root_objectid, btrfs_header_generation(parent) or zero, 0, 0)
636  *
637  * Back Reference Key hashing:
638  *
639  * Back references have four fields, each 64 bits long.  Unfortunately,
640  * This is hashed into a single 64 bit number and placed into the key offset.
641  * The key objectid corresponds to the first byte in the extent, and the
642  * key type is set to BTRFS_EXTENT_REF_KEY
643  */
644 int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans,
645                                  struct btrfs_root *root,
646                                  struct btrfs_path *path, u64 bytenr,
647                                  u64 root_objectid, u64 ref_generation,
648                                  u64 owner, u64 owner_offset)
649 {
650         u64 hash;
651         struct btrfs_key key;
652         struct btrfs_extent_ref ref;
653         struct btrfs_extent_ref *disk_ref;
654         int ret;
655
656         btrfs_set_stack_ref_root(&ref, root_objectid);
657         btrfs_set_stack_ref_generation(&ref, ref_generation);
658         btrfs_set_stack_ref_objectid(&ref, owner);
659         btrfs_set_stack_ref_offset(&ref, owner_offset);
660
661         hash = hash_extent_ref(root_objectid, ref_generation, owner,
662                                owner_offset);
663         key.offset = hash;
664         key.objectid = bytenr;
665         key.type = BTRFS_EXTENT_REF_KEY;
666
667         ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(ref));
668         while (ret == -EEXIST) {
669                 disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
670                                           struct btrfs_extent_ref);
671                 if (match_extent_ref(path->nodes[0], disk_ref, &ref))
672                         goto out;
673                 key.offset++;
674                 btrfs_release_path(root, path);
675                 ret = btrfs_insert_empty_item(trans, root, path, &key,
676                                               sizeof(ref));
677         }
678         if (ret)
679                 goto out;
680         disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
681                                   struct btrfs_extent_ref);
682         write_extent_buffer(path->nodes[0], &ref, (unsigned long)disk_ref,
683                             sizeof(ref));
684         btrfs_mark_buffer_dirty(path->nodes[0]);
685 out:
686         btrfs_release_path(root, path);
687         return ret;
688 }
689
690 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
691                                 struct btrfs_root *root,
692                                 u64 bytenr, u64 num_bytes,
693                                 u64 root_objectid, u64 ref_generation,
694                                 u64 owner, u64 owner_offset)
695 {
696         struct btrfs_path *path;
697         int ret;
698         struct btrfs_key key;
699         struct extent_buffer *l;
700         struct btrfs_extent_item *item;
701         u32 refs;
702
703         WARN_ON(num_bytes < root->sectorsize);
704         path = btrfs_alloc_path();
705         if (!path)
706                 return -ENOMEM;
707
708         path->reada = 1;
709         key.objectid = bytenr;
710         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
711         key.offset = num_bytes;
712         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
713                                 0, 1);
714         if (ret < 0)
715                 return ret;
716         if (ret != 0) {
717                 BUG();
718         }
719         BUG_ON(ret != 0);
720         l = path->nodes[0];
721         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
722         refs = btrfs_extent_refs(l, item);
723         btrfs_set_extent_refs(l, item, refs + 1);
724         btrfs_mark_buffer_dirty(path->nodes[0]);
725
726         btrfs_release_path(root->fs_info->extent_root, path);
727
728         path->reada = 1;
729         ret = btrfs_insert_extent_backref(trans, root->fs_info->extent_root,
730                                           path, bytenr, root_objectid,
731                                           ref_generation, owner, owner_offset);
732         BUG_ON(ret);
733         finish_current_insert(trans, root->fs_info->extent_root);
734         del_pending_extents(trans, root->fs_info->extent_root);
735
736         btrfs_free_path(path);
737         return 0;
738 }
739
740 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
741                                 struct btrfs_root *root,
742                                 u64 bytenr, u64 num_bytes,
743                                 u64 root_objectid, u64 ref_generation,
744                                 u64 owner, u64 owner_offset)
745 {
746         int ret;
747
748         mutex_lock(&root->fs_info->alloc_mutex);
749         ret = __btrfs_inc_extent_ref(trans, root, bytenr, num_bytes,
750                                      root_objectid, ref_generation,
751                                      owner, owner_offset);
752         mutex_unlock(&root->fs_info->alloc_mutex);
753         return ret;
754 }
755
756 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
757                          struct btrfs_root *root)
758 {
759         finish_current_insert(trans, root->fs_info->extent_root);
760         del_pending_extents(trans, root->fs_info->extent_root);
761         return 0;
762 }
763
764 static int lookup_extent_ref(struct btrfs_trans_handle *trans,
765                              struct btrfs_root *root, u64 bytenr,
766                              u64 num_bytes, u32 *refs)
767 {
768         struct btrfs_path *path;
769         int ret;
770         struct btrfs_key key;
771         struct extent_buffer *l;
772         struct btrfs_extent_item *item;
773
774         WARN_ON(num_bytes < root->sectorsize);
775         path = btrfs_alloc_path();
776         path->reada = 1;
777         key.objectid = bytenr;
778         key.offset = num_bytes;
779         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
780         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
781                                 0, 0);
782         if (ret < 0)
783                 goto out;
784         if (ret != 0) {
785                 btrfs_print_leaf(root, path->nodes[0]);
786                 printk("failed to find block number %Lu\n", bytenr);
787                 BUG();
788         }
789         l = path->nodes[0];
790         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
791         *refs = btrfs_extent_refs(l, item);
792 out:
793         btrfs_free_path(path);
794         return 0;
795 }
796
797 u32 btrfs_count_snapshots_in_path(struct btrfs_root *root,
798                                   struct btrfs_path *count_path,
799                                   u64 expected_owner,
800                                   u64 first_extent)
801 {
802         struct btrfs_root *extent_root = root->fs_info->extent_root;
803         struct btrfs_path *path;
804         u64 bytenr;
805         u64 found_objectid;
806         u64 found_owner;
807         u64 root_objectid = root->root_key.objectid;
808         u32 total_count = 0;
809         u32 extent_refs;
810         u32 cur_count;
811         u32 nritems;
812         int ret;
813         struct btrfs_key key;
814         struct btrfs_key found_key;
815         struct extent_buffer *l;
816         struct btrfs_extent_item *item;
817         struct btrfs_extent_ref *ref_item;
818         int level = -1;
819
820         /* FIXME, needs locking */
821         BUG();
822
823         mutex_lock(&root->fs_info->alloc_mutex);
824         path = btrfs_alloc_path();
825 again:
826         if (level == -1)
827                 bytenr = first_extent;
828         else
829                 bytenr = count_path->nodes[level]->start;
830
831         cur_count = 0;
832         key.objectid = bytenr;
833         key.offset = 0;
834
835         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
836         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
837         if (ret < 0)
838                 goto out;
839         BUG_ON(ret == 0);
840
841         l = path->nodes[0];
842         btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
843
844         if (found_key.objectid != bytenr ||
845             found_key.type != BTRFS_EXTENT_ITEM_KEY) {
846                 goto out;
847         }
848
849         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
850         extent_refs = btrfs_extent_refs(l, item);
851         while (1) {
852                 l = path->nodes[0];
853                 nritems = btrfs_header_nritems(l);
854                 if (path->slots[0] >= nritems) {
855                         ret = btrfs_next_leaf(extent_root, path);
856                         if (ret == 0)
857                                 continue;
858                         break;
859                 }
860                 btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
861                 if (found_key.objectid != bytenr)
862                         break;
863
864                 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
865                         path->slots[0]++;
866                         continue;
867                 }
868
869                 cur_count++;
870                 ref_item = btrfs_item_ptr(l, path->slots[0],
871                                           struct btrfs_extent_ref);
872                 found_objectid = btrfs_ref_root(l, ref_item);
873
874                 if (found_objectid != root_objectid) {
875                         total_count = 2;
876                         goto out;
877                 }
878                 if (level == -1) {
879                         found_owner = btrfs_ref_objectid(l, ref_item);
880                         if (found_owner != expected_owner) {
881                                 total_count = 2;
882                                 goto out;
883                         }
884                         /*
885                          * nasty.  we don't count a reference held by
886                          * the running transaction.  This allows nodatacow
887                          * to avoid cow most of the time
888                          */
889                         if (found_owner >= BTRFS_FIRST_FREE_OBJECTID &&
890                             btrfs_ref_generation(l, ref_item) ==
891                             root->fs_info->generation) {
892                                 extent_refs--;
893                         }
894                 }
895                 total_count = 1;
896                 path->slots[0]++;
897         }
898         /*
899          * if there is more than one reference against a data extent,
900          * we have to assume the other ref is another snapshot
901          */
902         if (level == -1 && extent_refs > 1) {
903                 total_count = 2;
904                 goto out;
905         }
906         if (cur_count == 0) {
907                 total_count = 0;
908                 goto out;
909         }
910         if (level >= 0 && root->node == count_path->nodes[level])
911                 goto out;
912         level++;
913         btrfs_release_path(root, path);
914         goto again;
915
916 out:
917         btrfs_free_path(path);
918         mutex_unlock(&root->fs_info->alloc_mutex);
919         return total_count;
920 }
921
922 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
923                   struct extent_buffer *buf)
924 {
925         u64 bytenr;
926         u32 nritems;
927         struct btrfs_key key;
928         struct btrfs_file_extent_item *fi;
929         int i;
930         int level;
931         int ret;
932         int faili;
933
934         if (!root->ref_cows)
935                 return 0;
936
937         mutex_lock(&root->fs_info->alloc_mutex);
938         level = btrfs_header_level(buf);
939         nritems = btrfs_header_nritems(buf);
940         for (i = 0; i < nritems; i++) {
941                 if (level == 0) {
942                         u64 disk_bytenr;
943                         btrfs_item_key_to_cpu(buf, &key, i);
944                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
945                                 continue;
946                         fi = btrfs_item_ptr(buf, i,
947                                             struct btrfs_file_extent_item);
948                         if (btrfs_file_extent_type(buf, fi) ==
949                             BTRFS_FILE_EXTENT_INLINE)
950                                 continue;
951                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
952                         if (disk_bytenr == 0)
953                                 continue;
954                         ret = __btrfs_inc_extent_ref(trans, root, disk_bytenr,
955                                     btrfs_file_extent_disk_num_bytes(buf, fi),
956                                     root->root_key.objectid, trans->transid,
957                                     key.objectid, key.offset);
958                         if (ret) {
959                                 faili = i;
960                                 goto fail;
961                         }
962                 } else {
963                         bytenr = btrfs_node_blockptr(buf, i);
964                         btrfs_node_key_to_cpu(buf, &key, i);
965                         ret = __btrfs_inc_extent_ref(trans, root, bytenr,
966                                            btrfs_level_size(root, level - 1),
967                                            root->root_key.objectid,
968                                            trans->transid,
969                                            level - 1, key.objectid);
970                         if (ret) {
971                                 faili = i;
972                                 goto fail;
973                         }
974                 }
975         }
976         mutex_unlock(&root->fs_info->alloc_mutex);
977         return 0;
978 fail:
979         WARN_ON(1);
980 #if 0
981         for (i =0; i < faili; i++) {
982                 if (level == 0) {
983                         u64 disk_bytenr;
984                         btrfs_item_key_to_cpu(buf, &key, i);
985                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
986                                 continue;
987                         fi = btrfs_item_ptr(buf, i,
988                                             struct btrfs_file_extent_item);
989                         if (btrfs_file_extent_type(buf, fi) ==
990                             BTRFS_FILE_EXTENT_INLINE)
991                                 continue;
992                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
993                         if (disk_bytenr == 0)
994                                 continue;
995                         err = btrfs_free_extent(trans, root, disk_bytenr,
996                                     btrfs_file_extent_disk_num_bytes(buf,
997                                                                       fi), 0);
998                         BUG_ON(err);
999                 } else {
1000                         bytenr = btrfs_node_blockptr(buf, i);
1001                         err = btrfs_free_extent(trans, root, bytenr,
1002                                         btrfs_level_size(root, level - 1), 0);
1003                         BUG_ON(err);
1004                 }
1005         }
1006 #endif
1007         mutex_unlock(&root->fs_info->alloc_mutex);
1008         return ret;
1009 }
1010
1011 static int write_one_cache_group(struct btrfs_trans_handle *trans,
1012                                  struct btrfs_root *root,
1013                                  struct btrfs_path *path,
1014                                  struct btrfs_block_group_cache *cache)
1015 {
1016         int ret;
1017         int pending_ret;
1018         struct btrfs_root *extent_root = root->fs_info->extent_root;
1019         unsigned long bi;
1020         struct extent_buffer *leaf;
1021
1022         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
1023         if (ret < 0)
1024                 goto fail;
1025         BUG_ON(ret);
1026
1027         leaf = path->nodes[0];
1028         bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
1029         write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
1030         btrfs_mark_buffer_dirty(leaf);
1031         btrfs_release_path(extent_root, path);
1032 fail:
1033         finish_current_insert(trans, extent_root);
1034         pending_ret = del_pending_extents(trans, extent_root);
1035         if (ret)
1036                 return ret;
1037         if (pending_ret)
1038                 return pending_ret;
1039         return 0;
1040
1041 }
1042
1043 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
1044                                    struct btrfs_root *root)
1045 {
1046         struct extent_io_tree *block_group_cache;
1047         struct btrfs_block_group_cache *cache;
1048         int ret;
1049         int err = 0;
1050         int werr = 0;
1051         struct btrfs_path *path;
1052         u64 last = 0;
1053         u64 start;
1054         u64 end;
1055         u64 ptr;
1056
1057         block_group_cache = &root->fs_info->block_group_cache;
1058         path = btrfs_alloc_path();
1059         if (!path)
1060                 return -ENOMEM;
1061
1062         mutex_lock(&root->fs_info->alloc_mutex);
1063         while(1) {
1064                 ret = find_first_extent_bit(block_group_cache, last,
1065                                             &start, &end, BLOCK_GROUP_DIRTY);
1066                 if (ret)
1067                         break;
1068
1069                 last = end + 1;
1070                 ret = get_state_private(block_group_cache, start, &ptr);
1071                 if (ret)
1072                         break;
1073                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
1074                 err = write_one_cache_group(trans, root,
1075                                             path, cache);
1076                 /*
1077                  * if we fail to write the cache group, we want
1078                  * to keep it marked dirty in hopes that a later
1079                  * write will work
1080                  */
1081                 if (err) {
1082                         werr = err;
1083                         continue;
1084                 }
1085                 clear_extent_bits(block_group_cache, start, end,
1086                                   BLOCK_GROUP_DIRTY, GFP_NOFS);
1087         }
1088         btrfs_free_path(path);
1089         mutex_unlock(&root->fs_info->alloc_mutex);
1090         return werr;
1091 }
1092
1093 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
1094                                                   u64 flags)
1095 {
1096         struct list_head *head = &info->space_info;
1097         struct list_head *cur;
1098         struct btrfs_space_info *found;
1099         list_for_each(cur, head) {
1100                 found = list_entry(cur, struct btrfs_space_info, list);
1101                 if (found->flags == flags)
1102                         return found;
1103         }
1104         return NULL;
1105
1106 }
1107
1108 static int update_space_info(struct btrfs_fs_info *info, u64 flags,
1109                              u64 total_bytes, u64 bytes_used,
1110                              struct btrfs_space_info **space_info)
1111 {
1112         struct btrfs_space_info *found;
1113
1114         found = __find_space_info(info, flags);
1115         if (found) {
1116                 found->total_bytes += total_bytes;
1117                 found->bytes_used += bytes_used;
1118                 found->full = 0;
1119                 WARN_ON(found->total_bytes < found->bytes_used);
1120                 *space_info = found;
1121                 return 0;
1122         }
1123         found = kmalloc(sizeof(*found), GFP_NOFS);
1124         if (!found)
1125                 return -ENOMEM;
1126
1127         list_add(&found->list, &info->space_info);
1128         found->flags = flags;
1129         found->total_bytes = total_bytes;
1130         found->bytes_used = bytes_used;
1131         found->bytes_pinned = 0;
1132         found->full = 0;
1133         found->force_alloc = 0;
1134         *space_info = found;
1135         return 0;
1136 }
1137
1138 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
1139 {
1140         u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
1141                                    BTRFS_BLOCK_GROUP_RAID1 |
1142                                    BTRFS_BLOCK_GROUP_RAID10 |
1143                                    BTRFS_BLOCK_GROUP_DUP);
1144         if (extra_flags) {
1145                 if (flags & BTRFS_BLOCK_GROUP_DATA)
1146                         fs_info->avail_data_alloc_bits |= extra_flags;
1147                 if (flags & BTRFS_BLOCK_GROUP_METADATA)
1148                         fs_info->avail_metadata_alloc_bits |= extra_flags;
1149                 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
1150                         fs_info->avail_system_alloc_bits |= extra_flags;
1151         }
1152 }
1153
1154 static u64 reduce_alloc_profile(struct btrfs_root *root, u64 flags)
1155 {
1156         u64 num_devices = root->fs_info->fs_devices->num_devices;
1157
1158         if (num_devices == 1)
1159                 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
1160         if (num_devices < 4)
1161                 flags &= ~BTRFS_BLOCK_GROUP_RAID10;
1162
1163         if ((flags & BTRFS_BLOCK_GROUP_DUP) &&
1164             (flags & (BTRFS_BLOCK_GROUP_RAID1 |
1165                       BTRFS_BLOCK_GROUP_RAID10))) {
1166                 flags &= ~BTRFS_BLOCK_GROUP_DUP;
1167         }
1168
1169         if ((flags & BTRFS_BLOCK_GROUP_RAID1) &&
1170             (flags & BTRFS_BLOCK_GROUP_RAID10)) {
1171                 flags &= ~BTRFS_BLOCK_GROUP_RAID1;
1172         }
1173
1174         if ((flags & BTRFS_BLOCK_GROUP_RAID0) &&
1175             ((flags & BTRFS_BLOCK_GROUP_RAID1) |
1176              (flags & BTRFS_BLOCK_GROUP_RAID10) |
1177              (flags & BTRFS_BLOCK_GROUP_DUP)))
1178                 flags &= ~BTRFS_BLOCK_GROUP_RAID0;
1179         return flags;
1180 }
1181
1182 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1183                           struct btrfs_root *extent_root, u64 alloc_bytes,
1184                           u64 flags, int force)
1185 {
1186         struct btrfs_space_info *space_info;
1187         u64 thresh;
1188         u64 start;
1189         u64 num_bytes;
1190         int ret;
1191
1192         flags = reduce_alloc_profile(extent_root, flags);
1193
1194         space_info = __find_space_info(extent_root->fs_info, flags);
1195         if (!space_info) {
1196                 ret = update_space_info(extent_root->fs_info, flags,
1197                                         0, 0, &space_info);
1198                 BUG_ON(ret);
1199         }
1200         BUG_ON(!space_info);
1201
1202         if (space_info->force_alloc) {
1203                 force = 1;
1204                 space_info->force_alloc = 0;
1205         }
1206         if (space_info->full)
1207                 goto out;
1208
1209         thresh = div_factor(space_info->total_bytes, 6);
1210         if (!force &&
1211            (space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) <
1212             thresh)
1213                 goto out;
1214
1215         mutex_lock(&extent_root->fs_info->chunk_mutex);
1216         ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags);
1217         if (ret == -ENOSPC) {
1218 printk("space info full %Lu\n", flags);
1219                 space_info->full = 1;
1220                 goto out_unlock;
1221         }
1222         BUG_ON(ret);
1223
1224         ret = btrfs_make_block_group(trans, extent_root, 0, flags,
1225                      BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes);
1226         BUG_ON(ret);
1227 out_unlock:
1228         mutex_unlock(&extent_root->fs_info->chunk_mutex);
1229 out:
1230         return 0;
1231 }
1232
1233 static int update_block_group(struct btrfs_trans_handle *trans,
1234                               struct btrfs_root *root,
1235                               u64 bytenr, u64 num_bytes, int alloc,
1236                               int mark_free)
1237 {
1238         struct btrfs_block_group_cache *cache;
1239         struct btrfs_fs_info *info = root->fs_info;
1240         u64 total = num_bytes;
1241         u64 old_val;
1242         u64 byte_in_group;
1243         u64 start;
1244         u64 end;
1245
1246         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1247         while(total) {
1248                 cache = btrfs_lookup_block_group(info, bytenr);
1249                 if (!cache) {
1250                         return -1;
1251                 }
1252                 byte_in_group = bytenr - cache->key.objectid;
1253                 WARN_ON(byte_in_group > cache->key.offset);
1254                 start = cache->key.objectid;
1255                 end = start + cache->key.offset - 1;
1256                 set_extent_bits(&info->block_group_cache, start, end,
1257                                 BLOCK_GROUP_DIRTY, GFP_NOFS);
1258
1259                 old_val = btrfs_block_group_used(&cache->item);
1260                 num_bytes = min(total, cache->key.offset - byte_in_group);
1261                 if (alloc) {
1262                         old_val += num_bytes;
1263                         cache->space_info->bytes_used += num_bytes;
1264                 } else {
1265                         old_val -= num_bytes;
1266                         cache->space_info->bytes_used -= num_bytes;
1267                         if (mark_free) {
1268                                 set_extent_dirty(&info->free_space_cache,
1269                                                  bytenr, bytenr + num_bytes - 1,
1270                                                  GFP_NOFS);
1271                         }
1272                 }
1273                 btrfs_set_block_group_used(&cache->item, old_val);
1274                 total -= num_bytes;
1275                 bytenr += num_bytes;
1276         }
1277         return 0;
1278 }
1279
1280 static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
1281 {
1282         u64 start;
1283         u64 end;
1284         int ret;
1285         ret = find_first_extent_bit(&root->fs_info->block_group_cache,
1286                                     search_start, &start, &end,
1287                                     BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
1288                                     BLOCK_GROUP_SYSTEM);
1289         if (ret)
1290                 return 0;
1291         return start;
1292 }
1293
1294
1295 static int update_pinned_extents(struct btrfs_root *root,
1296                                 u64 bytenr, u64 num, int pin)
1297 {
1298         u64 len;
1299         struct btrfs_block_group_cache *cache;
1300         struct btrfs_fs_info *fs_info = root->fs_info;
1301
1302         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1303         if (pin) {
1304                 set_extent_dirty(&fs_info->pinned_extents,
1305                                 bytenr, bytenr + num - 1, GFP_NOFS);
1306         } else {
1307                 clear_extent_dirty(&fs_info->pinned_extents,
1308                                 bytenr, bytenr + num - 1, GFP_NOFS);
1309         }
1310         while (num > 0) {
1311                 cache = btrfs_lookup_block_group(fs_info, bytenr);
1312                 if (!cache) {
1313                         u64 first = first_logical_byte(root, bytenr);
1314                         WARN_ON(first < bytenr);
1315                         len = min(first - bytenr, num);
1316                 } else {
1317                         len = min(num, cache->key.offset -
1318                                   (bytenr - cache->key.objectid));
1319                 }
1320                 if (pin) {
1321                         if (cache) {
1322                                 cache->pinned += len;
1323                                 cache->space_info->bytes_pinned += len;
1324                         }
1325                         fs_info->total_pinned += len;
1326                 } else {
1327                         if (cache) {
1328                                 cache->pinned -= len;
1329                                 cache->space_info->bytes_pinned -= len;
1330                         }
1331                         fs_info->total_pinned -= len;
1332                 }
1333                 bytenr += len;
1334                 num -= len;
1335         }
1336         return 0;
1337 }
1338
1339 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
1340 {
1341         u64 last = 0;
1342         u64 start;
1343         u64 end;
1344         struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
1345         int ret;
1346
1347         while(1) {
1348                 ret = find_first_extent_bit(pinned_extents, last,
1349                                             &start, &end, EXTENT_DIRTY);
1350                 if (ret)
1351                         break;
1352                 set_extent_dirty(copy, start, end, GFP_NOFS);
1353                 last = end + 1;
1354         }
1355         return 0;
1356 }
1357
1358 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
1359                                struct btrfs_root *root,
1360                                struct extent_io_tree *unpin)
1361 {
1362         u64 start;
1363         u64 end;
1364         int ret;
1365         struct extent_io_tree *free_space_cache;
1366         free_space_cache = &root->fs_info->free_space_cache;
1367
1368         mutex_lock(&root->fs_info->alloc_mutex);
1369         while(1) {
1370                 ret = find_first_extent_bit(unpin, 0, &start, &end,
1371                                             EXTENT_DIRTY);
1372                 if (ret)
1373                         break;
1374                 update_pinned_extents(root, start, end + 1 - start, 0);
1375                 clear_extent_dirty(unpin, start, end, GFP_NOFS);
1376                 set_extent_dirty(free_space_cache, start, end, GFP_NOFS);
1377         }
1378         mutex_unlock(&root->fs_info->alloc_mutex);
1379         return 0;
1380 }
1381
1382 static int finish_current_insert(struct btrfs_trans_handle *trans,
1383                                  struct btrfs_root *extent_root)
1384 {
1385         u64 start;
1386         u64 end;
1387         struct btrfs_fs_info *info = extent_root->fs_info;
1388         struct extent_buffer *eb;
1389         struct btrfs_path *path;
1390         struct btrfs_key ins;
1391         struct btrfs_disk_key first;
1392         struct btrfs_extent_item extent_item;
1393         int ret;
1394         int level;
1395         int err = 0;
1396
1397         WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex));
1398         btrfs_set_stack_extent_refs(&extent_item, 1);
1399         btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
1400         path = btrfs_alloc_path();
1401
1402         while(1) {
1403                 ret = find_first_extent_bit(&info->extent_ins, 0, &start,
1404                                             &end, EXTENT_LOCKED);
1405                 if (ret)
1406                         break;
1407
1408                 ins.objectid = start;
1409                 ins.offset = end + 1 - start;
1410                 err = btrfs_insert_item(trans, extent_root, &ins,
1411                                         &extent_item, sizeof(extent_item));
1412                 clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED,
1413                                   GFP_NOFS);
1414                 eb = read_tree_block(extent_root, ins.objectid, ins.offset,
1415                                      trans->transid);
1416                 btrfs_tree_lock(eb);
1417                 level = btrfs_header_level(eb);
1418                 if (level == 0) {
1419                         btrfs_item_key(eb, &first, 0);
1420                 } else {
1421                         btrfs_node_key(eb, &first, 0);
1422                 }
1423                 btrfs_tree_unlock(eb);
1424                 free_extent_buffer(eb);
1425                 /*
1426                  * the first key is just a hint, so the race we've created
1427                  * against reading it is fine
1428                  */
1429                 err = btrfs_insert_extent_backref(trans, extent_root, path,
1430                                           start, extent_root->root_key.objectid,
1431                                           0, level,
1432                                           btrfs_disk_key_objectid(&first));
1433                 BUG_ON(err);
1434         }
1435         btrfs_free_path(path);
1436         return 0;
1437 }
1438
1439 static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes,
1440                           int pending)
1441 {
1442         int err = 0;
1443
1444         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1445         if (!pending) {
1446                 struct extent_buffer *buf;
1447                 buf = btrfs_find_tree_block(root, bytenr, num_bytes);
1448                 if (buf) {
1449                         if (!btrfs_try_tree_lock(buf) &&
1450                             btrfs_buffer_uptodate(buf, 0)) {
1451                                 u64 transid =
1452                                     root->fs_info->running_transaction->transid;
1453                                 u64 header_transid =
1454                                         btrfs_header_generation(buf);
1455                                 if (header_transid == transid &&
1456                                     !btrfs_header_flag(buf,
1457                                                BTRFS_HEADER_FLAG_WRITTEN)) {
1458                                         clean_tree_block(NULL, root, buf);
1459                                         btrfs_tree_unlock(buf);
1460                                         free_extent_buffer(buf);
1461                                         return 1;
1462                                 }
1463                                 btrfs_tree_unlock(buf);
1464                         }
1465                         free_extent_buffer(buf);
1466                 }
1467                 update_pinned_extents(root, bytenr, num_bytes, 1);
1468         } else {
1469                 set_extent_bits(&root->fs_info->pending_del,
1470                                 bytenr, bytenr + num_bytes - 1,
1471                                 EXTENT_LOCKED, GFP_NOFS);
1472         }
1473         BUG_ON(err < 0);
1474         return 0;
1475 }
1476
1477 /*
1478  * remove an extent from the root, returns 0 on success
1479  */
1480 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1481                          *root, u64 bytenr, u64 num_bytes,
1482                          u64 root_objectid, u64 ref_generation,
1483                          u64 owner_objectid, u64 owner_offset, int pin,
1484                          int mark_free)
1485 {
1486         struct btrfs_path *path;
1487         struct btrfs_key key;
1488         struct btrfs_fs_info *info = root->fs_info;
1489         struct btrfs_root *extent_root = info->extent_root;
1490         struct extent_buffer *leaf;
1491         int ret;
1492         int extent_slot = 0;
1493         int found_extent = 0;
1494         int num_to_del = 1;
1495         struct btrfs_extent_item *ei;
1496         u32 refs;
1497
1498         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1499         key.objectid = bytenr;
1500         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
1501         key.offset = num_bytes;
1502         path = btrfs_alloc_path();
1503         if (!path)
1504                 return -ENOMEM;
1505
1506         path->reada = 1;
1507         ret = lookup_extent_backref(trans, extent_root, path,
1508                                     bytenr, root_objectid,
1509                                     ref_generation,
1510                                     owner_objectid, owner_offset, 1);
1511         if (ret == 0) {
1512                 struct btrfs_key found_key;
1513                 extent_slot = path->slots[0];
1514                 while(extent_slot > 0) {
1515                         extent_slot--;
1516                         btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1517                                               extent_slot);
1518                         if (found_key.objectid != bytenr)
1519                                 break;
1520                         if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
1521                             found_key.offset == num_bytes) {
1522                                 found_extent = 1;
1523                                 break;
1524                         }
1525                         if (path->slots[0] - extent_slot > 5)
1526                                 break;
1527                 }
1528                 if (!found_extent)
1529                         ret = btrfs_del_item(trans, extent_root, path);
1530         } else {
1531                 btrfs_print_leaf(extent_root, path->nodes[0]);
1532                 WARN_ON(1);
1533                 printk("Unable to find ref byte nr %Lu root %Lu "
1534                        " gen %Lu owner %Lu offset %Lu\n", bytenr,
1535                        root_objectid, ref_generation, owner_objectid,
1536                        owner_offset);
1537         }
1538         if (!found_extent) {
1539                 btrfs_release_path(extent_root, path);
1540                 ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
1541                 if (ret < 0)
1542                         return ret;
1543                 BUG_ON(ret);
1544                 extent_slot = path->slots[0];
1545         }
1546
1547         leaf = path->nodes[0];
1548         ei = btrfs_item_ptr(leaf, extent_slot,
1549                             struct btrfs_extent_item);
1550         refs = btrfs_extent_refs(leaf, ei);
1551         BUG_ON(refs == 0);
1552         refs -= 1;
1553         btrfs_set_extent_refs(leaf, ei, refs);
1554
1555         btrfs_mark_buffer_dirty(leaf);
1556
1557         if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) {
1558                 /* if the back ref and the extent are next to each other
1559                  * they get deleted below in one shot
1560                  */
1561                 path->slots[0] = extent_slot;
1562                 num_to_del = 2;
1563         } else if (found_extent) {
1564                 /* otherwise delete the extent back ref */
1565                 ret = btrfs_del_item(trans, extent_root, path);
1566                 BUG_ON(ret);
1567                 /* if refs are 0, we need to setup the path for deletion */
1568                 if (refs == 0) {
1569                         btrfs_release_path(extent_root, path);
1570                         ret = btrfs_search_slot(trans, extent_root, &key, path,
1571                                                 -1, 1);
1572                         if (ret < 0)
1573                                 return ret;
1574                         BUG_ON(ret);
1575                 }
1576         }
1577
1578         if (refs == 0) {
1579                 u64 super_used;
1580                 u64 root_used;
1581
1582                 if (pin) {
1583                         ret = pin_down_bytes(root, bytenr, num_bytes, 0);
1584                         if (ret > 0)
1585                                 mark_free = 1;
1586                         BUG_ON(ret < 0);
1587                 }
1588
1589                 /* block accounting for super block */
1590                 spin_lock_irq(&info->delalloc_lock);
1591                 super_used = btrfs_super_bytes_used(&info->super_copy);
1592                 btrfs_set_super_bytes_used(&info->super_copy,
1593                                            super_used - num_bytes);
1594                 spin_unlock_irq(&info->delalloc_lock);
1595
1596                 /* block accounting for root item */
1597                 root_used = btrfs_root_used(&root->root_item);
1598                 btrfs_set_root_used(&root->root_item,
1599                                            root_used - num_bytes);
1600                 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
1601                                       num_to_del);
1602                 if (ret) {
1603                         return ret;
1604                 }
1605                 ret = update_block_group(trans, root, bytenr, num_bytes, 0,
1606                                          mark_free);
1607                 BUG_ON(ret);
1608         }
1609         btrfs_free_path(path);
1610         finish_current_insert(trans, extent_root);
1611         return ret;
1612 }
1613
1614 /*
1615  * find all the blocks marked as pending in the radix tree and remove
1616  * them from the extent map
1617  */
1618 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
1619                                btrfs_root *extent_root)
1620 {
1621         int ret;
1622         int err = 0;
1623         u64 start;
1624         u64 end;
1625         struct extent_io_tree *pending_del;
1626         struct extent_io_tree *pinned_extents;
1627
1628         WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex));
1629         pending_del = &extent_root->fs_info->pending_del;
1630         pinned_extents = &extent_root->fs_info->pinned_extents;
1631
1632         while(1) {
1633                 ret = find_first_extent_bit(pending_del, 0, &start, &end,
1634                                             EXTENT_LOCKED);
1635                 if (ret)
1636                         break;
1637                 update_pinned_extents(extent_root, start, end + 1 - start, 1);
1638                 clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
1639                                   GFP_NOFS);
1640                 ret = __free_extent(trans, extent_root,
1641                                      start, end + 1 - start,
1642                                      extent_root->root_key.objectid,
1643                                      0, 0, 0, 0, 0);
1644                 if (ret)
1645                         err = ret;
1646         }
1647         return err;
1648 }
1649
1650 /*
1651  * remove an extent from the root, returns 0 on success
1652  */
1653 static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
1654                                struct btrfs_root *root, u64 bytenr,
1655                                u64 num_bytes, u64 root_objectid,
1656                                u64 ref_generation, u64 owner_objectid,
1657                                u64 owner_offset, int pin)
1658 {
1659         struct btrfs_root *extent_root = root->fs_info->extent_root;
1660         int pending_ret;
1661         int ret;
1662
1663         WARN_ON(num_bytes < root->sectorsize);
1664         if (!root->ref_cows)
1665                 ref_generation = 0;
1666
1667         if (root == extent_root) {
1668                 pin_down_bytes(root, bytenr, num_bytes, 1);
1669                 return 0;
1670         }
1671         ret = __free_extent(trans, root, bytenr, num_bytes, root_objectid,
1672                             ref_generation, owner_objectid, owner_offset,
1673                             pin, pin == 0);
1674
1675         finish_current_insert(trans, root->fs_info->extent_root);
1676         pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
1677         return ret ? ret : pending_ret;
1678 }
1679
1680 int btrfs_free_extent(struct btrfs_trans_handle *trans,
1681                       struct btrfs_root *root, u64 bytenr,
1682                       u64 num_bytes, u64 root_objectid,
1683                       u64 ref_generation, u64 owner_objectid,
1684                       u64 owner_offset, int pin)
1685 {
1686         int ret;
1687
1688         maybe_lock_mutex(root);
1689         ret = __btrfs_free_extent(trans, root, bytenr, num_bytes,
1690                                   root_objectid, ref_generation,
1691                                   owner_objectid, owner_offset, pin);
1692         maybe_unlock_mutex(root);
1693         return ret;
1694 }
1695
1696 static u64 stripe_align(struct btrfs_root *root, u64 val)
1697 {
1698         u64 mask = ((u64)root->stripesize - 1);
1699         u64 ret = (val + mask) & ~mask;
1700         return ret;
1701 }
1702
1703 /*
1704  * walks the btree of allocated extents and find a hole of a given size.
1705  * The key ins is changed to record the hole:
1706  * ins->objectid == block start
1707  * ins->flags = BTRFS_EXTENT_ITEM_KEY
1708  * ins->offset == number of blocks
1709  * Any available blocks before search_start are skipped.
1710  */
1711 static int noinline find_free_extent(struct btrfs_trans_handle *trans,
1712                                      struct btrfs_root *orig_root,
1713                                      u64 num_bytes, u64 empty_size,
1714                                      u64 search_start, u64 search_end,
1715                                      u64 hint_byte, struct btrfs_key *ins,
1716                                      u64 exclude_start, u64 exclude_nr,
1717                                      int data)
1718 {
1719         int ret;
1720         u64 orig_search_start;
1721         struct btrfs_root * root = orig_root->fs_info->extent_root;
1722         struct btrfs_fs_info *info = root->fs_info;
1723         u64 total_needed = num_bytes;
1724         u64 *last_ptr = NULL;
1725         struct btrfs_block_group_cache *block_group;
1726         int full_scan = 0;
1727         int wrapped = 0;
1728         int chunk_alloc_done = 0;
1729         int empty_cluster = 2 * 1024 * 1024;
1730         int allowed_chunk_alloc = 0;
1731
1732         WARN_ON(num_bytes < root->sectorsize);
1733         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
1734
1735         if (orig_root->ref_cows || empty_size)
1736                 allowed_chunk_alloc = 1;
1737
1738         if (data & BTRFS_BLOCK_GROUP_METADATA) {
1739                 last_ptr = &root->fs_info->last_alloc;
1740                 empty_cluster = 256 * 1024;
1741         }
1742
1743         if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) {
1744                 last_ptr = &root->fs_info->last_data_alloc;
1745         }
1746
1747         if (last_ptr) {
1748                 if (*last_ptr)
1749                         hint_byte = *last_ptr;
1750                 else {
1751                         empty_size += empty_cluster;
1752                 }
1753         }
1754
1755         search_start = max(search_start, first_logical_byte(root, 0));
1756         orig_search_start = search_start;
1757
1758         if (search_end == (u64)-1)
1759                 search_end = btrfs_super_total_bytes(&info->super_copy);
1760
1761         if (hint_byte) {
1762                 block_group = btrfs_lookup_first_block_group(info, hint_byte);
1763                 if (!block_group)
1764                         hint_byte = search_start;
1765                 block_group = __btrfs_find_block_group(root, block_group,
1766                                                      hint_byte, data, 1);
1767                 if (last_ptr && *last_ptr == 0 && block_group)
1768                         hint_byte = block_group->key.objectid;
1769         } else {
1770                 block_group = __btrfs_find_block_group(root,
1771                                                      trans->block_group,
1772                                                      search_start, data, 1);
1773         }
1774         search_start = max(search_start, hint_byte);
1775
1776         total_needed += empty_size;
1777
1778 check_failed:
1779         if (!block_group) {
1780                 block_group = btrfs_lookup_first_block_group(info,
1781                                                              search_start);
1782                 if (!block_group)
1783                         block_group = btrfs_lookup_first_block_group(info,
1784                                                        orig_search_start);
1785         }
1786         if (full_scan && !chunk_alloc_done) {
1787                 if (allowed_chunk_alloc) {
1788                         do_chunk_alloc(trans, root,
1789                                      num_bytes + 2 * 1024 * 1024, data, 1);
1790                         allowed_chunk_alloc = 0;
1791                 } else if (block_group && block_group_bits(block_group, data)) {
1792                         block_group->space_info->force_alloc = 1;
1793                 }
1794                 chunk_alloc_done = 1;
1795         }
1796         ret = find_search_start(root, &block_group, &search_start,
1797                                 total_needed, data);
1798         if (ret == -ENOSPC && last_ptr && *last_ptr) {
1799                 *last_ptr = 0;
1800                 block_group = btrfs_lookup_first_block_group(info,
1801                                                              orig_search_start);
1802                 search_start = orig_search_start;
1803                 ret = find_search_start(root, &block_group, &search_start,
1804                                         total_needed, data);
1805         }
1806         if (ret == -ENOSPC)
1807                 goto enospc;
1808         if (ret)
1809                 goto error;
1810
1811         if (last_ptr && *last_ptr && search_start != *last_ptr) {
1812                 *last_ptr = 0;
1813                 if (!empty_size) {
1814                         empty_size += empty_cluster;
1815                         total_needed += empty_size;
1816                 }
1817                 block_group = btrfs_lookup_first_block_group(info,
1818                                                        orig_search_start);
1819                 search_start = orig_search_start;
1820                 ret = find_search_start(root, &block_group,
1821                                         &search_start, total_needed, data);
1822                 if (ret == -ENOSPC)
1823                         goto enospc;
1824                 if (ret)
1825                         goto error;
1826         }
1827
1828         search_start = stripe_align(root, search_start);
1829         ins->objectid = search_start;
1830         ins->offset = num_bytes;
1831
1832         if (ins->objectid + num_bytes >= search_end)
1833                 goto enospc;
1834
1835         if (ins->objectid + num_bytes >
1836             block_group->key.objectid + block_group->key.offset) {
1837                 search_start = block_group->key.objectid +
1838                         block_group->key.offset;
1839                 goto new_group;
1840         }
1841
1842         if (test_range_bit(&info->extent_ins, ins->objectid,
1843                            ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) {
1844                 search_start = ins->objectid + num_bytes;
1845                 goto new_group;
1846         }
1847
1848         if (test_range_bit(&info->pinned_extents, ins->objectid,
1849                            ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
1850                 search_start = ins->objectid + num_bytes;
1851                 goto new_group;
1852         }
1853
1854         if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
1855             ins->objectid < exclude_start + exclude_nr)) {
1856                 search_start = exclude_start + exclude_nr;
1857                 goto new_group;
1858         }
1859
1860         if (!(data & BTRFS_BLOCK_GROUP_DATA)) {
1861                 block_group = btrfs_lookup_block_group(info, ins->objectid);
1862                 if (block_group)
1863                         trans->block_group = block_group;
1864         }
1865         ins->offset = num_bytes;
1866         if (last_ptr) {
1867                 *last_ptr = ins->objectid + ins->offset;
1868                 if (*last_ptr ==
1869                     btrfs_super_total_bytes(&root->fs_info->super_copy)) {
1870                         *last_ptr = 0;
1871                 }
1872         }
1873         return 0;
1874
1875 new_group:
1876         if (search_start + num_bytes >= search_end) {
1877 enospc:
1878                 search_start = orig_search_start;
1879                 if (full_scan) {
1880                         ret = -ENOSPC;
1881                         goto error;
1882                 }
1883                 if (wrapped) {
1884                         if (!full_scan)
1885                                 total_needed -= empty_size;
1886                         full_scan = 1;
1887                 } else
1888                         wrapped = 1;
1889         }
1890         block_group = btrfs_lookup_first_block_group(info, search_start);
1891         cond_resched();
1892         block_group = __btrfs_find_block_group(root, block_group,
1893                                              search_start, data, 0);
1894         goto check_failed;
1895
1896 error:
1897         return ret;
1898 }
1899
1900 static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
1901                                   struct btrfs_root *root,
1902                                   u64 num_bytes, u64 min_alloc_size,
1903                                   u64 empty_size, u64 hint_byte,
1904                                   u64 search_end, struct btrfs_key *ins,
1905                                   u64 data)
1906 {
1907         int ret;
1908         u64 search_start = 0;
1909         u64 alloc_profile;
1910         struct btrfs_fs_info *info = root->fs_info;
1911
1912         if (data) {
1913                 alloc_profile = info->avail_data_alloc_bits &
1914                                 info->data_alloc_profile;
1915                 data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
1916         } else if (root == root->fs_info->chunk_root) {
1917                 alloc_profile = info->avail_system_alloc_bits &
1918                                 info->system_alloc_profile;
1919                 data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
1920         } else {
1921                 alloc_profile = info->avail_metadata_alloc_bits &
1922                                 info->metadata_alloc_profile;
1923                 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
1924         }
1925 again:
1926         data = reduce_alloc_profile(root, data);
1927         /*
1928          * the only place that sets empty_size is btrfs_realloc_node, which
1929          * is not called recursively on allocations
1930          */
1931         if (empty_size || root->ref_cows) {
1932                 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
1933                         ret = do_chunk_alloc(trans, root->fs_info->extent_root,
1934                                      2 * 1024 * 1024,
1935                                      BTRFS_BLOCK_GROUP_METADATA |
1936                                      (info->metadata_alloc_profile &
1937                                       info->avail_metadata_alloc_bits), 0);
1938                         BUG_ON(ret);
1939                 }
1940                 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
1941                                      num_bytes + 2 * 1024 * 1024, data, 0);
1942                 BUG_ON(ret);
1943         }
1944
1945         WARN_ON(num_bytes < root->sectorsize);
1946         ret = find_free_extent(trans, root, num_bytes, empty_size,
1947                                search_start, search_end, hint_byte, ins,
1948                                trans->alloc_exclude_start,
1949                                trans->alloc_exclude_nr, data);
1950
1951         if (ret == -ENOSPC && num_bytes > min_alloc_size) {
1952                 num_bytes = num_bytes >> 1;
1953                 num_bytes = max(num_bytes, min_alloc_size);
1954                 do_chunk_alloc(trans, root->fs_info->extent_root,
1955                                num_bytes, data, 1);
1956                 goto again;
1957         }
1958         if (ret) {
1959                 printk("allocation failed flags %Lu\n", data);
1960                 BUG();
1961         }
1962         clear_extent_dirty(&root->fs_info->free_space_cache,
1963                            ins->objectid, ins->objectid + ins->offset - 1,
1964                            GFP_NOFS);
1965         return 0;
1966 }
1967
1968 int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
1969                                   struct btrfs_root *root,
1970                                   u64 num_bytes, u64 min_alloc_size,
1971                                   u64 empty_size, u64 hint_byte,
1972                                   u64 search_end, struct btrfs_key *ins,
1973                                   u64 data)
1974 {
1975         int ret;
1976         maybe_lock_mutex(root);
1977         ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
1978                                      empty_size, hint_byte, search_end, ins,
1979                                      data);
1980         maybe_unlock_mutex(root);
1981         return ret;
1982 }
1983
1984 static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
1985                                          struct btrfs_root *root,
1986                                          u64 root_objectid, u64 ref_generation,
1987                                          u64 owner, u64 owner_offset,
1988                                          struct btrfs_key *ins)
1989 {
1990         int ret;
1991         int pending_ret;
1992         u64 super_used;
1993         u64 root_used;
1994         u64 num_bytes = ins->offset;
1995         u32 sizes[2];
1996         struct btrfs_fs_info *info = root->fs_info;
1997         struct btrfs_root *extent_root = info->extent_root;
1998         struct btrfs_extent_item *extent_item;
1999         struct btrfs_extent_ref *ref;
2000         struct btrfs_path *path;
2001         struct btrfs_key keys[2];
2002
2003         /* block accounting for super block */
2004         spin_lock_irq(&info->delalloc_lock);
2005         super_used = btrfs_super_bytes_used(&info->super_copy);
2006         btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
2007         spin_unlock_irq(&info->delalloc_lock);
2008
2009         /* block accounting for root item */
2010         root_used = btrfs_root_used(&root->root_item);
2011         btrfs_set_root_used(&root->root_item, root_used + num_bytes);
2012
2013         if (root == extent_root) {
2014                 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
2015                                 ins->objectid + ins->offset - 1,
2016                                 EXTENT_LOCKED, GFP_NOFS);
2017                 goto update_block;
2018         }
2019
2020         memcpy(&keys[0], ins, sizeof(*ins));
2021         keys[1].offset = hash_extent_ref(root_objectid, ref_generation,
2022                                          owner, owner_offset);
2023         keys[1].objectid = ins->objectid;
2024         keys[1].type = BTRFS_EXTENT_REF_KEY;
2025         sizes[0] = sizeof(*extent_item);
2026         sizes[1] = sizeof(*ref);
2027
2028         path = btrfs_alloc_path();
2029         BUG_ON(!path);
2030
2031         ret = btrfs_insert_empty_items(trans, extent_root, path, keys,
2032                                        sizes, 2);
2033
2034         BUG_ON(ret);
2035         extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2036                                      struct btrfs_extent_item);
2037         btrfs_set_extent_refs(path->nodes[0], extent_item, 1);
2038         ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
2039                              struct btrfs_extent_ref);
2040
2041         btrfs_set_ref_root(path->nodes[0], ref, root_objectid);
2042         btrfs_set_ref_generation(path->nodes[0], ref, ref_generation);
2043         btrfs_set_ref_objectid(path->nodes[0], ref, owner);
2044         btrfs_set_ref_offset(path->nodes[0], ref, owner_offset);
2045
2046         btrfs_mark_buffer_dirty(path->nodes[0]);
2047
2048         trans->alloc_exclude_start = 0;
2049         trans->alloc_exclude_nr = 0;
2050         btrfs_free_path(path);
2051         finish_current_insert(trans, extent_root);
2052         pending_ret = del_pending_extents(trans, extent_root);
2053
2054         if (ret)
2055                 goto out;
2056         if (pending_ret) {
2057                 ret = pending_ret;
2058                 goto out;
2059         }
2060
2061 update_block:
2062         ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0);
2063         if (ret) {
2064                 printk("update block group failed for %Lu %Lu\n",
2065                        ins->objectid, ins->offset);
2066                 BUG();
2067         }
2068 out:
2069         return ret;
2070 }
2071
2072 int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
2073                                 struct btrfs_root *root,
2074                                 u64 root_objectid, u64 ref_generation,
2075                                 u64 owner, u64 owner_offset,
2076                                 struct btrfs_key *ins)
2077 {
2078         int ret;
2079         maybe_lock_mutex(root);
2080         ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid,
2081                                             ref_generation, owner,
2082                                             owner_offset, ins);
2083         maybe_unlock_mutex(root);
2084         return ret;
2085 }
2086 /*
2087  * finds a free extent and does all the dirty work required for allocation
2088  * returns the key for the extent through ins, and a tree buffer for
2089  * the first block of the extent through buf.
2090  *
2091  * returns 0 if everything worked, non-zero otherwise.
2092  */
2093 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
2094                        struct btrfs_root *root,
2095                        u64 num_bytes, u64 min_alloc_size,
2096                        u64 root_objectid, u64 ref_generation,
2097                        u64 owner, u64 owner_offset,
2098                        u64 empty_size, u64 hint_byte,
2099                        u64 search_end, struct btrfs_key *ins, u64 data)
2100 {
2101         int ret;
2102
2103         maybe_lock_mutex(root);
2104
2105         ret = __btrfs_reserve_extent(trans, root, num_bytes,
2106                                      min_alloc_size, empty_size, hint_byte,
2107                                      search_end, ins, data);
2108         BUG_ON(ret);
2109         ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid,
2110                                             ref_generation, owner,
2111                                             owner_offset, ins);
2112         BUG_ON(ret);
2113
2114         maybe_unlock_mutex(root);
2115         return ret;
2116 }
2117 /*
2118  * helper function to allocate a block for a given tree
2119  * returns the tree buffer or NULL.
2120  */
2121 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
2122                                              struct btrfs_root *root,
2123                                              u32 blocksize,
2124                                              u64 root_objectid,
2125                                              u64 ref_generation,
2126                                              u64 first_objectid,
2127                                              int level,
2128                                              u64 hint,
2129                                              u64 empty_size)
2130 {
2131         struct btrfs_key ins;
2132         int ret;
2133         struct extent_buffer *buf;
2134
2135         ret = btrfs_alloc_extent(trans, root, blocksize, blocksize,
2136                                  root_objectid, ref_generation,
2137                                  level, first_objectid, empty_size, hint,
2138                                  (u64)-1, &ins, 0);
2139         if (ret) {
2140                 BUG_ON(ret > 0);
2141                 return ERR_PTR(ret);
2142         }
2143         buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize);
2144         if (!buf) {
2145                 btrfs_free_extent(trans, root, ins.objectid, blocksize,
2146                                   root->root_key.objectid, ref_generation,
2147                                   0, 0, 0);
2148                 return ERR_PTR(-ENOMEM);
2149         }
2150         btrfs_set_header_generation(buf, trans->transid);
2151         btrfs_tree_lock(buf);
2152         clean_tree_block(trans, root, buf);
2153         btrfs_set_buffer_uptodate(buf);
2154
2155         if (PageDirty(buf->first_page)) {
2156                 printk("page %lu dirty\n", buf->first_page->index);
2157                 WARN_ON(1);
2158         }
2159
2160         set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
2161                          buf->start + buf->len - 1, GFP_NOFS);
2162         trans->blocks_used++;
2163         return buf;
2164 }
2165
2166 static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
2167                                   struct btrfs_root *root,
2168                                   struct extent_buffer *leaf)
2169 {
2170         u64 leaf_owner;
2171         u64 leaf_generation;
2172         struct btrfs_key key;
2173         struct btrfs_file_extent_item *fi;
2174         int i;
2175         int nritems;
2176         int ret;
2177
2178         BUG_ON(!btrfs_is_leaf(leaf));
2179         nritems = btrfs_header_nritems(leaf);
2180         leaf_owner = btrfs_header_owner(leaf);
2181         leaf_generation = btrfs_header_generation(leaf);
2182
2183         for (i = 0; i < nritems; i++) {
2184                 u64 disk_bytenr;
2185
2186                 btrfs_item_key_to_cpu(leaf, &key, i);
2187                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
2188                         continue;
2189                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
2190                 if (btrfs_file_extent_type(leaf, fi) ==
2191                     BTRFS_FILE_EXTENT_INLINE)
2192                         continue;
2193                 /*
2194                  * FIXME make sure to insert a trans record that
2195                  * repeats the snapshot del on crash
2196                  */
2197                 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
2198                 if (disk_bytenr == 0)
2199                         continue;
2200                 ret = __btrfs_free_extent(trans, root, disk_bytenr,
2201                                 btrfs_file_extent_disk_num_bytes(leaf, fi),
2202                                 leaf_owner, leaf_generation,
2203                                 key.objectid, key.offset, 0);
2204                 BUG_ON(ret);
2205         }
2206         return 0;
2207 }
2208
2209 static void noinline reada_walk_down(struct btrfs_root *root,
2210                                      struct extent_buffer *node,
2211                                      int slot)
2212 {
2213         u64 bytenr;
2214         u64 last = 0;
2215         u32 nritems;
2216         u32 refs;
2217         u32 blocksize;
2218         int ret;
2219         int i;
2220         int level;
2221         int skipped = 0;
2222
2223         nritems = btrfs_header_nritems(node);
2224         level = btrfs_header_level(node);
2225         if (level)
2226                 return;
2227
2228         for (i = slot; i < nritems && skipped < 32; i++) {
2229                 bytenr = btrfs_node_blockptr(node, i);
2230                 if (last && ((bytenr > last && bytenr - last > 32 * 1024) ||
2231                              (last > bytenr && last - bytenr > 32 * 1024))) {
2232                         skipped++;
2233                         continue;
2234                 }
2235                 blocksize = btrfs_level_size(root, level - 1);
2236                 if (i != slot) {
2237                         ret = lookup_extent_ref(NULL, root, bytenr,
2238                                                 blocksize, &refs);
2239                         BUG_ON(ret);
2240                         if (refs != 1) {
2241                                 skipped++;
2242                                 continue;
2243                         }
2244                 }
2245                 ret = readahead_tree_block(root, bytenr, blocksize,
2246                                            btrfs_node_ptr_generation(node, i));
2247                 last = bytenr + blocksize;
2248                 cond_resched();
2249                 if (ret)
2250                         break;
2251         }
2252 }
2253
2254 /*
2255  * we want to avoid as much random IO as we can with the alloc mutex
2256  * held, so drop the lock and do the lookup, then do it again with the
2257  * lock held.
2258  */
2259 int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len,
2260                               u32 *refs)
2261 {
2262         mutex_unlock(&root->fs_info->alloc_mutex);
2263         lookup_extent_ref(NULL, root, start, len, refs);
2264         cond_resched();
2265         mutex_lock(&root->fs_info->alloc_mutex);
2266         return lookup_extent_ref(NULL, root, start, len, refs);
2267 }
2268
2269 /*
2270  * helper function for drop_snapshot, this walks down the tree dropping ref
2271  * counts as it goes.
2272  */
2273 static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
2274                                    struct btrfs_root *root,
2275                                    struct btrfs_path *path, int *level)
2276 {
2277         u64 root_owner;
2278         u64 root_gen;
2279         u64 bytenr;
2280         u64 ptr_gen;
2281         struct extent_buffer *next;
2282         struct extent_buffer *cur;
2283         struct extent_buffer *parent;
2284         u32 blocksize;
2285         int ret;
2286         u32 refs;
2287
2288         mutex_lock(&root->fs_info->alloc_mutex);
2289
2290         WARN_ON(*level < 0);
2291         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2292         ret = drop_snap_lookup_refcount(root, path->nodes[*level]->start,
2293                                 path->nodes[*level]->len, &refs);
2294         BUG_ON(ret);
2295         if (refs > 1)
2296                 goto out;
2297
2298         /*
2299          * walk down to the last node level and free all the leaves
2300          */
2301         while(*level >= 0) {
2302                 WARN_ON(*level < 0);
2303                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2304                 cur = path->nodes[*level];
2305
2306                 if (btrfs_header_level(cur) != *level)
2307                         WARN_ON(1);
2308
2309                 if (path->slots[*level] >=
2310                     btrfs_header_nritems(cur))
2311                         break;
2312                 if (*level == 0) {
2313                         ret = drop_leaf_ref(trans, root, cur);
2314                         BUG_ON(ret);
2315                         break;
2316                 }
2317                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2318                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2319                 blocksize = btrfs_level_size(root, *level - 1);
2320
2321                 ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs);
2322                 BUG_ON(ret);
2323                 if (refs != 1) {
2324                         parent = path->nodes[*level];
2325                         root_owner = btrfs_header_owner(parent);
2326                         root_gen = btrfs_header_generation(parent);
2327                         path->slots[*level]++;
2328                         ret = __btrfs_free_extent(trans, root, bytenr,
2329                                                 blocksize, root_owner,
2330                                                 root_gen, 0, 0, 1);
2331                         BUG_ON(ret);
2332                         continue;
2333                 }
2334                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2335                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2336                         free_extent_buffer(next);
2337                         mutex_unlock(&root->fs_info->alloc_mutex);
2338
2339                         if (path->slots[*level] == 0)
2340                                 reada_walk_down(root, cur, path->slots[*level]);
2341
2342                         next = read_tree_block(root, bytenr, blocksize,
2343                                                ptr_gen);
2344                         cond_resched();
2345                         mutex_lock(&root->fs_info->alloc_mutex);
2346
2347                         /* we've dropped the lock, double check */
2348                         ret = lookup_extent_ref(NULL, root, bytenr, blocksize,
2349                                                 &refs);
2350                         BUG_ON(ret);
2351                         if (refs != 1) {
2352                                 parent = path->nodes[*level];
2353                                 root_owner = btrfs_header_owner(parent);
2354                                 root_gen = btrfs_header_generation(parent);
2355
2356                                 path->slots[*level]++;
2357                                 free_extent_buffer(next);
2358                                 ret = __btrfs_free_extent(trans, root, bytenr,
2359                                                         blocksize,
2360                                                         root_owner,
2361                                                         root_gen, 0, 0, 1);
2362                                 BUG_ON(ret);
2363                                 continue;
2364                         }
2365                 }
2366                 WARN_ON(*level <= 0);
2367                 if (path->nodes[*level-1])
2368                         free_extent_buffer(path->nodes[*level-1]);
2369                 path->nodes[*level-1] = next;
2370                 *level = btrfs_header_level(next);
2371                 path->slots[*level] = 0;
2372         }
2373 out:
2374         WARN_ON(*level < 0);
2375         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2376
2377         if (path->nodes[*level] == root->node) {
2378                 root_owner = root->root_key.objectid;
2379                 parent = path->nodes[*level];
2380         } else {
2381                 parent = path->nodes[*level + 1];
2382                 root_owner = btrfs_header_owner(parent);
2383         }
2384
2385         root_gen = btrfs_header_generation(parent);
2386         ret = __btrfs_free_extent(trans, root, path->nodes[*level]->start,
2387                                 path->nodes[*level]->len,
2388                                 root_owner, root_gen, 0, 0, 1);
2389         free_extent_buffer(path->nodes[*level]);
2390         path->nodes[*level] = NULL;
2391         *level += 1;
2392         BUG_ON(ret);
2393         mutex_unlock(&root->fs_info->alloc_mutex);
2394         cond_resched();
2395         return 0;
2396 }
2397
2398 /*
2399  * helper for dropping snapshots.  This walks back up the tree in the path
2400  * to find the first node higher up where we haven't yet gone through
2401  * all the slots
2402  */
2403 static int noinline walk_up_tree(struct btrfs_trans_handle *trans,
2404                                  struct btrfs_root *root,
2405                                  struct btrfs_path *path, int *level)
2406 {
2407         u64 root_owner;
2408         u64 root_gen;
2409         struct btrfs_root_item *root_item = &root->root_item;
2410         int i;
2411         int slot;
2412         int ret;
2413
2414         for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2415                 slot = path->slots[i];
2416                 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
2417                         struct extent_buffer *node;
2418                         struct btrfs_disk_key disk_key;
2419                         node = path->nodes[i];
2420                         path->slots[i]++;
2421                         *level = i;
2422                         WARN_ON(*level == 0);
2423                         btrfs_node_key(node, &disk_key, path->slots[i]);
2424                         memcpy(&root_item->drop_progress,
2425                                &disk_key, sizeof(disk_key));
2426                         root_item->drop_level = i;
2427                         return 0;
2428                 } else {
2429                         if (path->nodes[*level] == root->node) {
2430                                 root_owner = root->root_key.objectid;
2431                                 root_gen =
2432                                    btrfs_header_generation(path->nodes[*level]);
2433                         } else {
2434                                 struct extent_buffer *node;
2435                                 node = path->nodes[*level + 1];
2436                                 root_owner = btrfs_header_owner(node);
2437                                 root_gen = btrfs_header_generation(node);
2438                         }
2439                         ret = btrfs_free_extent(trans, root,
2440                                                 path->nodes[*level]->start,
2441                                                 path->nodes[*level]->len,
2442                                                 root_owner, root_gen, 0, 0, 1);
2443                         BUG_ON(ret);
2444                         free_extent_buffer(path->nodes[*level]);
2445                         path->nodes[*level] = NULL;
2446                         *level = i + 1;
2447                 }
2448         }
2449         return 1;
2450 }
2451
2452 /*
2453  * drop the reference count on the tree rooted at 'snap'.  This traverses
2454  * the tree freeing any blocks that have a ref count of zero after being
2455  * decremented.
2456  */
2457 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
2458                         *root)
2459 {
2460         int ret = 0;
2461         int wret;
2462         int level;
2463         struct btrfs_path *path;
2464         int i;
2465         int orig_level;
2466         struct btrfs_root_item *root_item = &root->root_item;
2467
2468         WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex));
2469         path = btrfs_alloc_path();
2470         BUG_ON(!path);
2471
2472         level = btrfs_header_level(root->node);
2473         orig_level = level;
2474         if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
2475                 path->nodes[level] = root->node;
2476                 extent_buffer_get(root->node);
2477                 path->slots[level] = 0;
2478         } else {
2479                 struct btrfs_key key;
2480                 struct btrfs_disk_key found_key;
2481                 struct extent_buffer *node;
2482
2483                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
2484                 level = root_item->drop_level;
2485                 path->lowest_level = level;
2486                 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2487                 if (wret < 0) {
2488                         ret = wret;
2489                         goto out;
2490                 }
2491                 node = path->nodes[level];
2492                 btrfs_node_key(node, &found_key, path->slots[level]);
2493                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
2494                                sizeof(found_key)));
2495                 /*
2496                  * unlock our path, this is safe because only this
2497                  * function is allowed to delete this snapshot
2498                  */
2499                 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
2500                         if (path->nodes[i] && path->locks[i]) {
2501                                 path->locks[i] = 0;
2502                                 btrfs_tree_unlock(path->nodes[i]);
2503                         }
2504                 }
2505         }
2506         while(1) {
2507                 wret = walk_down_tree(trans, root, path, &level);
2508                 if (wret > 0)
2509                         break;
2510                 if (wret < 0)
2511                         ret = wret;
2512
2513                 wret = walk_up_tree(trans, root, path, &level);
2514                 if (wret > 0)
2515                         break;
2516                 if (wret < 0)
2517                         ret = wret;
2518                 if (trans->transaction->in_commit) {
2519                         ret = -EAGAIN;
2520                         break;
2521                 }
2522         }
2523         for (i = 0; i <= orig_level; i++) {
2524                 if (path->nodes[i]) {
2525                         free_extent_buffer(path->nodes[i]);
2526                         path->nodes[i] = NULL;
2527                 }
2528         }
2529 out:
2530         btrfs_free_path(path);
2531         return ret;
2532 }
2533
2534 int btrfs_free_block_groups(struct btrfs_fs_info *info)
2535 {
2536         u64 start;
2537         u64 end;
2538         u64 ptr;
2539         int ret;
2540
2541         mutex_lock(&info->alloc_mutex);
2542         while(1) {
2543                 ret = find_first_extent_bit(&info->block_group_cache, 0,
2544                                             &start, &end, (unsigned int)-1);
2545                 if (ret)
2546                         break;
2547                 ret = get_state_private(&info->block_group_cache, start, &ptr);
2548                 if (!ret)
2549                         kfree((void *)(unsigned long)ptr);
2550                 clear_extent_bits(&info->block_group_cache, start,
2551                                   end, (unsigned int)-1, GFP_NOFS);
2552         }
2553         while(1) {
2554                 ret = find_first_extent_bit(&info->free_space_cache, 0,
2555                                             &start, &end, EXTENT_DIRTY);
2556                 if (ret)
2557                         break;
2558                 clear_extent_dirty(&info->free_space_cache, start,
2559                                    end, GFP_NOFS);
2560         }
2561         mutex_unlock(&info->alloc_mutex);
2562         return 0;
2563 }
2564
2565 static unsigned long calc_ra(unsigned long start, unsigned long last,
2566                              unsigned long nr)
2567 {
2568         return min(last, start + nr - 1);
2569 }
2570
2571 static int noinline relocate_inode_pages(struct inode *inode, u64 start,
2572                                          u64 len)
2573 {
2574         u64 page_start;
2575         u64 page_end;
2576         unsigned long last_index;
2577         unsigned long i;
2578         struct page *page;
2579         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2580         struct file_ra_state *ra;
2581         unsigned long total_read = 0;
2582         unsigned long ra_pages;
2583         struct btrfs_trans_handle *trans;
2584
2585         ra = kzalloc(sizeof(*ra), GFP_NOFS);
2586
2587         mutex_lock(&inode->i_mutex);
2588         i = start >> PAGE_CACHE_SHIFT;
2589         last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
2590
2591         ra_pages = BTRFS_I(inode)->root->fs_info->bdi.ra_pages;
2592
2593         file_ra_state_init(ra, inode->i_mapping);
2594
2595         for (; i <= last_index; i++) {
2596                 if (total_read % ra_pages == 0) {
2597                         btrfs_force_ra(inode->i_mapping, ra, NULL, i,
2598                                        calc_ra(i, last_index, ra_pages));
2599                 }
2600                 total_read++;
2601                 if (((u64)i << PAGE_CACHE_SHIFT) > inode->i_size)
2602                         goto truncate_racing;
2603
2604                 page = grab_cache_page(inode->i_mapping, i);
2605                 if (!page) {
2606                         goto out_unlock;
2607                 }
2608                 if (!PageUptodate(page)) {
2609                         btrfs_readpage(NULL, page);
2610                         lock_page(page);
2611                         if (!PageUptodate(page)) {
2612                                 unlock_page(page);
2613                                 page_cache_release(page);
2614                                 goto out_unlock;
2615                         }
2616                 }
2617 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
2618                 ClearPageDirty(page);
2619 #else
2620                 cancel_dirty_page(page, PAGE_CACHE_SIZE);
2621 #endif
2622                 wait_on_page_writeback(page);
2623                 set_page_extent_mapped(page);
2624                 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2625                 page_end = page_start + PAGE_CACHE_SIZE - 1;
2626
2627                 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
2628
2629                 set_extent_delalloc(io_tree, page_start,
2630                                     page_end, GFP_NOFS);
2631                 set_page_dirty(page);
2632
2633                 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
2634                 unlock_page(page);
2635                 page_cache_release(page);
2636         }
2637         balance_dirty_pages_ratelimited_nr(inode->i_mapping,
2638                                            total_read);
2639
2640 out_unlock:
2641         kfree(ra);
2642         trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
2643         if (trans) {
2644                 btrfs_end_transaction(trans, BTRFS_I(inode)->root);
2645                 mark_inode_dirty(inode);
2646         }
2647         mutex_unlock(&inode->i_mutex);
2648         return 0;
2649
2650 truncate_racing:
2651         vmtruncate(inode, inode->i_size);
2652         balance_dirty_pages_ratelimited_nr(inode->i_mapping,
2653                                            total_read);
2654         goto out_unlock;
2655 }
2656
2657 /*
2658  * The back references tell us which tree holds a ref on a block,
2659  * but it is possible for the tree root field in the reference to
2660  * reflect the original root before a snapshot was made.  In this
2661  * case we should search through all the children of a given root
2662  * to find potential holders of references on a block.
2663  *
2664  * Instead, we do something a little less fancy and just search
2665  * all the roots for a given key/block combination.
2666  */
2667 static int find_root_for_ref(struct btrfs_root *root,
2668                              struct btrfs_path *path,
2669                              struct btrfs_key *key0,
2670                              int level,
2671                              int file_key,
2672                              struct btrfs_root **found_root,
2673                              u64 bytenr)
2674 {
2675         struct btrfs_key root_location;
2676         struct btrfs_root *cur_root = *found_root;
2677         struct btrfs_file_extent_item *file_extent;
2678         u64 root_search_start = BTRFS_FS_TREE_OBJECTID;
2679         u64 found_bytenr;
2680         int ret;
2681
2682         root_location.offset = (u64)-1;
2683         root_location.type = BTRFS_ROOT_ITEM_KEY;
2684         path->lowest_level = level;
2685         path->reada = 0;
2686         while(1) {
2687                 ret = btrfs_search_slot(NULL, cur_root, key0, path, 0, 0);
2688                 found_bytenr = 0;
2689                 if (ret == 0 && file_key) {
2690                         struct extent_buffer *leaf = path->nodes[0];
2691                         file_extent = btrfs_item_ptr(leaf, path->slots[0],
2692                                              struct btrfs_file_extent_item);
2693                         if (btrfs_file_extent_type(leaf, file_extent) ==
2694                             BTRFS_FILE_EXTENT_REG) {
2695                                 found_bytenr =
2696                                         btrfs_file_extent_disk_bytenr(leaf,
2697                                                                file_extent);
2698                        }
2699                 } else if (!file_key) {
2700                         if (path->nodes[level])
2701                                 found_bytenr = path->nodes[level]->start;
2702                 }
2703
2704                 btrfs_release_path(cur_root, path);
2705
2706                 if (found_bytenr == bytenr) {
2707                         *found_root = cur_root;
2708                         ret = 0;
2709                         goto out;
2710                 }
2711                 ret = btrfs_search_root(root->fs_info->tree_root,
2712                                         root_search_start, &root_search_start);
2713                 if (ret)
2714                         break;
2715
2716                 root_location.objectid = root_search_start;
2717                 cur_root = btrfs_read_fs_root_no_name(root->fs_info,
2718                                                       &root_location);
2719                 if (!cur_root) {
2720                         ret = 1;
2721                         break;
2722                 }
2723         }
2724 out:
2725         path->lowest_level = 0;
2726         return ret;
2727 }
2728
2729 /*
2730  * note, this releases the path
2731  */
2732 static int noinline relocate_one_reference(struct btrfs_root *extent_root,
2733                                   struct btrfs_path *path,
2734                                   struct btrfs_key *extent_key,
2735                                   u64 *last_file_objectid,
2736                                   u64 *last_file_offset,
2737                                   u64 *last_file_root,
2738                                   u64 last_extent)
2739 {
2740         struct inode *inode;
2741         struct btrfs_root *found_root;
2742         struct btrfs_key root_location;
2743         struct btrfs_key found_key;
2744         struct btrfs_extent_ref *ref;
2745         u64 ref_root;
2746         u64 ref_gen;
2747         u64 ref_objectid;
2748         u64 ref_offset;
2749         int ret;
2750         int level;
2751
2752         WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex));
2753
2754         ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
2755                              struct btrfs_extent_ref);
2756         ref_root = btrfs_ref_root(path->nodes[0], ref);
2757         ref_gen = btrfs_ref_generation(path->nodes[0], ref);
2758         ref_objectid = btrfs_ref_objectid(path->nodes[0], ref);
2759         ref_offset = btrfs_ref_offset(path->nodes[0], ref);
2760         btrfs_release_path(extent_root, path);
2761
2762         root_location.objectid = ref_root;
2763         if (ref_gen == 0)
2764                 root_location.offset = 0;
2765         else
2766                 root_location.offset = (u64)-1;
2767         root_location.type = BTRFS_ROOT_ITEM_KEY;
2768
2769         found_root = btrfs_read_fs_root_no_name(extent_root->fs_info,
2770                                                 &root_location);
2771         BUG_ON(!found_root);
2772         mutex_unlock(&extent_root->fs_info->alloc_mutex);
2773
2774         if (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
2775                 found_key.objectid = ref_objectid;
2776                 found_key.type = BTRFS_EXTENT_DATA_KEY;
2777                 found_key.offset = ref_offset;
2778                 level = 0;
2779
2780                 if (last_extent == extent_key->objectid &&
2781                     *last_file_objectid == ref_objectid &&
2782                     *last_file_offset == ref_offset &&
2783                     *last_file_root == ref_root)
2784                         goto out;
2785
2786                 ret = find_root_for_ref(extent_root, path, &found_key,
2787                                         level, 1, &found_root,
2788                                         extent_key->objectid);
2789
2790                 if (ret)
2791                         goto out;
2792
2793                 if (last_extent == extent_key->objectid &&
2794                     *last_file_objectid == ref_objectid &&
2795                     *last_file_offset == ref_offset &&
2796                     *last_file_root == ref_root)
2797                         goto out;
2798
2799                 inode = btrfs_iget_locked(extent_root->fs_info->sb,
2800                                           ref_objectid, found_root);
2801                 if (inode->i_state & I_NEW) {
2802                         /* the inode and parent dir are two different roots */
2803                         BTRFS_I(inode)->root = found_root;
2804                         BTRFS_I(inode)->location.objectid = ref_objectid;
2805                         BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
2806                         BTRFS_I(inode)->location.offset = 0;
2807                         btrfs_read_locked_inode(inode);
2808                         unlock_new_inode(inode);
2809
2810                 }
2811                 /* this can happen if the reference is not against
2812                  * the latest version of the tree root
2813                  */
2814                 if (is_bad_inode(inode))
2815                         goto out;
2816
2817                 *last_file_objectid = inode->i_ino;
2818                 *last_file_root = found_root->root_key.objectid;
2819                 *last_file_offset = ref_offset;
2820
2821                 relocate_inode_pages(inode, ref_offset, extent_key->offset);
2822                 iput(inode);
2823         } else {
2824                 struct btrfs_trans_handle *trans;
2825                 struct extent_buffer *eb;
2826                 int needs_lock = 0;
2827
2828                 eb = read_tree_block(found_root, extent_key->objectid,
2829                                      extent_key->offset, 0);
2830                 btrfs_tree_lock(eb);
2831                 level = btrfs_header_level(eb);
2832
2833                 if (level == 0)
2834                         btrfs_item_key_to_cpu(eb, &found_key, 0);
2835                 else
2836                         btrfs_node_key_to_cpu(eb, &found_key, 0);
2837
2838                 btrfs_tree_unlock(eb);
2839                 free_extent_buffer(eb);
2840
2841                 ret = find_root_for_ref(extent_root, path, &found_key,
2842                                         level, 0, &found_root,
2843                                         extent_key->objectid);
2844
2845                 if (ret)
2846                         goto out;
2847
2848                 /*
2849                  * right here almost anything could happen to our key,
2850                  * but that's ok.  The cow below will either relocate it
2851                  * or someone else will have relocated it.  Either way,
2852                  * it is in a different spot than it was before and
2853                  * we're happy.
2854                  */
2855
2856                 trans = btrfs_start_transaction(found_root, 1);
2857
2858                 if (found_root == extent_root->fs_info->extent_root ||
2859                     found_root == extent_root->fs_info->chunk_root ||
2860                     found_root == extent_root->fs_info->dev_root) {
2861                         needs_lock = 1;
2862                         mutex_lock(&extent_root->fs_info->alloc_mutex);
2863                 }
2864
2865                 path->lowest_level = level;
2866                 path->reada = 2;
2867                 ret = btrfs_search_slot(trans, found_root, &found_key, path,
2868                                         0, 1);
2869                 path->lowest_level = 0;
2870                 btrfs_release_path(found_root, path);
2871
2872                 if (found_root == found_root->fs_info->extent_root)
2873                         btrfs_extent_post_op(trans, found_root);
2874                 if (needs_lock)
2875                         mutex_unlock(&extent_root->fs_info->alloc_mutex);
2876
2877                 btrfs_end_transaction(trans, found_root);
2878
2879         }
2880 out:
2881         mutex_lock(&extent_root->fs_info->alloc_mutex);
2882         return 0;
2883 }
2884
2885 static int noinline del_extent_zero(struct btrfs_root *extent_root,
2886                                     struct btrfs_path *path,
2887                                     struct btrfs_key *extent_key)
2888 {
2889         int ret;
2890         struct btrfs_trans_handle *trans;
2891
2892         trans = btrfs_start_transaction(extent_root, 1);
2893         ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1);
2894         if (ret > 0) {
2895                 ret = -EIO;
2896                 goto out;
2897         }
2898         if (ret < 0)
2899                 goto out;
2900         ret = btrfs_del_item(trans, extent_root, path);
2901 out:
2902         btrfs_end_transaction(trans, extent_root);
2903         return ret;
2904 }
2905
2906 static int noinline relocate_one_extent(struct btrfs_root *extent_root,
2907                                         struct btrfs_path *path,
2908                                         struct btrfs_key *extent_key)
2909 {
2910         struct btrfs_key key;
2911         struct btrfs_key found_key;
2912         struct extent_buffer *leaf;
2913         u64 last_file_objectid = 0;
2914         u64 last_file_root = 0;
2915         u64 last_file_offset = (u64)-1;
2916         u64 last_extent = 0;
2917         u32 nritems;
2918         u32 item_size;
2919         int ret = 0;
2920
2921         if (extent_key->objectid == 0) {
2922                 ret = del_extent_zero(extent_root, path, extent_key);
2923                 goto out;
2924         }
2925         key.objectid = extent_key->objectid;
2926         key.type = BTRFS_EXTENT_REF_KEY;
2927         key.offset = 0;
2928
2929         while(1) {
2930                 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2931
2932                 if (ret < 0)
2933                         goto out;
2934
2935                 ret = 0;
2936                 leaf = path->nodes[0];
2937                 nritems = btrfs_header_nritems(leaf);
2938                 if (path->slots[0] == nritems) {
2939                         ret = btrfs_next_leaf(extent_root, path);
2940                         if (ret > 0) {
2941                                 ret = 0;
2942                                 goto out;
2943                         }
2944                         if (ret < 0)
2945                                 goto out;
2946                         leaf = path->nodes[0];
2947                 }
2948
2949                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2950                 if (found_key.objectid != extent_key->objectid) {
2951                         break;
2952                 }
2953
2954                 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
2955                         break;
2956                 }
2957
2958                 key.offset = found_key.offset + 1;
2959                 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2960
2961                 ret = relocate_one_reference(extent_root, path, extent_key,
2962                                              &last_file_objectid,
2963                                              &last_file_offset,
2964                                              &last_file_root, last_extent);
2965                 if (ret)
2966                         goto out;
2967                 last_extent = extent_key->objectid;
2968         }
2969         ret = 0;
2970 out:
2971         btrfs_release_path(extent_root, path);
2972         return ret;
2973 }
2974
2975 static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
2976 {
2977         u64 num_devices;
2978         u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
2979                 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
2980
2981         num_devices = root->fs_info->fs_devices->num_devices;
2982         if (num_devices == 1) {
2983                 stripped |= BTRFS_BLOCK_GROUP_DUP;
2984                 stripped = flags & ~stripped;
2985
2986                 /* turn raid0 into single device chunks */
2987                 if (flags & BTRFS_BLOCK_GROUP_RAID0)
2988                         return stripped;
2989
2990                 /* turn mirroring into duplication */
2991                 if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
2992                              BTRFS_BLOCK_GROUP_RAID10))
2993                         return stripped | BTRFS_BLOCK_GROUP_DUP;
2994                 return flags;
2995         } else {
2996                 /* they already had raid on here, just return */
2997                 if (flags & stripped)
2998                         return flags;
2999
3000                 stripped |= BTRFS_BLOCK_GROUP_DUP;
3001                 stripped = flags & ~stripped;
3002
3003                 /* switch duplicated blocks with raid1 */
3004                 if (flags & BTRFS_BLOCK_GROUP_DUP)
3005                         return stripped | BTRFS_BLOCK_GROUP_RAID1;
3006
3007                 /* turn single device chunks into raid0 */
3008                 return stripped | BTRFS_BLOCK_GROUP_RAID0;
3009         }
3010         return flags;
3011 }
3012
3013 int __alloc_chunk_for_shrink(struct btrfs_root *root,
3014                      struct btrfs_block_group_cache *shrink_block_group,
3015                      int force)
3016 {
3017         struct btrfs_trans_handle *trans;
3018         u64 new_alloc_flags;
3019         u64 calc;
3020
3021         if (btrfs_block_group_used(&shrink_block_group->item) > 0) {
3022
3023                 mutex_unlock(&root->fs_info->alloc_mutex);
3024                 trans = btrfs_start_transaction(root, 1);
3025                 mutex_lock(&root->fs_info->alloc_mutex);
3026
3027                 new_alloc_flags = update_block_group_flags(root,
3028                                                    shrink_block_group->flags);
3029                 if (new_alloc_flags != shrink_block_group->flags) {
3030                         calc =
3031                              btrfs_block_group_used(&shrink_block_group->item);
3032                 } else {
3033                         calc = shrink_block_group->key.offset;
3034                 }
3035                 do_chunk_alloc(trans, root->fs_info->extent_root,
3036                                calc + 2 * 1024 * 1024, new_alloc_flags, force);
3037
3038                 mutex_unlock(&root->fs_info->alloc_mutex);
3039                 btrfs_end_transaction(trans, root);
3040                 mutex_lock(&root->fs_info->alloc_mutex);
3041         }
3042         return 0;
3043 }
3044
3045 int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start)
3046 {
3047         struct btrfs_trans_handle *trans;
3048         struct btrfs_root *tree_root = root->fs_info->tree_root;
3049         struct btrfs_path *path;
3050         u64 cur_byte;
3051         u64 total_found;
3052         u64 shrink_last_byte;
3053         struct btrfs_block_group_cache *shrink_block_group;
3054         struct btrfs_fs_info *info = root->fs_info;
3055         struct btrfs_key key;
3056         struct btrfs_key found_key;
3057         struct extent_buffer *leaf;
3058         u32 nritems;
3059         int ret;
3060         int progress;
3061
3062         mutex_lock(&root->fs_info->alloc_mutex);
3063         shrink_block_group = btrfs_lookup_block_group(root->fs_info,
3064                                                       shrink_start);
3065         BUG_ON(!shrink_block_group);
3066
3067         shrink_last_byte = shrink_block_group->key.objectid +
3068                 shrink_block_group->key.offset;
3069
3070         shrink_block_group->space_info->total_bytes -=
3071                 shrink_block_group->key.offset;
3072         path = btrfs_alloc_path();
3073         root = root->fs_info->extent_root;
3074         path->reada = 2;
3075
3076         printk("btrfs relocating block group %llu flags %llu\n",
3077                (unsigned long long)shrink_start,
3078                (unsigned long long)shrink_block_group->flags);
3079
3080         __alloc_chunk_for_shrink(root, shrink_block_group, 1);
3081
3082 again:
3083
3084         shrink_block_group->ro = 1;
3085
3086         total_found = 0;
3087         progress = 0;
3088         key.objectid = shrink_start;
3089         key.offset = 0;
3090         key.type = 0;
3091         cur_byte = key.objectid;
3092
3093         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3094         if (ret < 0)
3095                 goto out;
3096
3097         ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
3098         if (ret < 0)
3099                 goto out;
3100
3101         if (ret == 0) {
3102                 leaf = path->nodes[0];
3103                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3104                 if (found_key.objectid + found_key.offset > shrink_start &&
3105                     found_key.objectid < shrink_last_byte) {
3106                         cur_byte = found_key.objectid;
3107                         key.objectid = cur_byte;
3108                 }
3109         }
3110         btrfs_release_path(root, path);
3111
3112         while(1) {
3113                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3114                 if (ret < 0)
3115                         goto out;
3116
3117 next:
3118                 leaf = path->nodes[0];
3119                 nritems = btrfs_header_nritems(leaf);
3120                 if (path->slots[0] >= nritems) {
3121                         ret = btrfs_next_leaf(root, path);
3122                         if (ret < 0)
3123                                 goto out;
3124                         if (ret == 1) {
3125                                 ret = 0;
3126                                 break;
3127                         }
3128                         leaf = path->nodes[0];
3129                         nritems = btrfs_header_nritems(leaf);
3130                 }
3131
3132                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3133
3134                 if (found_key.objectid >= shrink_last_byte)
3135                         break;
3136
3137                 if (progress && need_resched()) {
3138                         memcpy(&key, &found_key, sizeof(key));
3139                         cond_resched();
3140                         btrfs_release_path(root, path);
3141                         btrfs_search_slot(NULL, root, &key, path, 0, 0);
3142                         progress = 0;
3143                         goto next;
3144                 }
3145                 progress = 1;
3146
3147                 if (btrfs_key_type(&found_key) != BTRFS_EXTENT_ITEM_KEY ||
3148                     found_key.objectid + found_key.offset <= cur_byte) {
3149                         memcpy(&key, &found_key, sizeof(key));
3150                         key.offset++;
3151                         path->slots[0]++;
3152                         goto next;
3153                 }
3154
3155                 total_found++;
3156                 cur_byte = found_key.objectid + found_key.offset;
3157                 key.objectid = cur_byte;
3158                 btrfs_release_path(root, path);
3159                 ret = relocate_one_extent(root, path, &found_key);
3160                 __alloc_chunk_for_shrink(root, shrink_block_group, 0);
3161         }
3162
3163         btrfs_release_path(root, path);
3164
3165         if (total_found > 0) {
3166                 printk("btrfs relocate found %llu last extent was %llu\n",
3167                        (unsigned long long)total_found,
3168                        (unsigned long long)found_key.objectid);
3169                 mutex_unlock(&root->fs_info->alloc_mutex);
3170                 trans = btrfs_start_transaction(tree_root, 1);
3171                 btrfs_commit_transaction(trans, tree_root);
3172
3173                 btrfs_clean_old_snapshots(tree_root);
3174
3175                 trans = btrfs_start_transaction(tree_root, 1);
3176                 btrfs_commit_transaction(trans, tree_root);
3177                 mutex_lock(&root->fs_info->alloc_mutex);
3178                 goto again;
3179         }
3180
3181         /*
3182          * we've freed all the extents, now remove the block
3183          * group item from the tree
3184          */
3185         mutex_unlock(&root->fs_info->alloc_mutex);
3186
3187         trans = btrfs_start_transaction(root, 1);
3188         mutex_lock(&root->fs_info->alloc_mutex);
3189         memcpy(&key, &shrink_block_group->key, sizeof(key));
3190
3191         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3192         if (ret > 0)
3193                 ret = -EIO;
3194         if (ret < 0)
3195                 goto out;
3196
3197         clear_extent_bits(&info->block_group_cache, key.objectid,
3198                           key.objectid + key.offset - 1,
3199                           (unsigned int)-1, GFP_NOFS);
3200
3201
3202         clear_extent_bits(&info->free_space_cache,
3203                            key.objectid, key.objectid + key.offset - 1,
3204                            (unsigned int)-1, GFP_NOFS);
3205
3206         memset(shrink_block_group, 0, sizeof(*shrink_block_group));
3207         kfree(shrink_block_group);
3208
3209         btrfs_del_item(trans, root, path);
3210         btrfs_release_path(root, path);
3211         mutex_unlock(&root->fs_info->alloc_mutex);
3212         btrfs_commit_transaction(trans, root);
3213
3214         mutex_lock(&root->fs_info->alloc_mutex);
3215
3216         /* the code to unpin extents might set a few bits in the free
3217          * space cache for this range again
3218          */
3219         clear_extent_bits(&info->free_space_cache,
3220                            key.objectid, key.objectid + key.offset - 1,
3221                            (unsigned int)-1, GFP_NOFS);
3222 out:
3223         btrfs_free_path(path);
3224         mutex_unlock(&root->fs_info->alloc_mutex);
3225         return ret;
3226 }
3227
3228 int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path,
3229                            struct btrfs_key *key)
3230 {
3231         int ret = 0;
3232         struct btrfs_key found_key;
3233         struct extent_buffer *leaf;
3234         int slot;
3235
3236         ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
3237         if (ret < 0)
3238                 goto out;
3239
3240         while(1) {
3241                 slot = path->slots[0];
3242                 leaf = path->nodes[0];
3243                 if (slot >= btrfs_header_nritems(leaf)) {
3244                         ret = btrfs_next_leaf(root, path);
3245                         if (ret == 0)
3246                                 continue;
3247                         if (ret < 0)
3248                                 goto out;
3249                         break;
3250                 }
3251                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3252
3253                 if (found_key.objectid >= key->objectid &&
3254                     found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
3255                         ret = 0;
3256                         goto out;
3257                 }
3258                 path->slots[0]++;
3259         }
3260         ret = -ENOENT;
3261 out:
3262         return ret;
3263 }
3264
3265 int btrfs_read_block_groups(struct btrfs_root *root)
3266 {
3267         struct btrfs_path *path;
3268         int ret;
3269         int bit;
3270         struct btrfs_block_group_cache *cache;
3271         struct btrfs_fs_info *info = root->fs_info;
3272         struct btrfs_space_info *space_info;
3273         struct extent_io_tree *block_group_cache;
3274         struct btrfs_key key;
3275         struct btrfs_key found_key;
3276         struct extent_buffer *leaf;
3277
3278         block_group_cache = &info->block_group_cache;
3279         root = info->extent_root;
3280         key.objectid = 0;
3281         key.offset = 0;
3282         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
3283         path = btrfs_alloc_path();
3284         if (!path)
3285                 return -ENOMEM;
3286
3287         mutex_lock(&root->fs_info->alloc_mutex);
3288         while(1) {
3289                 ret = find_first_block_group(root, path, &key);
3290                 if (ret > 0) {
3291                         ret = 0;
3292                         goto error;
3293                 }
3294                 if (ret != 0)
3295                         goto error;
3296
3297                 leaf = path->nodes[0];
3298                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3299                 cache = kzalloc(sizeof(*cache), GFP_NOFS);
3300                 if (!cache) {
3301                         ret = -ENOMEM;
3302                         break;
3303                 }
3304
3305                 read_extent_buffer(leaf, &cache->item,
3306                                    btrfs_item_ptr_offset(leaf, path->slots[0]),
3307                                    sizeof(cache->item));
3308                 memcpy(&cache->key, &found_key, sizeof(found_key));
3309
3310                 key.objectid = found_key.objectid + found_key.offset;
3311                 btrfs_release_path(root, path);
3312                 cache->flags = btrfs_block_group_flags(&cache->item);
3313                 bit = 0;
3314                 if (cache->flags & BTRFS_BLOCK_GROUP_DATA) {
3315                         bit = BLOCK_GROUP_DATA;
3316                 } else if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
3317                         bit = BLOCK_GROUP_SYSTEM;
3318                 } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) {
3319                         bit = BLOCK_GROUP_METADATA;
3320                 }
3321                 set_avail_alloc_bits(info, cache->flags);
3322
3323                 ret = update_space_info(info, cache->flags, found_key.offset,
3324                                         btrfs_block_group_used(&cache->item),
3325                                         &space_info);
3326                 BUG_ON(ret);
3327                 cache->space_info = space_info;
3328
3329                 /* use EXTENT_LOCKED to prevent merging */
3330                 set_extent_bits(block_group_cache, found_key.objectid,
3331                                 found_key.objectid + found_key.offset - 1,
3332                                 bit | EXTENT_LOCKED, GFP_NOFS);
3333                 set_state_private(block_group_cache, found_key.objectid,
3334                                   (unsigned long)cache);
3335
3336                 /* hack for now */
3337                 if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) {
3338                         cache_block_group(root->fs_info->extent_root,
3339                                           cache);
3340                 }
3341                 if (key.objectid >=
3342                     btrfs_super_total_bytes(&info->super_copy))
3343                         break;
3344         }
3345         ret = 0;
3346 error:
3347         btrfs_free_path(path);
3348         mutex_unlock(&root->fs_info->alloc_mutex);
3349         return ret;
3350 }
3351
3352 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
3353                            struct btrfs_root *root, u64 bytes_used,
3354                            u64 type, u64 chunk_objectid, u64 chunk_offset,
3355                            u64 size)
3356 {
3357         int ret;
3358         int bit = 0;
3359         struct btrfs_root *extent_root;
3360         struct btrfs_block_group_cache *cache;
3361         struct extent_io_tree *block_group_cache;
3362
3363         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
3364         extent_root = root->fs_info->extent_root;
3365         block_group_cache = &root->fs_info->block_group_cache;
3366
3367         cache = kzalloc(sizeof(*cache), GFP_NOFS);
3368         BUG_ON(!cache);
3369         cache->key.objectid = chunk_offset;
3370         cache->key.offset = size;
3371         btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
3372
3373         btrfs_set_block_group_used(&cache->item, bytes_used);
3374         btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
3375         cache->flags = type;
3376         btrfs_set_block_group_flags(&cache->item, type);
3377
3378         ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
3379                                 &cache->space_info);
3380         BUG_ON(ret);
3381
3382         bit = block_group_state_bits(type);
3383         set_extent_bits(block_group_cache, chunk_offset,
3384                         chunk_offset + size - 1,
3385                         bit | EXTENT_LOCKED, GFP_NOFS);
3386
3387         set_state_private(block_group_cache, chunk_offset,
3388                           (unsigned long)cache);
3389         ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
3390                                 sizeof(cache->item));
3391         BUG_ON(ret);
3392
3393         finish_current_insert(trans, extent_root);
3394         ret = del_pending_extents(trans, extent_root);
3395         BUG_ON(ret);
3396         set_avail_alloc_bits(extent_root->fs_info, type);
3397
3398         return 0;
3399 }