Btrfs: New data=ordered implementation
[linux-2.6-block.git] / fs / btrfs / extent-tree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 #include <linux/sched.h>
19 #include <linux/pagemap.h>
20 #include <linux/writeback.h>
21 #include "hash.h"
22 #include "crc32c.h"
23 #include "ctree.h"
24 #include "disk-io.h"
25 #include "print-tree.h"
26 #include "transaction.h"
27 #include "volumes.h"
28 #include "locking.h"
29
30 #define BLOCK_GROUP_DATA     EXTENT_WRITEBACK
31 #define BLOCK_GROUP_METADATA EXTENT_UPTODATE
32 #define BLOCK_GROUP_SYSTEM   EXTENT_NEW
33
34 #define BLOCK_GROUP_DIRTY EXTENT_DIRTY
35
36 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
37                                  btrfs_root *extent_root);
38 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
39                                btrfs_root *extent_root);
40 static struct btrfs_block_group_cache *
41 __btrfs_find_block_group(struct btrfs_root *root,
42                          struct btrfs_block_group_cache *hint,
43                          u64 search_start, int data, int owner);
44
45 void maybe_lock_mutex(struct btrfs_root *root)
46 {
47         if (root != root->fs_info->extent_root &&
48             root != root->fs_info->chunk_root &&
49             root != root->fs_info->dev_root) {
50                 mutex_lock(&root->fs_info->alloc_mutex);
51         }
52 }
53
54 void maybe_unlock_mutex(struct btrfs_root *root)
55 {
56         if (root != root->fs_info->extent_root &&
57             root != root->fs_info->chunk_root &&
58             root != root->fs_info->dev_root) {
59                 mutex_unlock(&root->fs_info->alloc_mutex);
60         }
61 }
62
63 static int cache_block_group(struct btrfs_root *root,
64                              struct btrfs_block_group_cache *block_group)
65 {
66         struct btrfs_path *path;
67         int ret;
68         struct btrfs_key key;
69         struct extent_buffer *leaf;
70         struct extent_io_tree *free_space_cache;
71         int slot;
72         u64 last = 0;
73         u64 hole_size;
74         u64 first_free;
75         int found = 0;
76
77         if (!block_group)
78                 return 0;
79
80         root = root->fs_info->extent_root;
81         free_space_cache = &root->fs_info->free_space_cache;
82
83         if (block_group->cached)
84                 return 0;
85
86         path = btrfs_alloc_path();
87         if (!path)
88                 return -ENOMEM;
89
90         path->reada = 2;
91         /*
92          * we get into deadlocks with paths held by callers of this function.
93          * since the alloc_mutex is protecting things right now, just
94          * skip the locking here
95          */
96         path->skip_locking = 1;
97         first_free = block_group->key.objectid;
98         key.objectid = block_group->key.objectid;
99         key.offset = 0;
100         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
101         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
102         if (ret < 0)
103                 return ret;
104         ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
105         if (ret < 0)
106                 return ret;
107         if (ret == 0) {
108                 leaf = path->nodes[0];
109                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
110                 if (key.objectid + key.offset > first_free)
111                         first_free = key.objectid + key.offset;
112         }
113         while(1) {
114                 leaf = path->nodes[0];
115                 slot = path->slots[0];
116                 if (slot >= btrfs_header_nritems(leaf)) {
117                         ret = btrfs_next_leaf(root, path);
118                         if (ret < 0)
119                                 goto err;
120                         if (ret == 0) {
121                                 continue;
122                         } else {
123                                 break;
124                         }
125                 }
126                 btrfs_item_key_to_cpu(leaf, &key, slot);
127                 if (key.objectid < block_group->key.objectid) {
128                         goto next;
129                 }
130                 if (key.objectid >= block_group->key.objectid +
131                     block_group->key.offset) {
132                         break;
133                 }
134
135                 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
136                         if (!found) {
137                                 last = first_free;
138                                 found = 1;
139                         }
140                         if (key.objectid > last) {
141                                 hole_size = key.objectid - last;
142                                 set_extent_dirty(free_space_cache, last,
143                                                  last + hole_size - 1,
144                                                  GFP_NOFS);
145                         }
146                         last = key.objectid + key.offset;
147                 }
148 next:
149                 path->slots[0]++;
150         }
151
152         if (!found)
153                 last = first_free;
154         if (block_group->key.objectid +
155             block_group->key.offset > last) {
156                 hole_size = block_group->key.objectid +
157                         block_group->key.offset - last;
158                 set_extent_dirty(free_space_cache, last,
159                                  last + hole_size - 1, GFP_NOFS);
160         }
161         block_group->cached = 1;
162 err:
163         btrfs_free_path(path);
164         return 0;
165 }
166
167 struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct
168                                                        btrfs_fs_info *info,
169                                                          u64 bytenr)
170 {
171         struct extent_io_tree *block_group_cache;
172         struct btrfs_block_group_cache *block_group = NULL;
173         u64 ptr;
174         u64 start;
175         u64 end;
176         int ret;
177
178         bytenr = max_t(u64, bytenr,
179                        BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE);
180         block_group_cache = &info->block_group_cache;
181         ret = find_first_extent_bit(block_group_cache,
182                                     bytenr, &start, &end,
183                                     BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
184                                     BLOCK_GROUP_SYSTEM);
185         if (ret) {
186                 return NULL;
187         }
188         ret = get_state_private(block_group_cache, start, &ptr);
189         if (ret)
190                 return NULL;
191
192         block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
193         return block_group;
194 }
195
196 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
197                                                          btrfs_fs_info *info,
198                                                          u64 bytenr)
199 {
200         struct extent_io_tree *block_group_cache;
201         struct btrfs_block_group_cache *block_group = NULL;
202         u64 ptr;
203         u64 start;
204         u64 end;
205         int ret;
206
207         bytenr = max_t(u64, bytenr,
208                        BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE);
209         block_group_cache = &info->block_group_cache;
210         ret = find_first_extent_bit(block_group_cache,
211                                     bytenr, &start, &end,
212                                     BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
213                                     BLOCK_GROUP_SYSTEM);
214         if (ret) {
215                 return NULL;
216         }
217         ret = get_state_private(block_group_cache, start, &ptr);
218         if (ret)
219                 return NULL;
220
221         block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
222         if (block_group->key.objectid <= bytenr && bytenr <
223             block_group->key.objectid + block_group->key.offset)
224                 return block_group;
225         return NULL;
226 }
227
228 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
229 {
230         return (cache->flags & bits) == bits;
231 }
232
233 static int noinline find_search_start(struct btrfs_root *root,
234                               struct btrfs_block_group_cache **cache_ret,
235                               u64 *start_ret, u64 num, int data)
236 {
237         int ret;
238         struct btrfs_block_group_cache *cache = *cache_ret;
239         struct extent_io_tree *free_space_cache;
240         struct extent_state *state;
241         u64 last;
242         u64 start = 0;
243         u64 cache_miss = 0;
244         u64 total_fs_bytes;
245         u64 search_start = *start_ret;
246         int wrapped = 0;
247
248         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
249         total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
250         free_space_cache = &root->fs_info->free_space_cache;
251
252         if (!cache)
253                 goto out;
254
255 again:
256         ret = cache_block_group(root, cache);
257         if (ret) {
258                 goto out;
259         }
260
261         last = max(search_start, cache->key.objectid);
262         if (!block_group_bits(cache, data) || cache->ro)
263                 goto new_group;
264
265         spin_lock_irq(&free_space_cache->lock);
266         state = find_first_extent_bit_state(free_space_cache, last, EXTENT_DIRTY);
267         while(1) {
268                 if (!state) {
269                         if (!cache_miss)
270                                 cache_miss = last;
271                         spin_unlock_irq(&free_space_cache->lock);
272                         goto new_group;
273                 }
274
275                 start = max(last, state->start);
276                 last = state->end + 1;
277                 if (last - start < num) {
278                         do {
279                                 state = extent_state_next(state);
280                         } while(state && !(state->state & EXTENT_DIRTY));
281                         continue;
282                 }
283                 spin_unlock_irq(&free_space_cache->lock);
284                 if (cache->ro) {
285                         goto new_group;
286                 }
287                 if (start + num > cache->key.objectid + cache->key.offset)
288                         goto new_group;
289                 if (!block_group_bits(cache, data)) {
290                         printk("block group bits don't match %Lu %d\n", cache->flags, data);
291                 }
292                 *start_ret = start;
293                 return 0;
294         }
295 out:
296         cache = btrfs_lookup_block_group(root->fs_info, search_start);
297         if (!cache) {
298                 printk("Unable to find block group for %Lu\n", search_start);
299                 WARN_ON(1);
300         }
301         return -ENOSPC;
302
303 new_group:
304         last = cache->key.objectid + cache->key.offset;
305 wrapped:
306         cache = btrfs_lookup_first_block_group(root->fs_info, last);
307         if (!cache || cache->key.objectid >= total_fs_bytes) {
308 no_cache:
309                 if (!wrapped) {
310                         wrapped = 1;
311                         last = search_start;
312                         goto wrapped;
313                 }
314                 goto out;
315         }
316         if (cache_miss && !cache->cached) {
317                 cache_block_group(root, cache);
318                 last = cache_miss;
319                 cache = btrfs_lookup_first_block_group(root->fs_info, last);
320         }
321         cache_miss = 0;
322         cache = __btrfs_find_block_group(root, cache, last, data, 0);
323         if (!cache)
324                 goto no_cache;
325         *cache_ret = cache;
326         goto again;
327 }
328
329 static u64 div_factor(u64 num, int factor)
330 {
331         if (factor == 10)
332                 return num;
333         num *= factor;
334         do_div(num, 10);
335         return num;
336 }
337
338 static int block_group_state_bits(u64 flags)
339 {
340         int bits = 0;
341         if (flags & BTRFS_BLOCK_GROUP_DATA)
342                 bits |= BLOCK_GROUP_DATA;
343         if (flags & BTRFS_BLOCK_GROUP_METADATA)
344                 bits |= BLOCK_GROUP_METADATA;
345         if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
346                 bits |= BLOCK_GROUP_SYSTEM;
347         return bits;
348 }
349
350 static struct btrfs_block_group_cache *
351 __btrfs_find_block_group(struct btrfs_root *root,
352                          struct btrfs_block_group_cache *hint,
353                          u64 search_start, int data, int owner)
354 {
355         struct btrfs_block_group_cache *cache;
356         struct extent_io_tree *block_group_cache;
357         struct btrfs_block_group_cache *found_group = NULL;
358         struct btrfs_fs_info *info = root->fs_info;
359         u64 used;
360         u64 last = 0;
361         u64 start;
362         u64 end;
363         u64 free_check;
364         u64 ptr;
365         int bit;
366         int ret;
367         int full_search = 0;
368         int factor = 10;
369         int wrapped = 0;
370
371         block_group_cache = &info->block_group_cache;
372
373         if (data & BTRFS_BLOCK_GROUP_METADATA)
374                 factor = 9;
375
376         bit = block_group_state_bits(data);
377
378         if (search_start) {
379                 struct btrfs_block_group_cache *shint;
380                 shint = btrfs_lookup_first_block_group(info, search_start);
381                 if (shint && block_group_bits(shint, data) && !shint->ro) {
382                         used = btrfs_block_group_used(&shint->item);
383                         if (used + shint->pinned <
384                             div_factor(shint->key.offset, factor)) {
385                                 return shint;
386                         }
387                 }
388         }
389         if (hint && !hint->ro && block_group_bits(hint, data)) {
390                 used = btrfs_block_group_used(&hint->item);
391                 if (used + hint->pinned <
392                     div_factor(hint->key.offset, factor)) {
393                         return hint;
394                 }
395                 last = hint->key.objectid + hint->key.offset;
396         } else {
397                 if (hint)
398                         last = max(hint->key.objectid, search_start);
399                 else
400                         last = search_start;
401         }
402 again:
403         while(1) {
404                 ret = find_first_extent_bit(block_group_cache, last,
405                                             &start, &end, bit);
406                 if (ret)
407                         break;
408
409                 ret = get_state_private(block_group_cache, start, &ptr);
410                 if (ret) {
411                         last = end + 1;
412                         continue;
413                 }
414
415                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
416                 last = cache->key.objectid + cache->key.offset;
417                 used = btrfs_block_group_used(&cache->item);
418
419                 if (!cache->ro && block_group_bits(cache, data)) {
420                         free_check = div_factor(cache->key.offset, factor);
421                         if (used + cache->pinned < free_check) {
422                                 found_group = cache;
423                                 goto found;
424                         }
425                 }
426                 cond_resched();
427         }
428         if (!wrapped) {
429                 last = search_start;
430                 wrapped = 1;
431                 goto again;
432         }
433         if (!full_search && factor < 10) {
434                 last = search_start;
435                 full_search = 1;
436                 factor = 10;
437                 goto again;
438         }
439 found:
440         return found_group;
441 }
442
443 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
444                                                  struct btrfs_block_group_cache
445                                                  *hint, u64 search_start,
446                                                  int data, int owner)
447 {
448
449         struct btrfs_block_group_cache *ret;
450         mutex_lock(&root->fs_info->alloc_mutex);
451         ret = __btrfs_find_block_group(root, hint, search_start, data, owner);
452         mutex_unlock(&root->fs_info->alloc_mutex);
453         return ret;
454 }
455 static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation,
456                            u64 owner, u64 owner_offset)
457 {
458         u32 high_crc = ~(u32)0;
459         u32 low_crc = ~(u32)0;
460         __le64 lenum;
461         lenum = cpu_to_le64(root_objectid);
462         high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
463         lenum = cpu_to_le64(ref_generation);
464         low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
465         if (owner >= BTRFS_FIRST_FREE_OBJECTID) {
466                 lenum = cpu_to_le64(owner);
467                 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
468                 lenum = cpu_to_le64(owner_offset);
469                 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
470         }
471         return ((u64)high_crc << 32) | (u64)low_crc;
472 }
473
474 static int match_extent_ref(struct extent_buffer *leaf,
475                             struct btrfs_extent_ref *disk_ref,
476                             struct btrfs_extent_ref *cpu_ref)
477 {
478         int ret;
479         int len;
480
481         if (cpu_ref->objectid)
482                 len = sizeof(*cpu_ref);
483         else
484                 len = 2 * sizeof(u64);
485         ret = memcmp_extent_buffer(leaf, cpu_ref, (unsigned long)disk_ref,
486                                    len);
487         return ret == 0;
488 }
489
490 static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans,
491                                           struct btrfs_root *root,
492                                           struct btrfs_path *path, u64 bytenr,
493                                           u64 root_objectid,
494                                           u64 ref_generation, u64 owner,
495                                           u64 owner_offset, int del)
496 {
497         u64 hash;
498         struct btrfs_key key;
499         struct btrfs_key found_key;
500         struct btrfs_extent_ref ref;
501         struct extent_buffer *leaf;
502         struct btrfs_extent_ref *disk_ref;
503         int ret;
504         int ret2;
505
506         btrfs_set_stack_ref_root(&ref, root_objectid);
507         btrfs_set_stack_ref_generation(&ref, ref_generation);
508         btrfs_set_stack_ref_objectid(&ref, owner);
509         btrfs_set_stack_ref_offset(&ref, owner_offset);
510
511         hash = hash_extent_ref(root_objectid, ref_generation, owner,
512                                owner_offset);
513         key.offset = hash;
514         key.objectid = bytenr;
515         key.type = BTRFS_EXTENT_REF_KEY;
516
517         while (1) {
518                 ret = btrfs_search_slot(trans, root, &key, path,
519                                         del ? -1 : 0, del);
520                 if (ret < 0)
521                         goto out;
522                 leaf = path->nodes[0];
523                 if (ret != 0) {
524                         u32 nritems = btrfs_header_nritems(leaf);
525                         if (path->slots[0] >= nritems) {
526                                 ret2 = btrfs_next_leaf(root, path);
527                                 if (ret2)
528                                         goto out;
529                                 leaf = path->nodes[0];
530                         }
531                         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
532                         if (found_key.objectid != bytenr ||
533                             found_key.type != BTRFS_EXTENT_REF_KEY)
534                                 goto out;
535                         key.offset = found_key.offset;
536                         if (del) {
537                                 btrfs_release_path(root, path);
538                                 continue;
539                         }
540                 }
541                 disk_ref = btrfs_item_ptr(path->nodes[0],
542                                           path->slots[0],
543                                           struct btrfs_extent_ref);
544                 if (match_extent_ref(path->nodes[0], disk_ref, &ref)) {
545                         ret = 0;
546                         goto out;
547                 }
548                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
549                 key.offset = found_key.offset + 1;
550                 btrfs_release_path(root, path);
551         }
552 out:
553         return ret;
554 }
555
556 /*
557  * Back reference rules.  Back refs have three main goals:
558  *
559  * 1) differentiate between all holders of references to an extent so that
560  *    when a reference is dropped we can make sure it was a valid reference
561  *    before freeing the extent.
562  *
563  * 2) Provide enough information to quickly find the holders of an extent
564  *    if we notice a given block is corrupted or bad.
565  *
566  * 3) Make it easy to migrate blocks for FS shrinking or storage pool
567  *    maintenance.  This is actually the same as #2, but with a slightly
568  *    different use case.
569  *
570  * File extents can be referenced by:
571  *
572  * - multiple snapshots, subvolumes, or different generations in one subvol
573  * - different files inside a single subvolume (in theory, not implemented yet)
574  * - different offsets inside a file (bookend extents in file.c)
575  *
576  * The extent ref structure has fields for:
577  *
578  * - Objectid of the subvolume root
579  * - Generation number of the tree holding the reference
580  * - objectid of the file holding the reference
581  * - offset in the file corresponding to the key holding the reference
582  *
583  * When a file extent is allocated the fields are filled in:
584  *     (root_key.objectid, trans->transid, inode objectid, offset in file)
585  *
586  * When a leaf is cow'd new references are added for every file extent found
587  * in the leaf.  It looks the same as the create case, but trans->transid
588  * will be different when the block is cow'd.
589  *
590  *     (root_key.objectid, trans->transid, inode objectid, offset in file)
591  *
592  * When a file extent is removed either during snapshot deletion or file
593  * truncation, the corresponding back reference is found
594  * by searching for:
595  *
596  *     (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
597  *      inode objectid, offset in file)
598  *
599  * Btree extents can be referenced by:
600  *
601  * - Different subvolumes
602  * - Different generations of the same subvolume
603  *
604  * Storing sufficient information for a full reverse mapping of a btree
605  * block would require storing the lowest key of the block in the backref,
606  * and it would require updating that lowest key either before write out or
607  * every time it changed.  Instead, the objectid of the lowest key is stored
608  * along with the level of the tree block.  This provides a hint
609  * about where in the btree the block can be found.  Searches through the
610  * btree only need to look for a pointer to that block, so they stop one
611  * level higher than the level recorded in the backref.
612  *
613  * Some btrees do not do reference counting on their extents.  These
614  * include the extent tree and the tree of tree roots.  Backrefs for these
615  * trees always have a generation of zero.
616  *
617  * When a tree block is created, back references are inserted:
618  *
619  * (root->root_key.objectid, trans->transid or zero, level, lowest_key_objectid)
620  *
621  * When a tree block is cow'd in a reference counted root,
622  * new back references are added for all the blocks it points to.
623  * These are of the form (trans->transid will have increased since creation):
624  *
625  * (root->root_key.objectid, trans->transid, level, lowest_key_objectid)
626  *
627  * Because the lowest_key_objectid and the level are just hints
628  * they are not used when backrefs are deleted.  When a backref is deleted:
629  *
630  * if backref was for a tree root:
631  *     root_objectid = root->root_key.objectid
632  * else
633  *     root_objectid = btrfs_header_owner(parent)
634  *
635  * (root_objectid, btrfs_header_generation(parent) or zero, 0, 0)
636  *
637  * Back Reference Key hashing:
638  *
639  * Back references have four fields, each 64 bits long.  Unfortunately,
640  * This is hashed into a single 64 bit number and placed into the key offset.
641  * The key objectid corresponds to the first byte in the extent, and the
642  * key type is set to BTRFS_EXTENT_REF_KEY
643  */
644 int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans,
645                                  struct btrfs_root *root,
646                                  struct btrfs_path *path, u64 bytenr,
647                                  u64 root_objectid, u64 ref_generation,
648                                  u64 owner, u64 owner_offset)
649 {
650         u64 hash;
651         struct btrfs_key key;
652         struct btrfs_extent_ref ref;
653         struct btrfs_extent_ref *disk_ref;
654         int ret;
655
656         btrfs_set_stack_ref_root(&ref, root_objectid);
657         btrfs_set_stack_ref_generation(&ref, ref_generation);
658         btrfs_set_stack_ref_objectid(&ref, owner);
659         btrfs_set_stack_ref_offset(&ref, owner_offset);
660
661         hash = hash_extent_ref(root_objectid, ref_generation, owner,
662                                owner_offset);
663         key.offset = hash;
664         key.objectid = bytenr;
665         key.type = BTRFS_EXTENT_REF_KEY;
666
667         ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(ref));
668         while (ret == -EEXIST) {
669                 disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
670                                           struct btrfs_extent_ref);
671                 if (match_extent_ref(path->nodes[0], disk_ref, &ref))
672                         goto out;
673                 key.offset++;
674                 btrfs_release_path(root, path);
675                 ret = btrfs_insert_empty_item(trans, root, path, &key,
676                                               sizeof(ref));
677         }
678         if (ret)
679                 goto out;
680         disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
681                                   struct btrfs_extent_ref);
682         write_extent_buffer(path->nodes[0], &ref, (unsigned long)disk_ref,
683                             sizeof(ref));
684         btrfs_mark_buffer_dirty(path->nodes[0]);
685 out:
686         btrfs_release_path(root, path);
687         return ret;
688 }
689
690 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
691                                 struct btrfs_root *root,
692                                 u64 bytenr, u64 num_bytes,
693                                 u64 root_objectid, u64 ref_generation,
694                                 u64 owner, u64 owner_offset)
695 {
696         struct btrfs_path *path;
697         int ret;
698         struct btrfs_key key;
699         struct extent_buffer *l;
700         struct btrfs_extent_item *item;
701         u32 refs;
702
703         WARN_ON(num_bytes < root->sectorsize);
704         path = btrfs_alloc_path();
705         if (!path)
706                 return -ENOMEM;
707
708         path->reada = 1;
709         key.objectid = bytenr;
710         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
711         key.offset = num_bytes;
712         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
713                                 0, 1);
714         if (ret < 0)
715                 return ret;
716         if (ret != 0) {
717                 BUG();
718         }
719         BUG_ON(ret != 0);
720         l = path->nodes[0];
721         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
722         refs = btrfs_extent_refs(l, item);
723         btrfs_set_extent_refs(l, item, refs + 1);
724         btrfs_mark_buffer_dirty(path->nodes[0]);
725
726         btrfs_release_path(root->fs_info->extent_root, path);
727
728         path->reada = 1;
729         ret = btrfs_insert_extent_backref(trans, root->fs_info->extent_root,
730                                           path, bytenr, root_objectid,
731                                           ref_generation, owner, owner_offset);
732         BUG_ON(ret);
733         finish_current_insert(trans, root->fs_info->extent_root);
734         del_pending_extents(trans, root->fs_info->extent_root);
735
736         btrfs_free_path(path);
737         return 0;
738 }
739
740 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
741                                 struct btrfs_root *root,
742                                 u64 bytenr, u64 num_bytes,
743                                 u64 root_objectid, u64 ref_generation,
744                                 u64 owner, u64 owner_offset)
745 {
746         int ret;
747
748         mutex_lock(&root->fs_info->alloc_mutex);
749         ret = __btrfs_inc_extent_ref(trans, root, bytenr, num_bytes,
750                                      root_objectid, ref_generation,
751                                      owner, owner_offset);
752         mutex_unlock(&root->fs_info->alloc_mutex);
753         return ret;
754 }
755
756 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
757                          struct btrfs_root *root)
758 {
759         finish_current_insert(trans, root->fs_info->extent_root);
760         del_pending_extents(trans, root->fs_info->extent_root);
761         return 0;
762 }
763
764 static int lookup_extent_ref(struct btrfs_trans_handle *trans,
765                              struct btrfs_root *root, u64 bytenr,
766                              u64 num_bytes, u32 *refs)
767 {
768         struct btrfs_path *path;
769         int ret;
770         struct btrfs_key key;
771         struct extent_buffer *l;
772         struct btrfs_extent_item *item;
773
774         WARN_ON(num_bytes < root->sectorsize);
775         path = btrfs_alloc_path();
776         path->reada = 1;
777         key.objectid = bytenr;
778         key.offset = num_bytes;
779         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
780         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
781                                 0, 0);
782         if (ret < 0)
783                 goto out;
784         if (ret != 0) {
785                 btrfs_print_leaf(root, path->nodes[0]);
786                 printk("failed to find block number %Lu\n", bytenr);
787                 BUG();
788         }
789         l = path->nodes[0];
790         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
791         *refs = btrfs_extent_refs(l, item);
792 out:
793         btrfs_free_path(path);
794         return 0;
795 }
796
797 u32 btrfs_count_snapshots_in_path(struct btrfs_root *root,
798                                   struct btrfs_path *count_path,
799                                   u64 expected_owner,
800                                   u64 first_extent)
801 {
802         struct btrfs_root *extent_root = root->fs_info->extent_root;
803         struct btrfs_path *path;
804         u64 bytenr;
805         u64 found_objectid;
806         u64 found_owner;
807         u64 root_objectid = root->root_key.objectid;
808         u32 total_count = 0;
809         u32 extent_refs;
810         u32 cur_count;
811         u32 nritems;
812         int ret;
813         struct btrfs_key key;
814         struct btrfs_key found_key;
815         struct extent_buffer *l;
816         struct btrfs_extent_item *item;
817         struct btrfs_extent_ref *ref_item;
818         int level = -1;
819
820         /* FIXME, needs locking */
821         BUG();
822
823         mutex_lock(&root->fs_info->alloc_mutex);
824         path = btrfs_alloc_path();
825 again:
826         if (level == -1)
827                 bytenr = first_extent;
828         else
829                 bytenr = count_path->nodes[level]->start;
830
831         cur_count = 0;
832         key.objectid = bytenr;
833         key.offset = 0;
834
835         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
836         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
837         if (ret < 0)
838                 goto out;
839         BUG_ON(ret == 0);
840
841         l = path->nodes[0];
842         btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
843
844         if (found_key.objectid != bytenr ||
845             found_key.type != BTRFS_EXTENT_ITEM_KEY) {
846                 goto out;
847         }
848
849         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
850         extent_refs = btrfs_extent_refs(l, item);
851         while (1) {
852                 l = path->nodes[0];
853                 nritems = btrfs_header_nritems(l);
854                 if (path->slots[0] >= nritems) {
855                         ret = btrfs_next_leaf(extent_root, path);
856                         if (ret == 0)
857                                 continue;
858                         break;
859                 }
860                 btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
861                 if (found_key.objectid != bytenr)
862                         break;
863
864                 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
865                         path->slots[0]++;
866                         continue;
867                 }
868
869                 cur_count++;
870                 ref_item = btrfs_item_ptr(l, path->slots[0],
871                                           struct btrfs_extent_ref);
872                 found_objectid = btrfs_ref_root(l, ref_item);
873
874                 if (found_objectid != root_objectid) {
875                         total_count = 2;
876                         goto out;
877                 }
878                 if (level == -1) {
879                         found_owner = btrfs_ref_objectid(l, ref_item);
880                         if (found_owner != expected_owner) {
881                                 total_count = 2;
882                                 goto out;
883                         }
884                         /*
885                          * nasty.  we don't count a reference held by
886                          * the running transaction.  This allows nodatacow
887                          * to avoid cow most of the time
888                          */
889                         if (found_owner >= BTRFS_FIRST_FREE_OBJECTID &&
890                             btrfs_ref_generation(l, ref_item) ==
891                             root->fs_info->generation) {
892                                 extent_refs--;
893                         }
894                 }
895                 total_count = 1;
896                 path->slots[0]++;
897         }
898         /*
899          * if there is more than one reference against a data extent,
900          * we have to assume the other ref is another snapshot
901          */
902         if (level == -1 && extent_refs > 1) {
903                 total_count = 2;
904                 goto out;
905         }
906         if (cur_count == 0) {
907                 total_count = 0;
908                 goto out;
909         }
910         if (level >= 0 && root->node == count_path->nodes[level])
911                 goto out;
912         level++;
913         btrfs_release_path(root, path);
914         goto again;
915
916 out:
917         btrfs_free_path(path);
918         mutex_unlock(&root->fs_info->alloc_mutex);
919         return total_count;
920 }
921
922 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
923                   struct extent_buffer *buf)
924 {
925         u64 bytenr;
926         u32 nritems;
927         struct btrfs_key key;
928         struct btrfs_file_extent_item *fi;
929         int i;
930         int level;
931         int ret;
932         int faili;
933
934         if (!root->ref_cows)
935                 return 0;
936
937         mutex_lock(&root->fs_info->alloc_mutex);
938         level = btrfs_header_level(buf);
939         nritems = btrfs_header_nritems(buf);
940         for (i = 0; i < nritems; i++) {
941                 if (level == 0) {
942                         u64 disk_bytenr;
943                         btrfs_item_key_to_cpu(buf, &key, i);
944                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
945                                 continue;
946                         fi = btrfs_item_ptr(buf, i,
947                                             struct btrfs_file_extent_item);
948                         if (btrfs_file_extent_type(buf, fi) ==
949                             BTRFS_FILE_EXTENT_INLINE)
950                                 continue;
951                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
952                         if (disk_bytenr == 0)
953                                 continue;
954                         ret = __btrfs_inc_extent_ref(trans, root, disk_bytenr,
955                                     btrfs_file_extent_disk_num_bytes(buf, fi),
956                                     root->root_key.objectid, trans->transid,
957                                     key.objectid, key.offset);
958                         if (ret) {
959                                 faili = i;
960                                 goto fail;
961                         }
962                 } else {
963                         bytenr = btrfs_node_blockptr(buf, i);
964                         btrfs_node_key_to_cpu(buf, &key, i);
965                         ret = __btrfs_inc_extent_ref(trans, root, bytenr,
966                                            btrfs_level_size(root, level - 1),
967                                            root->root_key.objectid,
968                                            trans->transid,
969                                            level - 1, key.objectid);
970                         if (ret) {
971                                 faili = i;
972                                 goto fail;
973                         }
974                 }
975         }
976         mutex_unlock(&root->fs_info->alloc_mutex);
977         return 0;
978 fail:
979         WARN_ON(1);
980 #if 0
981         for (i =0; i < faili; i++) {
982                 if (level == 0) {
983                         u64 disk_bytenr;
984                         btrfs_item_key_to_cpu(buf, &key, i);
985                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
986                                 continue;
987                         fi = btrfs_item_ptr(buf, i,
988                                             struct btrfs_file_extent_item);
989                         if (btrfs_file_extent_type(buf, fi) ==
990                             BTRFS_FILE_EXTENT_INLINE)
991                                 continue;
992                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
993                         if (disk_bytenr == 0)
994                                 continue;
995                         err = btrfs_free_extent(trans, root, disk_bytenr,
996                                     btrfs_file_extent_disk_num_bytes(buf,
997                                                                       fi), 0);
998                         BUG_ON(err);
999                 } else {
1000                         bytenr = btrfs_node_blockptr(buf, i);
1001                         err = btrfs_free_extent(trans, root, bytenr,
1002                                         btrfs_level_size(root, level - 1), 0);
1003                         BUG_ON(err);
1004                 }
1005         }
1006 #endif
1007         mutex_unlock(&root->fs_info->alloc_mutex);
1008         return ret;
1009 }
1010
1011 static int write_one_cache_group(struct btrfs_trans_handle *trans,
1012                                  struct btrfs_root *root,
1013                                  struct btrfs_path *path,
1014                                  struct btrfs_block_group_cache *cache)
1015 {
1016         int ret;
1017         int pending_ret;
1018         struct btrfs_root *extent_root = root->fs_info->extent_root;
1019         unsigned long bi;
1020         struct extent_buffer *leaf;
1021
1022         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
1023         if (ret < 0)
1024                 goto fail;
1025         BUG_ON(ret);
1026
1027         leaf = path->nodes[0];
1028         bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
1029         write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
1030         btrfs_mark_buffer_dirty(leaf);
1031         btrfs_release_path(extent_root, path);
1032 fail:
1033         finish_current_insert(trans, extent_root);
1034         pending_ret = del_pending_extents(trans, extent_root);
1035         if (ret)
1036                 return ret;
1037         if (pending_ret)
1038                 return pending_ret;
1039         return 0;
1040
1041 }
1042
1043 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
1044                                    struct btrfs_root *root)
1045 {
1046         struct extent_io_tree *block_group_cache;
1047         struct btrfs_block_group_cache *cache;
1048         int ret;
1049         int err = 0;
1050         int werr = 0;
1051         struct btrfs_path *path;
1052         u64 last = 0;
1053         u64 start;
1054         u64 end;
1055         u64 ptr;
1056
1057         block_group_cache = &root->fs_info->block_group_cache;
1058         path = btrfs_alloc_path();
1059         if (!path)
1060                 return -ENOMEM;
1061
1062         mutex_lock(&root->fs_info->alloc_mutex);
1063         while(1) {
1064                 ret = find_first_extent_bit(block_group_cache, last,
1065                                             &start, &end, BLOCK_GROUP_DIRTY);
1066                 if (ret)
1067                         break;
1068
1069                 last = end + 1;
1070                 ret = get_state_private(block_group_cache, start, &ptr);
1071                 if (ret)
1072                         break;
1073                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
1074                 err = write_one_cache_group(trans, root,
1075                                             path, cache);
1076                 /*
1077                  * if we fail to write the cache group, we want
1078                  * to keep it marked dirty in hopes that a later
1079                  * write will work
1080                  */
1081                 if (err) {
1082                         werr = err;
1083                         continue;
1084                 }
1085                 clear_extent_bits(block_group_cache, start, end,
1086                                   BLOCK_GROUP_DIRTY, GFP_NOFS);
1087         }
1088         btrfs_free_path(path);
1089         mutex_unlock(&root->fs_info->alloc_mutex);
1090         return werr;
1091 }
1092
1093 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
1094                                                   u64 flags)
1095 {
1096         struct list_head *head = &info->space_info;
1097         struct list_head *cur;
1098         struct btrfs_space_info *found;
1099         list_for_each(cur, head) {
1100                 found = list_entry(cur, struct btrfs_space_info, list);
1101                 if (found->flags == flags)
1102                         return found;
1103         }
1104         return NULL;
1105
1106 }
1107
1108 static int update_space_info(struct btrfs_fs_info *info, u64 flags,
1109                              u64 total_bytes, u64 bytes_used,
1110                              struct btrfs_space_info **space_info)
1111 {
1112         struct btrfs_space_info *found;
1113
1114         found = __find_space_info(info, flags);
1115         if (found) {
1116                 found->total_bytes += total_bytes;
1117                 found->bytes_used += bytes_used;
1118                 found->full = 0;
1119                 WARN_ON(found->total_bytes < found->bytes_used);
1120                 *space_info = found;
1121                 return 0;
1122         }
1123         found = kmalloc(sizeof(*found), GFP_NOFS);
1124         if (!found)
1125                 return -ENOMEM;
1126
1127         list_add(&found->list, &info->space_info);
1128         found->flags = flags;
1129         found->total_bytes = total_bytes;
1130         found->bytes_used = bytes_used;
1131         found->bytes_pinned = 0;
1132         found->full = 0;
1133         found->force_alloc = 0;
1134         *space_info = found;
1135         return 0;
1136 }
1137
1138 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
1139 {
1140         u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
1141                                    BTRFS_BLOCK_GROUP_RAID1 |
1142                                    BTRFS_BLOCK_GROUP_RAID10 |
1143                                    BTRFS_BLOCK_GROUP_DUP);
1144         if (extra_flags) {
1145                 if (flags & BTRFS_BLOCK_GROUP_DATA)
1146                         fs_info->avail_data_alloc_bits |= extra_flags;
1147                 if (flags & BTRFS_BLOCK_GROUP_METADATA)
1148                         fs_info->avail_metadata_alloc_bits |= extra_flags;
1149                 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
1150                         fs_info->avail_system_alloc_bits |= extra_flags;
1151         }
1152 }
1153
1154 static u64 reduce_alloc_profile(struct btrfs_root *root, u64 flags)
1155 {
1156         u64 num_devices = root->fs_info->fs_devices->num_devices;
1157
1158         if (num_devices == 1)
1159                 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
1160         if (num_devices < 4)
1161                 flags &= ~BTRFS_BLOCK_GROUP_RAID10;
1162
1163         if ((flags & BTRFS_BLOCK_GROUP_DUP) &&
1164             (flags & (BTRFS_BLOCK_GROUP_RAID1 |
1165                       BTRFS_BLOCK_GROUP_RAID10))) {
1166                 flags &= ~BTRFS_BLOCK_GROUP_DUP;
1167         }
1168
1169         if ((flags & BTRFS_BLOCK_GROUP_RAID1) &&
1170             (flags & BTRFS_BLOCK_GROUP_RAID10)) {
1171                 flags &= ~BTRFS_BLOCK_GROUP_RAID1;
1172         }
1173
1174         if ((flags & BTRFS_BLOCK_GROUP_RAID0) &&
1175             ((flags & BTRFS_BLOCK_GROUP_RAID1) |
1176              (flags & BTRFS_BLOCK_GROUP_RAID10) |
1177              (flags & BTRFS_BLOCK_GROUP_DUP)))
1178                 flags &= ~BTRFS_BLOCK_GROUP_RAID0;
1179         return flags;
1180 }
1181
1182 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1183                           struct btrfs_root *extent_root, u64 alloc_bytes,
1184                           u64 flags, int force)
1185 {
1186         struct btrfs_space_info *space_info;
1187         u64 thresh;
1188         u64 start;
1189         u64 num_bytes;
1190         int ret;
1191
1192         flags = reduce_alloc_profile(extent_root, flags);
1193
1194         space_info = __find_space_info(extent_root->fs_info, flags);
1195         if (!space_info) {
1196                 ret = update_space_info(extent_root->fs_info, flags,
1197                                         0, 0, &space_info);
1198                 BUG_ON(ret);
1199         }
1200         BUG_ON(!space_info);
1201
1202         if (space_info->force_alloc) {
1203                 force = 1;
1204                 space_info->force_alloc = 0;
1205         }
1206         if (space_info->full)
1207                 goto out;
1208
1209         thresh = div_factor(space_info->total_bytes, 6);
1210         if (!force &&
1211            (space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) <
1212             thresh)
1213                 goto out;
1214
1215         mutex_lock(&extent_root->fs_info->chunk_mutex);
1216         ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags);
1217         if (ret == -ENOSPC) {
1218 printk("space info full %Lu\n", flags);
1219                 space_info->full = 1;
1220                 goto out_unlock;
1221         }
1222         BUG_ON(ret);
1223
1224         ret = btrfs_make_block_group(trans, extent_root, 0, flags,
1225                      BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes);
1226         BUG_ON(ret);
1227 out_unlock:
1228         mutex_unlock(&extent_root->fs_info->chunk_mutex);
1229 out:
1230         return 0;
1231 }
1232
1233 static int update_block_group(struct btrfs_trans_handle *trans,
1234                               struct btrfs_root *root,
1235                               u64 bytenr, u64 num_bytes, int alloc,
1236                               int mark_free)
1237 {
1238         struct btrfs_block_group_cache *cache;
1239         struct btrfs_fs_info *info = root->fs_info;
1240         u64 total = num_bytes;
1241         u64 old_val;
1242         u64 byte_in_group;
1243         u64 start;
1244         u64 end;
1245
1246         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1247         while(total) {
1248                 cache = btrfs_lookup_block_group(info, bytenr);
1249                 if (!cache) {
1250                         return -1;
1251                 }
1252                 byte_in_group = bytenr - cache->key.objectid;
1253                 WARN_ON(byte_in_group > cache->key.offset);
1254                 start = cache->key.objectid;
1255                 end = start + cache->key.offset - 1;
1256                 set_extent_bits(&info->block_group_cache, start, end,
1257                                 BLOCK_GROUP_DIRTY, GFP_NOFS);
1258
1259                 old_val = btrfs_block_group_used(&cache->item);
1260                 num_bytes = min(total, cache->key.offset - byte_in_group);
1261                 if (alloc) {
1262                         old_val += num_bytes;
1263                         cache->space_info->bytes_used += num_bytes;
1264                 } else {
1265                         old_val -= num_bytes;
1266                         cache->space_info->bytes_used -= num_bytes;
1267                         if (mark_free) {
1268                                 set_extent_dirty(&info->free_space_cache,
1269                                                  bytenr, bytenr + num_bytes - 1,
1270                                                  GFP_NOFS);
1271                         }
1272                 }
1273                 btrfs_set_block_group_used(&cache->item, old_val);
1274                 total -= num_bytes;
1275                 bytenr += num_bytes;
1276         }
1277         return 0;
1278 }
1279
1280 static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
1281 {
1282         u64 start;
1283         u64 end;
1284         int ret;
1285         ret = find_first_extent_bit(&root->fs_info->block_group_cache,
1286                                     search_start, &start, &end,
1287                                     BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
1288                                     BLOCK_GROUP_SYSTEM);
1289         if (ret)
1290                 return 0;
1291         return start;
1292 }
1293
1294
1295 static int update_pinned_extents(struct btrfs_root *root,
1296                                 u64 bytenr, u64 num, int pin)
1297 {
1298         u64 len;
1299         struct btrfs_block_group_cache *cache;
1300         struct btrfs_fs_info *fs_info = root->fs_info;
1301
1302         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1303         if (pin) {
1304                 set_extent_dirty(&fs_info->pinned_extents,
1305                                 bytenr, bytenr + num - 1, GFP_NOFS);
1306         } else {
1307                 clear_extent_dirty(&fs_info->pinned_extents,
1308                                 bytenr, bytenr + num - 1, GFP_NOFS);
1309         }
1310         while (num > 0) {
1311                 cache = btrfs_lookup_block_group(fs_info, bytenr);
1312                 if (!cache) {
1313                         u64 first = first_logical_byte(root, bytenr);
1314                         WARN_ON(first < bytenr);
1315                         len = min(first - bytenr, num);
1316                 } else {
1317                         len = min(num, cache->key.offset -
1318                                   (bytenr - cache->key.objectid));
1319                 }
1320                 if (pin) {
1321                         if (cache) {
1322                                 cache->pinned += len;
1323                                 cache->space_info->bytes_pinned += len;
1324                         }
1325                         fs_info->total_pinned += len;
1326                 } else {
1327                         if (cache) {
1328                                 cache->pinned -= len;
1329                                 cache->space_info->bytes_pinned -= len;
1330                         }
1331                         fs_info->total_pinned -= len;
1332                 }
1333                 bytenr += len;
1334                 num -= len;
1335         }
1336         return 0;
1337 }
1338
1339 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
1340 {
1341         u64 last = 0;
1342         u64 start;
1343         u64 end;
1344         struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
1345         int ret;
1346
1347         while(1) {
1348                 ret = find_first_extent_bit(pinned_extents, last,
1349                                             &start, &end, EXTENT_DIRTY);
1350                 if (ret)
1351                         break;
1352                 set_extent_dirty(copy, start, end, GFP_NOFS);
1353                 last = end + 1;
1354         }
1355         return 0;
1356 }
1357
1358 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
1359                                struct btrfs_root *root,
1360                                struct extent_io_tree *unpin)
1361 {
1362         u64 start;
1363         u64 end;
1364         int ret;
1365         struct extent_io_tree *free_space_cache;
1366         free_space_cache = &root->fs_info->free_space_cache;
1367
1368         mutex_lock(&root->fs_info->alloc_mutex);
1369         while(1) {
1370                 ret = find_first_extent_bit(unpin, 0, &start, &end,
1371                                             EXTENT_DIRTY);
1372                 if (ret)
1373                         break;
1374                 update_pinned_extents(root, start, end + 1 - start, 0);
1375                 clear_extent_dirty(unpin, start, end, GFP_NOFS);
1376                 set_extent_dirty(free_space_cache, start, end, GFP_NOFS);
1377         }
1378         mutex_unlock(&root->fs_info->alloc_mutex);
1379         return 0;
1380 }
1381
1382 static int finish_current_insert(struct btrfs_trans_handle *trans,
1383                                  struct btrfs_root *extent_root)
1384 {
1385         u64 start;
1386         u64 end;
1387         struct btrfs_fs_info *info = extent_root->fs_info;
1388         struct extent_buffer *eb;
1389         struct btrfs_path *path;
1390         struct btrfs_key ins;
1391         struct btrfs_disk_key first;
1392         struct btrfs_extent_item extent_item;
1393         int ret;
1394         int level;
1395         int err = 0;
1396
1397         WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex));
1398         btrfs_set_stack_extent_refs(&extent_item, 1);
1399         btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
1400         path = btrfs_alloc_path();
1401
1402         while(1) {
1403                 ret = find_first_extent_bit(&info->extent_ins, 0, &start,
1404                                             &end, EXTENT_LOCKED);
1405                 if (ret)
1406                         break;
1407
1408                 ins.objectid = start;
1409                 ins.offset = end + 1 - start;
1410                 err = btrfs_insert_item(trans, extent_root, &ins,
1411                                         &extent_item, sizeof(extent_item));
1412                 clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED,
1413                                   GFP_NOFS);
1414                 eb = read_tree_block(extent_root, ins.objectid, ins.offset,
1415                                      trans->transid);
1416                 btrfs_tree_lock(eb);
1417                 level = btrfs_header_level(eb);
1418                 if (level == 0) {
1419                         btrfs_item_key(eb, &first, 0);
1420                 } else {
1421                         btrfs_node_key(eb, &first, 0);
1422                 }
1423                 btrfs_tree_unlock(eb);
1424                 free_extent_buffer(eb);
1425                 /*
1426                  * the first key is just a hint, so the race we've created
1427                  * against reading it is fine
1428                  */
1429                 err = btrfs_insert_extent_backref(trans, extent_root, path,
1430                                           start, extent_root->root_key.objectid,
1431                                           0, level,
1432                                           btrfs_disk_key_objectid(&first));
1433                 BUG_ON(err);
1434         }
1435         btrfs_free_path(path);
1436         return 0;
1437 }
1438
1439 static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes,
1440                           int pending)
1441 {
1442         int err = 0;
1443
1444         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1445         if (!pending) {
1446                 struct extent_buffer *buf;
1447                 buf = btrfs_find_tree_block(root, bytenr, num_bytes);
1448                 if (buf) {
1449                         if (!btrfs_try_tree_lock(buf) &&
1450                             btrfs_buffer_uptodate(buf, 0)) {
1451                                 u64 transid =
1452                                     root->fs_info->running_transaction->transid;
1453                                 u64 header_transid =
1454                                         btrfs_header_generation(buf);
1455                                 if (header_transid == transid &&
1456                                     !btrfs_header_flag(buf,
1457                                                BTRFS_HEADER_FLAG_WRITTEN)) {
1458                                         clean_tree_block(NULL, root, buf);
1459                                         btrfs_tree_unlock(buf);
1460                                         free_extent_buffer(buf);
1461                                         return 1;
1462                                 }
1463                                 btrfs_tree_unlock(buf);
1464                         }
1465                         free_extent_buffer(buf);
1466                 }
1467                 update_pinned_extents(root, bytenr, num_bytes, 1);
1468         } else {
1469                 set_extent_bits(&root->fs_info->pending_del,
1470                                 bytenr, bytenr + num_bytes - 1,
1471                                 EXTENT_LOCKED, GFP_NOFS);
1472         }
1473         BUG_ON(err < 0);
1474         return 0;
1475 }
1476
1477 /*
1478  * remove an extent from the root, returns 0 on success
1479  */
1480 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1481                          *root, u64 bytenr, u64 num_bytes,
1482                          u64 root_objectid, u64 ref_generation,
1483                          u64 owner_objectid, u64 owner_offset, int pin,
1484                          int mark_free)
1485 {
1486         struct btrfs_path *path;
1487         struct btrfs_key key;
1488         struct btrfs_fs_info *info = root->fs_info;
1489         struct btrfs_root *extent_root = info->extent_root;
1490         struct extent_buffer *leaf;
1491         int ret;
1492         int extent_slot = 0;
1493         int found_extent = 0;
1494         int num_to_del = 1;
1495         struct btrfs_extent_item *ei;
1496         u32 refs;
1497
1498         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1499         key.objectid = bytenr;
1500         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
1501         key.offset = num_bytes;
1502         path = btrfs_alloc_path();
1503         if (!path)
1504                 return -ENOMEM;
1505
1506         path->reada = 1;
1507         ret = lookup_extent_backref(trans, extent_root, path,
1508                                     bytenr, root_objectid,
1509                                     ref_generation,
1510                                     owner_objectid, owner_offset, 1);
1511         if (ret == 0) {
1512                 struct btrfs_key found_key;
1513                 extent_slot = path->slots[0];
1514                 while(extent_slot > 0) {
1515                         extent_slot--;
1516                         btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1517                                               extent_slot);
1518                         if (found_key.objectid != bytenr)
1519                                 break;
1520                         if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
1521                             found_key.offset == num_bytes) {
1522                                 found_extent = 1;
1523                                 break;
1524                         }
1525                         if (path->slots[0] - extent_slot > 5)
1526                                 break;
1527                 }
1528                 if (!found_extent)
1529                         ret = btrfs_del_item(trans, extent_root, path);
1530         } else {
1531                 btrfs_print_leaf(extent_root, path->nodes[0]);
1532                 WARN_ON(1);
1533                 printk("Unable to find ref byte nr %Lu root %Lu "
1534                        " gen %Lu owner %Lu offset %Lu\n", bytenr,
1535                        root_objectid, ref_generation, owner_objectid,
1536                        owner_offset);
1537         }
1538         if (!found_extent) {
1539                 btrfs_release_path(extent_root, path);
1540                 ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
1541                 if (ret < 0)
1542                         return ret;
1543                 BUG_ON(ret);
1544                 extent_slot = path->slots[0];
1545         }
1546
1547         leaf = path->nodes[0];
1548         ei = btrfs_item_ptr(leaf, extent_slot,
1549                             struct btrfs_extent_item);
1550         refs = btrfs_extent_refs(leaf, ei);
1551         BUG_ON(refs == 0);
1552         refs -= 1;
1553         btrfs_set_extent_refs(leaf, ei, refs);
1554
1555         btrfs_mark_buffer_dirty(leaf);
1556
1557         if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) {
1558                 /* if the back ref and the extent are next to each other
1559                  * they get deleted below in one shot
1560                  */
1561                 path->slots[0] = extent_slot;
1562                 num_to_del = 2;
1563         } else if (found_extent) {
1564                 /* otherwise delete the extent back ref */
1565                 ret = btrfs_del_item(trans, extent_root, path);
1566                 BUG_ON(ret);
1567                 /* if refs are 0, we need to setup the path for deletion */
1568                 if (refs == 0) {
1569                         btrfs_release_path(extent_root, path);
1570                         ret = btrfs_search_slot(trans, extent_root, &key, path,
1571                                                 -1, 1);
1572                         if (ret < 0)
1573                                 return ret;
1574                         BUG_ON(ret);
1575                 }
1576         }
1577
1578         if (refs == 0) {
1579                 u64 super_used;
1580                 u64 root_used;
1581
1582                 if (pin) {
1583                         ret = pin_down_bytes(root, bytenr, num_bytes, 0);
1584                         if (ret > 0)
1585                                 mark_free = 1;
1586                         BUG_ON(ret < 0);
1587                 }
1588
1589                 /* block accounting for super block */
1590                 spin_lock_irq(&info->delalloc_lock);
1591                 super_used = btrfs_super_bytes_used(&info->super_copy);
1592                 btrfs_set_super_bytes_used(&info->super_copy,
1593                                            super_used - num_bytes);
1594                 spin_unlock_irq(&info->delalloc_lock);
1595
1596                 /* block accounting for root item */
1597                 root_used = btrfs_root_used(&root->root_item);
1598                 btrfs_set_root_used(&root->root_item,
1599                                            root_used - num_bytes);
1600                 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
1601                                       num_to_del);
1602                 if (ret) {
1603                         return ret;
1604                 }
1605                 ret = update_block_group(trans, root, bytenr, num_bytes, 0,
1606                                          mark_free);
1607                 BUG_ON(ret);
1608         }
1609         btrfs_free_path(path);
1610         finish_current_insert(trans, extent_root);
1611         return ret;
1612 }
1613
1614 /*
1615  * find all the blocks marked as pending in the radix tree and remove
1616  * them from the extent map
1617  */
1618 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
1619                                btrfs_root *extent_root)
1620 {
1621         int ret;
1622         int err = 0;
1623         u64 start;
1624         u64 end;
1625         struct extent_io_tree *pending_del;
1626         struct extent_io_tree *pinned_extents;
1627
1628         WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex));
1629         pending_del = &extent_root->fs_info->pending_del;
1630         pinned_extents = &extent_root->fs_info->pinned_extents;
1631
1632         while(1) {
1633                 ret = find_first_extent_bit(pending_del, 0, &start, &end,
1634                                             EXTENT_LOCKED);
1635                 if (ret)
1636                         break;
1637                 update_pinned_extents(extent_root, start, end + 1 - start, 1);
1638                 clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
1639                                   GFP_NOFS);
1640                 ret = __free_extent(trans, extent_root,
1641                                      start, end + 1 - start,
1642                                      extent_root->root_key.objectid,
1643                                      0, 0, 0, 0, 0);
1644                 if (ret)
1645                         err = ret;
1646         }
1647         return err;
1648 }
1649
1650 /*
1651  * remove an extent from the root, returns 0 on success
1652  */
1653 static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
1654                                struct btrfs_root *root, u64 bytenr,
1655                                u64 num_bytes, u64 root_objectid,
1656                                u64 ref_generation, u64 owner_objectid,
1657                                u64 owner_offset, int pin)
1658 {
1659         struct btrfs_root *extent_root = root->fs_info->extent_root;
1660         int pending_ret;
1661         int ret;
1662
1663         WARN_ON(num_bytes < root->sectorsize);
1664         if (!root->ref_cows)
1665                 ref_generation = 0;
1666
1667         if (root == extent_root) {
1668                 pin_down_bytes(root, bytenr, num_bytes, 1);
1669                 return 0;
1670         }
1671         ret = __free_extent(trans, root, bytenr, num_bytes, root_objectid,
1672                             ref_generation, owner_objectid, owner_offset,
1673                             pin, pin == 0);
1674         pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
1675         return ret ? ret : pending_ret;
1676 }
1677
1678 int btrfs_free_extent(struct btrfs_trans_handle *trans,
1679                       struct btrfs_root *root, u64 bytenr,
1680                       u64 num_bytes, u64 root_objectid,
1681                       u64 ref_generation, u64 owner_objectid,
1682                       u64 owner_offset, int pin)
1683 {
1684         int ret;
1685
1686         maybe_lock_mutex(root);
1687         ret = __btrfs_free_extent(trans, root, bytenr, num_bytes,
1688                                   root_objectid, ref_generation,
1689                                   owner_objectid, owner_offset, pin);
1690         maybe_unlock_mutex(root);
1691         return ret;
1692 }
1693
1694 static u64 stripe_align(struct btrfs_root *root, u64 val)
1695 {
1696         u64 mask = ((u64)root->stripesize - 1);
1697         u64 ret = (val + mask) & ~mask;
1698         return ret;
1699 }
1700
1701 /*
1702  * walks the btree of allocated extents and find a hole of a given size.
1703  * The key ins is changed to record the hole:
1704  * ins->objectid == block start
1705  * ins->flags = BTRFS_EXTENT_ITEM_KEY
1706  * ins->offset == number of blocks
1707  * Any available blocks before search_start are skipped.
1708  */
1709 static int noinline find_free_extent(struct btrfs_trans_handle *trans,
1710                                      struct btrfs_root *orig_root,
1711                                      u64 num_bytes, u64 empty_size,
1712                                      u64 search_start, u64 search_end,
1713                                      u64 hint_byte, struct btrfs_key *ins,
1714                                      u64 exclude_start, u64 exclude_nr,
1715                                      int data)
1716 {
1717         int ret;
1718         u64 orig_search_start;
1719         struct btrfs_root * root = orig_root->fs_info->extent_root;
1720         struct btrfs_fs_info *info = root->fs_info;
1721         u64 total_needed = num_bytes;
1722         u64 *last_ptr = NULL;
1723         struct btrfs_block_group_cache *block_group;
1724         int full_scan = 0;
1725         int wrapped = 0;
1726         int chunk_alloc_done = 0;
1727         int empty_cluster = 2 * 1024 * 1024;
1728         int allowed_chunk_alloc = 0;
1729
1730         WARN_ON(num_bytes < root->sectorsize);
1731         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
1732
1733         if (orig_root->ref_cows || empty_size)
1734                 allowed_chunk_alloc = 1;
1735
1736         if (data & BTRFS_BLOCK_GROUP_METADATA) {
1737                 last_ptr = &root->fs_info->last_alloc;
1738                 empty_cluster = 256 * 1024;
1739         }
1740
1741         if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) {
1742                 last_ptr = &root->fs_info->last_data_alloc;
1743         }
1744
1745         if (last_ptr) {
1746                 if (*last_ptr)
1747                         hint_byte = *last_ptr;
1748                 else {
1749                         empty_size += empty_cluster;
1750                 }
1751         }
1752
1753         search_start = max(search_start, first_logical_byte(root, 0));
1754         orig_search_start = search_start;
1755
1756         if (search_end == (u64)-1)
1757                 search_end = btrfs_super_total_bytes(&info->super_copy);
1758
1759         if (hint_byte) {
1760                 block_group = btrfs_lookup_first_block_group(info, hint_byte);
1761                 if (!block_group)
1762                         hint_byte = search_start;
1763                 block_group = __btrfs_find_block_group(root, block_group,
1764                                                      hint_byte, data, 1);
1765                 if (last_ptr && *last_ptr == 0 && block_group)
1766                         hint_byte = block_group->key.objectid;
1767         } else {
1768                 block_group = __btrfs_find_block_group(root,
1769                                                      trans->block_group,
1770                                                      search_start, data, 1);
1771         }
1772         search_start = max(search_start, hint_byte);
1773
1774         total_needed += empty_size;
1775
1776 check_failed:
1777         if (!block_group) {
1778                 block_group = btrfs_lookup_first_block_group(info,
1779                                                              search_start);
1780                 if (!block_group)
1781                         block_group = btrfs_lookup_first_block_group(info,
1782                                                        orig_search_start);
1783         }
1784         if (full_scan && !chunk_alloc_done) {
1785                 if (allowed_chunk_alloc) {
1786                         do_chunk_alloc(trans, root,
1787                                      num_bytes + 2 * 1024 * 1024, data, 1);
1788                         allowed_chunk_alloc = 0;
1789                 } else if (block_group && block_group_bits(block_group, data)) {
1790                         block_group->space_info->force_alloc = 1;
1791                 }
1792                 chunk_alloc_done = 1;
1793         }
1794         ret = find_search_start(root, &block_group, &search_start,
1795                                 total_needed, data);
1796         if (ret == -ENOSPC && last_ptr && *last_ptr) {
1797                 *last_ptr = 0;
1798                 block_group = btrfs_lookup_first_block_group(info,
1799                                                              orig_search_start);
1800                 search_start = orig_search_start;
1801                 ret = find_search_start(root, &block_group, &search_start,
1802                                         total_needed, data);
1803         }
1804         if (ret == -ENOSPC)
1805                 goto enospc;
1806         if (ret)
1807                 goto error;
1808
1809         if (last_ptr && *last_ptr && search_start != *last_ptr) {
1810                 *last_ptr = 0;
1811                 if (!empty_size) {
1812                         empty_size += empty_cluster;
1813                         total_needed += empty_size;
1814                 }
1815                 block_group = btrfs_lookup_first_block_group(info,
1816                                                        orig_search_start);
1817                 search_start = orig_search_start;
1818                 ret = find_search_start(root, &block_group,
1819                                         &search_start, total_needed, data);
1820                 if (ret == -ENOSPC)
1821                         goto enospc;
1822                 if (ret)
1823                         goto error;
1824         }
1825
1826         search_start = stripe_align(root, search_start);
1827         ins->objectid = search_start;
1828         ins->offset = num_bytes;
1829
1830         if (ins->objectid + num_bytes >= search_end)
1831                 goto enospc;
1832
1833         if (ins->objectid + num_bytes >
1834             block_group->key.objectid + block_group->key.offset) {
1835                 search_start = block_group->key.objectid +
1836                         block_group->key.offset;
1837                 goto new_group;
1838         }
1839
1840         if (test_range_bit(&info->extent_ins, ins->objectid,
1841                            ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) {
1842                 search_start = ins->objectid + num_bytes;
1843                 goto new_group;
1844         }
1845
1846         if (test_range_bit(&info->pinned_extents, ins->objectid,
1847                            ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
1848                 search_start = ins->objectid + num_bytes;
1849                 goto new_group;
1850         }
1851
1852         if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
1853             ins->objectid < exclude_start + exclude_nr)) {
1854                 search_start = exclude_start + exclude_nr;
1855                 goto new_group;
1856         }
1857
1858         if (!(data & BTRFS_BLOCK_GROUP_DATA)) {
1859                 block_group = btrfs_lookup_block_group(info, ins->objectid);
1860                 if (block_group)
1861                         trans->block_group = block_group;
1862         }
1863         ins->offset = num_bytes;
1864         if (last_ptr) {
1865                 *last_ptr = ins->objectid + ins->offset;
1866                 if (*last_ptr ==
1867                     btrfs_super_total_bytes(&root->fs_info->super_copy)) {
1868                         *last_ptr = 0;
1869                 }
1870         }
1871         return 0;
1872
1873 new_group:
1874         if (search_start + num_bytes >= search_end) {
1875 enospc:
1876                 search_start = orig_search_start;
1877                 if (full_scan) {
1878                         ret = -ENOSPC;
1879                         goto error;
1880                 }
1881                 if (wrapped) {
1882                         if (!full_scan)
1883                                 total_needed -= empty_size;
1884                         full_scan = 1;
1885                 } else
1886                         wrapped = 1;
1887         }
1888         block_group = btrfs_lookup_first_block_group(info, search_start);
1889         cond_resched();
1890         block_group = __btrfs_find_block_group(root, block_group,
1891                                              search_start, data, 0);
1892         goto check_failed;
1893
1894 error:
1895         return ret;
1896 }
1897
1898 static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
1899                                   struct btrfs_root *root,
1900                                   u64 num_bytes, u64 min_alloc_size,
1901                                   u64 empty_size, u64 hint_byte,
1902                                   u64 search_end, struct btrfs_key *ins,
1903                                   u64 data)
1904 {
1905         int ret;
1906         u64 search_start = 0;
1907         u64 alloc_profile;
1908         struct btrfs_fs_info *info = root->fs_info;
1909
1910         if (data) {
1911                 alloc_profile = info->avail_data_alloc_bits &
1912                                 info->data_alloc_profile;
1913                 data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
1914         } else if (root == root->fs_info->chunk_root) {
1915                 alloc_profile = info->avail_system_alloc_bits &
1916                                 info->system_alloc_profile;
1917                 data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
1918         } else {
1919                 alloc_profile = info->avail_metadata_alloc_bits &
1920                                 info->metadata_alloc_profile;
1921                 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
1922         }
1923 again:
1924         data = reduce_alloc_profile(root, data);
1925         /*
1926          * the only place that sets empty_size is btrfs_realloc_node, which
1927          * is not called recursively on allocations
1928          */
1929         if (empty_size || root->ref_cows) {
1930                 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
1931                         ret = do_chunk_alloc(trans, root->fs_info->extent_root,
1932                                      2 * 1024 * 1024,
1933                                      BTRFS_BLOCK_GROUP_METADATA |
1934                                      (info->metadata_alloc_profile &
1935                                       info->avail_metadata_alloc_bits), 0);
1936                         BUG_ON(ret);
1937                 }
1938                 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
1939                                      num_bytes + 2 * 1024 * 1024, data, 0);
1940                 BUG_ON(ret);
1941         }
1942
1943         WARN_ON(num_bytes < root->sectorsize);
1944         ret = find_free_extent(trans, root, num_bytes, empty_size,
1945                                search_start, search_end, hint_byte, ins,
1946                                trans->alloc_exclude_start,
1947                                trans->alloc_exclude_nr, data);
1948
1949         if (ret == -ENOSPC && num_bytes > min_alloc_size) {
1950                 num_bytes = num_bytes >> 1;
1951                 num_bytes = max(num_bytes, min_alloc_size);
1952                 do_chunk_alloc(trans, root->fs_info->extent_root,
1953                                num_bytes, data, 1);
1954                 goto again;
1955         }
1956         if (ret) {
1957                 printk("allocation failed flags %Lu\n", data);
1958                 BUG();
1959         }
1960         clear_extent_dirty(&root->fs_info->free_space_cache,
1961                            ins->objectid, ins->objectid + ins->offset - 1,
1962                            GFP_NOFS);
1963         return 0;
1964 }
1965
1966 int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
1967                                   struct btrfs_root *root,
1968                                   u64 num_bytes, u64 min_alloc_size,
1969                                   u64 empty_size, u64 hint_byte,
1970                                   u64 search_end, struct btrfs_key *ins,
1971                                   u64 data)
1972 {
1973         int ret;
1974         maybe_lock_mutex(root);
1975         ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
1976                                      empty_size, hint_byte, search_end, ins,
1977                                      data);
1978         maybe_unlock_mutex(root);
1979         return ret;
1980 }
1981
1982 static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
1983                                          struct btrfs_root *root,
1984                                          u64 root_objectid, u64 ref_generation,
1985                                          u64 owner, u64 owner_offset,
1986                                          struct btrfs_key *ins)
1987 {
1988         int ret;
1989         int pending_ret;
1990         u64 super_used;
1991         u64 root_used;
1992         u64 num_bytes = ins->offset;
1993         u32 sizes[2];
1994         struct btrfs_fs_info *info = root->fs_info;
1995         struct btrfs_root *extent_root = info->extent_root;
1996         struct btrfs_extent_item *extent_item;
1997         struct btrfs_extent_ref *ref;
1998         struct btrfs_path *path;
1999         struct btrfs_key keys[2];
2000
2001         /* block accounting for super block */
2002         spin_lock_irq(&info->delalloc_lock);
2003         super_used = btrfs_super_bytes_used(&info->super_copy);
2004         btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
2005         spin_unlock_irq(&info->delalloc_lock);
2006
2007         /* block accounting for root item */
2008         root_used = btrfs_root_used(&root->root_item);
2009         btrfs_set_root_used(&root->root_item, root_used + num_bytes);
2010
2011         if (root == extent_root) {
2012                 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
2013                                 ins->objectid + ins->offset - 1,
2014                                 EXTENT_LOCKED, GFP_NOFS);
2015                 goto update_block;
2016         }
2017
2018         memcpy(&keys[0], ins, sizeof(*ins));
2019         keys[1].offset = hash_extent_ref(root_objectid, ref_generation,
2020                                          owner, owner_offset);
2021         keys[1].objectid = ins->objectid;
2022         keys[1].type = BTRFS_EXTENT_REF_KEY;
2023         sizes[0] = sizeof(*extent_item);
2024         sizes[1] = sizeof(*ref);
2025
2026         path = btrfs_alloc_path();
2027         BUG_ON(!path);
2028
2029         ret = btrfs_insert_empty_items(trans, extent_root, path, keys,
2030                                        sizes, 2);
2031
2032         BUG_ON(ret);
2033         extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2034                                      struct btrfs_extent_item);
2035         btrfs_set_extent_refs(path->nodes[0], extent_item, 1);
2036         ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
2037                              struct btrfs_extent_ref);
2038
2039         btrfs_set_ref_root(path->nodes[0], ref, root_objectid);
2040         btrfs_set_ref_generation(path->nodes[0], ref, ref_generation);
2041         btrfs_set_ref_objectid(path->nodes[0], ref, owner);
2042         btrfs_set_ref_offset(path->nodes[0], ref, owner_offset);
2043
2044         btrfs_mark_buffer_dirty(path->nodes[0]);
2045
2046         trans->alloc_exclude_start = 0;
2047         trans->alloc_exclude_nr = 0;
2048         btrfs_free_path(path);
2049         finish_current_insert(trans, extent_root);
2050         pending_ret = del_pending_extents(trans, extent_root);
2051
2052         if (ret)
2053                 goto out;
2054         if (pending_ret) {
2055                 ret = pending_ret;
2056                 goto out;
2057         }
2058
2059 update_block:
2060         ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0);
2061         if (ret) {
2062                 printk("update block group failed for %Lu %Lu\n",
2063                        ins->objectid, ins->offset);
2064                 BUG();
2065         }
2066 out:
2067         return ret;
2068 }
2069
2070 int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
2071                                 struct btrfs_root *root,
2072                                 u64 root_objectid, u64 ref_generation,
2073                                 u64 owner, u64 owner_offset,
2074                                 struct btrfs_key *ins)
2075 {
2076         int ret;
2077         maybe_lock_mutex(root);
2078         ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid,
2079                                             ref_generation, owner,
2080                                             owner_offset, ins);
2081         maybe_unlock_mutex(root);
2082         return ret;
2083 }
2084 /*
2085  * finds a free extent and does all the dirty work required for allocation
2086  * returns the key for the extent through ins, and a tree buffer for
2087  * the first block of the extent through buf.
2088  *
2089  * returns 0 if everything worked, non-zero otherwise.
2090  */
2091 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
2092                        struct btrfs_root *root,
2093                        u64 num_bytes, u64 min_alloc_size,
2094                        u64 root_objectid, u64 ref_generation,
2095                        u64 owner, u64 owner_offset,
2096                        u64 empty_size, u64 hint_byte,
2097                        u64 search_end, struct btrfs_key *ins, u64 data)
2098 {
2099         int ret;
2100
2101         maybe_lock_mutex(root);
2102
2103         ret = __btrfs_reserve_extent(trans, root, num_bytes,
2104                                      min_alloc_size, empty_size, hint_byte,
2105                                      search_end, ins, data);
2106         BUG_ON(ret);
2107         ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid,
2108                                             ref_generation, owner,
2109                                             owner_offset, ins);
2110         BUG_ON(ret);
2111
2112         maybe_unlock_mutex(root);
2113         return ret;
2114 }
2115 /*
2116  * helper function to allocate a block for a given tree
2117  * returns the tree buffer or NULL.
2118  */
2119 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
2120                                              struct btrfs_root *root,
2121                                              u32 blocksize,
2122                                              u64 root_objectid,
2123                                              u64 ref_generation,
2124                                              u64 first_objectid,
2125                                              int level,
2126                                              u64 hint,
2127                                              u64 empty_size)
2128 {
2129         struct btrfs_key ins;
2130         int ret;
2131         struct extent_buffer *buf;
2132
2133         ret = btrfs_alloc_extent(trans, root, blocksize, blocksize,
2134                                  root_objectid, ref_generation,
2135                                  level, first_objectid, empty_size, hint,
2136                                  (u64)-1, &ins, 0);
2137         if (ret) {
2138                 BUG_ON(ret > 0);
2139                 return ERR_PTR(ret);
2140         }
2141         buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize);
2142         if (!buf) {
2143                 btrfs_free_extent(trans, root, ins.objectid, blocksize,
2144                                   root->root_key.objectid, ref_generation,
2145                                   0, 0, 0);
2146                 return ERR_PTR(-ENOMEM);
2147         }
2148         btrfs_set_header_generation(buf, trans->transid);
2149         btrfs_tree_lock(buf);
2150         clean_tree_block(trans, root, buf);
2151         btrfs_set_buffer_uptodate(buf);
2152
2153         if (PageDirty(buf->first_page)) {
2154                 printk("page %lu dirty\n", buf->first_page->index);
2155                 WARN_ON(1);
2156         }
2157
2158         set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
2159                          buf->start + buf->len - 1, GFP_NOFS);
2160         trans->blocks_used++;
2161         return buf;
2162 }
2163
2164 static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
2165                                   struct btrfs_root *root,
2166                                   struct extent_buffer *leaf)
2167 {
2168         u64 leaf_owner;
2169         u64 leaf_generation;
2170         struct btrfs_key key;
2171         struct btrfs_file_extent_item *fi;
2172         int i;
2173         int nritems;
2174         int ret;
2175
2176         BUG_ON(!btrfs_is_leaf(leaf));
2177         nritems = btrfs_header_nritems(leaf);
2178         leaf_owner = btrfs_header_owner(leaf);
2179         leaf_generation = btrfs_header_generation(leaf);
2180
2181         for (i = 0; i < nritems; i++) {
2182                 u64 disk_bytenr;
2183
2184                 btrfs_item_key_to_cpu(leaf, &key, i);
2185                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
2186                         continue;
2187                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
2188                 if (btrfs_file_extent_type(leaf, fi) ==
2189                     BTRFS_FILE_EXTENT_INLINE)
2190                         continue;
2191                 /*
2192                  * FIXME make sure to insert a trans record that
2193                  * repeats the snapshot del on crash
2194                  */
2195                 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
2196                 if (disk_bytenr == 0)
2197                         continue;
2198                 ret = __btrfs_free_extent(trans, root, disk_bytenr,
2199                                 btrfs_file_extent_disk_num_bytes(leaf, fi),
2200                                 leaf_owner, leaf_generation,
2201                                 key.objectid, key.offset, 0);
2202                 BUG_ON(ret);
2203         }
2204         return 0;
2205 }
2206
2207 static void noinline reada_walk_down(struct btrfs_root *root,
2208                                      struct extent_buffer *node,
2209                                      int slot)
2210 {
2211         u64 bytenr;
2212         u64 last = 0;
2213         u32 nritems;
2214         u32 refs;
2215         u32 blocksize;
2216         int ret;
2217         int i;
2218         int level;
2219         int skipped = 0;
2220
2221         nritems = btrfs_header_nritems(node);
2222         level = btrfs_header_level(node);
2223         if (level)
2224                 return;
2225
2226         for (i = slot; i < nritems && skipped < 32; i++) {
2227                 bytenr = btrfs_node_blockptr(node, i);
2228                 if (last && ((bytenr > last && bytenr - last > 32 * 1024) ||
2229                              (last > bytenr && last - bytenr > 32 * 1024))) {
2230                         skipped++;
2231                         continue;
2232                 }
2233                 blocksize = btrfs_level_size(root, level - 1);
2234                 if (i != slot) {
2235                         ret = lookup_extent_ref(NULL, root, bytenr,
2236                                                 blocksize, &refs);
2237                         BUG_ON(ret);
2238                         if (refs != 1) {
2239                                 skipped++;
2240                                 continue;
2241                         }
2242                 }
2243                 ret = readahead_tree_block(root, bytenr, blocksize,
2244                                            btrfs_node_ptr_generation(node, i));
2245                 last = bytenr + blocksize;
2246                 cond_resched();
2247                 if (ret)
2248                         break;
2249         }
2250 }
2251
2252 /*
2253  * we want to avoid as much random IO as we can with the alloc mutex
2254  * held, so drop the lock and do the lookup, then do it again with the
2255  * lock held.
2256  */
2257 int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len,
2258                               u32 *refs)
2259 {
2260         mutex_unlock(&root->fs_info->alloc_mutex);
2261         lookup_extent_ref(NULL, root, start, len, refs);
2262         cond_resched();
2263         mutex_lock(&root->fs_info->alloc_mutex);
2264         return lookup_extent_ref(NULL, root, start, len, refs);
2265 }
2266
2267 /*
2268  * helper function for drop_snapshot, this walks down the tree dropping ref
2269  * counts as it goes.
2270  */
2271 static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
2272                                    struct btrfs_root *root,
2273                                    struct btrfs_path *path, int *level)
2274 {
2275         u64 root_owner;
2276         u64 root_gen;
2277         u64 bytenr;
2278         u64 ptr_gen;
2279         struct extent_buffer *next;
2280         struct extent_buffer *cur;
2281         struct extent_buffer *parent;
2282         u32 blocksize;
2283         int ret;
2284         u32 refs;
2285
2286         mutex_lock(&root->fs_info->alloc_mutex);
2287
2288         WARN_ON(*level < 0);
2289         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2290         ret = drop_snap_lookup_refcount(root, path->nodes[*level]->start,
2291                                 path->nodes[*level]->len, &refs);
2292         BUG_ON(ret);
2293         if (refs > 1)
2294                 goto out;
2295
2296         /*
2297          * walk down to the last node level and free all the leaves
2298          */
2299         while(*level >= 0) {
2300                 WARN_ON(*level < 0);
2301                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2302                 cur = path->nodes[*level];
2303
2304                 if (btrfs_header_level(cur) != *level)
2305                         WARN_ON(1);
2306
2307                 if (path->slots[*level] >=
2308                     btrfs_header_nritems(cur))
2309                         break;
2310                 if (*level == 0) {
2311                         ret = drop_leaf_ref(trans, root, cur);
2312                         BUG_ON(ret);
2313                         break;
2314                 }
2315                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2316                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2317                 blocksize = btrfs_level_size(root, *level - 1);
2318
2319                 ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs);
2320                 BUG_ON(ret);
2321                 if (refs != 1) {
2322                         parent = path->nodes[*level];
2323                         root_owner = btrfs_header_owner(parent);
2324                         root_gen = btrfs_header_generation(parent);
2325                         path->slots[*level]++;
2326                         ret = __btrfs_free_extent(trans, root, bytenr,
2327                                                 blocksize, root_owner,
2328                                                 root_gen, 0, 0, 1);
2329                         BUG_ON(ret);
2330                         continue;
2331                 }
2332                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2333                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2334                         free_extent_buffer(next);
2335                         mutex_unlock(&root->fs_info->alloc_mutex);
2336
2337                         if (path->slots[*level] == 0)
2338                                 reada_walk_down(root, cur, path->slots[*level]);
2339
2340                         next = read_tree_block(root, bytenr, blocksize,
2341                                                ptr_gen);
2342                         cond_resched();
2343                         mutex_lock(&root->fs_info->alloc_mutex);
2344
2345                         /* we've dropped the lock, double check */
2346                         ret = lookup_extent_ref(NULL, root, bytenr, blocksize,
2347                                                 &refs);
2348                         BUG_ON(ret);
2349                         if (refs != 1) {
2350                                 parent = path->nodes[*level];
2351                                 root_owner = btrfs_header_owner(parent);
2352                                 root_gen = btrfs_header_generation(parent);
2353
2354                                 path->slots[*level]++;
2355                                 free_extent_buffer(next);
2356                                 ret = __btrfs_free_extent(trans, root, bytenr,
2357                                                         blocksize,
2358                                                         root_owner,
2359                                                         root_gen, 0, 0, 1);
2360                                 BUG_ON(ret);
2361                                 continue;
2362                         }
2363                 }
2364                 WARN_ON(*level <= 0);
2365                 if (path->nodes[*level-1])
2366                         free_extent_buffer(path->nodes[*level-1]);
2367                 path->nodes[*level-1] = next;
2368                 *level = btrfs_header_level(next);
2369                 path->slots[*level] = 0;
2370         }
2371 out:
2372         WARN_ON(*level < 0);
2373         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2374
2375         if (path->nodes[*level] == root->node) {
2376                 root_owner = root->root_key.objectid;
2377                 parent = path->nodes[*level];
2378         } else {
2379                 parent = path->nodes[*level + 1];
2380                 root_owner = btrfs_header_owner(parent);
2381         }
2382
2383         root_gen = btrfs_header_generation(parent);
2384         ret = __btrfs_free_extent(trans, root, path->nodes[*level]->start,
2385                                 path->nodes[*level]->len,
2386                                 root_owner, root_gen, 0, 0, 1);
2387         free_extent_buffer(path->nodes[*level]);
2388         path->nodes[*level] = NULL;
2389         *level += 1;
2390         BUG_ON(ret);
2391         mutex_unlock(&root->fs_info->alloc_mutex);
2392         cond_resched();
2393         return 0;
2394 }
2395
2396 /*
2397  * helper for dropping snapshots.  This walks back up the tree in the path
2398  * to find the first node higher up where we haven't yet gone through
2399  * all the slots
2400  */
2401 static int noinline walk_up_tree(struct btrfs_trans_handle *trans,
2402                                  struct btrfs_root *root,
2403                                  struct btrfs_path *path, int *level)
2404 {
2405         u64 root_owner;
2406         u64 root_gen;
2407         struct btrfs_root_item *root_item = &root->root_item;
2408         int i;
2409         int slot;
2410         int ret;
2411
2412         for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2413                 slot = path->slots[i];
2414                 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
2415                         struct extent_buffer *node;
2416                         struct btrfs_disk_key disk_key;
2417                         node = path->nodes[i];
2418                         path->slots[i]++;
2419                         *level = i;
2420                         WARN_ON(*level == 0);
2421                         btrfs_node_key(node, &disk_key, path->slots[i]);
2422                         memcpy(&root_item->drop_progress,
2423                                &disk_key, sizeof(disk_key));
2424                         root_item->drop_level = i;
2425                         return 0;
2426                 } else {
2427                         if (path->nodes[*level] == root->node) {
2428                                 root_owner = root->root_key.objectid;
2429                                 root_gen =
2430                                    btrfs_header_generation(path->nodes[*level]);
2431                         } else {
2432                                 struct extent_buffer *node;
2433                                 node = path->nodes[*level + 1];
2434                                 root_owner = btrfs_header_owner(node);
2435                                 root_gen = btrfs_header_generation(node);
2436                         }
2437                         ret = btrfs_free_extent(trans, root,
2438                                                 path->nodes[*level]->start,
2439                                                 path->nodes[*level]->len,
2440                                                 root_owner, root_gen, 0, 0, 1);
2441                         BUG_ON(ret);
2442                         free_extent_buffer(path->nodes[*level]);
2443                         path->nodes[*level] = NULL;
2444                         *level = i + 1;
2445                 }
2446         }
2447         return 1;
2448 }
2449
2450 /*
2451  * drop the reference count on the tree rooted at 'snap'.  This traverses
2452  * the tree freeing any blocks that have a ref count of zero after being
2453  * decremented.
2454  */
2455 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
2456                         *root)
2457 {
2458         int ret = 0;
2459         int wret;
2460         int level;
2461         struct btrfs_path *path;
2462         int i;
2463         int orig_level;
2464         struct btrfs_root_item *root_item = &root->root_item;
2465
2466         WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex));
2467         path = btrfs_alloc_path();
2468         BUG_ON(!path);
2469
2470         level = btrfs_header_level(root->node);
2471         orig_level = level;
2472         if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
2473                 path->nodes[level] = root->node;
2474                 extent_buffer_get(root->node);
2475                 path->slots[level] = 0;
2476         } else {
2477                 struct btrfs_key key;
2478                 struct btrfs_disk_key found_key;
2479                 struct extent_buffer *node;
2480
2481                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
2482                 level = root_item->drop_level;
2483                 path->lowest_level = level;
2484                 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2485                 if (wret < 0) {
2486                         ret = wret;
2487                         goto out;
2488                 }
2489                 node = path->nodes[level];
2490                 btrfs_node_key(node, &found_key, path->slots[level]);
2491                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
2492                                sizeof(found_key)));
2493                 /*
2494                  * unlock our path, this is safe because only this
2495                  * function is allowed to delete this snapshot
2496                  */
2497                 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
2498                         if (path->nodes[i] && path->locks[i]) {
2499                                 path->locks[i] = 0;
2500                                 btrfs_tree_unlock(path->nodes[i]);
2501                         }
2502                 }
2503         }
2504         while(1) {
2505                 wret = walk_down_tree(trans, root, path, &level);
2506                 if (wret > 0)
2507                         break;
2508                 if (wret < 0)
2509                         ret = wret;
2510
2511                 wret = walk_up_tree(trans, root, path, &level);
2512                 if (wret > 0)
2513                         break;
2514                 if (wret < 0)
2515                         ret = wret;
2516                 if (trans->transaction->in_commit) {
2517                         ret = -EAGAIN;
2518                         break;
2519                 }
2520         }
2521         for (i = 0; i <= orig_level; i++) {
2522                 if (path->nodes[i]) {
2523                         free_extent_buffer(path->nodes[i]);
2524                         path->nodes[i] = NULL;
2525                 }
2526         }
2527 out:
2528         btrfs_free_path(path);
2529         return ret;
2530 }
2531
2532 int btrfs_free_block_groups(struct btrfs_fs_info *info)
2533 {
2534         u64 start;
2535         u64 end;
2536         u64 ptr;
2537         int ret;
2538
2539         mutex_lock(&info->alloc_mutex);
2540         while(1) {
2541                 ret = find_first_extent_bit(&info->block_group_cache, 0,
2542                                             &start, &end, (unsigned int)-1);
2543                 if (ret)
2544                         break;
2545                 ret = get_state_private(&info->block_group_cache, start, &ptr);
2546                 if (!ret)
2547                         kfree((void *)(unsigned long)ptr);
2548                 clear_extent_bits(&info->block_group_cache, start,
2549                                   end, (unsigned int)-1, GFP_NOFS);
2550         }
2551         while(1) {
2552                 ret = find_first_extent_bit(&info->free_space_cache, 0,
2553                                             &start, &end, EXTENT_DIRTY);
2554                 if (ret)
2555                         break;
2556                 clear_extent_dirty(&info->free_space_cache, start,
2557                                    end, GFP_NOFS);
2558         }
2559         mutex_unlock(&info->alloc_mutex);
2560         return 0;
2561 }
2562
2563 static unsigned long calc_ra(unsigned long start, unsigned long last,
2564                              unsigned long nr)
2565 {
2566         return min(last, start + nr - 1);
2567 }
2568
2569 static int noinline relocate_inode_pages(struct inode *inode, u64 start,
2570                                          u64 len)
2571 {
2572         u64 page_start;
2573         u64 page_end;
2574         unsigned long last_index;
2575         unsigned long i;
2576         struct page *page;
2577         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2578         struct file_ra_state *ra;
2579         unsigned long total_read = 0;
2580         unsigned long ra_pages;
2581         struct btrfs_trans_handle *trans;
2582
2583         ra = kzalloc(sizeof(*ra), GFP_NOFS);
2584
2585         mutex_lock(&inode->i_mutex);
2586         i = start >> PAGE_CACHE_SHIFT;
2587         last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
2588
2589         ra_pages = BTRFS_I(inode)->root->fs_info->bdi.ra_pages;
2590
2591         file_ra_state_init(ra, inode->i_mapping);
2592
2593         for (; i <= last_index; i++) {
2594                 if (total_read % ra_pages == 0) {
2595                         btrfs_force_ra(inode->i_mapping, ra, NULL, i,
2596                                        calc_ra(i, last_index, ra_pages));
2597                 }
2598                 total_read++;
2599                 if (((u64)i << PAGE_CACHE_SHIFT) > inode->i_size)
2600                         goto truncate_racing;
2601
2602                 page = grab_cache_page(inode->i_mapping, i);
2603                 if (!page) {
2604                         goto out_unlock;
2605                 }
2606                 if (!PageUptodate(page)) {
2607                         btrfs_readpage(NULL, page);
2608                         lock_page(page);
2609                         if (!PageUptodate(page)) {
2610                                 unlock_page(page);
2611                                 page_cache_release(page);
2612                                 goto out_unlock;
2613                         }
2614                 }
2615 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
2616                 ClearPageDirty(page);
2617 #else
2618                 cancel_dirty_page(page, PAGE_CACHE_SIZE);
2619 #endif
2620                 wait_on_page_writeback(page);
2621                 set_page_extent_mapped(page);
2622                 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2623                 page_end = page_start + PAGE_CACHE_SIZE - 1;
2624
2625                 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
2626
2627                 set_extent_delalloc(io_tree, page_start,
2628                                     page_end, GFP_NOFS);
2629                 set_page_dirty(page);
2630
2631                 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
2632                 unlock_page(page);
2633                 page_cache_release(page);
2634         }
2635         balance_dirty_pages_ratelimited_nr(inode->i_mapping,
2636                                            total_read);
2637
2638 out_unlock:
2639         kfree(ra);
2640         trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
2641         if (trans) {
2642                 btrfs_end_transaction(trans, BTRFS_I(inode)->root);
2643                 mark_inode_dirty(inode);
2644         }
2645         mutex_unlock(&inode->i_mutex);
2646         return 0;
2647
2648 truncate_racing:
2649         vmtruncate(inode, inode->i_size);
2650         balance_dirty_pages_ratelimited_nr(inode->i_mapping,
2651                                            total_read);
2652         goto out_unlock;
2653 }
2654
2655 /*
2656  * The back references tell us which tree holds a ref on a block,
2657  * but it is possible for the tree root field in the reference to
2658  * reflect the original root before a snapshot was made.  In this
2659  * case we should search through all the children of a given root
2660  * to find potential holders of references on a block.
2661  *
2662  * Instead, we do something a little less fancy and just search
2663  * all the roots for a given key/block combination.
2664  */
2665 static int find_root_for_ref(struct btrfs_root *root,
2666                              struct btrfs_path *path,
2667                              struct btrfs_key *key0,
2668                              int level,
2669                              int file_key,
2670                              struct btrfs_root **found_root,
2671                              u64 bytenr)
2672 {
2673         struct btrfs_key root_location;
2674         struct btrfs_root *cur_root = *found_root;
2675         struct btrfs_file_extent_item *file_extent;
2676         u64 root_search_start = BTRFS_FS_TREE_OBJECTID;
2677         u64 found_bytenr;
2678         int ret;
2679
2680         root_location.offset = (u64)-1;
2681         root_location.type = BTRFS_ROOT_ITEM_KEY;
2682         path->lowest_level = level;
2683         path->reada = 0;
2684         while(1) {
2685                 ret = btrfs_search_slot(NULL, cur_root, key0, path, 0, 0);
2686                 found_bytenr = 0;
2687                 if (ret == 0 && file_key) {
2688                         struct extent_buffer *leaf = path->nodes[0];
2689                         file_extent = btrfs_item_ptr(leaf, path->slots[0],
2690                                              struct btrfs_file_extent_item);
2691                         if (btrfs_file_extent_type(leaf, file_extent) ==
2692                             BTRFS_FILE_EXTENT_REG) {
2693                                 found_bytenr =
2694                                         btrfs_file_extent_disk_bytenr(leaf,
2695                                                                file_extent);
2696                        }
2697                 } else if (!file_key) {
2698                         if (path->nodes[level])
2699                                 found_bytenr = path->nodes[level]->start;
2700                 }
2701
2702                 btrfs_release_path(cur_root, path);
2703
2704                 if (found_bytenr == bytenr) {
2705                         *found_root = cur_root;
2706                         ret = 0;
2707                         goto out;
2708                 }
2709                 ret = btrfs_search_root(root->fs_info->tree_root,
2710                                         root_search_start, &root_search_start);
2711                 if (ret)
2712                         break;
2713
2714                 root_location.objectid = root_search_start;
2715                 cur_root = btrfs_read_fs_root_no_name(root->fs_info,
2716                                                       &root_location);
2717                 if (!cur_root) {
2718                         ret = 1;
2719                         break;
2720                 }
2721         }
2722 out:
2723         path->lowest_level = 0;
2724         return ret;
2725 }
2726
2727 /*
2728  * note, this releases the path
2729  */
2730 static int noinline relocate_one_reference(struct btrfs_root *extent_root,
2731                                   struct btrfs_path *path,
2732                                   struct btrfs_key *extent_key,
2733                                   u64 *last_file_objectid,
2734                                   u64 *last_file_offset,
2735                                   u64 *last_file_root,
2736                                   u64 last_extent)
2737 {
2738         struct inode *inode;
2739         struct btrfs_root *found_root;
2740         struct btrfs_key root_location;
2741         struct btrfs_key found_key;
2742         struct btrfs_extent_ref *ref;
2743         u64 ref_root;
2744         u64 ref_gen;
2745         u64 ref_objectid;
2746         u64 ref_offset;
2747         int ret;
2748         int level;
2749
2750         WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex));
2751
2752         ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
2753                              struct btrfs_extent_ref);
2754         ref_root = btrfs_ref_root(path->nodes[0], ref);
2755         ref_gen = btrfs_ref_generation(path->nodes[0], ref);
2756         ref_objectid = btrfs_ref_objectid(path->nodes[0], ref);
2757         ref_offset = btrfs_ref_offset(path->nodes[0], ref);
2758         btrfs_release_path(extent_root, path);
2759
2760         root_location.objectid = ref_root;
2761         if (ref_gen == 0)
2762                 root_location.offset = 0;
2763         else
2764                 root_location.offset = (u64)-1;
2765         root_location.type = BTRFS_ROOT_ITEM_KEY;
2766
2767         found_root = btrfs_read_fs_root_no_name(extent_root->fs_info,
2768                                                 &root_location);
2769         BUG_ON(!found_root);
2770         mutex_unlock(&extent_root->fs_info->alloc_mutex);
2771
2772         if (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
2773                 found_key.objectid = ref_objectid;
2774                 found_key.type = BTRFS_EXTENT_DATA_KEY;
2775                 found_key.offset = ref_offset;
2776                 level = 0;
2777
2778                 if (last_extent == extent_key->objectid &&
2779                     *last_file_objectid == ref_objectid &&
2780                     *last_file_offset == ref_offset &&
2781                     *last_file_root == ref_root)
2782                         goto out;
2783
2784                 ret = find_root_for_ref(extent_root, path, &found_key,
2785                                         level, 1, &found_root,
2786                                         extent_key->objectid);
2787
2788                 if (ret)
2789                         goto out;
2790
2791                 if (last_extent == extent_key->objectid &&
2792                     *last_file_objectid == ref_objectid &&
2793                     *last_file_offset == ref_offset &&
2794                     *last_file_root == ref_root)
2795                         goto out;
2796
2797                 inode = btrfs_iget_locked(extent_root->fs_info->sb,
2798                                           ref_objectid, found_root);
2799                 if (inode->i_state & I_NEW) {
2800                         /* the inode and parent dir are two different roots */
2801                         BTRFS_I(inode)->root = found_root;
2802                         BTRFS_I(inode)->location.objectid = ref_objectid;
2803                         BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
2804                         BTRFS_I(inode)->location.offset = 0;
2805                         btrfs_read_locked_inode(inode);
2806                         unlock_new_inode(inode);
2807
2808                 }
2809                 /* this can happen if the reference is not against
2810                  * the latest version of the tree root
2811                  */
2812                 if (is_bad_inode(inode))
2813                         goto out;
2814
2815                 *last_file_objectid = inode->i_ino;
2816                 *last_file_root = found_root->root_key.objectid;
2817                 *last_file_offset = ref_offset;
2818
2819                 relocate_inode_pages(inode, ref_offset, extent_key->offset);
2820                 iput(inode);
2821         } else {
2822                 struct btrfs_trans_handle *trans;
2823                 struct extent_buffer *eb;
2824                 int needs_lock = 0;
2825
2826                 eb = read_tree_block(found_root, extent_key->objectid,
2827                                      extent_key->offset, 0);
2828                 btrfs_tree_lock(eb);
2829                 level = btrfs_header_level(eb);
2830
2831                 if (level == 0)
2832                         btrfs_item_key_to_cpu(eb, &found_key, 0);
2833                 else
2834                         btrfs_node_key_to_cpu(eb, &found_key, 0);
2835
2836                 btrfs_tree_unlock(eb);
2837                 free_extent_buffer(eb);
2838
2839                 ret = find_root_for_ref(extent_root, path, &found_key,
2840                                         level, 0, &found_root,
2841                                         extent_key->objectid);
2842
2843                 if (ret)
2844                         goto out;
2845
2846                 /*
2847                  * right here almost anything could happen to our key,
2848                  * but that's ok.  The cow below will either relocate it
2849                  * or someone else will have relocated it.  Either way,
2850                  * it is in a different spot than it was before and
2851                  * we're happy.
2852                  */
2853
2854                 trans = btrfs_start_transaction(found_root, 1);
2855
2856                 if (found_root == extent_root->fs_info->extent_root ||
2857                     found_root == extent_root->fs_info->chunk_root ||
2858                     found_root == extent_root->fs_info->dev_root) {
2859                         needs_lock = 1;
2860                         mutex_lock(&extent_root->fs_info->alloc_mutex);
2861                 }
2862
2863                 path->lowest_level = level;
2864                 path->reada = 2;
2865                 ret = btrfs_search_slot(trans, found_root, &found_key, path,
2866                                         0, 1);
2867                 path->lowest_level = 0;
2868                 btrfs_release_path(found_root, path);
2869
2870                 if (found_root == found_root->fs_info->extent_root)
2871                         btrfs_extent_post_op(trans, found_root);
2872                 if (needs_lock)
2873                         mutex_unlock(&extent_root->fs_info->alloc_mutex);
2874
2875                 btrfs_end_transaction(trans, found_root);
2876
2877         }
2878 out:
2879         mutex_lock(&extent_root->fs_info->alloc_mutex);
2880         return 0;
2881 }
2882
2883 static int noinline del_extent_zero(struct btrfs_root *extent_root,
2884                                     struct btrfs_path *path,
2885                                     struct btrfs_key *extent_key)
2886 {
2887         int ret;
2888         struct btrfs_trans_handle *trans;
2889
2890         trans = btrfs_start_transaction(extent_root, 1);
2891         ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1);
2892         if (ret > 0) {
2893                 ret = -EIO;
2894                 goto out;
2895         }
2896         if (ret < 0)
2897                 goto out;
2898         ret = btrfs_del_item(trans, extent_root, path);
2899 out:
2900         btrfs_end_transaction(trans, extent_root);
2901         return ret;
2902 }
2903
2904 static int noinline relocate_one_extent(struct btrfs_root *extent_root,
2905                                         struct btrfs_path *path,
2906                                         struct btrfs_key *extent_key)
2907 {
2908         struct btrfs_key key;
2909         struct btrfs_key found_key;
2910         struct extent_buffer *leaf;
2911         u64 last_file_objectid = 0;
2912         u64 last_file_root = 0;
2913         u64 last_file_offset = (u64)-1;
2914         u64 last_extent = 0;
2915         u32 nritems;
2916         u32 item_size;
2917         int ret = 0;
2918
2919         if (extent_key->objectid == 0) {
2920                 ret = del_extent_zero(extent_root, path, extent_key);
2921                 goto out;
2922         }
2923         key.objectid = extent_key->objectid;
2924         key.type = BTRFS_EXTENT_REF_KEY;
2925         key.offset = 0;
2926
2927         while(1) {
2928                 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2929
2930                 if (ret < 0)
2931                         goto out;
2932
2933                 ret = 0;
2934                 leaf = path->nodes[0];
2935                 nritems = btrfs_header_nritems(leaf);
2936                 if (path->slots[0] == nritems) {
2937                         ret = btrfs_next_leaf(extent_root, path);
2938                         if (ret > 0) {
2939                                 ret = 0;
2940                                 goto out;
2941                         }
2942                         if (ret < 0)
2943                                 goto out;
2944                         leaf = path->nodes[0];
2945                 }
2946
2947                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2948                 if (found_key.objectid != extent_key->objectid) {
2949                         break;
2950                 }
2951
2952                 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
2953                         break;
2954                 }
2955
2956                 key.offset = found_key.offset + 1;
2957                 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2958
2959                 ret = relocate_one_reference(extent_root, path, extent_key,
2960                                              &last_file_objectid,
2961                                              &last_file_offset,
2962                                              &last_file_root, last_extent);
2963                 if (ret)
2964                         goto out;
2965                 last_extent = extent_key->objectid;
2966         }
2967         ret = 0;
2968 out:
2969         btrfs_release_path(extent_root, path);
2970         return ret;
2971 }
2972
2973 static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
2974 {
2975         u64 num_devices;
2976         u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
2977                 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
2978
2979         num_devices = root->fs_info->fs_devices->num_devices;
2980         if (num_devices == 1) {
2981                 stripped |= BTRFS_BLOCK_GROUP_DUP;
2982                 stripped = flags & ~stripped;
2983
2984                 /* turn raid0 into single device chunks */
2985                 if (flags & BTRFS_BLOCK_GROUP_RAID0)
2986                         return stripped;
2987
2988                 /* turn mirroring into duplication */
2989                 if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
2990                              BTRFS_BLOCK_GROUP_RAID10))
2991                         return stripped | BTRFS_BLOCK_GROUP_DUP;
2992                 return flags;
2993         } else {
2994                 /* they already had raid on here, just return */
2995                 if (flags & stripped)
2996                         return flags;
2997
2998                 stripped |= BTRFS_BLOCK_GROUP_DUP;
2999                 stripped = flags & ~stripped;
3000
3001                 /* switch duplicated blocks with raid1 */
3002                 if (flags & BTRFS_BLOCK_GROUP_DUP)
3003                         return stripped | BTRFS_BLOCK_GROUP_RAID1;
3004
3005                 /* turn single device chunks into raid0 */
3006                 return stripped | BTRFS_BLOCK_GROUP_RAID0;
3007         }
3008         return flags;
3009 }
3010
3011 int __alloc_chunk_for_shrink(struct btrfs_root *root,
3012                      struct btrfs_block_group_cache *shrink_block_group,
3013                      int force)
3014 {
3015         struct btrfs_trans_handle *trans;
3016         u64 new_alloc_flags;
3017         u64 calc;
3018
3019         if (btrfs_block_group_used(&shrink_block_group->item) > 0) {
3020
3021                 mutex_unlock(&root->fs_info->alloc_mutex);
3022                 trans = btrfs_start_transaction(root, 1);
3023                 mutex_lock(&root->fs_info->alloc_mutex);
3024
3025                 new_alloc_flags = update_block_group_flags(root,
3026                                                    shrink_block_group->flags);
3027                 if (new_alloc_flags != shrink_block_group->flags) {
3028                         calc =
3029                              btrfs_block_group_used(&shrink_block_group->item);
3030                 } else {
3031                         calc = shrink_block_group->key.offset;
3032                 }
3033                 do_chunk_alloc(trans, root->fs_info->extent_root,
3034                                calc + 2 * 1024 * 1024, new_alloc_flags, force);
3035
3036                 mutex_unlock(&root->fs_info->alloc_mutex);
3037                 btrfs_end_transaction(trans, root);
3038                 mutex_lock(&root->fs_info->alloc_mutex);
3039         }
3040         return 0;
3041 }
3042
3043 int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start)
3044 {
3045         struct btrfs_trans_handle *trans;
3046         struct btrfs_root *tree_root = root->fs_info->tree_root;
3047         struct btrfs_path *path;
3048         u64 cur_byte;
3049         u64 total_found;
3050         u64 shrink_last_byte;
3051         struct btrfs_block_group_cache *shrink_block_group;
3052         struct btrfs_fs_info *info = root->fs_info;
3053         struct btrfs_key key;
3054         struct btrfs_key found_key;
3055         struct extent_buffer *leaf;
3056         u32 nritems;
3057         int ret;
3058         int progress;
3059
3060         mutex_lock(&root->fs_info->alloc_mutex);
3061         shrink_block_group = btrfs_lookup_block_group(root->fs_info,
3062                                                       shrink_start);
3063         BUG_ON(!shrink_block_group);
3064
3065         shrink_last_byte = shrink_block_group->key.objectid +
3066                 shrink_block_group->key.offset;
3067
3068         shrink_block_group->space_info->total_bytes -=
3069                 shrink_block_group->key.offset;
3070         path = btrfs_alloc_path();
3071         root = root->fs_info->extent_root;
3072         path->reada = 2;
3073
3074         printk("btrfs relocating block group %llu flags %llu\n",
3075                (unsigned long long)shrink_start,
3076                (unsigned long long)shrink_block_group->flags);
3077
3078         __alloc_chunk_for_shrink(root, shrink_block_group, 1);
3079
3080 again:
3081
3082         shrink_block_group->ro = 1;
3083
3084         total_found = 0;
3085         progress = 0;
3086         key.objectid = shrink_start;
3087         key.offset = 0;
3088         key.type = 0;
3089         cur_byte = key.objectid;
3090
3091         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3092         if (ret < 0)
3093                 goto out;
3094
3095         ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
3096         if (ret < 0)
3097                 goto out;
3098
3099         if (ret == 0) {
3100                 leaf = path->nodes[0];
3101                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3102                 if (found_key.objectid + found_key.offset > shrink_start &&
3103                     found_key.objectid < shrink_last_byte) {
3104                         cur_byte = found_key.objectid;
3105                         key.objectid = cur_byte;
3106                 }
3107         }
3108         btrfs_release_path(root, path);
3109
3110         while(1) {
3111                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3112                 if (ret < 0)
3113                         goto out;
3114
3115 next:
3116                 leaf = path->nodes[0];
3117                 nritems = btrfs_header_nritems(leaf);
3118                 if (path->slots[0] >= nritems) {
3119                         ret = btrfs_next_leaf(root, path);
3120                         if (ret < 0)
3121                                 goto out;
3122                         if (ret == 1) {
3123                                 ret = 0;
3124                                 break;
3125                         }
3126                         leaf = path->nodes[0];
3127                         nritems = btrfs_header_nritems(leaf);
3128                 }
3129
3130                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3131
3132                 if (found_key.objectid >= shrink_last_byte)
3133                         break;
3134
3135                 if (progress && need_resched()) {
3136                         memcpy(&key, &found_key, sizeof(key));
3137                         cond_resched();
3138                         btrfs_release_path(root, path);
3139                         btrfs_search_slot(NULL, root, &key, path, 0, 0);
3140                         progress = 0;
3141                         goto next;
3142                 }
3143                 progress = 1;
3144
3145                 if (btrfs_key_type(&found_key) != BTRFS_EXTENT_ITEM_KEY ||
3146                     found_key.objectid + found_key.offset <= cur_byte) {
3147                         memcpy(&key, &found_key, sizeof(key));
3148                         key.offset++;
3149                         path->slots[0]++;
3150                         goto next;
3151                 }
3152
3153                 total_found++;
3154                 cur_byte = found_key.objectid + found_key.offset;
3155                 key.objectid = cur_byte;
3156                 btrfs_release_path(root, path);
3157                 ret = relocate_one_extent(root, path, &found_key);
3158                 __alloc_chunk_for_shrink(root, shrink_block_group, 0);
3159         }
3160
3161         btrfs_release_path(root, path);
3162
3163         if (total_found > 0) {
3164                 printk("btrfs relocate found %llu last extent was %llu\n",
3165                        (unsigned long long)total_found,
3166                        (unsigned long long)found_key.objectid);
3167                 mutex_unlock(&root->fs_info->alloc_mutex);
3168                 trans = btrfs_start_transaction(tree_root, 1);
3169                 btrfs_commit_transaction(trans, tree_root);
3170
3171                 btrfs_clean_old_snapshots(tree_root);
3172
3173                 trans = btrfs_start_transaction(tree_root, 1);
3174                 btrfs_commit_transaction(trans, tree_root);
3175                 mutex_lock(&root->fs_info->alloc_mutex);
3176                 goto again;
3177         }
3178
3179         /*
3180          * we've freed all the extents, now remove the block
3181          * group item from the tree
3182          */
3183         mutex_unlock(&root->fs_info->alloc_mutex);
3184
3185         trans = btrfs_start_transaction(root, 1);
3186         mutex_lock(&root->fs_info->alloc_mutex);
3187         memcpy(&key, &shrink_block_group->key, sizeof(key));
3188
3189         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3190         if (ret > 0)
3191                 ret = -EIO;
3192         if (ret < 0)
3193                 goto out;
3194
3195         clear_extent_bits(&info->block_group_cache, key.objectid,
3196                           key.objectid + key.offset - 1,
3197                           (unsigned int)-1, GFP_NOFS);
3198
3199
3200         clear_extent_bits(&info->free_space_cache,
3201                            key.objectid, key.objectid + key.offset - 1,
3202                            (unsigned int)-1, GFP_NOFS);
3203
3204         memset(shrink_block_group, 0, sizeof(*shrink_block_group));
3205         kfree(shrink_block_group);
3206
3207         btrfs_del_item(trans, root, path);
3208         btrfs_release_path(root, path);
3209         mutex_unlock(&root->fs_info->alloc_mutex);
3210         btrfs_commit_transaction(trans, root);
3211
3212         mutex_lock(&root->fs_info->alloc_mutex);
3213
3214         /* the code to unpin extents might set a few bits in the free
3215          * space cache for this range again
3216          */
3217         clear_extent_bits(&info->free_space_cache,
3218                            key.objectid, key.objectid + key.offset - 1,
3219                            (unsigned int)-1, GFP_NOFS);
3220 out:
3221         btrfs_free_path(path);
3222         mutex_unlock(&root->fs_info->alloc_mutex);
3223         return ret;
3224 }
3225
3226 int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path,
3227                            struct btrfs_key *key)
3228 {
3229         int ret = 0;
3230         struct btrfs_key found_key;
3231         struct extent_buffer *leaf;
3232         int slot;
3233
3234         ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
3235         if (ret < 0)
3236                 goto out;
3237
3238         while(1) {
3239                 slot = path->slots[0];
3240                 leaf = path->nodes[0];
3241                 if (slot >= btrfs_header_nritems(leaf)) {
3242                         ret = btrfs_next_leaf(root, path);
3243                         if (ret == 0)
3244                                 continue;
3245                         if (ret < 0)
3246                                 goto out;
3247                         break;
3248                 }
3249                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3250
3251                 if (found_key.objectid >= key->objectid &&
3252                     found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
3253                         ret = 0;
3254                         goto out;
3255                 }
3256                 path->slots[0]++;
3257         }
3258         ret = -ENOENT;
3259 out:
3260         return ret;
3261 }
3262
3263 int btrfs_read_block_groups(struct btrfs_root *root)
3264 {
3265         struct btrfs_path *path;
3266         int ret;
3267         int bit;
3268         struct btrfs_block_group_cache *cache;
3269         struct btrfs_fs_info *info = root->fs_info;
3270         struct btrfs_space_info *space_info;
3271         struct extent_io_tree *block_group_cache;
3272         struct btrfs_key key;
3273         struct btrfs_key found_key;
3274         struct extent_buffer *leaf;
3275
3276         block_group_cache = &info->block_group_cache;
3277         root = info->extent_root;
3278         key.objectid = 0;
3279         key.offset = 0;
3280         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
3281         path = btrfs_alloc_path();
3282         if (!path)
3283                 return -ENOMEM;
3284
3285         mutex_lock(&root->fs_info->alloc_mutex);
3286         while(1) {
3287                 ret = find_first_block_group(root, path, &key);
3288                 if (ret > 0) {
3289                         ret = 0;
3290                         goto error;
3291                 }
3292                 if (ret != 0)
3293                         goto error;
3294
3295                 leaf = path->nodes[0];
3296                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3297                 cache = kzalloc(sizeof(*cache), GFP_NOFS);
3298                 if (!cache) {
3299                         ret = -ENOMEM;
3300                         break;
3301                 }
3302
3303                 read_extent_buffer(leaf, &cache->item,
3304                                    btrfs_item_ptr_offset(leaf, path->slots[0]),
3305                                    sizeof(cache->item));
3306                 memcpy(&cache->key, &found_key, sizeof(found_key));
3307
3308                 key.objectid = found_key.objectid + found_key.offset;
3309                 btrfs_release_path(root, path);
3310                 cache->flags = btrfs_block_group_flags(&cache->item);
3311                 bit = 0;
3312                 if (cache->flags & BTRFS_BLOCK_GROUP_DATA) {
3313                         bit = BLOCK_GROUP_DATA;
3314                 } else if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
3315                         bit = BLOCK_GROUP_SYSTEM;
3316                 } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) {
3317                         bit = BLOCK_GROUP_METADATA;
3318                 }
3319                 set_avail_alloc_bits(info, cache->flags);
3320
3321                 ret = update_space_info(info, cache->flags, found_key.offset,
3322                                         btrfs_block_group_used(&cache->item),
3323                                         &space_info);
3324                 BUG_ON(ret);
3325                 cache->space_info = space_info;
3326
3327                 /* use EXTENT_LOCKED to prevent merging */
3328                 set_extent_bits(block_group_cache, found_key.objectid,
3329                                 found_key.objectid + found_key.offset - 1,
3330                                 bit | EXTENT_LOCKED, GFP_NOFS);
3331                 set_state_private(block_group_cache, found_key.objectid,
3332                                   (unsigned long)cache);
3333
3334                 if (key.objectid >=
3335                     btrfs_super_total_bytes(&info->super_copy))
3336                         break;
3337         }
3338         ret = 0;
3339 error:
3340         btrfs_free_path(path);
3341         mutex_unlock(&root->fs_info->alloc_mutex);
3342         return ret;
3343 }
3344
3345 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
3346                            struct btrfs_root *root, u64 bytes_used,
3347                            u64 type, u64 chunk_objectid, u64 chunk_offset,
3348                            u64 size)
3349 {
3350         int ret;
3351         int bit = 0;
3352         struct btrfs_root *extent_root;
3353         struct btrfs_block_group_cache *cache;
3354         struct extent_io_tree *block_group_cache;
3355
3356         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
3357         extent_root = root->fs_info->extent_root;
3358         block_group_cache = &root->fs_info->block_group_cache;
3359
3360         cache = kzalloc(sizeof(*cache), GFP_NOFS);
3361         BUG_ON(!cache);
3362         cache->key.objectid = chunk_offset;
3363         cache->key.offset = size;
3364         btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
3365
3366         btrfs_set_block_group_used(&cache->item, bytes_used);
3367         btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
3368         cache->flags = type;
3369         btrfs_set_block_group_flags(&cache->item, type);
3370
3371         ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
3372                                 &cache->space_info);
3373         BUG_ON(ret);
3374
3375         bit = block_group_state_bits(type);
3376         set_extent_bits(block_group_cache, chunk_offset,
3377                         chunk_offset + size - 1,
3378                         bit | EXTENT_LOCKED, GFP_NOFS);
3379
3380         set_state_private(block_group_cache, chunk_offset,
3381                           (unsigned long)cache);
3382         ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
3383                                 sizeof(cache->item));
3384         BUG_ON(ret);
3385
3386         finish_current_insert(trans, extent_root);
3387         ret = del_pending_extents(trans, extent_root);
3388         BUG_ON(ret);
3389         set_avail_alloc_bits(extent_root->fs_info, type);
3390
3391         return 0;
3392 }