Btrfs: Let some locks go during defrag and snapshot dropping
[linux-block.git] / fs / btrfs / extent-tree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/sched.h>
20 #include "ctree.h"
21 #include "disk-io.h"
22 #include "print-tree.h"
23 #include "transaction.h"
24
25 static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
26                             *orig_root, u64 num_blocks, u64 empty_size,
27                             u64 search_start,
28                             u64 search_end, u64 hint_block,
29                             struct btrfs_key *ins, u64 exclude_start,
30                             u64 exclude_nr, int data);
31 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
32                                  btrfs_root *extent_root);
33 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
34                                btrfs_root *extent_root);
35
36 static int cache_block_group(struct btrfs_root *root,
37                              struct btrfs_block_group_cache *block_group)
38 {
39         struct btrfs_path *path;
40         int ret;
41         struct btrfs_key key;
42         struct btrfs_leaf *leaf;
43         struct radix_tree_root *extent_radix;
44         int slot;
45         u64 i;
46         u64 last = 0;
47         u64 hole_size;
48         u64 limit;
49         int found = 0;
50
51         root = root->fs_info->extent_root;
52         extent_radix = &root->fs_info->extent_map_radix;
53
54         if (block_group->cached)
55                 return 0;
56         if (block_group->data)
57                 return 0;
58         path = btrfs_alloc_path();
59         if (!path)
60                 return -ENOMEM;
61         path->reada = 1;
62         key.objectid = block_group->key.objectid;
63         key.flags = 0;
64         key.offset = 0;
65         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
66         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
67         if (ret < 0)
68                 return ret;
69         if (ret && path->slots[0] > 0)
70                 path->slots[0]--;
71         limit = block_group->key.objectid + block_group->key.offset;
72         while(1) {
73                 leaf = btrfs_buffer_leaf(path->nodes[0]);
74                 slot = path->slots[0];
75                 if (slot >= btrfs_header_nritems(&leaf->header)) {
76                         ret = btrfs_next_leaf(root, path);
77                         if (ret < 0)
78                                 goto err;
79                         if (ret == 0) {
80                                 continue;
81                         } else {
82                                 if (found) {
83                                         hole_size = block_group->key.objectid +
84                                                 block_group->key.offset - last;
85                                 } else {
86                                         last = block_group->key.objectid;
87                                         hole_size = block_group->key.offset;
88                                 }
89                                 for (i = 0; i < hole_size; i++) {
90                                         set_radix_bit(extent_radix,
91                                                       last + i);
92                                 }
93                                 break;
94                         }
95                 }
96                 btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key);
97                 if (key.objectid >= block_group->key.objectid +
98                     block_group->key.offset) {
99                         if (found) {
100                                 hole_size = block_group->key.objectid +
101                                         block_group->key.offset - last;
102                         } else {
103                                 last = block_group->key.objectid;
104                                 hole_size = block_group->key.offset;
105                         }
106                         for (i = 0; i < hole_size; i++) {
107                                 set_radix_bit(extent_radix, last + i);
108                         }
109                         break;
110                 }
111                 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
112                         if (!found) {
113                                 last = key.objectid + key.offset;
114                                 found = 1;
115                         } else {
116                                 hole_size = key.objectid - last;
117                                 for (i = 0; i < hole_size; i++) {
118                                         set_radix_bit(extent_radix, last + i);
119                                 }
120                                 last = key.objectid + key.offset;
121                         }
122                 }
123                 path->slots[0]++;
124         }
125
126         block_group->cached = 1;
127 err:
128         btrfs_free_path(path);
129         return 0;
130 }
131
132 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
133                                                          btrfs_fs_info *info,
134                                                          u64 blocknr)
135 {
136         struct btrfs_block_group_cache *block_group;
137         int ret;
138
139         ret = radix_tree_gang_lookup(&info->block_group_radix,
140                                      (void **)&block_group,
141                                      blocknr, 1);
142         if (ret) {
143                 if (block_group->key.objectid <= blocknr && blocknr <=
144                     block_group->key.objectid + block_group->key.offset)
145                         return block_group;
146         }
147         ret = radix_tree_gang_lookup(&info->block_group_data_radix,
148                                      (void **)&block_group,
149                                      blocknr, 1);
150         if (ret) {
151                 if (block_group->key.objectid <= blocknr && blocknr <=
152                     block_group->key.objectid + block_group->key.offset)
153                         return block_group;
154         }
155         return NULL;
156 }
157
158 static u64 leaf_range(struct btrfs_root *root)
159 {
160         u64 size = BTRFS_LEAF_DATA_SIZE(root);
161         do_div(size, sizeof(struct btrfs_extent_item) +
162                 sizeof(struct btrfs_item));
163         return size;
164 }
165
166 static u64 find_search_start(struct btrfs_root *root,
167                              struct btrfs_block_group_cache **cache_ret,
168                              u64 search_start, int num)
169 {
170         unsigned long gang[8];
171         int ret;
172         struct btrfs_block_group_cache *cache = *cache_ret;
173         u64 last = max(search_start, cache->key.objectid);
174
175         if (cache->data)
176                 goto out;
177         if (num > 1) {
178                 last = max(last, cache->last_prealloc);
179         }
180 again:
181         ret = cache_block_group(root, cache);
182         if (ret)
183                 goto out;
184         while(1) {
185                 ret = find_first_radix_bit(&root->fs_info->extent_map_radix,
186                                            gang, last, ARRAY_SIZE(gang));
187                 if (!ret)
188                         goto out;
189                 last = gang[ret-1] + 1;
190                 if (num > 1) {
191                         if (ret != ARRAY_SIZE(gang)) {
192                                 goto new_group;
193                         }
194                         if (gang[ret-1] - gang[0] > leaf_range(root)) {
195                                 continue;
196                         }
197                 }
198                 if (gang[0] >= cache->key.objectid + cache->key.offset) {
199                         goto new_group;
200                 }
201                 return gang[0];
202         }
203 out:
204         return max(cache->last_alloc, search_start);
205
206 new_group:
207         cache = btrfs_lookup_block_group(root->fs_info,
208                                          last + cache->key.offset - 1);
209         if (!cache) {
210                 return max((*cache_ret)->last_alloc, search_start);
211         }
212         cache = btrfs_find_block_group(root, cache,
213                                        last + cache->key.offset - 1, 0, 0);
214         *cache_ret = cache;
215         goto again;
216 }
217
218 static u64 div_factor(u64 num, int factor)
219 {
220         num *= factor;
221         do_div(num, 10);
222         return num;
223 }
224
225 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
226                                                  struct btrfs_block_group_cache
227                                                  *hint, u64 search_start,
228                                                  int data, int owner)
229 {
230         struct btrfs_block_group_cache *cache[8];
231         struct btrfs_block_group_cache *found_group = NULL;
232         struct btrfs_fs_info *info = root->fs_info;
233         struct radix_tree_root *radix;
234         struct radix_tree_root *swap_radix;
235         u64 used;
236         u64 last = 0;
237         u64 hint_last;
238         int i;
239         int ret;
240         int full_search = 0;
241         int factor = 8;
242         int data_swap = 0;
243
244         if (!owner)
245                 factor = 5;
246
247         if (data) {
248                 radix = &info->block_group_data_radix;
249                 swap_radix = &info->block_group_radix;
250         } else {
251                 radix = &info->block_group_radix;
252                 swap_radix = &info->block_group_data_radix;
253         }
254
255         if (search_start) {
256                 struct btrfs_block_group_cache *shint;
257                 shint = btrfs_lookup_block_group(info, search_start);
258                 if (shint->data == data) {
259                         used = btrfs_block_group_used(&shint->item);
260                         if (used + shint->pinned <
261                             div_factor(shint->key.offset, factor)) {
262                                 return shint;
263                         }
264                 }
265         }
266         if (hint && hint->data == data) {
267                 used = btrfs_block_group_used(&hint->item);
268                 if (used + hint->pinned <
269                     div_factor(hint->key.offset, factor)) {
270                         return hint;
271                 }
272                 if (used >= div_factor(hint->key.offset, 8)) {
273                         radix_tree_tag_clear(radix,
274                                              hint->key.objectid +
275                                              hint->key.offset - 1,
276                                              BTRFS_BLOCK_GROUP_AVAIL);
277                 }
278                 last = hint->key.offset * 3;
279                 if (hint->key.objectid >= last)
280                         last = max(search_start + hint->key.offset - 1,
281                                    hint->key.objectid - last);
282                 else
283                         last = hint->key.objectid + hint->key.offset;
284                 hint_last = last;
285         } else {
286                 if (hint)
287                         hint_last = max(hint->key.objectid, search_start);
288                 else
289                         hint_last = search_start;
290
291                 last = hint_last;
292         }
293         while(1) {
294                 ret = radix_tree_gang_lookup_tag(radix, (void **)cache,
295                                                  last, ARRAY_SIZE(cache),
296                                                  BTRFS_BLOCK_GROUP_AVAIL);
297                 if (!ret)
298                         break;
299                 for (i = 0; i < ret; i++) {
300                         last = cache[i]->key.objectid +
301                                 cache[i]->key.offset;
302                         used = btrfs_block_group_used(&cache[i]->item);
303                         if (used + cache[i]->pinned <
304                             div_factor(cache[i]->key.offset, factor)) {
305                                 found_group = cache[i];
306                                 goto found;
307                         }
308                         if (used >= div_factor(cache[i]->key.offset, 8)) {
309                                 radix_tree_tag_clear(radix,
310                                                      cache[i]->key.objectid +
311                                                      cache[i]->key.offset - 1,
312                                                      BTRFS_BLOCK_GROUP_AVAIL);
313                         }
314                 }
315                 cond_resched();
316         }
317         last = hint_last;
318 again:
319         while(1) {
320                 ret = radix_tree_gang_lookup(radix, (void **)cache,
321                                              last, ARRAY_SIZE(cache));
322                 if (!ret)
323                         break;
324                 for (i = 0; i < ret; i++) {
325                         last = cache[i]->key.objectid +
326                                 cache[i]->key.offset;
327                         used = btrfs_block_group_used(&cache[i]->item);
328                         if (used + cache[i]->pinned < cache[i]->key.offset) {
329                                 found_group = cache[i];
330                                 goto found;
331                         }
332                         if (used >= cache[i]->key.offset) {
333                                 radix_tree_tag_clear(radix,
334                                                      cache[i]->key.objectid +
335                                                      cache[i]->key.offset - 1,
336                                                      BTRFS_BLOCK_GROUP_AVAIL);
337                         }
338                 }
339                 cond_resched();
340         }
341         if (!full_search) {
342                 last = search_start;
343                 full_search = 1;
344                 goto again;
345         }
346         if (!data_swap) {
347                 struct radix_tree_root *tmp = radix;
348                 data_swap = 1;
349                 radix = swap_radix;
350                 swap_radix = tmp;
351                 last = search_start;
352                 goto again;
353         }
354         if (!found_group) {
355                 ret = radix_tree_gang_lookup(radix,
356                                              (void **)&found_group, 0, 1);
357                 if (ret == 0) {
358                         ret = radix_tree_gang_lookup(swap_radix,
359                                                      (void **)&found_group,
360                                                      0, 1);
361                 }
362                 BUG_ON(ret != 1);
363         }
364 found:
365         return found_group;
366 }
367
368 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
369                                 struct btrfs_root *root,
370                                 u64 blocknr, u64 num_blocks)
371 {
372         struct btrfs_path *path;
373         int ret;
374         struct btrfs_key key;
375         struct btrfs_leaf *l;
376         struct btrfs_extent_item *item;
377         struct btrfs_key ins;
378         u32 refs;
379
380         path = btrfs_alloc_path();
381         if (!path)
382                 return -ENOMEM;
383         ret = find_free_extent(trans, root->fs_info->extent_root, 0, 0, 0,
384                                (u64)-1, 0, &ins, 0, 0, 0);
385         if (ret) {
386                 btrfs_free_path(path);
387                 return ret;
388         }
389         key.objectid = blocknr;
390         key.flags = 0;
391         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
392         key.offset = num_blocks;
393         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
394                                 0, 1);
395         if (ret < 0)
396                 return ret;
397         if (ret != 0) {
398                 BUG();
399         }
400         BUG_ON(ret != 0);
401         l = btrfs_buffer_leaf(path->nodes[0]);
402         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
403         refs = btrfs_extent_refs(item);
404         btrfs_set_extent_refs(item, refs + 1);
405         btrfs_mark_buffer_dirty(path->nodes[0]);
406
407         btrfs_release_path(root->fs_info->extent_root, path);
408         btrfs_free_path(path);
409         finish_current_insert(trans, root->fs_info->extent_root);
410         del_pending_extents(trans, root->fs_info->extent_root);
411         return 0;
412 }
413
414 static int lookup_extent_ref(struct btrfs_trans_handle *trans,
415                              struct btrfs_root *root, u64 blocknr,
416                              u64 num_blocks, u32 *refs)
417 {
418         struct btrfs_path *path;
419         int ret;
420         struct btrfs_key key;
421         struct btrfs_leaf *l;
422         struct btrfs_extent_item *item;
423
424         path = btrfs_alloc_path();
425         key.objectid = blocknr;
426         key.offset = num_blocks;
427         key.flags = 0;
428         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
429         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
430                                 0, 0);
431         if (ret < 0)
432                 goto out;
433         if (ret != 0)
434                 BUG();
435         l = btrfs_buffer_leaf(path->nodes[0]);
436         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
437         *refs = btrfs_extent_refs(item);
438 out:
439         btrfs_free_path(path);
440         return 0;
441 }
442
443 int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
444                        struct btrfs_root *root)
445 {
446         return btrfs_inc_extent_ref(trans, root, bh_blocknr(root->node), 1);
447 }
448
449 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
450                   struct buffer_head *buf)
451 {
452         u64 blocknr;
453         struct btrfs_node *buf_node;
454         struct btrfs_leaf *buf_leaf;
455         struct btrfs_disk_key *key;
456         struct btrfs_file_extent_item *fi;
457         int i;
458         int leaf;
459         int ret;
460         int faili;
461         int err;
462
463         if (!root->ref_cows)
464                 return 0;
465         buf_node = btrfs_buffer_node(buf);
466         leaf = btrfs_is_leaf(buf_node);
467         buf_leaf = btrfs_buffer_leaf(buf);
468         for (i = 0; i < btrfs_header_nritems(&buf_node->header); i++) {
469                 if (leaf) {
470                         u64 disk_blocknr;
471                         key = &buf_leaf->items[i].key;
472                         if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
473                                 continue;
474                         fi = btrfs_item_ptr(buf_leaf, i,
475                                             struct btrfs_file_extent_item);
476                         if (btrfs_file_extent_type(fi) ==
477                             BTRFS_FILE_EXTENT_INLINE)
478                                 continue;
479                         disk_blocknr = btrfs_file_extent_disk_blocknr(fi);
480                         if (disk_blocknr == 0)
481                                 continue;
482                         ret = btrfs_inc_extent_ref(trans, root, disk_blocknr,
483                                     btrfs_file_extent_disk_num_blocks(fi));
484                         if (ret) {
485                                 faili = i;
486                                 goto fail;
487                         }
488                 } else {
489                         blocknr = btrfs_node_blockptr(buf_node, i);
490                         ret = btrfs_inc_extent_ref(trans, root, blocknr, 1);
491                         if (ret) {
492                                 faili = i;
493                                 goto fail;
494                         }
495                 }
496         }
497         return 0;
498 fail:
499         WARN_ON(1);
500         for (i =0; i < faili; i++) {
501                 if (leaf) {
502                         u64 disk_blocknr;
503                         key = &buf_leaf->items[i].key;
504                         if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
505                                 continue;
506                         fi = btrfs_item_ptr(buf_leaf, i,
507                                             struct btrfs_file_extent_item);
508                         if (btrfs_file_extent_type(fi) ==
509                             BTRFS_FILE_EXTENT_INLINE)
510                                 continue;
511                         disk_blocknr = btrfs_file_extent_disk_blocknr(fi);
512                         if (disk_blocknr == 0)
513                                 continue;
514                         err = btrfs_free_extent(trans, root, disk_blocknr,
515                                     btrfs_file_extent_disk_num_blocks(fi), 0);
516                         BUG_ON(err);
517                 } else {
518                         blocknr = btrfs_node_blockptr(buf_node, i);
519                         err = btrfs_free_extent(trans, root, blocknr, 1, 0);
520                         BUG_ON(err);
521                 }
522         }
523         return ret;
524 }
525
526 static int write_one_cache_group(struct btrfs_trans_handle *trans,
527                                  struct btrfs_root *root,
528                                  struct btrfs_path *path,
529                                  struct btrfs_block_group_cache *cache)
530 {
531         int ret;
532         int pending_ret;
533         struct btrfs_root *extent_root = root->fs_info->extent_root;
534         struct btrfs_block_group_item *bi;
535         struct btrfs_key ins;
536
537         ret = find_free_extent(trans, extent_root, 0, 0, 0, (u64)-1, 0, &ins,
538                                0, 0, 0);
539         /* FIXME, set bit to recalc cache groups on next mount */
540         if (ret)
541                 return ret;
542         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
543         if (ret < 0)
544                 goto fail;
545         BUG_ON(ret);
546         bi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
547                             struct btrfs_block_group_item);
548         memcpy(bi, &cache->item, sizeof(*bi));
549         btrfs_mark_buffer_dirty(path->nodes[0]);
550         btrfs_release_path(extent_root, path);
551 fail:
552         finish_current_insert(trans, extent_root);
553         pending_ret = del_pending_extents(trans, extent_root);
554         if (ret)
555                 return ret;
556         if (pending_ret)
557                 return pending_ret;
558         if (cache->data)
559                 cache->last_alloc = cache->first_free;
560         return 0;
561
562 }
563
564 static int write_dirty_block_radix(struct btrfs_trans_handle *trans,
565                                    struct btrfs_root *root,
566                                    struct radix_tree_root *radix)
567 {
568         struct btrfs_block_group_cache *cache[8];
569         int ret;
570         int err = 0;
571         int werr = 0;
572         int i;
573         struct btrfs_path *path;
574         unsigned long off = 0;
575
576         path = btrfs_alloc_path();
577         if (!path)
578                 return -ENOMEM;
579
580         while(1) {
581                 ret = radix_tree_gang_lookup_tag(radix, (void **)cache,
582                                                  off, ARRAY_SIZE(cache),
583                                                  BTRFS_BLOCK_GROUP_DIRTY);
584                 if (!ret)
585                         break;
586                 for (i = 0; i < ret; i++) {
587                         err = write_one_cache_group(trans, root,
588                                                     path, cache[i]);
589                         /*
590                          * if we fail to write the cache group, we want
591                          * to keep it marked dirty in hopes that a later
592                          * write will work
593                          */
594                         if (err) {
595                                 werr = err;
596                                 off = cache[i]->key.objectid +
597                                         cache[i]->key.offset;
598                                 continue;
599                         }
600
601                         radix_tree_tag_clear(radix, cache[i]->key.objectid +
602                                              cache[i]->key.offset - 1,
603                                              BTRFS_BLOCK_GROUP_DIRTY);
604                 }
605         }
606         btrfs_free_path(path);
607         return werr;
608 }
609
610 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
611                                    struct btrfs_root *root)
612 {
613         int ret;
614         int ret2;
615         ret = write_dirty_block_radix(trans, root,
616                                       &root->fs_info->block_group_radix);
617         ret2 = write_dirty_block_radix(trans, root,
618                                       &root->fs_info->block_group_data_radix);
619         if (ret)
620                 return ret;
621         if (ret2)
622                 return ret2;
623         return 0;
624 }
625
626 static int update_block_group(struct btrfs_trans_handle *trans,
627                               struct btrfs_root *root,
628                               u64 blocknr, u64 num, int alloc, int mark_free,
629                               int data)
630 {
631         struct btrfs_block_group_cache *cache;
632         struct btrfs_fs_info *info = root->fs_info;
633         u64 total = num;
634         u64 old_val;
635         u64 block_in_group;
636         u64 i;
637         int ret;
638
639         while(total) {
640                 cache = btrfs_lookup_block_group(info, blocknr);
641                 if (!cache) {
642                         return -1;
643                 }
644                 block_in_group = blocknr - cache->key.objectid;
645                 WARN_ON(block_in_group > cache->key.offset);
646                 radix_tree_tag_set(cache->radix, cache->key.objectid +
647                                    cache->key.offset - 1,
648                                    BTRFS_BLOCK_GROUP_DIRTY);
649
650                 old_val = btrfs_block_group_used(&cache->item);
651                 num = min(total, cache->key.offset - block_in_group);
652                 if (alloc) {
653                         if (blocknr > cache->last_alloc)
654                                 cache->last_alloc = blocknr;
655                         if (!cache->data) {
656                                 for (i = 0; i < num; i++) {
657                                         clear_radix_bit(&info->extent_map_radix,
658                                                         blocknr + i);
659                                 }
660                         }
661                         if (cache->data != data &&
662                             old_val < (cache->key.offset >> 1)) {
663                                 cache->data = data;
664                                 radix_tree_delete(cache->radix,
665                                                   cache->key.objectid +
666                                                   cache->key.offset - 1);
667
668                                 if (data) {
669                                         cache->radix =
670                                                 &info->block_group_data_radix;
671                                         cache->item.flags |=
672                                                 BTRFS_BLOCK_GROUP_DATA;
673                                 } else {
674                                         cache->radix = &info->block_group_radix;
675                                         cache->item.flags &=
676                                                 ~BTRFS_BLOCK_GROUP_DATA;
677                                 }
678                                 ret = radix_tree_insert(cache->radix,
679                                                         cache->key.objectid +
680                                                         cache->key.offset - 1,
681                                                         (void *)cache);
682                         }
683                         old_val += num;
684                 } else {
685                         old_val -= num;
686                         if (blocknr < cache->first_free)
687                                 cache->first_free = blocknr;
688                         if (!cache->data && mark_free) {
689                                 for (i = 0; i < num; i++) {
690                                         set_radix_bit(&info->extent_map_radix,
691                                                       blocknr + i);
692                                 }
693                         }
694                         if (old_val < (cache->key.offset >> 1) &&
695                             old_val + num >= (cache->key.offset >> 1)) {
696                                 radix_tree_tag_set(cache->radix,
697                                                    cache->key.objectid +
698                                                    cache->key.offset - 1,
699                                                    BTRFS_BLOCK_GROUP_AVAIL);
700                         }
701                 }
702                 btrfs_set_block_group_used(&cache->item, old_val);
703                 total -= num;
704                 blocknr += num;
705         }
706         return 0;
707 }
708
709 static int try_remove_page(struct address_space *mapping, unsigned long index)
710 {
711         int ret;
712         return 0;
713         ret = invalidate_mapping_pages(mapping, index, index);
714         return ret;
715 }
716
717 int btrfs_copy_pinned(struct btrfs_root *root, struct radix_tree_root *copy)
718 {
719         unsigned long gang[8];
720         u64 last = 0;
721         struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix;
722         int ret;
723         int i;
724
725         while(1) {
726                 ret = find_first_radix_bit(pinned_radix, gang, last,
727                                            ARRAY_SIZE(gang));
728                 if (!ret)
729                         break;
730                 for (i = 0 ; i < ret; i++) {
731                         set_radix_bit(copy, gang[i]);
732                         last = gang[i] + 1;
733                 }
734         }
735         return 0;
736 }
737
738 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
739                                struct btrfs_root *root,
740                                struct radix_tree_root *unpin_radix)
741 {
742         unsigned long gang[8];
743         struct inode *btree_inode = root->fs_info->btree_inode;
744         struct btrfs_block_group_cache *block_group;
745         u64 first = 0;
746         int ret;
747         int i;
748         struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix;
749         struct radix_tree_root *extent_radix = &root->fs_info->extent_map_radix;
750
751         while(1) {
752                 ret = find_first_radix_bit(unpin_radix, gang, 0,
753                                            ARRAY_SIZE(gang));
754                 if (!ret)
755                         break;
756                 if (!first)
757                         first = gang[0];
758                 for (i = 0; i < ret; i++) {
759                         clear_radix_bit(pinned_radix, gang[i]);
760                         clear_radix_bit(unpin_radix, gang[i]);
761                         block_group = btrfs_lookup_block_group(root->fs_info,
762                                                                gang[i]);
763                         if (block_group) {
764                                 WARN_ON(block_group->pinned == 0);
765                                 block_group->pinned--;
766                                 if (gang[i] < block_group->last_alloc)
767                                         block_group->last_alloc = gang[i];
768                                 if (gang[i] < block_group->last_prealloc)
769                                         block_group->last_prealloc = gang[i];
770                                 if (!block_group->data)
771                                         set_radix_bit(extent_radix, gang[i]);
772                         }
773                         try_remove_page(btree_inode->i_mapping,
774                                         gang[i] << (PAGE_CACHE_SHIFT -
775                                                     btree_inode->i_blkbits));
776                 }
777         }
778         return 0;
779 }
780
781 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
782                                  btrfs_root *extent_root)
783 {
784         struct btrfs_key ins;
785         struct btrfs_extent_item extent_item;
786         int i;
787         int ret;
788         u64 super_blocks_used;
789         struct btrfs_fs_info *info = extent_root->fs_info;
790
791         btrfs_set_extent_refs(&extent_item, 1);
792         ins.offset = 1;
793         ins.flags = 0;
794         btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
795         btrfs_set_extent_owner(&extent_item, extent_root->root_key.objectid);
796
797         for (i = 0; i < extent_root->fs_info->extent_tree_insert_nr; i++) {
798                 ins.objectid = extent_root->fs_info->extent_tree_insert[i];
799                 super_blocks_used = btrfs_super_blocks_used(&info->super_copy);
800                 btrfs_set_super_blocks_used(&info->super_copy,
801                                             super_blocks_used + 1);
802                 ret = btrfs_insert_item(trans, extent_root, &ins, &extent_item,
803                                         sizeof(extent_item));
804                 BUG_ON(ret);
805         }
806         extent_root->fs_info->extent_tree_insert_nr = 0;
807         return 0;
808 }
809
810 static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending)
811 {
812         int err;
813         struct btrfs_header *header;
814         struct buffer_head *bh;
815
816         if (!pending) {
817                 bh = btrfs_find_tree_block(root, blocknr);
818                 if (bh) {
819                         if (buffer_uptodate(bh)) {
820                                 u64 transid =
821                                     root->fs_info->running_transaction->transid;
822                                 header = btrfs_buffer_header(bh);
823                                 if (btrfs_header_generation(header) ==
824                                     transid) {
825                                         btrfs_block_release(root, bh);
826                                         return 0;
827                                 }
828                         }
829                         btrfs_block_release(root, bh);
830                 }
831                 err = set_radix_bit(&root->fs_info->pinned_radix, blocknr);
832                 if (!err) {
833                         struct btrfs_block_group_cache *cache;
834                         cache = btrfs_lookup_block_group(root->fs_info,
835                                                          blocknr);
836                         if (cache)
837                                 cache->pinned++;
838                 }
839         } else {
840                 err = set_radix_bit(&root->fs_info->pending_del_radix, blocknr);
841         }
842         BUG_ON(err < 0);
843         return 0;
844 }
845
846 /*
847  * remove an extent from the root, returns 0 on success
848  */
849 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
850                          *root, u64 blocknr, u64 num_blocks, int pin,
851                          int mark_free)
852 {
853         struct btrfs_path *path;
854         struct btrfs_key key;
855         struct btrfs_fs_info *info = root->fs_info;
856         struct btrfs_root *extent_root = info->extent_root;
857         int ret;
858         struct btrfs_extent_item *ei;
859         struct btrfs_key ins;
860         u32 refs;
861
862         key.objectid = blocknr;
863         key.flags = 0;
864         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
865         key.offset = num_blocks;
866
867         path = btrfs_alloc_path();
868         if (!path)
869                 return -ENOMEM;
870
871         ret = find_free_extent(trans, root, 0, 0, 0, (u64)-1, 0, &ins, 0, 0, 0);
872         if (ret) {
873                 btrfs_free_path(path);
874                 return ret;
875         }
876
877         ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
878         if (ret < 0)
879                 return ret;
880         BUG_ON(ret);
881         ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
882                             struct btrfs_extent_item);
883         BUG_ON(ei->refs == 0);
884         refs = btrfs_extent_refs(ei) - 1;
885         btrfs_set_extent_refs(ei, refs);
886         btrfs_mark_buffer_dirty(path->nodes[0]);
887         if (refs == 0) {
888                 u64 super_blocks_used;
889
890                 if (pin) {
891                         ret = pin_down_block(root, blocknr, 0);
892                         BUG_ON(ret);
893                 }
894
895                 super_blocks_used = btrfs_super_blocks_used(&info->super_copy);
896                 btrfs_set_super_blocks_used(&info->super_copy,
897                                             super_blocks_used - num_blocks);
898                 ret = btrfs_del_item(trans, extent_root, path);
899                 if (ret) {
900                         return ret;
901                 }
902                 ret = update_block_group(trans, root, blocknr, num_blocks, 0,
903                                          mark_free, 0);
904                 BUG_ON(ret);
905         }
906         btrfs_free_path(path);
907         finish_current_insert(trans, extent_root);
908         return ret;
909 }
910
911 /*
912  * find all the blocks marked as pending in the radix tree and remove
913  * them from the extent map
914  */
915 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
916                                btrfs_root *extent_root)
917 {
918         int ret;
919         int wret;
920         int err = 0;
921         unsigned long gang[4];
922         int i;
923         struct radix_tree_root *pending_radix;
924         struct radix_tree_root *pinned_radix;
925         struct btrfs_block_group_cache *cache;
926
927         pending_radix = &extent_root->fs_info->pending_del_radix;
928         pinned_radix = &extent_root->fs_info->pinned_radix;
929
930         while(1) {
931                 ret = find_first_radix_bit(pending_radix, gang, 0,
932                                            ARRAY_SIZE(gang));
933                 if (!ret)
934                         break;
935                 for (i = 0; i < ret; i++) {
936                         wret = set_radix_bit(pinned_radix, gang[i]);
937                         if (wret == 0) {
938                                 cache =
939                                   btrfs_lookup_block_group(extent_root->fs_info,
940                                                            gang[i]);
941                                 if (cache)
942                                         cache->pinned++;
943                         }
944                         if (wret < 0) {
945                                 printk(KERN_CRIT "set_radix_bit, err %d\n",
946                                        wret);
947                                 BUG_ON(wret < 0);
948                         }
949                         wret = clear_radix_bit(pending_radix, gang[i]);
950                         BUG_ON(wret);
951                         wret = __free_extent(trans, extent_root,
952                                              gang[i], 1, 0, 0);
953                         if (wret)
954                                 err = wret;
955                 }
956         }
957         return err;
958 }
959
960 /*
961  * remove an extent from the root, returns 0 on success
962  */
963 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
964                       *root, u64 blocknr, u64 num_blocks, int pin)
965 {
966         struct btrfs_root *extent_root = root->fs_info->extent_root;
967         int pending_ret;
968         int ret;
969
970         if (root == extent_root) {
971                 pin_down_block(root, blocknr, 1);
972                 return 0;
973         }
974         ret = __free_extent(trans, root, blocknr, num_blocks, pin, pin == 0);
975         pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
976         return ret ? ret : pending_ret;
977 }
978
979 /*
980  * walks the btree of allocated extents and find a hole of a given size.
981  * The key ins is changed to record the hole:
982  * ins->objectid == block start
983  * ins->flags = BTRFS_EXTENT_ITEM_KEY
984  * ins->offset == number of blocks
985  * Any available blocks before search_start are skipped.
986  */
987 static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
988                             *orig_root, u64 num_blocks, u64 empty_size,
989                             u64 search_start, u64 search_end, u64 hint_block,
990                             struct btrfs_key *ins, u64 exclude_start,
991                             u64 exclude_nr, int data)
992 {
993         struct btrfs_path *path;
994         struct btrfs_key key;
995         int ret;
996         u64 hole_size = 0;
997         int slot = 0;
998         u64 last_block = 0;
999         u64 test_block;
1000         u64 orig_search_start = search_start;
1001         int start_found;
1002         struct btrfs_leaf *l;
1003         struct btrfs_root * root = orig_root->fs_info->extent_root;
1004         struct btrfs_fs_info *info = root->fs_info;
1005         int total_needed = num_blocks;
1006         int total_found = 0;
1007         int fill_prealloc = 0;
1008         int level;
1009         struct btrfs_block_group_cache *block_group;
1010         int full_scan = 0;
1011         int wrapped = 0;
1012         u64 limit;
1013
1014         ins->flags = 0;
1015         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
1016
1017         level = btrfs_header_level(btrfs_buffer_header(root->node));
1018         if (num_blocks == 0) {
1019                 fill_prealloc = 1;
1020                 num_blocks = 1;
1021                 total_needed = (min(level + 1, BTRFS_MAX_LEVEL)) * 6;
1022         }
1023         if (fill_prealloc) {
1024                 u64 first;
1025                 int nr = info->extent_tree_prealloc_nr;
1026                 first = info->extent_tree_prealloc[nr - 1];
1027                 if (info->extent_tree_prealloc_nr >= total_needed &&
1028                     first >= search_start) {
1029                         ins->objectid = info->extent_tree_prealloc[0];
1030                         ins->offset = 1;
1031                         return 0;
1032                 }
1033                 info->extent_tree_prealloc_nr = 0;
1034         }
1035         if (search_end == (u64)-1)
1036                 search_end = btrfs_super_total_blocks(&info->super_copy);
1037         if (hint_block) {
1038                 block_group = btrfs_lookup_block_group(info, hint_block);
1039                 block_group = btrfs_find_block_group(root, block_group,
1040                                                      hint_block, data, 1);
1041         } else {
1042                 block_group = btrfs_find_block_group(root,
1043                                                      trans->block_group, 0,
1044                                                      data, 1);
1045         }
1046
1047         total_needed += empty_size;
1048         path = btrfs_alloc_path();
1049
1050 check_failed:
1051         if (!block_group->data)
1052                 search_start = find_search_start(root, &block_group,
1053                                                  search_start, total_needed);
1054         else if (!full_scan)
1055                 search_start = max(block_group->last_alloc, search_start);
1056
1057         btrfs_init_path(path);
1058         ins->objectid = search_start;
1059         ins->offset = 0;
1060         start_found = 0;
1061
1062         ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
1063         if (ret < 0)
1064                 goto error;
1065
1066         if (path->slots[0] > 0) {
1067                 path->slots[0]--;
1068         }
1069
1070         l = btrfs_buffer_leaf(path->nodes[0]);
1071         btrfs_disk_key_to_cpu(&key, &l->items[path->slots[0]].key);
1072         /*
1073          * a rare case, go back one key if we hit a block group item
1074          * instead of an extent item
1075          */
1076         if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY &&
1077             key.objectid + key.offset >= search_start) {
1078                 ins->objectid = key.objectid;
1079                 ins->offset = key.offset - 1;
1080                 btrfs_release_path(root, path);
1081                 ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
1082                 if (ret < 0)
1083                         goto error;
1084
1085                 if (path->slots[0] > 0) {
1086                         path->slots[0]--;
1087                 }
1088         }
1089
1090         while (1) {
1091                 l = btrfs_buffer_leaf(path->nodes[0]);
1092                 slot = path->slots[0];
1093                 if (slot >= btrfs_header_nritems(&l->header)) {
1094                         if (fill_prealloc) {
1095                                 info->extent_tree_prealloc_nr = 0;
1096                                 total_found = 0;
1097                         }
1098                         if (start_found)
1099                                 limit = last_block +
1100                                         (block_group->key.offset >> 1);
1101                         else
1102                                 limit = search_start +
1103                                         (block_group->key.offset >> 1);
1104                         ret = btrfs_next_leaf(root, path);
1105                         if (ret == 0)
1106                                 continue;
1107                         if (ret < 0)
1108                                 goto error;
1109                         if (!start_found) {
1110                                 ins->objectid = search_start;
1111                                 ins->offset = search_end - search_start;
1112                                 start_found = 1;
1113                                 goto check_pending;
1114                         }
1115                         ins->objectid = last_block > search_start ?
1116                                         last_block : search_start;
1117                         ins->offset = search_end - ins->objectid;
1118                         goto check_pending;
1119                 }
1120
1121                 btrfs_disk_key_to_cpu(&key, &l->items[slot].key);
1122                 if (key.objectid >= search_start && key.objectid > last_block &&
1123                     start_found) {
1124                         if (last_block < search_start)
1125                                 last_block = search_start;
1126                         hole_size = key.objectid - last_block;
1127                         if (hole_size >= num_blocks) {
1128                                 ins->objectid = last_block;
1129                                 ins->offset = hole_size;
1130                                 goto check_pending;
1131                         }
1132                 }
1133
1134                 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY)
1135                         goto next;
1136
1137                 start_found = 1;
1138                 last_block = key.objectid + key.offset;
1139                 if (!full_scan && last_block >= block_group->key.objectid +
1140                     block_group->key.offset) {
1141                         btrfs_release_path(root, path);
1142                         search_start = block_group->key.objectid +
1143                                 block_group->key.offset * 2;
1144                         goto new_group;
1145                 }
1146 next:
1147                 path->slots[0]++;
1148                 cond_resched();
1149         }
1150 check_pending:
1151         /* we have to make sure we didn't find an extent that has already
1152          * been allocated by the map tree or the original allocation
1153          */
1154         btrfs_release_path(root, path);
1155         BUG_ON(ins->objectid < search_start);
1156
1157         if (ins->objectid + num_blocks >= search_end) {
1158                 if (full_scan) {
1159                         ret = -ENOSPC;
1160                         goto error;
1161                 }
1162                 search_start = orig_search_start;
1163                 if (wrapped) {
1164                         if (!full_scan)
1165                                 total_needed -= empty_size;
1166                         full_scan = 1;
1167                 } else
1168                         wrapped = 1;
1169                 goto new_group;
1170         }
1171         for (test_block = ins->objectid;
1172              test_block < ins->objectid + num_blocks; test_block++) {
1173                 if (test_radix_bit(&info->pinned_radix, test_block)) {
1174                         search_start = test_block + 1;
1175                         goto new_group;
1176                 }
1177         }
1178         if (!fill_prealloc && info->extent_tree_insert_nr) {
1179                 u64 last =
1180                   info->extent_tree_insert[info->extent_tree_insert_nr - 1];
1181                 if (ins->objectid + num_blocks >
1182                     info->extent_tree_insert[0] &&
1183                     ins->objectid <= last) {
1184                         search_start = last + 1;
1185                         WARN_ON(!full_scan);
1186                         goto new_group;
1187                 }
1188         }
1189         if (!fill_prealloc && info->extent_tree_prealloc_nr) {
1190                 u64 first =
1191                   info->extent_tree_prealloc[info->extent_tree_prealloc_nr - 1];
1192                 if (ins->objectid + num_blocks > first &&
1193                     ins->objectid <= info->extent_tree_prealloc[0]) {
1194                         search_start = info->extent_tree_prealloc[0] + 1;
1195                         goto new_group;
1196                 }
1197         }
1198         if (exclude_nr > 0 && (ins->objectid + num_blocks > exclude_start &&
1199             ins->objectid < exclude_start + exclude_nr)) {
1200                 search_start = exclude_start + exclude_nr;
1201                 goto new_group;
1202         }
1203         if (fill_prealloc) {
1204                 int nr;
1205                 test_block = ins->objectid;
1206                 if (test_block - info->extent_tree_prealloc[total_needed - 1] >=
1207                     leaf_range(root)) {
1208                         total_found = 0;
1209                         info->extent_tree_prealloc_nr = total_found;
1210                 }
1211                 while(test_block < ins->objectid + ins->offset &&
1212                       total_found < total_needed) {
1213                         nr = total_needed - total_found - 1;
1214                         BUG_ON(nr < 0);
1215                         info->extent_tree_prealloc[nr] = test_block;
1216                         total_found++;
1217                         test_block++;
1218                 }
1219                 if (total_found < total_needed) {
1220                         search_start = test_block;
1221                         goto new_group;
1222                 }
1223                 info->extent_tree_prealloc_nr = total_found;
1224         }
1225         if (!data) {
1226                 block_group = btrfs_lookup_block_group(info, ins->objectid);
1227                 if (block_group) {
1228                         if (fill_prealloc)
1229                                 block_group->last_prealloc =
1230                                      info->extent_tree_prealloc[total_needed-1];
1231                         else
1232                                 trans->block_group = block_group;
1233                 }
1234         }
1235         ins->offset = num_blocks;
1236         btrfs_free_path(path);
1237         return 0;
1238
1239 new_group:
1240         if (search_start + num_blocks >= search_end) {
1241                 search_start = orig_search_start;
1242                 if (full_scan) {
1243                         ret = -ENOSPC;
1244                         goto error;
1245                 }
1246                 if (wrapped) {
1247                         if (!full_scan)
1248                                 total_needed -= empty_size;
1249                         full_scan = 1;
1250                 } else
1251                         wrapped = 1;
1252         }
1253         block_group = btrfs_lookup_block_group(info, search_start);
1254         cond_resched();
1255         if (!full_scan)
1256                 block_group = btrfs_find_block_group(root, block_group,
1257                                                      search_start, data, 0);
1258         goto check_failed;
1259
1260 error:
1261         btrfs_release_path(root, path);
1262         btrfs_free_path(path);
1263         return ret;
1264 }
1265 /*
1266  * finds a free extent and does all the dirty work required for allocation
1267  * returns the key for the extent through ins, and a tree buffer for
1268  * the first block of the extent through buf.
1269  *
1270  * returns 0 if everything worked, non-zero otherwise.
1271  */
1272 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1273                        struct btrfs_root *root, u64 owner,
1274                        u64 num_blocks, u64 empty_size, u64 hint_block,
1275                        u64 search_end, struct btrfs_key *ins, int data)
1276 {
1277         int ret;
1278         int pending_ret;
1279         u64 super_blocks_used;
1280         u64 search_start = 0;
1281         u64 exclude_start = 0;
1282         u64 exclude_nr = 0;
1283         struct btrfs_fs_info *info = root->fs_info;
1284         struct btrfs_root *extent_root = info->extent_root;
1285         struct btrfs_extent_item extent_item;
1286         struct btrfs_key prealloc_key;
1287
1288         btrfs_set_extent_refs(&extent_item, 1);
1289         btrfs_set_extent_owner(&extent_item, owner);
1290
1291         if (root == extent_root) {
1292                 int nr;
1293                 BUG_ON(info->extent_tree_prealloc_nr == 0);
1294                 BUG_ON(num_blocks != 1);
1295                 ins->offset = 1;
1296                 info->extent_tree_prealloc_nr--;
1297                 nr = info->extent_tree_prealloc_nr;
1298                 ins->objectid = info->extent_tree_prealloc[nr];
1299                 info->extent_tree_insert[info->extent_tree_insert_nr++] =
1300                         ins->objectid;
1301                 ret = update_block_group(trans, root,
1302                                          ins->objectid, ins->offset, 1, 0, 0);
1303                 WARN_ON(info->extent_tree_insert_nr >
1304                         ARRAY_SIZE(info->extent_tree_insert));
1305                 BUG_ON(ret);
1306                 return 0;
1307         }
1308
1309         /*
1310          * if we're doing a data allocation, preallocate room in the
1311          * extent tree first.  This way the extent tree blocks end up
1312          * in the correct block group.
1313          */
1314         if (data) {
1315                 ret = find_free_extent(trans, root, 0, 0, 0,
1316                                        search_end, 0, &prealloc_key, 0, 0, 0);
1317                 BUG_ON(ret);
1318                 if (ret)
1319                         return ret;
1320                 exclude_nr = info->extent_tree_prealloc_nr;
1321                 exclude_start = info->extent_tree_prealloc[exclude_nr - 1];
1322         }
1323
1324         /* do the real allocation */
1325         ret = find_free_extent(trans, root, num_blocks, empty_size,
1326                                search_start, search_end, hint_block, ins,
1327                                exclude_start, exclude_nr, data);
1328         BUG_ON(ret);
1329         if (ret)
1330                 return ret;
1331
1332         /*
1333          * if we're doing a metadata allocation, preallocate space in the
1334          * extent tree second.  This way, we don't create a tiny hole
1335          * in the allocation map between any unused preallocation blocks
1336          * and the metadata block we're actually allocating.  On disk,
1337          * it'll go:
1338          * [block we've allocated], [used prealloc 1], [ unused prealloc ]
1339          * The unused prealloc will get reused the next time around.
1340          */
1341         if (!data) {
1342                 exclude_start = ins->objectid;
1343                 exclude_nr = ins->offset;
1344                 hint_block = exclude_start + exclude_nr;
1345                 ret = find_free_extent(trans, root, 0, 0, search_start,
1346                                        search_end, hint_block,
1347                                        &prealloc_key, exclude_start,
1348                                        exclude_nr, 0);
1349                 BUG_ON(ret);
1350                 if (ret)
1351                         return ret;
1352         }
1353
1354         super_blocks_used = btrfs_super_blocks_used(&info->super_copy);
1355         btrfs_set_super_blocks_used(&info->super_copy, super_blocks_used +
1356                                     num_blocks);
1357         ret = btrfs_insert_item(trans, extent_root, ins, &extent_item,
1358                                 sizeof(extent_item));
1359
1360         BUG_ON(ret);
1361         finish_current_insert(trans, extent_root);
1362         pending_ret = del_pending_extents(trans, extent_root);
1363         if (ret) {
1364                 return ret;
1365         }
1366         if (pending_ret) {
1367                 return pending_ret;
1368         }
1369         ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0,
1370                                  data);
1371         BUG_ON(ret);
1372         return 0;
1373 }
1374
1375 /*
1376  * helper function to allocate a block for a given tree
1377  * returns the tree buffer or NULL.
1378  */
1379 struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1380                                            struct btrfs_root *root, u64 hint,
1381                                            u64 empty_size)
1382 {
1383         struct btrfs_key ins;
1384         int ret;
1385         struct buffer_head *buf;
1386
1387         ret = btrfs_alloc_extent(trans, root, root->root_key.objectid,
1388                                  1, empty_size, hint,
1389                                  (unsigned long)-1, &ins, 0);
1390         if (ret) {
1391                 BUG_ON(ret > 0);
1392                 return ERR_PTR(ret);
1393         }
1394         buf = btrfs_find_create_tree_block(root, ins.objectid);
1395         if (!buf) {
1396                 btrfs_free_extent(trans, root, ins.objectid, 1, 0);
1397                 return ERR_PTR(-ENOMEM);
1398         }
1399         WARN_ON(buffer_dirty(buf));
1400         set_buffer_uptodate(buf);
1401         set_buffer_checked(buf);
1402         set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index);
1403         return buf;
1404 }
1405
1406 static int drop_leaf_ref(struct btrfs_trans_handle *trans,
1407                          struct btrfs_root *root, struct buffer_head *cur)
1408 {
1409         struct btrfs_disk_key *key;
1410         struct btrfs_leaf *leaf;
1411         struct btrfs_file_extent_item *fi;
1412         int i;
1413         int nritems;
1414         int ret;
1415
1416         BUG_ON(!btrfs_is_leaf(btrfs_buffer_node(cur)));
1417         leaf = btrfs_buffer_leaf(cur);
1418         nritems = btrfs_header_nritems(&leaf->header);
1419         for (i = 0; i < nritems; i++) {
1420                 u64 disk_blocknr;
1421                 key = &leaf->items[i].key;
1422                 if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
1423                         continue;
1424                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1425                 if (btrfs_file_extent_type(fi) == BTRFS_FILE_EXTENT_INLINE)
1426                         continue;
1427                 /*
1428                  * FIXME make sure to insert a trans record that
1429                  * repeats the snapshot del on crash
1430                  */
1431                 disk_blocknr = btrfs_file_extent_disk_blocknr(fi);
1432                 if (disk_blocknr == 0)
1433                         continue;
1434                 ret = btrfs_free_extent(trans, root, disk_blocknr,
1435                                         btrfs_file_extent_disk_num_blocks(fi),
1436                                         0);
1437                 BUG_ON(ret);
1438         }
1439         return 0;
1440 }
1441
1442 static void reada_walk_down(struct btrfs_root *root,
1443                             struct btrfs_node *node)
1444 {
1445         int i;
1446         u32 nritems;
1447         u64 blocknr;
1448         int ret;
1449         u32 refs;
1450
1451         nritems = btrfs_header_nritems(&node->header);
1452         for (i = 0; i < nritems; i++) {
1453                 blocknr = btrfs_node_blockptr(node, i);
1454                 ret = lookup_extent_ref(NULL, root, blocknr, 1, &refs);
1455                 BUG_ON(ret);
1456                 if (refs != 1)
1457                         continue;
1458                 ret = readahead_tree_block(root, blocknr);
1459                 if (ret)
1460                         break;
1461         }
1462 }
1463
1464 /*
1465  * helper function for drop_snapshot, this walks down the tree dropping ref
1466  * counts as it goes.
1467  */
1468 static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root
1469                           *root, struct btrfs_path *path, int *level)
1470 {
1471         struct buffer_head *next;
1472         struct buffer_head *cur;
1473         u64 blocknr;
1474         int ret;
1475         u32 refs;
1476
1477         WARN_ON(*level < 0);
1478         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1479         ret = lookup_extent_ref(trans, root, bh_blocknr(path->nodes[*level]),
1480                                1, &refs);
1481         BUG_ON(ret);
1482         if (refs > 1)
1483                 goto out;
1484
1485         /*
1486          * walk down to the last node level and free all the leaves
1487          */
1488         while(*level >= 0) {
1489                 WARN_ON(*level < 0);
1490                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1491                 cur = path->nodes[*level];
1492
1493                 if (*level > 0 && path->slots[*level] == 0)
1494                         reada_walk_down(root, btrfs_buffer_node(cur));
1495
1496                 if (btrfs_header_level(btrfs_buffer_header(cur)) != *level)
1497                         WARN_ON(1);
1498
1499                 if (path->slots[*level] >=
1500                     btrfs_header_nritems(btrfs_buffer_header(cur)))
1501                         break;
1502                 if (*level == 0) {
1503                         ret = drop_leaf_ref(trans, root, cur);
1504                         BUG_ON(ret);
1505                         break;
1506                 }
1507                 blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur),
1508                                               path->slots[*level]);
1509                 ret = lookup_extent_ref(trans, root, blocknr, 1, &refs);
1510                 BUG_ON(ret);
1511                 if (refs != 1) {
1512                         path->slots[*level]++;
1513                         ret = btrfs_free_extent(trans, root, blocknr, 1, 1);
1514                         BUG_ON(ret);
1515                         continue;
1516                 }
1517                 next = read_tree_block(root, blocknr);
1518                 WARN_ON(*level <= 0);
1519                 if (path->nodes[*level-1])
1520                         btrfs_block_release(root, path->nodes[*level-1]);
1521                 path->nodes[*level-1] = next;
1522                 *level = btrfs_header_level(btrfs_buffer_header(next));
1523                 path->slots[*level] = 0;
1524         }
1525 out:
1526         WARN_ON(*level < 0);
1527         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1528         ret = btrfs_free_extent(trans, root,
1529                                 bh_blocknr(path->nodes[*level]), 1, 1);
1530         btrfs_block_release(root, path->nodes[*level]);
1531         path->nodes[*level] = NULL;
1532         *level += 1;
1533         BUG_ON(ret);
1534         return 0;
1535 }
1536
1537 /*
1538  * helper for dropping snapshots.  This walks back up the tree in the path
1539  * to find the first node higher up where we haven't yet gone through
1540  * all the slots
1541  */
1542 static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root
1543                         *root, struct btrfs_path *path, int *level)
1544 {
1545         int i;
1546         int slot;
1547         int ret;
1548         struct btrfs_root_item *root_item = &root->root_item;
1549
1550         for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1551                 slot = path->slots[i];
1552                 if (slot < btrfs_header_nritems(
1553                     btrfs_buffer_header(path->nodes[i])) - 1) {
1554                         struct btrfs_node *node;
1555                         node = btrfs_buffer_node(path->nodes[i]);
1556                         path->slots[i]++;
1557                         *level = i;
1558                         WARN_ON(*level == 0);
1559                         memcpy(&root_item->drop_progress,
1560                                &node->ptrs[path->slots[i]].key,
1561                                sizeof(root_item->drop_progress));
1562                         root_item->drop_level = i;
1563                         return 0;
1564                 } else {
1565                         ret = btrfs_free_extent(trans, root,
1566                                                 bh_blocknr(path->nodes[*level]),
1567                                                 1, 1);
1568                         BUG_ON(ret);
1569                         btrfs_block_release(root, path->nodes[*level]);
1570                         path->nodes[*level] = NULL;
1571                         *level = i + 1;
1572                 }
1573         }
1574         return 1;
1575 }
1576
1577 /*
1578  * drop the reference count on the tree rooted at 'snap'.  This traverses
1579  * the tree freeing any blocks that have a ref count of zero after being
1580  * decremented.
1581  */
1582 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
1583                         *root)
1584 {
1585         int ret = 0;
1586         int wret;
1587         int level;
1588         struct btrfs_path *path;
1589         int i;
1590         int orig_level;
1591         int num_walks = 0;
1592         struct btrfs_root_item *root_item = &root->root_item;
1593
1594         path = btrfs_alloc_path();
1595         BUG_ON(!path);
1596
1597         level = btrfs_header_level(btrfs_buffer_header(root->node));
1598         orig_level = level;
1599         if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
1600                 path->nodes[level] = root->node;
1601                 path->slots[level] = 0;
1602         } else {
1603                 struct btrfs_key key;
1604                 struct btrfs_disk_key *found_key;
1605                 struct btrfs_node *node;
1606
1607                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
1608                 level = root_item->drop_level;
1609                 path->lowest_level = level;
1610                 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1611                 if (wret < 0) {
1612                         ret = wret;
1613                         goto out;
1614                 }
1615                 node = btrfs_buffer_node(path->nodes[level]);
1616                 found_key = &node->ptrs[path->slots[level]].key;
1617                 WARN_ON(memcmp(found_key, &root_item->drop_progress,
1618                                sizeof(*found_key)));
1619         }
1620         while(1) {
1621                 wret = walk_down_tree(trans, root, path, &level);
1622                 if (wret > 0)
1623                         break;
1624                 if (wret < 0)
1625                         ret = wret;
1626
1627                 wret = walk_up_tree(trans, root, path, &level);
1628                 if (wret > 0)
1629                         break;
1630                 if (wret < 0)
1631                         ret = wret;
1632                 num_walks++;
1633                 if (num_walks > 2) {
1634                         ret = -EAGAIN;
1635                         get_bh(root->node);
1636                         break;
1637                 }
1638         }
1639         for (i = 0; i <= orig_level; i++) {
1640                 if (path->nodes[i]) {
1641                         btrfs_block_release(root, path->nodes[i]);
1642                         path->nodes[i] = 0;
1643                 }
1644         }
1645 out:
1646         btrfs_free_path(path);
1647         return ret;
1648 }
1649
1650 static int free_block_group_radix(struct radix_tree_root *radix)
1651 {
1652         int ret;
1653         struct btrfs_block_group_cache *cache[8];
1654         int i;
1655
1656         while(1) {
1657                 ret = radix_tree_gang_lookup(radix, (void **)cache, 0,
1658                                              ARRAY_SIZE(cache));
1659                 if (!ret)
1660                         break;
1661                 for (i = 0; i < ret; i++) {
1662                         radix_tree_delete(radix, cache[i]->key.objectid +
1663                                           cache[i]->key.offset - 1);
1664                         kfree(cache[i]);
1665                 }
1666         }
1667         return 0;
1668 }
1669
1670 int btrfs_free_block_groups(struct btrfs_fs_info *info)
1671 {
1672         int ret;
1673         int ret2;
1674         unsigned long gang[16];
1675         int i;
1676
1677         ret = free_block_group_radix(&info->block_group_radix);
1678         ret2 = free_block_group_radix(&info->block_group_data_radix);
1679         if (ret)
1680                 return ret;
1681         if (ret2)
1682                 return ret2;
1683
1684         while(1) {
1685                 ret = find_first_radix_bit(&info->extent_map_radix,
1686                                            gang, 0, ARRAY_SIZE(gang));
1687                 if (!ret)
1688                         break;
1689                 for (i = 0; i < ret; i++) {
1690                         clear_radix_bit(&info->extent_map_radix, gang[i]);
1691                 }
1692         }
1693         return 0;
1694 }
1695
1696 int btrfs_read_block_groups(struct btrfs_root *root)
1697 {
1698         struct btrfs_path *path;
1699         int ret;
1700         int err = 0;
1701         struct btrfs_block_group_item *bi;
1702         struct btrfs_block_group_cache *cache;
1703         struct btrfs_fs_info *info = root->fs_info;
1704         struct radix_tree_root *radix;
1705         struct btrfs_key key;
1706         struct btrfs_key found_key;
1707         struct btrfs_leaf *leaf;
1708         u64 group_size_blocks;
1709         u64 used;
1710
1711         group_size_blocks = BTRFS_BLOCK_GROUP_SIZE >>
1712                 root->fs_info->sb->s_blocksize_bits;
1713         root = info->extent_root;
1714         key.objectid = 0;
1715         key.offset = group_size_blocks;
1716         key.flags = 0;
1717         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
1718
1719         path = btrfs_alloc_path();
1720         if (!path)
1721                 return -ENOMEM;
1722
1723         while(1) {
1724                 ret = btrfs_search_slot(NULL, info->extent_root,
1725                                         &key, path, 0, 0);
1726                 if (ret != 0) {
1727                         err = ret;
1728                         break;
1729                 }
1730                 leaf = btrfs_buffer_leaf(path->nodes[0]);
1731                 btrfs_disk_key_to_cpu(&found_key,
1732                                       &leaf->items[path->slots[0]].key);
1733                 cache = kmalloc(sizeof(*cache), GFP_NOFS);
1734                 if (!cache) {
1735                         err = -1;
1736                         break;
1737                 }
1738
1739                 bi = btrfs_item_ptr(leaf, path->slots[0],
1740                                     struct btrfs_block_group_item);
1741                 if (bi->flags & BTRFS_BLOCK_GROUP_DATA) {
1742                         radix = &info->block_group_data_radix;
1743                         cache->data = 1;
1744                 } else {
1745                         radix = &info->block_group_radix;
1746                         cache->data = 0;
1747                 }
1748
1749                 memcpy(&cache->item, bi, sizeof(*bi));
1750                 memcpy(&cache->key, &found_key, sizeof(found_key));
1751                 cache->last_alloc = cache->key.objectid;
1752                 cache->first_free = cache->key.objectid;
1753                 cache->last_prealloc = cache->key.objectid;
1754                 cache->pinned = 0;
1755                 cache->cached = 0;
1756
1757                 cache->radix = radix;
1758
1759                 key.objectid = found_key.objectid + found_key.offset;
1760                 btrfs_release_path(root, path);
1761                 ret = radix_tree_insert(radix, found_key.objectid +
1762                                         found_key.offset - 1,
1763                                         (void *)cache);
1764                 BUG_ON(ret);
1765                 used = btrfs_block_group_used(bi);
1766                 if (used < div_factor(key.offset, 8)) {
1767                         radix_tree_tag_set(radix, found_key.objectid +
1768                                            found_key.offset - 1,
1769                                            BTRFS_BLOCK_GROUP_AVAIL);
1770                 }
1771                 if (key.objectid >=
1772                     btrfs_super_total_blocks(&info->super_copy))
1773                         break;
1774         }
1775
1776         btrfs_free_path(path);
1777         return 0;
1778 }