Btrfs: Add efficient dirty accounting to the extent_map tree
[linux-2.6-block.git] / fs / btrfs / extent-tree.c
CommitLineData
6cbd5570
CM
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
ec6b910f 19#include <linux/sched.h>
fec577fb
CM
20#include "ctree.h"
21#include "disk-io.h"
22#include "print-tree.h"
e089f05c 23#include "transaction.h"
fec577fb 24
96b5179d
CM
25#define BLOCK_GROUP_DATA EXTENT_WRITEBACK
26#define BLOCK_GROUP_METADATA EXTENT_UPTODATE
27#define BLOCK_GROUP_DIRTY EXTENT_DIRTY
28
e089f05c
CM
29static int finish_current_insert(struct btrfs_trans_handle *trans, struct
30 btrfs_root *extent_root);
e20d96d6
CM
31static int del_pending_extents(struct btrfs_trans_handle *trans, struct
32 btrfs_root *extent_root);
fec577fb 33
e37c9e69
CM
34static int cache_block_group(struct btrfs_root *root,
35 struct btrfs_block_group_cache *block_group)
36{
37 struct btrfs_path *path;
38 int ret;
39 struct btrfs_key key;
5f39d397 40 struct extent_buffer *leaf;
f510cfec 41 struct extent_map_tree *free_space_cache;
e37c9e69 42 int slot;
e37c9e69
CM
43 u64 last = 0;
44 u64 hole_size;
7d7d6068 45 u64 first_free;
e37c9e69
CM
46 int found = 0;
47
48 root = root->fs_info->extent_root;
f510cfec 49 free_space_cache = &root->fs_info->free_space_cache;
e37c9e69
CM
50
51 if (block_group->cached)
52 return 0;
f510cfec 53
e37c9e69
CM
54 path = btrfs_alloc_path();
55 if (!path)
56 return -ENOMEM;
7d7d6068 57
2cc58cf2 58 path->reada = 2;
7d7d6068 59 first_free = block_group->key.objectid;
e37c9e69 60 key.objectid = block_group->key.objectid;
e37c9e69 61 key.offset = 0;
7d7d6068 62
e37c9e69
CM
63 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
64 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7d7d6068 65
e37c9e69
CM
66 if (ret < 0)
67 return ret;
7d7d6068 68
e37c9e69
CM
69 if (ret && path->slots[0] > 0)
70 path->slots[0]--;
7d7d6068 71
e37c9e69 72 while(1) {
5f39d397 73 leaf = path->nodes[0];
e37c9e69 74 slot = path->slots[0];
5f39d397 75 if (slot >= btrfs_header_nritems(leaf)) {
e37c9e69 76 ret = btrfs_next_leaf(root, path);
54aa1f4d
CM
77 if (ret < 0)
78 goto err;
de428b63 79 if (ret == 0) {
e37c9e69 80 continue;
de428b63 81 } else {
e37c9e69
CM
82 break;
83 }
84 }
7d7d6068 85
5f39d397 86 btrfs_item_key_to_cpu(leaf, &key, slot);
7d7d6068
Y
87 if (key.objectid < block_group->key.objectid) {
88 if (key.objectid + key.offset > first_free)
89 first_free = key.objectid + key.offset;
90 goto next;
91 }
92
e37c9e69
CM
93 if (key.objectid >= block_group->key.objectid +
94 block_group->key.offset) {
e37c9e69
CM
95 break;
96 }
7d7d6068 97
e37c9e69
CM
98 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
99 if (!found) {
7d7d6068 100 last = first_free;
e37c9e69 101 found = 1;
e37c9e69 102 }
f510cfec
CM
103 if (key.objectid > last) {
104 hole_size = key.objectid - last;
105 set_extent_dirty(free_space_cache, last,
106 last + hole_size - 1,
107 GFP_NOFS);
7d7d6068
Y
108 }
109 last = key.objectid + key.offset;
e37c9e69 110 }
7d7d6068 111next:
e37c9e69
CM
112 path->slots[0]++;
113 }
114
7d7d6068
Y
115 if (!found)
116 last = first_free;
117 if (block_group->key.objectid +
118 block_group->key.offset > last) {
119 hole_size = block_group->key.objectid +
120 block_group->key.offset - last;
f510cfec
CM
121 set_extent_dirty(free_space_cache, last,
122 last + hole_size - 1, GFP_NOFS);
7d7d6068 123 }
e37c9e69 124 block_group->cached = 1;
54aa1f4d 125err:
e37c9e69
CM
126 btrfs_free_path(path);
127 return 0;
128}
129
5276aeda
CM
130struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
131 btrfs_fs_info *info,
db94535d 132 u64 bytenr)
be744175 133{
96b5179d
CM
134 struct extent_map_tree *block_group_cache;
135 struct btrfs_block_group_cache *block_group = NULL;
136 u64 ptr;
137 u64 start;
138 u64 end;
be744175
CM
139 int ret;
140
96b5179d
CM
141 block_group_cache = &info->block_group_cache;
142 ret = find_first_extent_bit(block_group_cache,
db94535d 143 bytenr, &start, &end,
96b5179d 144 BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA);
be744175 145 if (ret) {
96b5179d 146 return NULL;
be744175 147 }
96b5179d
CM
148 ret = get_state_private(block_group_cache, start, &ptr);
149 if (ret)
150 return NULL;
151
ae2f5411 152 block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
5cf66426 153 if (block_group->key.objectid <= bytenr && bytenr <
96b5179d
CM
154 block_group->key.objectid + block_group->key.offset)
155 return block_group;
be744175
CM
156 return NULL;
157}
e37c9e69
CM
158static u64 find_search_start(struct btrfs_root *root,
159 struct btrfs_block_group_cache **cache_ret,
5e5745dc
Y
160 u64 search_start, int num,
161 int data, int full_scan)
e37c9e69 162{
e37c9e69
CM
163 int ret;
164 struct btrfs_block_group_cache *cache = *cache_ret;
e19caa5f 165 u64 last;
f510cfec
CM
166 u64 start = 0;
167 u64 end = 0;
257d0ce3 168 u64 cache_miss = 0;
f84a8b36 169 int wrapped = 0;
e37c9e69 170
e37c9e69 171again:
54aa1f4d
CM
172 ret = cache_block_group(root, cache);
173 if (ret)
174 goto out;
f84a8b36 175
e19caa5f
CM
176 last = max(search_start, cache->key.objectid);
177
e37c9e69 178 while(1) {
f510cfec
CM
179 ret = find_first_extent_bit(&root->fs_info->free_space_cache,
180 last, &start, &end, EXTENT_DIRTY);
e19caa5f 181 if (ret) {
257d0ce3
CM
182 if (!cache_miss)
183 cache_miss = last;
e19caa5f
CM
184 goto new_group;
185 }
f510cfec
CM
186
187 start = max(last, start);
188 last = end + 1;
257d0ce3
CM
189 if (last - start < num) {
190 if (last == cache->key.objectid + cache->key.offset)
191 cache_miss = start;
f510cfec 192 continue;
257d0ce3
CM
193 }
194 if (data != BTRFS_BLOCK_GROUP_MIXED &&
5cf66426 195 start + num > cache->key.objectid + cache->key.offset)
e37c9e69 196 goto new_group;
f510cfec 197 return start;
e37c9e69
CM
198 }
199out:
1a5bc167 200 return search_start;
e37c9e69
CM
201
202new_group:
e19caa5f 203 last = cache->key.objectid + cache->key.offset;
f84a8b36 204wrapped:
e19caa5f 205 cache = btrfs_lookup_block_group(root->fs_info, last);
e37c9e69 206 if (!cache) {
0e4de584 207no_cache:
f84a8b36
CM
208 if (!wrapped) {
209 wrapped = 1;
210 last = search_start;
211 data = BTRFS_BLOCK_GROUP_MIXED;
212 goto wrapped;
213 }
1a5bc167 214 return search_start;
e37c9e69 215 }
257d0ce3
CM
216 if (cache_miss && !cache->cached) {
217 cache_block_group(root, cache);
218 last = cache_miss;
257d0ce3
CM
219 cache = btrfs_lookup_block_group(root->fs_info, last);
220 }
5e5745dc
Y
221 if (!full_scan)
222 cache = btrfs_find_block_group(root, cache, last, data, 0);
0e4de584
CM
223 if (!cache)
224 goto no_cache;
e37c9e69 225 *cache_ret = cache;
257d0ce3 226 cache_miss = 0;
e37c9e69
CM
227 goto again;
228}
229
84f54cfa
CM
230static u64 div_factor(u64 num, int factor)
231{
257d0ce3
CM
232 if (factor == 10)
233 return num;
84f54cfa
CM
234 num *= factor;
235 do_div(num, 10);
236 return num;
237}
238
31f3c99b
CM
239struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
240 struct btrfs_block_group_cache
be744175 241 *hint, u64 search_start,
de428b63 242 int data, int owner)
cd1bc465 243{
96b5179d
CM
244 struct btrfs_block_group_cache *cache;
245 struct extent_map_tree *block_group_cache;
31f3c99b 246 struct btrfs_block_group_cache *found_group = NULL;
cd1bc465
CM
247 struct btrfs_fs_info *info = root->fs_info;
248 u64 used;
31f3c99b
CM
249 u64 last = 0;
250 u64 hint_last;
96b5179d
CM
251 u64 start;
252 u64 end;
253 u64 free_check;
254 u64 ptr;
255 int bit;
cd1bc465 256 int ret;
31f3c99b 257 int full_search = 0;
de428b63 258 int factor = 8;
1e2677e0 259 int data_swap = 0;
de428b63 260
96b5179d
CM
261 block_group_cache = &info->block_group_cache;
262
de428b63 263 if (!owner)
f84a8b36 264 factor = 8;
be744175 265
257d0ce3 266 if (data == BTRFS_BLOCK_GROUP_MIXED) {
f84a8b36 267 bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
257d0ce3
CM
268 factor = 10;
269 } else if (data)
96b5179d
CM
270 bit = BLOCK_GROUP_DATA;
271 else
272 bit = BLOCK_GROUP_METADATA;
be744175
CM
273
274 if (search_start) {
275 struct btrfs_block_group_cache *shint;
5276aeda 276 shint = btrfs_lookup_block_group(info, search_start);
f84a8b36
CM
277 if (shint && (shint->data == data ||
278 shint->data == BTRFS_BLOCK_GROUP_MIXED)) {
be744175 279 used = btrfs_block_group_used(&shint->item);
324ae4df
Y
280 if (used + shint->pinned <
281 div_factor(shint->key.offset, factor)) {
be744175
CM
282 return shint;
283 }
284 }
285 }
f84a8b36
CM
286 if (hint && (hint->data == data ||
287 hint->data == BTRFS_BLOCK_GROUP_MIXED)) {
31f3c99b 288 used = btrfs_block_group_used(&hint->item);
324ae4df
Y
289 if (used + hint->pinned <
290 div_factor(hint->key.offset, factor)) {
31f3c99b
CM
291 return hint;
292 }
e19caa5f 293 last = hint->key.objectid + hint->key.offset;
31f3c99b
CM
294 hint_last = last;
295 } else {
e37c9e69
CM
296 if (hint)
297 hint_last = max(hint->key.objectid, search_start);
298 else
299 hint_last = search_start;
300
301 last = hint_last;
31f3c99b 302 }
31f3c99b 303again:
cd1bc465 304 while(1) {
96b5179d
CM
305 ret = find_first_extent_bit(block_group_cache, last,
306 &start, &end, bit);
307 if (ret)
cd1bc465 308 break;
96b5179d
CM
309
310 ret = get_state_private(block_group_cache, start, &ptr);
311 if (ret)
312 break;
313
ae2f5411 314 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
96b5179d
CM
315 last = cache->key.objectid + cache->key.offset;
316 used = btrfs_block_group_used(&cache->item);
317
318 if (full_search)
319 free_check = cache->key.offset;
320 else
321 free_check = div_factor(cache->key.offset, factor);
324ae4df 322 if (used + cache->pinned < free_check) {
96b5179d
CM
323 found_group = cache;
324 goto found;
cd1bc465 325 }
de428b63 326 cond_resched();
cd1bc465 327 }
31f3c99b 328 if (!full_search) {
be744175 329 last = search_start;
31f3c99b
CM
330 full_search = 1;
331 goto again;
332 }
1e2677e0 333 if (!data_swap) {
1e2677e0 334 data_swap = 1;
96b5179d 335 bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
1e2677e0
CM
336 last = search_start;
337 goto again;
338 }
be744175 339found:
31f3c99b 340 return found_group;
cd1bc465
CM
341}
342
b18c6685
CM
343int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
344 struct btrfs_root *root,
db94535d 345 u64 bytenr, u64 num_bytes)
02217ed2 346{
5caf2a00 347 struct btrfs_path *path;
02217ed2 348 int ret;
e2fa7227 349 struct btrfs_key key;
5f39d397 350 struct extent_buffer *l;
234b63a0 351 struct btrfs_extent_item *item;
cf27e1ee 352 u32 refs;
037e6390 353
db94535d 354 WARN_ON(num_bytes < root->sectorsize);
5caf2a00 355 path = btrfs_alloc_path();
54aa1f4d
CM
356 if (!path)
357 return -ENOMEM;
26b8003f 358
db94535d 359 key.objectid = bytenr;
62e2749e 360 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
db94535d 361 key.offset = num_bytes;
5caf2a00 362 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
9f5fae2f 363 0, 1);
54aa1f4d
CM
364 if (ret < 0)
365 return ret;
a429e513 366 if (ret != 0) {
a28ec197 367 BUG();
a429e513 368 }
02217ed2 369 BUG_ON(ret != 0);
5f39d397 370 l = path->nodes[0];
5caf2a00 371 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
5f39d397
CM
372 refs = btrfs_extent_refs(l, item);
373 btrfs_set_extent_refs(l, item, refs + 1);
5caf2a00 374 btrfs_mark_buffer_dirty(path->nodes[0]);
a28ec197 375
5caf2a00
CM
376 btrfs_release_path(root->fs_info->extent_root, path);
377 btrfs_free_path(path);
9f5fae2f 378 finish_current_insert(trans, root->fs_info->extent_root);
e20d96d6 379 del_pending_extents(trans, root->fs_info->extent_root);
02217ed2
CM
380 return 0;
381}
382
e9d0b13b
CM
383int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
384 struct btrfs_root *root)
385{
386 finish_current_insert(trans, root->fs_info->extent_root);
387 del_pending_extents(trans, root->fs_info->extent_root);
388 return 0;
389}
390
b18c6685 391static int lookup_extent_ref(struct btrfs_trans_handle *trans,
db94535d
CM
392 struct btrfs_root *root, u64 bytenr,
393 u64 num_bytes, u32 *refs)
a28ec197 394{
5caf2a00 395 struct btrfs_path *path;
a28ec197 396 int ret;
e2fa7227 397 struct btrfs_key key;
5f39d397 398 struct extent_buffer *l;
234b63a0 399 struct btrfs_extent_item *item;
5caf2a00 400
db94535d 401 WARN_ON(num_bytes < root->sectorsize);
5caf2a00 402 path = btrfs_alloc_path();
db94535d
CM
403 key.objectid = bytenr;
404 key.offset = num_bytes;
62e2749e 405 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
5caf2a00 406 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
9f5fae2f 407 0, 0);
54aa1f4d
CM
408 if (ret < 0)
409 goto out;
5f39d397
CM
410 if (ret != 0) {
411 btrfs_print_leaf(root, path->nodes[0]);
db94535d 412 printk("failed to find block number %Lu\n", bytenr);
a28ec197 413 BUG();
5f39d397
CM
414 }
415 l = path->nodes[0];
5caf2a00 416 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
5f39d397 417 *refs = btrfs_extent_refs(l, item);
54aa1f4d 418out:
5caf2a00 419 btrfs_free_path(path);
a28ec197
CM
420 return 0;
421}
422
c5739bba
CM
423int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
424 struct btrfs_root *root)
425{
db94535d
CM
426 return btrfs_inc_extent_ref(trans, root, root->node->start,
427 root->node->len);
c5739bba
CM
428}
429
e089f05c 430int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
5f39d397 431 struct extent_buffer *buf)
02217ed2 432{
db94535d 433 u64 bytenr;
5f39d397
CM
434 u32 nritems;
435 struct btrfs_key key;
6407bf6d 436 struct btrfs_file_extent_item *fi;
02217ed2 437 int i;
db94535d 438 int level;
6407bf6d 439 int ret;
54aa1f4d
CM
440 int faili;
441 int err;
a28ec197 442
3768f368 443 if (!root->ref_cows)
a28ec197 444 return 0;
5f39d397 445
db94535d 446 level = btrfs_header_level(buf);
5f39d397
CM
447 nritems = btrfs_header_nritems(buf);
448 for (i = 0; i < nritems; i++) {
db94535d
CM
449 if (level == 0) {
450 u64 disk_bytenr;
5f39d397
CM
451 btrfs_item_key_to_cpu(buf, &key, i);
452 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
6407bf6d 453 continue;
5f39d397 454 fi = btrfs_item_ptr(buf, i,
6407bf6d 455 struct btrfs_file_extent_item);
5f39d397 456 if (btrfs_file_extent_type(buf, fi) ==
236454df
CM
457 BTRFS_FILE_EXTENT_INLINE)
458 continue;
db94535d
CM
459 disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
460 if (disk_bytenr == 0)
3a686375 461 continue;
db94535d
CM
462 ret = btrfs_inc_extent_ref(trans, root, disk_bytenr,
463 btrfs_file_extent_disk_num_bytes(buf, fi));
54aa1f4d
CM
464 if (ret) {
465 faili = i;
466 goto fail;
467 }
6407bf6d 468 } else {
db94535d
CM
469 bytenr = btrfs_node_blockptr(buf, i);
470 ret = btrfs_inc_extent_ref(trans, root, bytenr,
471 btrfs_level_size(root, level - 1));
54aa1f4d
CM
472 if (ret) {
473 faili = i;
474 goto fail;
475 }
6407bf6d 476 }
02217ed2
CM
477 }
478 return 0;
54aa1f4d 479fail:
ccd467d6 480 WARN_ON(1);
54aa1f4d 481 for (i =0; i < faili; i++) {
db94535d
CM
482 if (level == 0) {
483 u64 disk_bytenr;
5f39d397
CM
484 btrfs_item_key_to_cpu(buf, &key, i);
485 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
54aa1f4d 486 continue;
5f39d397 487 fi = btrfs_item_ptr(buf, i,
54aa1f4d 488 struct btrfs_file_extent_item);
5f39d397 489 if (btrfs_file_extent_type(buf, fi) ==
54aa1f4d
CM
490 BTRFS_FILE_EXTENT_INLINE)
491 continue;
db94535d
CM
492 disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
493 if (disk_bytenr == 0)
54aa1f4d 494 continue;
db94535d
CM
495 err = btrfs_free_extent(trans, root, disk_bytenr,
496 btrfs_file_extent_disk_num_bytes(buf,
5f39d397 497 fi), 0);
54aa1f4d
CM
498 BUG_ON(err);
499 } else {
db94535d
CM
500 bytenr = btrfs_node_blockptr(buf, i);
501 err = btrfs_free_extent(trans, root, bytenr,
502 btrfs_level_size(root, level - 1), 0);
54aa1f4d
CM
503 BUG_ON(err);
504 }
505 }
506 return ret;
02217ed2
CM
507}
508
9078a3e1
CM
509static int write_one_cache_group(struct btrfs_trans_handle *trans,
510 struct btrfs_root *root,
511 struct btrfs_path *path,
512 struct btrfs_block_group_cache *cache)
513{
514 int ret;
515 int pending_ret;
516 struct btrfs_root *extent_root = root->fs_info->extent_root;
5f39d397
CM
517 unsigned long bi;
518 struct extent_buffer *leaf;
9078a3e1 519
9078a3e1 520 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
54aa1f4d
CM
521 if (ret < 0)
522 goto fail;
9078a3e1 523 BUG_ON(ret);
5f39d397
CM
524
525 leaf = path->nodes[0];
526 bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
527 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
528 btrfs_mark_buffer_dirty(leaf);
9078a3e1 529 btrfs_release_path(extent_root, path);
54aa1f4d 530fail:
9078a3e1
CM
531 finish_current_insert(trans, extent_root);
532 pending_ret = del_pending_extents(trans, extent_root);
533 if (ret)
534 return ret;
535 if (pending_ret)
536 return pending_ret;
537 return 0;
538
539}
540
96b5179d
CM
541int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
542 struct btrfs_root *root)
9078a3e1 543{
96b5179d
CM
544 struct extent_map_tree *block_group_cache;
545 struct btrfs_block_group_cache *cache;
9078a3e1
CM
546 int ret;
547 int err = 0;
548 int werr = 0;
9078a3e1 549 struct btrfs_path *path;
96b5179d
CM
550 u64 last = 0;
551 u64 start;
552 u64 end;
553 u64 ptr;
9078a3e1 554
96b5179d 555 block_group_cache = &root->fs_info->block_group_cache;
9078a3e1
CM
556 path = btrfs_alloc_path();
557 if (!path)
558 return -ENOMEM;
559
560 while(1) {
96b5179d
CM
561 ret = find_first_extent_bit(block_group_cache, last,
562 &start, &end, BLOCK_GROUP_DIRTY);
563 if (ret)
9078a3e1 564 break;
54aa1f4d 565
96b5179d
CM
566 last = end + 1;
567 ret = get_state_private(block_group_cache, start, &ptr);
568 if (ret)
569 break;
570
ae2f5411 571 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
96b5179d
CM
572 err = write_one_cache_group(trans, root,
573 path, cache);
574 /*
575 * if we fail to write the cache group, we want
576 * to keep it marked dirty in hopes that a later
577 * write will work
578 */
579 if (err) {
580 werr = err;
581 continue;
9078a3e1 582 }
96b5179d
CM
583 clear_extent_bits(block_group_cache, start, end,
584 BLOCK_GROUP_DIRTY, GFP_NOFS);
9078a3e1
CM
585 }
586 btrfs_free_path(path);
587 return werr;
588}
589
590static int update_block_group(struct btrfs_trans_handle *trans,
591 struct btrfs_root *root,
db94535d
CM
592 u64 bytenr, u64 num_bytes, int alloc,
593 int mark_free, int data)
9078a3e1
CM
594{
595 struct btrfs_block_group_cache *cache;
596 struct btrfs_fs_info *info = root->fs_info;
db94535d 597 u64 total = num_bytes;
9078a3e1 598 u64 old_val;
db94535d 599 u64 byte_in_group;
96b5179d
CM
600 u64 start;
601 u64 end;
3e1ad54f 602
9078a3e1 603 while(total) {
db94535d 604 cache = btrfs_lookup_block_group(info, bytenr);
3e1ad54f 605 if (!cache) {
9078a3e1 606 return -1;
cd1bc465 607 }
db94535d
CM
608 byte_in_group = bytenr - cache->key.objectid;
609 WARN_ON(byte_in_group > cache->key.offset);
96b5179d
CM
610 start = cache->key.objectid;
611 end = start + cache->key.offset - 1;
612 set_extent_bits(&info->block_group_cache, start, end,
613 BLOCK_GROUP_DIRTY, GFP_NOFS);
9078a3e1
CM
614
615 old_val = btrfs_block_group_used(&cache->item);
db94535d 616 num_bytes = min(total, cache->key.offset - byte_in_group);
cd1bc465 617 if (alloc) {
1e2677e0 618 if (cache->data != data &&
84f54cfa 619 old_val < (cache->key.offset >> 1)) {
96b5179d
CM
620 int bit_to_clear;
621 int bit_to_set;
96b5179d 622 cache->data = data;
1e2677e0 623 if (data) {
b97f9203
Y
624 bit_to_clear = BLOCK_GROUP_METADATA;
625 bit_to_set = BLOCK_GROUP_DATA;
f84a8b36
CM
626 cache->item.flags &=
627 ~BTRFS_BLOCK_GROUP_MIXED;
1e2677e0
CM
628 cache->item.flags |=
629 BTRFS_BLOCK_GROUP_DATA;
630 } else {
b97f9203
Y
631 bit_to_clear = BLOCK_GROUP_DATA;
632 bit_to_set = BLOCK_GROUP_METADATA;
f84a8b36
CM
633 cache->item.flags &=
634 ~BTRFS_BLOCK_GROUP_MIXED;
1e2677e0
CM
635 cache->item.flags &=
636 ~BTRFS_BLOCK_GROUP_DATA;
637 }
96b5179d
CM
638 clear_extent_bits(&info->block_group_cache,
639 start, end, bit_to_clear,
640 GFP_NOFS);
641 set_extent_bits(&info->block_group_cache,
642 start, end, bit_to_set,
643 GFP_NOFS);
f84a8b36
CM
644 } else if (cache->data != data &&
645 cache->data != BTRFS_BLOCK_GROUP_MIXED) {
646 cache->data = BTRFS_BLOCK_GROUP_MIXED;
647 set_extent_bits(&info->block_group_cache,
648 start, end,
649 BLOCK_GROUP_DATA |
650 BLOCK_GROUP_METADATA,
651 GFP_NOFS);
1e2677e0 652 }
db94535d 653 old_val += num_bytes;
cd1bc465 654 } else {
db94535d 655 old_val -= num_bytes;
f510cfec
CM
656 if (mark_free) {
657 set_extent_dirty(&info->free_space_cache,
db94535d 658 bytenr, bytenr + num_bytes - 1,
f510cfec 659 GFP_NOFS);
e37c9e69 660 }
cd1bc465 661 }
9078a3e1 662 btrfs_set_block_group_used(&cache->item, old_val);
db94535d
CM
663 total -= num_bytes;
664 bytenr += num_bytes;
9078a3e1
CM
665 }
666 return 0;
667}
324ae4df
Y
668static int update_pinned_extents(struct btrfs_root *root,
669 u64 bytenr, u64 num, int pin)
670{
671 u64 len;
672 struct btrfs_block_group_cache *cache;
673 struct btrfs_fs_info *fs_info = root->fs_info;
674
675 if (pin) {
676 set_extent_dirty(&fs_info->pinned_extents,
677 bytenr, bytenr + num - 1, GFP_NOFS);
678 } else {
679 clear_extent_dirty(&fs_info->pinned_extents,
680 bytenr, bytenr + num - 1, GFP_NOFS);
681 }
682 while (num > 0) {
683 cache = btrfs_lookup_block_group(fs_info, bytenr);
684 WARN_ON(!cache);
685 len = min(num, cache->key.offset -
686 (bytenr - cache->key.objectid));
687 if (pin) {
688 cache->pinned += len;
689 fs_info->total_pinned += len;
690 } else {
691 cache->pinned -= len;
692 fs_info->total_pinned -= len;
693 }
694 bytenr += len;
695 num -= len;
696 }
697 return 0;
698}
9078a3e1 699
1a5bc167 700int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy)
ccd467d6 701{
ccd467d6 702 u64 last = 0;
1a5bc167
CM
703 u64 start;
704 u64 end;
705 struct extent_map_tree *pinned_extents = &root->fs_info->pinned_extents;
ccd467d6 706 int ret;
ccd467d6
CM
707
708 while(1) {
1a5bc167
CM
709 ret = find_first_extent_bit(pinned_extents, last,
710 &start, &end, EXTENT_DIRTY);
711 if (ret)
ccd467d6 712 break;
1a5bc167
CM
713 set_extent_dirty(copy, start, end, GFP_NOFS);
714 last = end + 1;
ccd467d6
CM
715 }
716 return 0;
717}
718
719int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
720 struct btrfs_root *root,
1a5bc167 721 struct extent_map_tree *unpin)
a28ec197 722{
1a5bc167
CM
723 u64 start;
724 u64 end;
a28ec197 725 int ret;
f510cfec 726 struct extent_map_tree *free_space_cache;
f510cfec 727 free_space_cache = &root->fs_info->free_space_cache;
a28ec197
CM
728
729 while(1) {
1a5bc167
CM
730 ret = find_first_extent_bit(unpin, 0, &start, &end,
731 EXTENT_DIRTY);
732 if (ret)
a28ec197 733 break;
324ae4df 734 update_pinned_extents(root, start, end + 1 - start, 0);
1a5bc167
CM
735 clear_extent_dirty(unpin, start, end, GFP_NOFS);
736 set_extent_dirty(free_space_cache, start, end, GFP_NOFS);
a28ec197
CM
737 }
738 return 0;
739}
740
e089f05c
CM
741static int finish_current_insert(struct btrfs_trans_handle *trans, struct
742 btrfs_root *extent_root)
037e6390 743{
e2fa7227 744 struct btrfs_key ins;
234b63a0 745 struct btrfs_extent_item extent_item;
037e6390 746 int ret;
1a5bc167
CM
747 int err = 0;
748 u64 start;
749 u64 end;
1261ec42 750 struct btrfs_fs_info *info = extent_root->fs_info;
037e6390 751
5f39d397 752 btrfs_set_stack_extent_refs(&extent_item, 1);
62e2749e 753 btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
5f39d397
CM
754 btrfs_set_stack_extent_owner(&extent_item,
755 extent_root->root_key.objectid);
037e6390 756
26b8003f 757 while(1) {
1a5bc167
CM
758 ret = find_first_extent_bit(&info->extent_ins, 0, &start,
759 &end, EXTENT_LOCKED);
760 if (ret)
26b8003f
CM
761 break;
762
1a5bc167
CM
763 ins.objectid = start;
764 ins.offset = end + 1 - start;
765 err = btrfs_insert_item(trans, extent_root, &ins,
766 &extent_item, sizeof(extent_item));
767 clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED,
768 GFP_NOFS);
037e6390 769 }
037e6390
CM
770 return 0;
771}
772
db94535d
CM
773static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes,
774 int pending)
e20d96d6 775{
1a5bc167 776 int err = 0;
5f39d397 777 struct extent_buffer *buf;
8ef97622 778
f4b9aa8d 779 if (!pending) {
db94535d 780 buf = btrfs_find_tree_block(root, bytenr, num_bytes);
5f39d397
CM
781 if (buf) {
782 if (btrfs_buffer_uptodate(buf)) {
2c90e5d6
CM
783 u64 transid =
784 root->fs_info->running_transaction->transid;
5f39d397
CM
785 if (btrfs_header_generation(buf) == transid) {
786 free_extent_buffer(buf);
c549228f 787 return 1;
2c90e5d6 788 }
f4b9aa8d 789 }
5f39d397 790 free_extent_buffer(buf);
8ef97622 791 }
324ae4df 792 update_pinned_extents(root, bytenr, num_bytes, 1);
f4b9aa8d 793 } else {
1a5bc167 794 set_extent_bits(&root->fs_info->pending_del,
db94535d
CM
795 bytenr, bytenr + num_bytes - 1,
796 EXTENT_LOCKED, GFP_NOFS);
f4b9aa8d 797 }
be744175 798 BUG_ON(err < 0);
e20d96d6
CM
799 return 0;
800}
801
fec577fb 802/*
a28ec197 803 * remove an extent from the root, returns 0 on success
fec577fb 804 */
e089f05c 805static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
db94535d 806 *root, u64 bytenr, u64 num_bytes, int pin,
e37c9e69 807 int mark_free)
a28ec197 808{
5caf2a00 809 struct btrfs_path *path;
e2fa7227 810 struct btrfs_key key;
1261ec42
CM
811 struct btrfs_fs_info *info = root->fs_info;
812 struct btrfs_root *extent_root = info->extent_root;
5f39d397 813 struct extent_buffer *leaf;
a28ec197 814 int ret;
234b63a0 815 struct btrfs_extent_item *ei;
cf27e1ee 816 u32 refs;
037e6390 817
db94535d 818 key.objectid = bytenr;
62e2749e 819 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
db94535d 820 key.offset = num_bytes;
a28ec197 821
5caf2a00 822 path = btrfs_alloc_path();
54aa1f4d
CM
823 if (!path)
824 return -ENOMEM;
5f26f772 825
54aa1f4d
CM
826 ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
827 if (ret < 0)
828 return ret;
829 BUG_ON(ret);
5f39d397
CM
830
831 leaf = path->nodes[0];
832 ei = btrfs_item_ptr(leaf, path->slots[0],
123abc88 833 struct btrfs_extent_item);
5f39d397
CM
834 refs = btrfs_extent_refs(leaf, ei);
835 BUG_ON(refs == 0);
836 refs -= 1;
837 btrfs_set_extent_refs(leaf, ei, refs);
838 btrfs_mark_buffer_dirty(leaf);
839
cf27e1ee 840 if (refs == 0) {
db94535d
CM
841 u64 super_used;
842 u64 root_used;
78fae27e
CM
843
844 if (pin) {
db94535d 845 ret = pin_down_bytes(root, bytenr, num_bytes, 0);
c549228f
Y
846 if (ret > 0)
847 mark_free = 1;
848 BUG_ON(ret < 0);
78fae27e
CM
849 }
850
58176a96 851 /* block accounting for super block */
db94535d
CM
852 super_used = btrfs_super_bytes_used(&info->super_copy);
853 btrfs_set_super_bytes_used(&info->super_copy,
854 super_used - num_bytes);
58176a96
JB
855
856 /* block accounting for root item */
db94535d 857 root_used = btrfs_root_used(&root->root_item);
5f39d397 858 btrfs_set_root_used(&root->root_item,
db94535d 859 root_used - num_bytes);
58176a96 860
5caf2a00 861 ret = btrfs_del_item(trans, extent_root, path);
54aa1f4d
CM
862 if (ret) {
863 return ret;
864 }
db94535d 865 ret = update_block_group(trans, root, bytenr, num_bytes, 0,
1e2677e0 866 mark_free, 0);
9078a3e1 867 BUG_ON(ret);
a28ec197 868 }
5caf2a00 869 btrfs_free_path(path);
e089f05c 870 finish_current_insert(trans, extent_root);
a28ec197
CM
871 return ret;
872}
873
a28ec197
CM
874/*
875 * find all the blocks marked as pending in the radix tree and remove
876 * them from the extent map
877 */
e089f05c
CM
878static int del_pending_extents(struct btrfs_trans_handle *trans, struct
879 btrfs_root *extent_root)
a28ec197
CM
880{
881 int ret;
e20d96d6 882 int err = 0;
1a5bc167
CM
883 u64 start;
884 u64 end;
885 struct extent_map_tree *pending_del;
886 struct extent_map_tree *pinned_extents;
8ef97622 887
1a5bc167
CM
888 pending_del = &extent_root->fs_info->pending_del;
889 pinned_extents = &extent_root->fs_info->pinned_extents;
a28ec197
CM
890
891 while(1) {
1a5bc167
CM
892 ret = find_first_extent_bit(pending_del, 0, &start, &end,
893 EXTENT_LOCKED);
894 if (ret)
a28ec197 895 break;
324ae4df 896 update_pinned_extents(extent_root, start, end + 1 - start, 1);
1a5bc167
CM
897 clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
898 GFP_NOFS);
899 ret = __free_extent(trans, extent_root,
900 start, end + 1 - start, 0, 0);
901 if (ret)
902 err = ret;
fec577fb 903 }
e20d96d6 904 return err;
fec577fb
CM
905}
906
907/*
908 * remove an extent from the root, returns 0 on success
909 */
e089f05c 910int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
db94535d 911 *root, u64 bytenr, u64 num_bytes, int pin)
fec577fb 912{
9f5fae2f 913 struct btrfs_root *extent_root = root->fs_info->extent_root;
fec577fb
CM
914 int pending_ret;
915 int ret;
a28ec197 916
db94535d 917 WARN_ON(num_bytes < root->sectorsize);
fec577fb 918 if (root == extent_root) {
db94535d 919 pin_down_bytes(root, bytenr, num_bytes, 1);
fec577fb
CM
920 return 0;
921 }
db94535d 922 ret = __free_extent(trans, root, bytenr, num_bytes, pin, pin == 0);
e20d96d6 923 pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
fec577fb
CM
924 return ret ? ret : pending_ret;
925}
926
927/*
928 * walks the btree of allocated extents and find a hole of a given size.
929 * The key ins is changed to record the hole:
930 * ins->objectid == block start
62e2749e 931 * ins->flags = BTRFS_EXTENT_ITEM_KEY
fec577fb
CM
932 * ins->offset == number of blocks
933 * Any available blocks before search_start are skipped.
934 */
e089f05c 935static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
db94535d
CM
936 *orig_root, u64 num_bytes, u64 empty_size,
937 u64 search_start, u64 search_end, u64 hint_byte,
f2654de4
CM
938 struct btrfs_key *ins, u64 exclude_start,
939 u64 exclude_nr, int data)
fec577fb 940{
5caf2a00 941 struct btrfs_path *path;
e2fa7227 942 struct btrfs_key key;
fec577fb
CM
943 int ret;
944 u64 hole_size = 0;
945 int slot = 0;
db94535d 946 u64 last_byte = 0;
be744175 947 u64 orig_search_start = search_start;
fec577fb 948 int start_found;
5f39d397 949 struct extent_buffer *l;
9f5fae2f 950 struct btrfs_root * root = orig_root->fs_info->extent_root;
f2458e1d 951 struct btrfs_fs_info *info = root->fs_info;
db94535d 952 u64 total_needed = num_bytes;
e20d96d6 953 int level;
be08c1b9 954 struct btrfs_block_group_cache *block_group;
be744175 955 int full_scan = 0;
fbdc762b 956 int wrapped = 0;
f84a8b36 957 u64 cached_start;
fec577fb 958
db94535d 959 WARN_ON(num_bytes < root->sectorsize);
b1a4d965
CM
960 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
961
5f39d397
CM
962 level = btrfs_header_level(root->node);
963
015a739c 964 if (num_bytes >= 32 * 1024 * 1024 && hint_byte) {
257d0ce3
CM
965 data = BTRFS_BLOCK_GROUP_MIXED;
966 }
967
3e1ad54f 968 if (search_end == (u64)-1)
db94535d
CM
969 search_end = btrfs_super_total_bytes(&info->super_copy);
970 if (hint_byte) {
971 block_group = btrfs_lookup_block_group(info, hint_byte);
be744175 972 block_group = btrfs_find_block_group(root, block_group,
db94535d 973 hint_byte, data, 1);
be744175
CM
974 } else {
975 block_group = btrfs_find_block_group(root,
976 trans->block_group, 0,
de428b63 977 data, 1);
be744175
CM
978 }
979
6702ed49 980 total_needed += empty_size;
e011599b 981 path = btrfs_alloc_path();
be744175 982check_failed:
5e5745dc
Y
983 search_start = find_search_start(root, &block_group, search_start,
984 total_needed, data, full_scan);
f84a8b36 985 cached_start = search_start;
5caf2a00 986 btrfs_init_path(path);
fec577fb
CM
987 ins->objectid = search_start;
988 ins->offset = 0;
fec577fb 989 start_found = 0;
2cc58cf2 990 path->reada = 2;
e37c9e69 991
5caf2a00 992 ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
0f70abe2
CM
993 if (ret < 0)
994 goto error;
aa5d6bed 995
e37c9e69 996 if (path->slots[0] > 0) {
5caf2a00 997 path->slots[0]--;
e37c9e69
CM
998 }
999
5f39d397
CM
1000 l = path->nodes[0];
1001 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
1002
e37c9e69
CM
1003 /*
1004 * a rare case, go back one key if we hit a block group item
1005 * instead of an extent item
1006 */
1007 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY &&
1008 key.objectid + key.offset >= search_start) {
1009 ins->objectid = key.objectid;
1010 ins->offset = key.offset - 1;
1011 btrfs_release_path(root, path);
1012 ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
1013 if (ret < 0)
1014 goto error;
1015
1016 if (path->slots[0] > 0) {
1017 path->slots[0]--;
1018 }
1019 }
0579da42 1020
fec577fb 1021 while (1) {
5f39d397 1022 l = path->nodes[0];
5caf2a00 1023 slot = path->slots[0];
5f39d397 1024 if (slot >= btrfs_header_nritems(l)) {
5caf2a00 1025 ret = btrfs_next_leaf(root, path);
fec577fb
CM
1026 if (ret == 0)
1027 continue;
0f70abe2
CM
1028 if (ret < 0)
1029 goto error;
e19caa5f
CM
1030
1031 search_start = max(search_start,
1032 block_group->key.objectid);
fec577fb
CM
1033 if (!start_found) {
1034 ins->objectid = search_start;
3e1ad54f 1035 ins->offset = search_end - search_start;
fec577fb
CM
1036 start_found = 1;
1037 goto check_pending;
1038 }
db94535d
CM
1039 ins->objectid = last_byte > search_start ?
1040 last_byte : search_start;
3e1ad54f 1041 ins->offset = search_end - ins->objectid;
e19caa5f 1042 BUG_ON(ins->objectid >= search_end);
fec577fb
CM
1043 goto check_pending;
1044 }
5f39d397 1045 btrfs_item_key_to_cpu(l, &key, slot);
96b5179d 1046
db94535d 1047 if (key.objectid >= search_start && key.objectid > last_byte &&
e37c9e69 1048 start_found) {
db94535d
CM
1049 if (last_byte < search_start)
1050 last_byte = search_start;
1051 hole_size = key.objectid - last_byte;
1052 if (hole_size >= num_bytes) {
1053 ins->objectid = last_byte;
e37c9e69
CM
1054 ins->offset = hole_size;
1055 goto check_pending;
0579da42 1056 }
fec577fb 1057 }
96b5179d
CM
1058 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) {
1059 if (!start_found) {
db94535d 1060 last_byte = key.objectid;
96b5179d
CM
1061 start_found = 1;
1062 }
e37c9e69 1063 goto next;
96b5179d
CM
1064 }
1065
e37c9e69 1066
0579da42 1067 start_found = 1;
db94535d 1068 last_byte = key.objectid + key.offset;
f510cfec 1069
257d0ce3
CM
1070 if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED &&
1071 last_byte >= block_group->key.objectid +
be744175
CM
1072 block_group->key.offset) {
1073 btrfs_release_path(root, path);
1074 search_start = block_group->key.objectid +
e19caa5f 1075 block_group->key.offset;
be744175
CM
1076 goto new_group;
1077 }
9078a3e1 1078next:
5caf2a00 1079 path->slots[0]++;
de428b63 1080 cond_resched();
fec577fb 1081 }
fec577fb
CM
1082check_pending:
1083 /* we have to make sure we didn't find an extent that has already
1084 * been allocated by the map tree or the original allocation
1085 */
5caf2a00 1086 btrfs_release_path(root, path);
fec577fb 1087 BUG_ON(ins->objectid < search_start);
e37c9e69 1088
db94535d 1089 if (ins->objectid + num_bytes >= search_end)
cf67582b 1090 goto enospc;
257d0ce3 1091 if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED &&
5cf66426 1092 ins->objectid + num_bytes > block_group->
e19caa5f
CM
1093 key.objectid + block_group->key.offset) {
1094 search_start = block_group->key.objectid +
1095 block_group->key.offset;
1096 goto new_group;
1097 }
1a5bc167 1098 if (test_range_bit(&info->extent_ins, ins->objectid,
db94535d
CM
1099 ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) {
1100 search_start = ins->objectid + num_bytes;
1a5bc167
CM
1101 goto new_group;
1102 }
1103 if (test_range_bit(&info->pinned_extents, ins->objectid,
db94535d
CM
1104 ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
1105 search_start = ins->objectid + num_bytes;
1a5bc167 1106 goto new_group;
fec577fb 1107 }
db94535d 1108 if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
f2654de4
CM
1109 ins->objectid < exclude_start + exclude_nr)) {
1110 search_start = exclude_start + exclude_nr;
1111 goto new_group;
1112 }
e37c9e69 1113 if (!data) {
5276aeda 1114 block_group = btrfs_lookup_block_group(info, ins->objectid);
26b8003f
CM
1115 if (block_group)
1116 trans->block_group = block_group;
f2458e1d 1117 }
db94535d 1118 ins->offset = num_bytes;
5caf2a00 1119 btrfs_free_path(path);
fec577fb 1120 return 0;
be744175
CM
1121
1122new_group:
db94535d 1123 if (search_start + num_bytes >= search_end) {
cf67582b 1124enospc:
be744175 1125 search_start = orig_search_start;
fbdc762b
CM
1126 if (full_scan) {
1127 ret = -ENOSPC;
1128 goto error;
1129 }
6702ed49
CM
1130 if (wrapped) {
1131 if (!full_scan)
1132 total_needed -= empty_size;
fbdc762b 1133 full_scan = 1;
6702ed49 1134 } else
fbdc762b 1135 wrapped = 1;
be744175 1136 }
5276aeda 1137 block_group = btrfs_lookup_block_group(info, search_start);
fbdc762b 1138 cond_resched();
be744175
CM
1139 if (!full_scan)
1140 block_group = btrfs_find_block_group(root, block_group,
de428b63 1141 search_start, data, 0);
be744175
CM
1142 goto check_failed;
1143
0f70abe2 1144error:
5caf2a00
CM
1145 btrfs_release_path(root, path);
1146 btrfs_free_path(path);
0f70abe2 1147 return ret;
fec577fb 1148}
fec577fb
CM
1149/*
1150 * finds a free extent and does all the dirty work required for allocation
1151 * returns the key for the extent through ins, and a tree buffer for
1152 * the first block of the extent through buf.
1153 *
1154 * returns 0 if everything worked, non-zero otherwise.
1155 */
4d775673
CM
1156int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1157 struct btrfs_root *root, u64 owner,
db94535d 1158 u64 num_bytes, u64 empty_size, u64 hint_byte,
be08c1b9 1159 u64 search_end, struct btrfs_key *ins, int data)
fec577fb
CM
1160{
1161 int ret;
1162 int pending_ret;
db94535d 1163 u64 super_used, root_used;
fbdc762b 1164 u64 search_start = 0;
1261ec42
CM
1165 struct btrfs_fs_info *info = root->fs_info;
1166 struct btrfs_root *extent_root = info->extent_root;
234b63a0 1167 struct btrfs_extent_item extent_item;
037e6390 1168
5f39d397
CM
1169 btrfs_set_stack_extent_refs(&extent_item, 1);
1170 btrfs_set_stack_extent_owner(&extent_item, owner);
fec577fb 1171
db94535d
CM
1172 WARN_ON(num_bytes < root->sectorsize);
1173 ret = find_free_extent(trans, root, num_bytes, empty_size,
1174 search_start, search_end, hint_byte, ins,
26b8003f
CM
1175 trans->alloc_exclude_start,
1176 trans->alloc_exclude_nr, data);
ccd467d6 1177 BUG_ON(ret);
f2654de4
CM
1178 if (ret)
1179 return ret;
fec577fb 1180
58176a96 1181 /* block accounting for super block */
db94535d
CM
1182 super_used = btrfs_super_bytes_used(&info->super_copy);
1183 btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
26b8003f 1184
58176a96 1185 /* block accounting for root item */
db94535d
CM
1186 root_used = btrfs_root_used(&root->root_item);
1187 btrfs_set_root_used(&root->root_item, root_used + num_bytes);
58176a96 1188
f510cfec
CM
1189 clear_extent_dirty(&root->fs_info->free_space_cache,
1190 ins->objectid, ins->objectid + ins->offset - 1,
1191 GFP_NOFS);
1192
26b8003f 1193 if (root == extent_root) {
1a5bc167
CM
1194 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
1195 ins->objectid + ins->offset - 1,
1196 EXTENT_LOCKED, GFP_NOFS);
e19caa5f 1197 WARN_ON(data == 1);
26b8003f
CM
1198 goto update_block;
1199 }
1200
1201 WARN_ON(trans->alloc_exclude_nr);
1202 trans->alloc_exclude_start = ins->objectid;
1203 trans->alloc_exclude_nr = ins->offset;
e089f05c
CM
1204 ret = btrfs_insert_item(trans, extent_root, ins, &extent_item,
1205 sizeof(extent_item));
037e6390 1206
26b8003f
CM
1207 trans->alloc_exclude_start = 0;
1208 trans->alloc_exclude_nr = 0;
1209
ccd467d6 1210 BUG_ON(ret);
e089f05c 1211 finish_current_insert(trans, extent_root);
e20d96d6 1212 pending_ret = del_pending_extents(trans, extent_root);
f510cfec 1213
e37c9e69 1214 if (ret) {
037e6390 1215 return ret;
e37c9e69
CM
1216 }
1217 if (pending_ret) {
037e6390 1218 return pending_ret;
e37c9e69 1219 }
26b8003f
CM
1220
1221update_block:
1e2677e0
CM
1222 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0,
1223 data);
fabb5681 1224 BUG_ON(ret);
037e6390 1225 return 0;
fec577fb
CM
1226}
1227
1228/*
1229 * helper function to allocate a block for a given tree
1230 * returns the tree buffer or NULL.
1231 */
5f39d397 1232struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
db94535d
CM
1233 struct btrfs_root *root,
1234 u32 blocksize, u64 hint,
5f39d397 1235 u64 empty_size)
fec577fb 1236{
e2fa7227 1237 struct btrfs_key ins;
fec577fb 1238 int ret;
5f39d397 1239 struct extent_buffer *buf;
fec577fb 1240
4d775673 1241 ret = btrfs_alloc_extent(trans, root, root->root_key.objectid,
db94535d
CM
1242 blocksize, empty_size, hint,
1243 (u64)-1, &ins, 0);
fec577fb 1244 if (ret) {
54aa1f4d
CM
1245 BUG_ON(ret > 0);
1246 return ERR_PTR(ret);
fec577fb 1247 }
db94535d 1248 buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize);
54aa1f4d 1249 if (!buf) {
db94535d 1250 btrfs_free_extent(trans, root, ins.objectid, blocksize, 0);
54aa1f4d
CM
1251 return ERR_PTR(-ENOMEM);
1252 }
5f39d397
CM
1253 btrfs_set_buffer_uptodate(buf);
1254 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
1255 buf->start + buf->len - 1, GFP_NOFS);
19c00ddc
CM
1256 set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->extent_tree,
1257 buf->start, buf->start + buf->len - 1,
1258 EXTENT_CSUM, GFP_NOFS);
1259 buf->flags |= EXTENT_CSUM;
6b80053d 1260 btrfs_set_buffer_defrag(buf);
d3c2fdcf 1261 trans->blocks_used++;
fec577fb
CM
1262 return buf;
1263}
a28ec197 1264
6407bf6d 1265static int drop_leaf_ref(struct btrfs_trans_handle *trans,
5f39d397 1266 struct btrfs_root *root, struct extent_buffer *leaf)
6407bf6d 1267{
5f39d397 1268 struct btrfs_key key;
6407bf6d
CM
1269 struct btrfs_file_extent_item *fi;
1270 int i;
1271 int nritems;
1272 int ret;
1273
5f39d397
CM
1274 BUG_ON(!btrfs_is_leaf(leaf));
1275 nritems = btrfs_header_nritems(leaf);
6407bf6d 1276 for (i = 0; i < nritems; i++) {
db94535d 1277 u64 disk_bytenr;
5f39d397
CM
1278
1279 btrfs_item_key_to_cpu(leaf, &key, i);
1280 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
6407bf6d
CM
1281 continue;
1282 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
5f39d397
CM
1283 if (btrfs_file_extent_type(leaf, fi) ==
1284 BTRFS_FILE_EXTENT_INLINE)
236454df 1285 continue;
6407bf6d
CM
1286 /*
1287 * FIXME make sure to insert a trans record that
1288 * repeats the snapshot del on crash
1289 */
db94535d
CM
1290 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1291 if (disk_bytenr == 0)
3a686375 1292 continue;
db94535d
CM
1293 ret = btrfs_free_extent(trans, root, disk_bytenr,
1294 btrfs_file_extent_disk_num_bytes(leaf, fi), 0);
6407bf6d
CM
1295 BUG_ON(ret);
1296 }
1297 return 0;
1298}
1299
e011599b 1300static void reada_walk_down(struct btrfs_root *root,
5f39d397 1301 struct extent_buffer *node)
e011599b
CM
1302{
1303 int i;
1304 u32 nritems;
db94535d 1305 u64 bytenr;
e011599b
CM
1306 int ret;
1307 u32 refs;
db94535d
CM
1308 int level;
1309 u32 blocksize;
e011599b 1310
5f39d397 1311 nritems = btrfs_header_nritems(node);
db94535d 1312 level = btrfs_header_level(node);
e011599b 1313 for (i = 0; i < nritems; i++) {
db94535d
CM
1314 bytenr = btrfs_node_blockptr(node, i);
1315 blocksize = btrfs_level_size(root, level - 1);
1316 ret = lookup_extent_ref(NULL, root, bytenr, blocksize, &refs);
e011599b
CM
1317 BUG_ON(ret);
1318 if (refs != 1)
1319 continue;
409eb95d 1320 mutex_unlock(&root->fs_info->fs_mutex);
db94535d 1321 ret = readahead_tree_block(root, bytenr, blocksize);
409eb95d
CM
1322 cond_resched();
1323 mutex_lock(&root->fs_info->fs_mutex);
e011599b
CM
1324 if (ret)
1325 break;
1326 }
1327}
1328
9aca1d51
CM
1329/*
1330 * helper function for drop_snapshot, this walks down the tree dropping ref
1331 * counts as it goes.
1332 */
e089f05c
CM
1333static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root
1334 *root, struct btrfs_path *path, int *level)
20524f02 1335{
5f39d397
CM
1336 struct extent_buffer *next;
1337 struct extent_buffer *cur;
db94535d
CM
1338 u64 bytenr;
1339 u32 blocksize;
20524f02
CM
1340 int ret;
1341 u32 refs;
1342
5caf2a00
CM
1343 WARN_ON(*level < 0);
1344 WARN_ON(*level >= BTRFS_MAX_LEVEL);
5f39d397 1345 ret = lookup_extent_ref(trans, root,
db94535d
CM
1346 path->nodes[*level]->start,
1347 path->nodes[*level]->len, &refs);
20524f02
CM
1348 BUG_ON(ret);
1349 if (refs > 1)
1350 goto out;
e011599b 1351
9aca1d51
CM
1352 /*
1353 * walk down to the last node level and free all the leaves
1354 */
6407bf6d 1355 while(*level >= 0) {
5caf2a00
CM
1356 WARN_ON(*level < 0);
1357 WARN_ON(*level >= BTRFS_MAX_LEVEL);
20524f02 1358 cur = path->nodes[*level];
e011599b
CM
1359
1360 if (*level > 0 && path->slots[*level] == 0)
5f39d397 1361 reada_walk_down(root, cur);
e011599b 1362
5f39d397 1363 if (btrfs_header_level(cur) != *level)
2c90e5d6 1364 WARN_ON(1);
e011599b 1365
7518a238 1366 if (path->slots[*level] >=
5f39d397 1367 btrfs_header_nritems(cur))
20524f02 1368 break;
6407bf6d
CM
1369 if (*level == 0) {
1370 ret = drop_leaf_ref(trans, root, cur);
1371 BUG_ON(ret);
1372 break;
1373 }
db94535d
CM
1374 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1375 blocksize = btrfs_level_size(root, *level - 1);
1376 ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs);
6407bf6d
CM
1377 BUG_ON(ret);
1378 if (refs != 1) {
20524f02 1379 path->slots[*level]++;
db94535d
CM
1380 ret = btrfs_free_extent(trans, root, bytenr,
1381 blocksize, 1);
20524f02
CM
1382 BUG_ON(ret);
1383 continue;
1384 }
db94535d 1385 next = btrfs_find_tree_block(root, bytenr, blocksize);
5f39d397
CM
1386 if (!next || !btrfs_buffer_uptodate(next)) {
1387 free_extent_buffer(next);
e9d0b13b 1388 mutex_unlock(&root->fs_info->fs_mutex);
db94535d 1389 next = read_tree_block(root, bytenr, blocksize);
e9d0b13b
CM
1390 mutex_lock(&root->fs_info->fs_mutex);
1391
1392 /* we dropped the lock, check one more time */
db94535d
CM
1393 ret = lookup_extent_ref(trans, root, bytenr,
1394 blocksize, &refs);
e9d0b13b
CM
1395 BUG_ON(ret);
1396 if (refs != 1) {
1397 path->slots[*level]++;
5f39d397 1398 free_extent_buffer(next);
e9d0b13b 1399 ret = btrfs_free_extent(trans, root,
db94535d 1400 bytenr, blocksize, 1);
e9d0b13b
CM
1401 BUG_ON(ret);
1402 continue;
1403 }
1404 }
5caf2a00 1405 WARN_ON(*level <= 0);
83e15a28 1406 if (path->nodes[*level-1])
5f39d397 1407 free_extent_buffer(path->nodes[*level-1]);
20524f02 1408 path->nodes[*level-1] = next;
5f39d397 1409 *level = btrfs_header_level(next);
20524f02
CM
1410 path->slots[*level] = 0;
1411 }
1412out:
5caf2a00
CM
1413 WARN_ON(*level < 0);
1414 WARN_ON(*level >= BTRFS_MAX_LEVEL);
db94535d
CM
1415 ret = btrfs_free_extent(trans, root, path->nodes[*level]->start,
1416 path->nodes[*level]->len, 1);
5f39d397 1417 free_extent_buffer(path->nodes[*level]);
20524f02
CM
1418 path->nodes[*level] = NULL;
1419 *level += 1;
1420 BUG_ON(ret);
1421 return 0;
1422}
1423
9aca1d51
CM
1424/*
1425 * helper for dropping snapshots. This walks back up the tree in the path
1426 * to find the first node higher up where we haven't yet gone through
1427 * all the slots
1428 */
e089f05c
CM
1429static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root
1430 *root, struct btrfs_path *path, int *level)
20524f02
CM
1431{
1432 int i;
1433 int slot;
1434 int ret;
9f3a7427
CM
1435 struct btrfs_root_item *root_item = &root->root_item;
1436
234b63a0 1437 for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
20524f02 1438 slot = path->slots[i];
5f39d397
CM
1439 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
1440 struct extent_buffer *node;
1441 struct btrfs_disk_key disk_key;
1442 node = path->nodes[i];
20524f02
CM
1443 path->slots[i]++;
1444 *level = i;
9f3a7427 1445 WARN_ON(*level == 0);
5f39d397 1446 btrfs_node_key(node, &disk_key, path->slots[i]);
9f3a7427 1447 memcpy(&root_item->drop_progress,
5f39d397 1448 &disk_key, sizeof(disk_key));
9f3a7427 1449 root_item->drop_level = i;
20524f02
CM
1450 return 0;
1451 } else {
e089f05c 1452 ret = btrfs_free_extent(trans, root,
db94535d
CM
1453 path->nodes[*level]->start,
1454 path->nodes[*level]->len, 1);
6407bf6d 1455 BUG_ON(ret);
5f39d397 1456 free_extent_buffer(path->nodes[*level]);
83e15a28 1457 path->nodes[*level] = NULL;
20524f02 1458 *level = i + 1;
20524f02
CM
1459 }
1460 }
1461 return 1;
1462}
1463
9aca1d51
CM
1464/*
1465 * drop the reference count on the tree rooted at 'snap'. This traverses
1466 * the tree freeing any blocks that have a ref count of zero after being
1467 * decremented.
1468 */
e089f05c 1469int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
9f3a7427 1470 *root)
20524f02 1471{
3768f368 1472 int ret = 0;
9aca1d51 1473 int wret;
20524f02 1474 int level;
5caf2a00 1475 struct btrfs_path *path;
20524f02
CM
1476 int i;
1477 int orig_level;
9f3a7427 1478 struct btrfs_root_item *root_item = &root->root_item;
20524f02 1479
5caf2a00
CM
1480 path = btrfs_alloc_path();
1481 BUG_ON(!path);
20524f02 1482
5f39d397 1483 level = btrfs_header_level(root->node);
20524f02 1484 orig_level = level;
9f3a7427
CM
1485 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
1486 path->nodes[level] = root->node;
f510cfec 1487 extent_buffer_get(root->node);
9f3a7427
CM
1488 path->slots[level] = 0;
1489 } else {
1490 struct btrfs_key key;
5f39d397
CM
1491 struct btrfs_disk_key found_key;
1492 struct extent_buffer *node;
6702ed49 1493
9f3a7427 1494 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6702ed49
CM
1495 level = root_item->drop_level;
1496 path->lowest_level = level;
9f3a7427 1497 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6702ed49 1498 if (wret < 0) {
9f3a7427
CM
1499 ret = wret;
1500 goto out;
1501 }
5f39d397
CM
1502 node = path->nodes[level];
1503 btrfs_node_key(node, &found_key, path->slots[level]);
1504 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
1505 sizeof(found_key)));
9f3a7427 1506 }
20524f02 1507 while(1) {
5caf2a00 1508 wret = walk_down_tree(trans, root, path, &level);
9aca1d51 1509 if (wret > 0)
20524f02 1510 break;
9aca1d51
CM
1511 if (wret < 0)
1512 ret = wret;
1513
5caf2a00 1514 wret = walk_up_tree(trans, root, path, &level);
9aca1d51 1515 if (wret > 0)
20524f02 1516 break;
9aca1d51
CM
1517 if (wret < 0)
1518 ret = wret;
409eb95d 1519 ret = -EAGAIN;
409eb95d 1520 break;
20524f02 1521 }
83e15a28 1522 for (i = 0; i <= orig_level; i++) {
5caf2a00 1523 if (path->nodes[i]) {
5f39d397 1524 free_extent_buffer(path->nodes[i]);
0f82731f 1525 path->nodes[i] = NULL;
83e15a28 1526 }
20524f02 1527 }
9f3a7427 1528out:
5caf2a00 1529 btrfs_free_path(path);
9aca1d51 1530 return ret;
20524f02 1531}
9078a3e1 1532
96b5179d 1533int btrfs_free_block_groups(struct btrfs_fs_info *info)
9078a3e1 1534{
96b5179d
CM
1535 u64 start;
1536 u64 end;
b97f9203 1537 u64 ptr;
9078a3e1 1538 int ret;
9078a3e1 1539 while(1) {
96b5179d
CM
1540 ret = find_first_extent_bit(&info->block_group_cache, 0,
1541 &start, &end, (unsigned int)-1);
1542 if (ret)
9078a3e1 1543 break;
b97f9203
Y
1544 ret = get_state_private(&info->block_group_cache, start, &ptr);
1545 if (!ret)
1546 kfree((void *)(unsigned long)ptr);
96b5179d
CM
1547 clear_extent_bits(&info->block_group_cache, start,
1548 end, (unsigned int)-1, GFP_NOFS);
9078a3e1 1549 }
e37c9e69 1550 while(1) {
f510cfec
CM
1551 ret = find_first_extent_bit(&info->free_space_cache, 0,
1552 &start, &end, EXTENT_DIRTY);
1553 if (ret)
e37c9e69 1554 break;
f510cfec
CM
1555 clear_extent_dirty(&info->free_space_cache, start,
1556 end, GFP_NOFS);
e37c9e69 1557 }
be744175
CM
1558 return 0;
1559}
1560
9078a3e1
CM
1561int btrfs_read_block_groups(struct btrfs_root *root)
1562{
1563 struct btrfs_path *path;
1564 int ret;
1565 int err = 0;
96b5179d 1566 int bit;
9078a3e1 1567 struct btrfs_block_group_cache *cache;
be744175 1568 struct btrfs_fs_info *info = root->fs_info;
96b5179d 1569 struct extent_map_tree *block_group_cache;
9078a3e1
CM
1570 struct btrfs_key key;
1571 struct btrfs_key found_key;
5f39d397 1572 struct extent_buffer *leaf;
96b5179d
CM
1573
1574 block_group_cache = &info->block_group_cache;
9078a3e1 1575
be744175 1576 root = info->extent_root;
9078a3e1 1577 key.objectid = 0;
db94535d 1578 key.offset = BTRFS_BLOCK_GROUP_SIZE;
9078a3e1
CM
1579 btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
1580
1581 path = btrfs_alloc_path();
1582 if (!path)
1583 return -ENOMEM;
1584
1585 while(1) {
be744175 1586 ret = btrfs_search_slot(NULL, info->extent_root,
9078a3e1
CM
1587 &key, path, 0, 0);
1588 if (ret != 0) {
1589 err = ret;
1590 break;
1591 }
5f39d397
CM
1592 leaf = path->nodes[0];
1593 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9078a3e1
CM
1594 cache = kmalloc(sizeof(*cache), GFP_NOFS);
1595 if (!cache) {
1596 err = -1;
1597 break;
1598 }
3e1ad54f 1599
5f39d397
CM
1600 read_extent_buffer(leaf, &cache->item,
1601 btrfs_item_ptr_offset(leaf, path->slots[0]),
1602 sizeof(cache->item));
9078a3e1 1603 memcpy(&cache->key, &found_key, sizeof(found_key));
e37c9e69 1604 cache->cached = 0;
324ae4df 1605 cache->pinned = 0;
9078a3e1
CM
1606 key.objectid = found_key.objectid + found_key.offset;
1607 btrfs_release_path(root, path);
5f39d397 1608
f84a8b36
CM
1609 if (cache->item.flags & BTRFS_BLOCK_GROUP_MIXED) {
1610 bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
1611 cache->data = BTRFS_BLOCK_GROUP_MIXED;
1612 } else if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) {
96b5179d 1613 bit = BLOCK_GROUP_DATA;
f84a8b36 1614 cache->data = BTRFS_BLOCK_GROUP_DATA;
96b5179d
CM
1615 } else {
1616 bit = BLOCK_GROUP_METADATA;
1617 cache->data = 0;
31f3c99b 1618 }
96b5179d
CM
1619
1620 /* use EXTENT_LOCKED to prevent merging */
1621 set_extent_bits(block_group_cache, found_key.objectid,
1622 found_key.objectid + found_key.offset - 1,
1623 bit | EXTENT_LOCKED, GFP_NOFS);
1624 set_state_private(block_group_cache, found_key.objectid,
ae2f5411 1625 (unsigned long)cache);
96b5179d 1626
9078a3e1 1627 if (key.objectid >=
db94535d 1628 btrfs_super_total_bytes(&info->super_copy))
9078a3e1
CM
1629 break;
1630 }
1631
1632 btrfs_free_path(path);
1633 return 0;
1634}