Btrfs: Make sure page mapping dirty tag is properly cleared
[linux-2.6-block.git] / fs / btrfs / extent-tree.c
CommitLineData
6cbd5570
CM
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
ec6b910f 19#include <linux/sched.h>
fec577fb
CM
20#include "ctree.h"
21#include "disk-io.h"
22#include "print-tree.h"
e089f05c 23#include "transaction.h"
fec577fb 24
96b5179d
CM
25#define BLOCK_GROUP_DATA EXTENT_WRITEBACK
26#define BLOCK_GROUP_METADATA EXTENT_UPTODATE
27#define BLOCK_GROUP_DIRTY EXTENT_DIRTY
28
e089f05c
CM
29static int finish_current_insert(struct btrfs_trans_handle *trans, struct
30 btrfs_root *extent_root);
e20d96d6
CM
31static int del_pending_extents(struct btrfs_trans_handle *trans, struct
32 btrfs_root *extent_root);
fec577fb 33
e37c9e69
CM
34static int cache_block_group(struct btrfs_root *root,
35 struct btrfs_block_group_cache *block_group)
36{
37 struct btrfs_path *path;
38 int ret;
39 struct btrfs_key key;
5f39d397 40 struct extent_buffer *leaf;
f510cfec 41 struct extent_map_tree *free_space_cache;
e37c9e69 42 int slot;
e37c9e69
CM
43 u64 last = 0;
44 u64 hole_size;
7d7d6068 45 u64 first_free;
e37c9e69
CM
46 int found = 0;
47
48 root = root->fs_info->extent_root;
f510cfec 49 free_space_cache = &root->fs_info->free_space_cache;
e37c9e69
CM
50
51 if (block_group->cached)
52 return 0;
f510cfec 53
e37c9e69
CM
54 path = btrfs_alloc_path();
55 if (!path)
56 return -ENOMEM;
7d7d6068 57
2cc58cf2 58 path->reada = 2;
7d7d6068 59 first_free = block_group->key.objectid;
e37c9e69 60 key.objectid = block_group->key.objectid;
e37c9e69 61 key.offset = 0;
7d7d6068 62
e37c9e69
CM
63 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
64 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7d7d6068 65
e37c9e69
CM
66 if (ret < 0)
67 return ret;
7d7d6068 68
e37c9e69
CM
69 if (ret && path->slots[0] > 0)
70 path->slots[0]--;
7d7d6068 71
e37c9e69 72 while(1) {
5f39d397 73 leaf = path->nodes[0];
e37c9e69 74 slot = path->slots[0];
5f39d397 75 if (slot >= btrfs_header_nritems(leaf)) {
e37c9e69 76 ret = btrfs_next_leaf(root, path);
54aa1f4d
CM
77 if (ret < 0)
78 goto err;
de428b63 79 if (ret == 0) {
e37c9e69 80 continue;
de428b63 81 } else {
e37c9e69
CM
82 break;
83 }
84 }
7d7d6068 85
5f39d397 86 btrfs_item_key_to_cpu(leaf, &key, slot);
7d7d6068
Y
87 if (key.objectid < block_group->key.objectid) {
88 if (key.objectid + key.offset > first_free)
89 first_free = key.objectid + key.offset;
90 goto next;
91 }
92
e37c9e69
CM
93 if (key.objectid >= block_group->key.objectid +
94 block_group->key.offset) {
e37c9e69
CM
95 break;
96 }
7d7d6068 97
e37c9e69
CM
98 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
99 if (!found) {
7d7d6068 100 last = first_free;
e37c9e69 101 found = 1;
e37c9e69 102 }
f510cfec
CM
103 if (key.objectid > last) {
104 hole_size = key.objectid - last;
105 set_extent_dirty(free_space_cache, last,
106 last + hole_size - 1,
107 GFP_NOFS);
7d7d6068
Y
108 }
109 last = key.objectid + key.offset;
e37c9e69 110 }
7d7d6068 111next:
e37c9e69
CM
112 path->slots[0]++;
113 }
114
7d7d6068
Y
115 if (!found)
116 last = first_free;
117 if (block_group->key.objectid +
118 block_group->key.offset > last) {
119 hole_size = block_group->key.objectid +
120 block_group->key.offset - last;
f510cfec
CM
121 set_extent_dirty(free_space_cache, last,
122 last + hole_size - 1, GFP_NOFS);
7d7d6068 123 }
e37c9e69 124 block_group->cached = 1;
54aa1f4d 125err:
e37c9e69
CM
126 btrfs_free_path(path);
127 return 0;
128}
129
5276aeda
CM
130struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
131 btrfs_fs_info *info,
db94535d 132 u64 bytenr)
be744175 133{
96b5179d
CM
134 struct extent_map_tree *block_group_cache;
135 struct btrfs_block_group_cache *block_group = NULL;
136 u64 ptr;
137 u64 start;
138 u64 end;
be744175
CM
139 int ret;
140
96b5179d
CM
141 block_group_cache = &info->block_group_cache;
142 ret = find_first_extent_bit(block_group_cache,
db94535d 143 bytenr, &start, &end,
96b5179d 144 BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA);
be744175 145 if (ret) {
96b5179d 146 return NULL;
be744175 147 }
96b5179d
CM
148 ret = get_state_private(block_group_cache, start, &ptr);
149 if (ret)
150 return NULL;
151
ae2f5411 152 block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
5cf66426 153 if (block_group->key.objectid <= bytenr && bytenr <
96b5179d
CM
154 block_group->key.objectid + block_group->key.offset)
155 return block_group;
be744175
CM
156 return NULL;
157}
e37c9e69
CM
158static u64 find_search_start(struct btrfs_root *root,
159 struct btrfs_block_group_cache **cache_ret,
5e5745dc
Y
160 u64 search_start, int num,
161 int data, int full_scan)
e37c9e69 162{
e37c9e69
CM
163 int ret;
164 struct btrfs_block_group_cache *cache = *cache_ret;
e19caa5f 165 u64 last;
f510cfec
CM
166 u64 start = 0;
167 u64 end = 0;
257d0ce3 168 u64 cache_miss = 0;
f84a8b36 169 int wrapped = 0;
e37c9e69 170
e37c9e69 171again:
54aa1f4d
CM
172 ret = cache_block_group(root, cache);
173 if (ret)
174 goto out;
f84a8b36 175
e19caa5f
CM
176 last = max(search_start, cache->key.objectid);
177
e37c9e69 178 while(1) {
f510cfec
CM
179 ret = find_first_extent_bit(&root->fs_info->free_space_cache,
180 last, &start, &end, EXTENT_DIRTY);
e19caa5f 181 if (ret) {
257d0ce3
CM
182 if (!cache_miss)
183 cache_miss = last;
e19caa5f
CM
184 goto new_group;
185 }
f510cfec
CM
186
187 start = max(last, start);
188 last = end + 1;
257d0ce3
CM
189 if (last - start < num) {
190 if (last == cache->key.objectid + cache->key.offset)
191 cache_miss = start;
f510cfec 192 continue;
257d0ce3
CM
193 }
194 if (data != BTRFS_BLOCK_GROUP_MIXED &&
5cf66426 195 start + num > cache->key.objectid + cache->key.offset)
e37c9e69 196 goto new_group;
f510cfec 197 return start;
e37c9e69
CM
198 }
199out:
1a5bc167 200 return search_start;
e37c9e69
CM
201
202new_group:
e19caa5f 203 last = cache->key.objectid + cache->key.offset;
f84a8b36 204wrapped:
e19caa5f 205 cache = btrfs_lookup_block_group(root->fs_info, last);
e37c9e69 206 if (!cache) {
f84a8b36
CM
207 if (!wrapped) {
208 wrapped = 1;
209 last = search_start;
210 data = BTRFS_BLOCK_GROUP_MIXED;
211 goto wrapped;
212 }
1a5bc167 213 return search_start;
e37c9e69 214 }
257d0ce3
CM
215 if (cache_miss && !cache->cached) {
216 cache_block_group(root, cache);
217 last = cache_miss;
257d0ce3
CM
218 cache = btrfs_lookup_block_group(root->fs_info, last);
219 }
5e5745dc
Y
220 if (!full_scan)
221 cache = btrfs_find_block_group(root, cache, last, data, 0);
e37c9e69 222 *cache_ret = cache;
257d0ce3 223 cache_miss = 0;
e37c9e69
CM
224 goto again;
225}
226
84f54cfa
CM
227static u64 div_factor(u64 num, int factor)
228{
257d0ce3
CM
229 if (factor == 10)
230 return num;
84f54cfa
CM
231 num *= factor;
232 do_div(num, 10);
233 return num;
234}
235
31f3c99b
CM
236struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
237 struct btrfs_block_group_cache
be744175 238 *hint, u64 search_start,
de428b63 239 int data, int owner)
cd1bc465 240{
96b5179d
CM
241 struct btrfs_block_group_cache *cache;
242 struct extent_map_tree *block_group_cache;
31f3c99b 243 struct btrfs_block_group_cache *found_group = NULL;
cd1bc465
CM
244 struct btrfs_fs_info *info = root->fs_info;
245 u64 used;
31f3c99b
CM
246 u64 last = 0;
247 u64 hint_last;
96b5179d
CM
248 u64 start;
249 u64 end;
250 u64 free_check;
251 u64 ptr;
252 int bit;
cd1bc465 253 int ret;
31f3c99b 254 int full_search = 0;
de428b63 255 int factor = 8;
1e2677e0 256 int data_swap = 0;
de428b63 257
96b5179d
CM
258 block_group_cache = &info->block_group_cache;
259
de428b63 260 if (!owner)
f84a8b36 261 factor = 8;
be744175 262
257d0ce3 263 if (data == BTRFS_BLOCK_GROUP_MIXED) {
f84a8b36 264 bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
257d0ce3
CM
265 factor = 10;
266 } else if (data)
96b5179d
CM
267 bit = BLOCK_GROUP_DATA;
268 else
269 bit = BLOCK_GROUP_METADATA;
be744175
CM
270
271 if (search_start) {
272 struct btrfs_block_group_cache *shint;
5276aeda 273 shint = btrfs_lookup_block_group(info, search_start);
f84a8b36
CM
274 if (shint && (shint->data == data ||
275 shint->data == BTRFS_BLOCK_GROUP_MIXED)) {
be744175 276 used = btrfs_block_group_used(&shint->item);
324ae4df
Y
277 if (used + shint->pinned <
278 div_factor(shint->key.offset, factor)) {
be744175
CM
279 return shint;
280 }
281 }
282 }
f84a8b36
CM
283 if (hint && (hint->data == data ||
284 hint->data == BTRFS_BLOCK_GROUP_MIXED)) {
31f3c99b 285 used = btrfs_block_group_used(&hint->item);
324ae4df
Y
286 if (used + hint->pinned <
287 div_factor(hint->key.offset, factor)) {
31f3c99b
CM
288 return hint;
289 }
e19caa5f 290 last = hint->key.objectid + hint->key.offset;
31f3c99b
CM
291 hint_last = last;
292 } else {
e37c9e69
CM
293 if (hint)
294 hint_last = max(hint->key.objectid, search_start);
295 else
296 hint_last = search_start;
297
298 last = hint_last;
31f3c99b 299 }
31f3c99b 300again:
cd1bc465 301 while(1) {
96b5179d
CM
302 ret = find_first_extent_bit(block_group_cache, last,
303 &start, &end, bit);
304 if (ret)
cd1bc465 305 break;
96b5179d
CM
306
307 ret = get_state_private(block_group_cache, start, &ptr);
308 if (ret)
309 break;
310
ae2f5411 311 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
96b5179d
CM
312 last = cache->key.objectid + cache->key.offset;
313 used = btrfs_block_group_used(&cache->item);
314
315 if (full_search)
316 free_check = cache->key.offset;
317 else
318 free_check = div_factor(cache->key.offset, factor);
324ae4df 319 if (used + cache->pinned < free_check) {
96b5179d
CM
320 found_group = cache;
321 goto found;
cd1bc465 322 }
de428b63 323 cond_resched();
cd1bc465 324 }
31f3c99b 325 if (!full_search) {
be744175 326 last = search_start;
31f3c99b
CM
327 full_search = 1;
328 goto again;
329 }
1e2677e0 330 if (!data_swap) {
1e2677e0 331 data_swap = 1;
96b5179d 332 bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
1e2677e0
CM
333 last = search_start;
334 goto again;
335 }
be744175 336found:
31f3c99b 337 return found_group;
cd1bc465
CM
338}
339
b18c6685
CM
340int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
341 struct btrfs_root *root,
db94535d 342 u64 bytenr, u64 num_bytes)
02217ed2 343{
5caf2a00 344 struct btrfs_path *path;
02217ed2 345 int ret;
e2fa7227 346 struct btrfs_key key;
5f39d397 347 struct extent_buffer *l;
234b63a0 348 struct btrfs_extent_item *item;
cf27e1ee 349 u32 refs;
037e6390 350
db94535d 351 WARN_ON(num_bytes < root->sectorsize);
5caf2a00 352 path = btrfs_alloc_path();
54aa1f4d
CM
353 if (!path)
354 return -ENOMEM;
26b8003f 355
db94535d 356 key.objectid = bytenr;
62e2749e 357 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
db94535d 358 key.offset = num_bytes;
5caf2a00 359 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
9f5fae2f 360 0, 1);
54aa1f4d
CM
361 if (ret < 0)
362 return ret;
a429e513 363 if (ret != 0) {
a28ec197 364 BUG();
a429e513 365 }
02217ed2 366 BUG_ON(ret != 0);
5f39d397 367 l = path->nodes[0];
5caf2a00 368 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
5f39d397
CM
369 refs = btrfs_extent_refs(l, item);
370 btrfs_set_extent_refs(l, item, refs + 1);
5caf2a00 371 btrfs_mark_buffer_dirty(path->nodes[0]);
a28ec197 372
5caf2a00
CM
373 btrfs_release_path(root->fs_info->extent_root, path);
374 btrfs_free_path(path);
9f5fae2f 375 finish_current_insert(trans, root->fs_info->extent_root);
e20d96d6 376 del_pending_extents(trans, root->fs_info->extent_root);
02217ed2
CM
377 return 0;
378}
379
e9d0b13b
CM
380int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
381 struct btrfs_root *root)
382{
383 finish_current_insert(trans, root->fs_info->extent_root);
384 del_pending_extents(trans, root->fs_info->extent_root);
385 return 0;
386}
387
b18c6685 388static int lookup_extent_ref(struct btrfs_trans_handle *trans,
db94535d
CM
389 struct btrfs_root *root, u64 bytenr,
390 u64 num_bytes, u32 *refs)
a28ec197 391{
5caf2a00 392 struct btrfs_path *path;
a28ec197 393 int ret;
e2fa7227 394 struct btrfs_key key;
5f39d397 395 struct extent_buffer *l;
234b63a0 396 struct btrfs_extent_item *item;
5caf2a00 397
db94535d 398 WARN_ON(num_bytes < root->sectorsize);
5caf2a00 399 path = btrfs_alloc_path();
db94535d
CM
400 key.objectid = bytenr;
401 key.offset = num_bytes;
62e2749e 402 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
5caf2a00 403 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
9f5fae2f 404 0, 0);
54aa1f4d
CM
405 if (ret < 0)
406 goto out;
5f39d397
CM
407 if (ret != 0) {
408 btrfs_print_leaf(root, path->nodes[0]);
db94535d 409 printk("failed to find block number %Lu\n", bytenr);
a28ec197 410 BUG();
5f39d397
CM
411 }
412 l = path->nodes[0];
5caf2a00 413 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
5f39d397 414 *refs = btrfs_extent_refs(l, item);
54aa1f4d 415out:
5caf2a00 416 btrfs_free_path(path);
a28ec197
CM
417 return 0;
418}
419
c5739bba
CM
420int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
421 struct btrfs_root *root)
422{
db94535d
CM
423 return btrfs_inc_extent_ref(trans, root, root->node->start,
424 root->node->len);
c5739bba
CM
425}
426
e089f05c 427int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
5f39d397 428 struct extent_buffer *buf)
02217ed2 429{
db94535d 430 u64 bytenr;
5f39d397
CM
431 u32 nritems;
432 struct btrfs_key key;
6407bf6d 433 struct btrfs_file_extent_item *fi;
02217ed2 434 int i;
db94535d 435 int level;
6407bf6d 436 int ret;
54aa1f4d
CM
437 int faili;
438 int err;
a28ec197 439
3768f368 440 if (!root->ref_cows)
a28ec197 441 return 0;
5f39d397 442
db94535d 443 level = btrfs_header_level(buf);
5f39d397
CM
444 nritems = btrfs_header_nritems(buf);
445 for (i = 0; i < nritems; i++) {
db94535d
CM
446 if (level == 0) {
447 u64 disk_bytenr;
5f39d397
CM
448 btrfs_item_key_to_cpu(buf, &key, i);
449 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
6407bf6d 450 continue;
5f39d397 451 fi = btrfs_item_ptr(buf, i,
6407bf6d 452 struct btrfs_file_extent_item);
5f39d397 453 if (btrfs_file_extent_type(buf, fi) ==
236454df
CM
454 BTRFS_FILE_EXTENT_INLINE)
455 continue;
db94535d
CM
456 disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
457 if (disk_bytenr == 0)
3a686375 458 continue;
db94535d
CM
459 ret = btrfs_inc_extent_ref(trans, root, disk_bytenr,
460 btrfs_file_extent_disk_num_bytes(buf, fi));
54aa1f4d
CM
461 if (ret) {
462 faili = i;
463 goto fail;
464 }
6407bf6d 465 } else {
db94535d
CM
466 bytenr = btrfs_node_blockptr(buf, i);
467 ret = btrfs_inc_extent_ref(trans, root, bytenr,
468 btrfs_level_size(root, level - 1));
54aa1f4d
CM
469 if (ret) {
470 faili = i;
471 goto fail;
472 }
6407bf6d 473 }
02217ed2
CM
474 }
475 return 0;
54aa1f4d 476fail:
ccd467d6 477 WARN_ON(1);
54aa1f4d 478 for (i =0; i < faili; i++) {
db94535d
CM
479 if (level == 0) {
480 u64 disk_bytenr;
5f39d397
CM
481 btrfs_item_key_to_cpu(buf, &key, i);
482 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
54aa1f4d 483 continue;
5f39d397 484 fi = btrfs_item_ptr(buf, i,
54aa1f4d 485 struct btrfs_file_extent_item);
5f39d397 486 if (btrfs_file_extent_type(buf, fi) ==
54aa1f4d
CM
487 BTRFS_FILE_EXTENT_INLINE)
488 continue;
db94535d
CM
489 disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
490 if (disk_bytenr == 0)
54aa1f4d 491 continue;
db94535d
CM
492 err = btrfs_free_extent(trans, root, disk_bytenr,
493 btrfs_file_extent_disk_num_bytes(buf,
5f39d397 494 fi), 0);
54aa1f4d
CM
495 BUG_ON(err);
496 } else {
db94535d
CM
497 bytenr = btrfs_node_blockptr(buf, i);
498 err = btrfs_free_extent(trans, root, bytenr,
499 btrfs_level_size(root, level - 1), 0);
54aa1f4d
CM
500 BUG_ON(err);
501 }
502 }
503 return ret;
02217ed2
CM
504}
505
9078a3e1
CM
506static int write_one_cache_group(struct btrfs_trans_handle *trans,
507 struct btrfs_root *root,
508 struct btrfs_path *path,
509 struct btrfs_block_group_cache *cache)
510{
511 int ret;
512 int pending_ret;
513 struct btrfs_root *extent_root = root->fs_info->extent_root;
5f39d397
CM
514 unsigned long bi;
515 struct extent_buffer *leaf;
9078a3e1 516
9078a3e1 517 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
54aa1f4d
CM
518 if (ret < 0)
519 goto fail;
9078a3e1 520 BUG_ON(ret);
5f39d397
CM
521
522 leaf = path->nodes[0];
523 bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
524 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
525 btrfs_mark_buffer_dirty(leaf);
9078a3e1 526 btrfs_release_path(extent_root, path);
54aa1f4d 527fail:
9078a3e1
CM
528 finish_current_insert(trans, extent_root);
529 pending_ret = del_pending_extents(trans, extent_root);
530 if (ret)
531 return ret;
532 if (pending_ret)
533 return pending_ret;
534 return 0;
535
536}
537
96b5179d
CM
538int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
539 struct btrfs_root *root)
9078a3e1 540{
96b5179d
CM
541 struct extent_map_tree *block_group_cache;
542 struct btrfs_block_group_cache *cache;
9078a3e1
CM
543 int ret;
544 int err = 0;
545 int werr = 0;
9078a3e1 546 struct btrfs_path *path;
96b5179d
CM
547 u64 last = 0;
548 u64 start;
549 u64 end;
550 u64 ptr;
9078a3e1 551
96b5179d 552 block_group_cache = &root->fs_info->block_group_cache;
9078a3e1
CM
553 path = btrfs_alloc_path();
554 if (!path)
555 return -ENOMEM;
556
557 while(1) {
96b5179d
CM
558 ret = find_first_extent_bit(block_group_cache, last,
559 &start, &end, BLOCK_GROUP_DIRTY);
560 if (ret)
9078a3e1 561 break;
54aa1f4d 562
96b5179d
CM
563 last = end + 1;
564 ret = get_state_private(block_group_cache, start, &ptr);
565 if (ret)
566 break;
567
ae2f5411 568 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
96b5179d
CM
569 err = write_one_cache_group(trans, root,
570 path, cache);
571 /*
572 * if we fail to write the cache group, we want
573 * to keep it marked dirty in hopes that a later
574 * write will work
575 */
576 if (err) {
577 werr = err;
578 continue;
9078a3e1 579 }
96b5179d
CM
580 clear_extent_bits(block_group_cache, start, end,
581 BLOCK_GROUP_DIRTY, GFP_NOFS);
9078a3e1
CM
582 }
583 btrfs_free_path(path);
584 return werr;
585}
586
587static int update_block_group(struct btrfs_trans_handle *trans,
588 struct btrfs_root *root,
db94535d
CM
589 u64 bytenr, u64 num_bytes, int alloc,
590 int mark_free, int data)
9078a3e1
CM
591{
592 struct btrfs_block_group_cache *cache;
593 struct btrfs_fs_info *info = root->fs_info;
db94535d 594 u64 total = num_bytes;
9078a3e1 595 u64 old_val;
db94535d 596 u64 byte_in_group;
96b5179d
CM
597 u64 start;
598 u64 end;
3e1ad54f 599
9078a3e1 600 while(total) {
db94535d 601 cache = btrfs_lookup_block_group(info, bytenr);
3e1ad54f 602 if (!cache) {
9078a3e1 603 return -1;
cd1bc465 604 }
db94535d
CM
605 byte_in_group = bytenr - cache->key.objectid;
606 WARN_ON(byte_in_group > cache->key.offset);
96b5179d
CM
607 start = cache->key.objectid;
608 end = start + cache->key.offset - 1;
609 set_extent_bits(&info->block_group_cache, start, end,
610 BLOCK_GROUP_DIRTY, GFP_NOFS);
9078a3e1
CM
611
612 old_val = btrfs_block_group_used(&cache->item);
db94535d 613 num_bytes = min(total, cache->key.offset - byte_in_group);
cd1bc465 614 if (alloc) {
1e2677e0 615 if (cache->data != data &&
84f54cfa 616 old_val < (cache->key.offset >> 1)) {
96b5179d
CM
617 int bit_to_clear;
618 int bit_to_set;
96b5179d 619 cache->data = data;
1e2677e0 620 if (data) {
b97f9203
Y
621 bit_to_clear = BLOCK_GROUP_METADATA;
622 bit_to_set = BLOCK_GROUP_DATA;
f84a8b36
CM
623 cache->item.flags &=
624 ~BTRFS_BLOCK_GROUP_MIXED;
1e2677e0
CM
625 cache->item.flags |=
626 BTRFS_BLOCK_GROUP_DATA;
627 } else {
b97f9203
Y
628 bit_to_clear = BLOCK_GROUP_DATA;
629 bit_to_set = BLOCK_GROUP_METADATA;
f84a8b36
CM
630 cache->item.flags &=
631 ~BTRFS_BLOCK_GROUP_MIXED;
1e2677e0
CM
632 cache->item.flags &=
633 ~BTRFS_BLOCK_GROUP_DATA;
634 }
96b5179d
CM
635 clear_extent_bits(&info->block_group_cache,
636 start, end, bit_to_clear,
637 GFP_NOFS);
638 set_extent_bits(&info->block_group_cache,
639 start, end, bit_to_set,
640 GFP_NOFS);
f84a8b36
CM
641 } else if (cache->data != data &&
642 cache->data != BTRFS_BLOCK_GROUP_MIXED) {
643 cache->data = BTRFS_BLOCK_GROUP_MIXED;
644 set_extent_bits(&info->block_group_cache,
645 start, end,
646 BLOCK_GROUP_DATA |
647 BLOCK_GROUP_METADATA,
648 GFP_NOFS);
1e2677e0 649 }
db94535d 650 old_val += num_bytes;
cd1bc465 651 } else {
db94535d 652 old_val -= num_bytes;
f510cfec
CM
653 if (mark_free) {
654 set_extent_dirty(&info->free_space_cache,
db94535d 655 bytenr, bytenr + num_bytes - 1,
f510cfec 656 GFP_NOFS);
e37c9e69 657 }
cd1bc465 658 }
9078a3e1 659 btrfs_set_block_group_used(&cache->item, old_val);
db94535d
CM
660 total -= num_bytes;
661 bytenr += num_bytes;
9078a3e1
CM
662 }
663 return 0;
664}
324ae4df
Y
665static int update_pinned_extents(struct btrfs_root *root,
666 u64 bytenr, u64 num, int pin)
667{
668 u64 len;
669 struct btrfs_block_group_cache *cache;
670 struct btrfs_fs_info *fs_info = root->fs_info;
671
672 if (pin) {
673 set_extent_dirty(&fs_info->pinned_extents,
674 bytenr, bytenr + num - 1, GFP_NOFS);
675 } else {
676 clear_extent_dirty(&fs_info->pinned_extents,
677 bytenr, bytenr + num - 1, GFP_NOFS);
678 }
679 while (num > 0) {
680 cache = btrfs_lookup_block_group(fs_info, bytenr);
681 WARN_ON(!cache);
682 len = min(num, cache->key.offset -
683 (bytenr - cache->key.objectid));
684 if (pin) {
685 cache->pinned += len;
686 fs_info->total_pinned += len;
687 } else {
688 cache->pinned -= len;
689 fs_info->total_pinned -= len;
690 }
691 bytenr += len;
692 num -= len;
693 }
694 return 0;
695}
9078a3e1 696
1a5bc167 697int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy)
ccd467d6 698{
ccd467d6 699 u64 last = 0;
1a5bc167
CM
700 u64 start;
701 u64 end;
702 struct extent_map_tree *pinned_extents = &root->fs_info->pinned_extents;
ccd467d6 703 int ret;
ccd467d6
CM
704
705 while(1) {
1a5bc167
CM
706 ret = find_first_extent_bit(pinned_extents, last,
707 &start, &end, EXTENT_DIRTY);
708 if (ret)
ccd467d6 709 break;
1a5bc167
CM
710 set_extent_dirty(copy, start, end, GFP_NOFS);
711 last = end + 1;
ccd467d6
CM
712 }
713 return 0;
714}
715
716int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
717 struct btrfs_root *root,
1a5bc167 718 struct extent_map_tree *unpin)
a28ec197 719{
1a5bc167
CM
720 u64 start;
721 u64 end;
a28ec197 722 int ret;
f510cfec 723 struct extent_map_tree *free_space_cache;
f510cfec 724 free_space_cache = &root->fs_info->free_space_cache;
a28ec197
CM
725
726 while(1) {
1a5bc167
CM
727 ret = find_first_extent_bit(unpin, 0, &start, &end,
728 EXTENT_DIRTY);
729 if (ret)
a28ec197 730 break;
324ae4df 731 update_pinned_extents(root, start, end + 1 - start, 0);
1a5bc167
CM
732 clear_extent_dirty(unpin, start, end, GFP_NOFS);
733 set_extent_dirty(free_space_cache, start, end, GFP_NOFS);
a28ec197
CM
734 }
735 return 0;
736}
737
e089f05c
CM
738static int finish_current_insert(struct btrfs_trans_handle *trans, struct
739 btrfs_root *extent_root)
037e6390 740{
e2fa7227 741 struct btrfs_key ins;
234b63a0 742 struct btrfs_extent_item extent_item;
037e6390 743 int ret;
1a5bc167
CM
744 int err = 0;
745 u64 start;
746 u64 end;
1261ec42 747 struct btrfs_fs_info *info = extent_root->fs_info;
037e6390 748
5f39d397 749 btrfs_set_stack_extent_refs(&extent_item, 1);
62e2749e 750 btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
5f39d397
CM
751 btrfs_set_stack_extent_owner(&extent_item,
752 extent_root->root_key.objectid);
037e6390 753
26b8003f 754 while(1) {
1a5bc167
CM
755 ret = find_first_extent_bit(&info->extent_ins, 0, &start,
756 &end, EXTENT_LOCKED);
757 if (ret)
26b8003f
CM
758 break;
759
1a5bc167
CM
760 ins.objectid = start;
761 ins.offset = end + 1 - start;
762 err = btrfs_insert_item(trans, extent_root, &ins,
763 &extent_item, sizeof(extent_item));
764 clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED,
765 GFP_NOFS);
037e6390 766 }
037e6390
CM
767 return 0;
768}
769
db94535d
CM
770static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes,
771 int pending)
e20d96d6 772{
1a5bc167 773 int err = 0;
5f39d397 774 struct extent_buffer *buf;
8ef97622 775
f4b9aa8d 776 if (!pending) {
db94535d 777 buf = btrfs_find_tree_block(root, bytenr, num_bytes);
5f39d397
CM
778 if (buf) {
779 if (btrfs_buffer_uptodate(buf)) {
2c90e5d6
CM
780 u64 transid =
781 root->fs_info->running_transaction->transid;
5f39d397
CM
782 if (btrfs_header_generation(buf) == transid) {
783 free_extent_buffer(buf);
c549228f 784 return 1;
2c90e5d6 785 }
f4b9aa8d 786 }
5f39d397 787 free_extent_buffer(buf);
8ef97622 788 }
324ae4df 789 update_pinned_extents(root, bytenr, num_bytes, 1);
f4b9aa8d 790 } else {
1a5bc167 791 set_extent_bits(&root->fs_info->pending_del,
db94535d
CM
792 bytenr, bytenr + num_bytes - 1,
793 EXTENT_LOCKED, GFP_NOFS);
f4b9aa8d 794 }
be744175 795 BUG_ON(err < 0);
e20d96d6
CM
796 return 0;
797}
798
fec577fb 799/*
a28ec197 800 * remove an extent from the root, returns 0 on success
fec577fb 801 */
e089f05c 802static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
db94535d 803 *root, u64 bytenr, u64 num_bytes, int pin,
e37c9e69 804 int mark_free)
a28ec197 805{
5caf2a00 806 struct btrfs_path *path;
e2fa7227 807 struct btrfs_key key;
1261ec42
CM
808 struct btrfs_fs_info *info = root->fs_info;
809 struct btrfs_root *extent_root = info->extent_root;
5f39d397 810 struct extent_buffer *leaf;
a28ec197 811 int ret;
234b63a0 812 struct btrfs_extent_item *ei;
cf27e1ee 813 u32 refs;
037e6390 814
db94535d 815 key.objectid = bytenr;
62e2749e 816 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
db94535d 817 key.offset = num_bytes;
a28ec197 818
5caf2a00 819 path = btrfs_alloc_path();
54aa1f4d
CM
820 if (!path)
821 return -ENOMEM;
5f26f772 822
54aa1f4d
CM
823 ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
824 if (ret < 0)
825 return ret;
826 BUG_ON(ret);
5f39d397
CM
827
828 leaf = path->nodes[0];
829 ei = btrfs_item_ptr(leaf, path->slots[0],
123abc88 830 struct btrfs_extent_item);
5f39d397
CM
831 refs = btrfs_extent_refs(leaf, ei);
832 BUG_ON(refs == 0);
833 refs -= 1;
834 btrfs_set_extent_refs(leaf, ei, refs);
835 btrfs_mark_buffer_dirty(leaf);
836
cf27e1ee 837 if (refs == 0) {
db94535d
CM
838 u64 super_used;
839 u64 root_used;
78fae27e
CM
840
841 if (pin) {
db94535d 842 ret = pin_down_bytes(root, bytenr, num_bytes, 0);
c549228f
Y
843 if (ret > 0)
844 mark_free = 1;
845 BUG_ON(ret < 0);
78fae27e
CM
846 }
847
58176a96 848 /* block accounting for super block */
db94535d
CM
849 super_used = btrfs_super_bytes_used(&info->super_copy);
850 btrfs_set_super_bytes_used(&info->super_copy,
851 super_used - num_bytes);
58176a96
JB
852
853 /* block accounting for root item */
db94535d 854 root_used = btrfs_root_used(&root->root_item);
5f39d397 855 btrfs_set_root_used(&root->root_item,
db94535d 856 root_used - num_bytes);
58176a96 857
5caf2a00 858 ret = btrfs_del_item(trans, extent_root, path);
54aa1f4d
CM
859 if (ret) {
860 return ret;
861 }
db94535d 862 ret = update_block_group(trans, root, bytenr, num_bytes, 0,
1e2677e0 863 mark_free, 0);
9078a3e1 864 BUG_ON(ret);
a28ec197 865 }
5caf2a00 866 btrfs_free_path(path);
e089f05c 867 finish_current_insert(trans, extent_root);
a28ec197
CM
868 return ret;
869}
870
a28ec197
CM
871/*
872 * find all the blocks marked as pending in the radix tree and remove
873 * them from the extent map
874 */
e089f05c
CM
875static int del_pending_extents(struct btrfs_trans_handle *trans, struct
876 btrfs_root *extent_root)
a28ec197
CM
877{
878 int ret;
e20d96d6 879 int err = 0;
1a5bc167
CM
880 u64 start;
881 u64 end;
882 struct extent_map_tree *pending_del;
883 struct extent_map_tree *pinned_extents;
8ef97622 884
1a5bc167
CM
885 pending_del = &extent_root->fs_info->pending_del;
886 pinned_extents = &extent_root->fs_info->pinned_extents;
a28ec197
CM
887
888 while(1) {
1a5bc167
CM
889 ret = find_first_extent_bit(pending_del, 0, &start, &end,
890 EXTENT_LOCKED);
891 if (ret)
a28ec197 892 break;
324ae4df 893 update_pinned_extents(extent_root, start, end + 1 - start, 1);
1a5bc167
CM
894 clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
895 GFP_NOFS);
896 ret = __free_extent(trans, extent_root,
897 start, end + 1 - start, 0, 0);
898 if (ret)
899 err = ret;
fec577fb 900 }
e20d96d6 901 return err;
fec577fb
CM
902}
903
904/*
905 * remove an extent from the root, returns 0 on success
906 */
e089f05c 907int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
db94535d 908 *root, u64 bytenr, u64 num_bytes, int pin)
fec577fb 909{
9f5fae2f 910 struct btrfs_root *extent_root = root->fs_info->extent_root;
fec577fb
CM
911 int pending_ret;
912 int ret;
a28ec197 913
db94535d 914 WARN_ON(num_bytes < root->sectorsize);
fec577fb 915 if (root == extent_root) {
db94535d 916 pin_down_bytes(root, bytenr, num_bytes, 1);
fec577fb
CM
917 return 0;
918 }
db94535d 919 ret = __free_extent(trans, root, bytenr, num_bytes, pin, pin == 0);
e20d96d6 920 pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
fec577fb
CM
921 return ret ? ret : pending_ret;
922}
923
924/*
925 * walks the btree of allocated extents and find a hole of a given size.
926 * The key ins is changed to record the hole:
927 * ins->objectid == block start
62e2749e 928 * ins->flags = BTRFS_EXTENT_ITEM_KEY
fec577fb
CM
929 * ins->offset == number of blocks
930 * Any available blocks before search_start are skipped.
931 */
e089f05c 932static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
db94535d
CM
933 *orig_root, u64 num_bytes, u64 empty_size,
934 u64 search_start, u64 search_end, u64 hint_byte,
f2654de4
CM
935 struct btrfs_key *ins, u64 exclude_start,
936 u64 exclude_nr, int data)
fec577fb 937{
5caf2a00 938 struct btrfs_path *path;
e2fa7227 939 struct btrfs_key key;
fec577fb
CM
940 int ret;
941 u64 hole_size = 0;
942 int slot = 0;
db94535d 943 u64 last_byte = 0;
be744175 944 u64 orig_search_start = search_start;
fec577fb 945 int start_found;
5f39d397 946 struct extent_buffer *l;
9f5fae2f 947 struct btrfs_root * root = orig_root->fs_info->extent_root;
f2458e1d 948 struct btrfs_fs_info *info = root->fs_info;
db94535d 949 u64 total_needed = num_bytes;
e20d96d6 950 int level;
be08c1b9 951 struct btrfs_block_group_cache *block_group;
be744175 952 int full_scan = 0;
fbdc762b 953 int wrapped = 0;
f84a8b36 954 u64 cached_start;
fec577fb 955
db94535d 956 WARN_ON(num_bytes < root->sectorsize);
b1a4d965
CM
957 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
958
5f39d397
CM
959 level = btrfs_header_level(root->node);
960
257d0ce3
CM
961 if (num_bytes >= 96 * 1024 * 1024 && hint_byte) {
962 data = BTRFS_BLOCK_GROUP_MIXED;
963 }
964
3e1ad54f 965 if (search_end == (u64)-1)
db94535d
CM
966 search_end = btrfs_super_total_bytes(&info->super_copy);
967 if (hint_byte) {
968 block_group = btrfs_lookup_block_group(info, hint_byte);
be744175 969 block_group = btrfs_find_block_group(root, block_group,
db94535d 970 hint_byte, data, 1);
be744175
CM
971 } else {
972 block_group = btrfs_find_block_group(root,
973 trans->block_group, 0,
de428b63 974 data, 1);
be744175
CM
975 }
976
6702ed49 977 total_needed += empty_size;
e011599b 978 path = btrfs_alloc_path();
be744175 979check_failed:
5e5745dc
Y
980 search_start = find_search_start(root, &block_group, search_start,
981 total_needed, data, full_scan);
f84a8b36 982 cached_start = search_start;
5caf2a00 983 btrfs_init_path(path);
fec577fb
CM
984 ins->objectid = search_start;
985 ins->offset = 0;
fec577fb 986 start_found = 0;
2cc58cf2 987 path->reada = 2;
e37c9e69 988
5caf2a00 989 ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
0f70abe2
CM
990 if (ret < 0)
991 goto error;
aa5d6bed 992
e37c9e69 993 if (path->slots[0] > 0) {
5caf2a00 994 path->slots[0]--;
e37c9e69
CM
995 }
996
5f39d397
CM
997 l = path->nodes[0];
998 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
999
e37c9e69
CM
1000 /*
1001 * a rare case, go back one key if we hit a block group item
1002 * instead of an extent item
1003 */
1004 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY &&
1005 key.objectid + key.offset >= search_start) {
1006 ins->objectid = key.objectid;
1007 ins->offset = key.offset - 1;
1008 btrfs_release_path(root, path);
1009 ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
1010 if (ret < 0)
1011 goto error;
1012
1013 if (path->slots[0] > 0) {
1014 path->slots[0]--;
1015 }
1016 }
0579da42 1017
fec577fb 1018 while (1) {
5f39d397 1019 l = path->nodes[0];
5caf2a00 1020 slot = path->slots[0];
5f39d397 1021 if (slot >= btrfs_header_nritems(l)) {
5caf2a00 1022 ret = btrfs_next_leaf(root, path);
fec577fb
CM
1023 if (ret == 0)
1024 continue;
0f70abe2
CM
1025 if (ret < 0)
1026 goto error;
e19caa5f
CM
1027
1028 search_start = max(search_start,
1029 block_group->key.objectid);
fec577fb
CM
1030 if (!start_found) {
1031 ins->objectid = search_start;
3e1ad54f 1032 ins->offset = search_end - search_start;
fec577fb
CM
1033 start_found = 1;
1034 goto check_pending;
1035 }
db94535d
CM
1036 ins->objectid = last_byte > search_start ?
1037 last_byte : search_start;
3e1ad54f 1038 ins->offset = search_end - ins->objectid;
e19caa5f 1039 BUG_ON(ins->objectid >= search_end);
fec577fb
CM
1040 goto check_pending;
1041 }
5f39d397 1042 btrfs_item_key_to_cpu(l, &key, slot);
96b5179d 1043
db94535d 1044 if (key.objectid >= search_start && key.objectid > last_byte &&
e37c9e69 1045 start_found) {
db94535d
CM
1046 if (last_byte < search_start)
1047 last_byte = search_start;
1048 hole_size = key.objectid - last_byte;
1049 if (hole_size >= num_bytes) {
1050 ins->objectid = last_byte;
e37c9e69
CM
1051 ins->offset = hole_size;
1052 goto check_pending;
0579da42 1053 }
fec577fb 1054 }
96b5179d
CM
1055 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) {
1056 if (!start_found) {
db94535d 1057 last_byte = key.objectid;
96b5179d
CM
1058 start_found = 1;
1059 }
e37c9e69 1060 goto next;
96b5179d
CM
1061 }
1062
e37c9e69 1063
0579da42 1064 start_found = 1;
db94535d 1065 last_byte = key.objectid + key.offset;
f510cfec 1066
257d0ce3
CM
1067 if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED &&
1068 last_byte >= block_group->key.objectid +
be744175
CM
1069 block_group->key.offset) {
1070 btrfs_release_path(root, path);
1071 search_start = block_group->key.objectid +
e19caa5f 1072 block_group->key.offset;
be744175
CM
1073 goto new_group;
1074 }
9078a3e1 1075next:
5caf2a00 1076 path->slots[0]++;
de428b63 1077 cond_resched();
fec577fb 1078 }
fec577fb
CM
1079check_pending:
1080 /* we have to make sure we didn't find an extent that has already
1081 * been allocated by the map tree or the original allocation
1082 */
5caf2a00 1083 btrfs_release_path(root, path);
fec577fb 1084 BUG_ON(ins->objectid < search_start);
e37c9e69 1085
db94535d 1086 if (ins->objectid + num_bytes >= search_end)
cf67582b 1087 goto enospc;
257d0ce3 1088 if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED &&
5cf66426 1089 ins->objectid + num_bytes > block_group->
e19caa5f
CM
1090 key.objectid + block_group->key.offset) {
1091 search_start = block_group->key.objectid +
1092 block_group->key.offset;
1093 goto new_group;
1094 }
1a5bc167 1095 if (test_range_bit(&info->extent_ins, ins->objectid,
db94535d
CM
1096 ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) {
1097 search_start = ins->objectid + num_bytes;
1a5bc167
CM
1098 goto new_group;
1099 }
1100 if (test_range_bit(&info->pinned_extents, ins->objectid,
db94535d
CM
1101 ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
1102 search_start = ins->objectid + num_bytes;
1a5bc167 1103 goto new_group;
fec577fb 1104 }
db94535d 1105 if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
f2654de4
CM
1106 ins->objectid < exclude_start + exclude_nr)) {
1107 search_start = exclude_start + exclude_nr;
1108 goto new_group;
1109 }
e37c9e69 1110 if (!data) {
5276aeda 1111 block_group = btrfs_lookup_block_group(info, ins->objectid);
26b8003f
CM
1112 if (block_group)
1113 trans->block_group = block_group;
f2458e1d 1114 }
db94535d 1115 ins->offset = num_bytes;
5caf2a00 1116 btrfs_free_path(path);
fec577fb 1117 return 0;
be744175
CM
1118
1119new_group:
db94535d 1120 if (search_start + num_bytes >= search_end) {
cf67582b 1121enospc:
be744175 1122 search_start = orig_search_start;
fbdc762b
CM
1123 if (full_scan) {
1124 ret = -ENOSPC;
1125 goto error;
1126 }
6702ed49
CM
1127 if (wrapped) {
1128 if (!full_scan)
1129 total_needed -= empty_size;
fbdc762b 1130 full_scan = 1;
6702ed49 1131 } else
fbdc762b 1132 wrapped = 1;
be744175 1133 }
5276aeda 1134 block_group = btrfs_lookup_block_group(info, search_start);
fbdc762b 1135 cond_resched();
be744175
CM
1136 if (!full_scan)
1137 block_group = btrfs_find_block_group(root, block_group,
de428b63 1138 search_start, data, 0);
be744175
CM
1139 goto check_failed;
1140
0f70abe2 1141error:
5caf2a00
CM
1142 btrfs_release_path(root, path);
1143 btrfs_free_path(path);
0f70abe2 1144 return ret;
fec577fb 1145}
fec577fb
CM
1146/*
1147 * finds a free extent and does all the dirty work required for allocation
1148 * returns the key for the extent through ins, and a tree buffer for
1149 * the first block of the extent through buf.
1150 *
1151 * returns 0 if everything worked, non-zero otherwise.
1152 */
4d775673
CM
1153int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1154 struct btrfs_root *root, u64 owner,
db94535d 1155 u64 num_bytes, u64 empty_size, u64 hint_byte,
be08c1b9 1156 u64 search_end, struct btrfs_key *ins, int data)
fec577fb
CM
1157{
1158 int ret;
1159 int pending_ret;
db94535d 1160 u64 super_used, root_used;
fbdc762b 1161 u64 search_start = 0;
1261ec42
CM
1162 struct btrfs_fs_info *info = root->fs_info;
1163 struct btrfs_root *extent_root = info->extent_root;
234b63a0 1164 struct btrfs_extent_item extent_item;
037e6390 1165
5f39d397
CM
1166 btrfs_set_stack_extent_refs(&extent_item, 1);
1167 btrfs_set_stack_extent_owner(&extent_item, owner);
fec577fb 1168
db94535d
CM
1169 WARN_ON(num_bytes < root->sectorsize);
1170 ret = find_free_extent(trans, root, num_bytes, empty_size,
1171 search_start, search_end, hint_byte, ins,
26b8003f
CM
1172 trans->alloc_exclude_start,
1173 trans->alloc_exclude_nr, data);
ccd467d6 1174 BUG_ON(ret);
f2654de4
CM
1175 if (ret)
1176 return ret;
fec577fb 1177
58176a96 1178 /* block accounting for super block */
db94535d
CM
1179 super_used = btrfs_super_bytes_used(&info->super_copy);
1180 btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
26b8003f 1181
58176a96 1182 /* block accounting for root item */
db94535d
CM
1183 root_used = btrfs_root_used(&root->root_item);
1184 btrfs_set_root_used(&root->root_item, root_used + num_bytes);
58176a96 1185
f510cfec
CM
1186 clear_extent_dirty(&root->fs_info->free_space_cache,
1187 ins->objectid, ins->objectid + ins->offset - 1,
1188 GFP_NOFS);
1189
26b8003f 1190 if (root == extent_root) {
1a5bc167
CM
1191 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
1192 ins->objectid + ins->offset - 1,
1193 EXTENT_LOCKED, GFP_NOFS);
e19caa5f 1194 WARN_ON(data == 1);
26b8003f
CM
1195 goto update_block;
1196 }
1197
1198 WARN_ON(trans->alloc_exclude_nr);
1199 trans->alloc_exclude_start = ins->objectid;
1200 trans->alloc_exclude_nr = ins->offset;
e089f05c
CM
1201 ret = btrfs_insert_item(trans, extent_root, ins, &extent_item,
1202 sizeof(extent_item));
037e6390 1203
26b8003f
CM
1204 trans->alloc_exclude_start = 0;
1205 trans->alloc_exclude_nr = 0;
1206
ccd467d6 1207 BUG_ON(ret);
e089f05c 1208 finish_current_insert(trans, extent_root);
e20d96d6 1209 pending_ret = del_pending_extents(trans, extent_root);
f510cfec 1210
e37c9e69 1211 if (ret) {
037e6390 1212 return ret;
e37c9e69
CM
1213 }
1214 if (pending_ret) {
037e6390 1215 return pending_ret;
e37c9e69 1216 }
26b8003f
CM
1217
1218update_block:
1e2677e0
CM
1219 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0,
1220 data);
fabb5681 1221 BUG_ON(ret);
037e6390 1222 return 0;
fec577fb
CM
1223}
1224
1225/*
1226 * helper function to allocate a block for a given tree
1227 * returns the tree buffer or NULL.
1228 */
5f39d397 1229struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
db94535d
CM
1230 struct btrfs_root *root,
1231 u32 blocksize, u64 hint,
5f39d397 1232 u64 empty_size)
fec577fb 1233{
e2fa7227 1234 struct btrfs_key ins;
fec577fb 1235 int ret;
5f39d397 1236 struct extent_buffer *buf;
fec577fb 1237
4d775673 1238 ret = btrfs_alloc_extent(trans, root, root->root_key.objectid,
db94535d
CM
1239 blocksize, empty_size, hint,
1240 (u64)-1, &ins, 0);
fec577fb 1241 if (ret) {
54aa1f4d
CM
1242 BUG_ON(ret > 0);
1243 return ERR_PTR(ret);
fec577fb 1244 }
db94535d 1245 buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize);
54aa1f4d 1246 if (!buf) {
db94535d 1247 btrfs_free_extent(trans, root, ins.objectid, blocksize, 0);
54aa1f4d
CM
1248 return ERR_PTR(-ENOMEM);
1249 }
5f39d397
CM
1250 btrfs_set_buffer_uptodate(buf);
1251 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
1252 buf->start + buf->len - 1, GFP_NOFS);
19c00ddc
CM
1253 set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->extent_tree,
1254 buf->start, buf->start + buf->len - 1,
1255 EXTENT_CSUM, GFP_NOFS);
1256 buf->flags |= EXTENT_CSUM;
6b80053d 1257 btrfs_set_buffer_defrag(buf);
d3c2fdcf 1258 trans->blocks_used++;
fec577fb
CM
1259 return buf;
1260}
a28ec197 1261
6407bf6d 1262static int drop_leaf_ref(struct btrfs_trans_handle *trans,
5f39d397 1263 struct btrfs_root *root, struct extent_buffer *leaf)
6407bf6d 1264{
5f39d397 1265 struct btrfs_key key;
6407bf6d
CM
1266 struct btrfs_file_extent_item *fi;
1267 int i;
1268 int nritems;
1269 int ret;
1270
5f39d397
CM
1271 BUG_ON(!btrfs_is_leaf(leaf));
1272 nritems = btrfs_header_nritems(leaf);
6407bf6d 1273 for (i = 0; i < nritems; i++) {
db94535d 1274 u64 disk_bytenr;
5f39d397
CM
1275
1276 btrfs_item_key_to_cpu(leaf, &key, i);
1277 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
6407bf6d
CM
1278 continue;
1279 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
5f39d397
CM
1280 if (btrfs_file_extent_type(leaf, fi) ==
1281 BTRFS_FILE_EXTENT_INLINE)
236454df 1282 continue;
6407bf6d
CM
1283 /*
1284 * FIXME make sure to insert a trans record that
1285 * repeats the snapshot del on crash
1286 */
db94535d
CM
1287 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1288 if (disk_bytenr == 0)
3a686375 1289 continue;
db94535d
CM
1290 ret = btrfs_free_extent(trans, root, disk_bytenr,
1291 btrfs_file_extent_disk_num_bytes(leaf, fi), 0);
6407bf6d
CM
1292 BUG_ON(ret);
1293 }
1294 return 0;
1295}
1296
e011599b 1297static void reada_walk_down(struct btrfs_root *root,
5f39d397 1298 struct extent_buffer *node)
e011599b
CM
1299{
1300 int i;
1301 u32 nritems;
db94535d 1302 u64 bytenr;
e011599b
CM
1303 int ret;
1304 u32 refs;
db94535d
CM
1305 int level;
1306 u32 blocksize;
e011599b 1307
5f39d397 1308 nritems = btrfs_header_nritems(node);
db94535d 1309 level = btrfs_header_level(node);
e011599b 1310 for (i = 0; i < nritems; i++) {
db94535d
CM
1311 bytenr = btrfs_node_blockptr(node, i);
1312 blocksize = btrfs_level_size(root, level - 1);
1313 ret = lookup_extent_ref(NULL, root, bytenr, blocksize, &refs);
e011599b
CM
1314 BUG_ON(ret);
1315 if (refs != 1)
1316 continue;
409eb95d 1317 mutex_unlock(&root->fs_info->fs_mutex);
db94535d 1318 ret = readahead_tree_block(root, bytenr, blocksize);
409eb95d
CM
1319 cond_resched();
1320 mutex_lock(&root->fs_info->fs_mutex);
e011599b
CM
1321 if (ret)
1322 break;
1323 }
1324}
1325
9aca1d51
CM
1326/*
1327 * helper function for drop_snapshot, this walks down the tree dropping ref
1328 * counts as it goes.
1329 */
e089f05c
CM
1330static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root
1331 *root, struct btrfs_path *path, int *level)
20524f02 1332{
5f39d397
CM
1333 struct extent_buffer *next;
1334 struct extent_buffer *cur;
db94535d
CM
1335 u64 bytenr;
1336 u32 blocksize;
20524f02
CM
1337 int ret;
1338 u32 refs;
1339
5caf2a00
CM
1340 WARN_ON(*level < 0);
1341 WARN_ON(*level >= BTRFS_MAX_LEVEL);
5f39d397 1342 ret = lookup_extent_ref(trans, root,
db94535d
CM
1343 path->nodes[*level]->start,
1344 path->nodes[*level]->len, &refs);
20524f02
CM
1345 BUG_ON(ret);
1346 if (refs > 1)
1347 goto out;
e011599b 1348
9aca1d51
CM
1349 /*
1350 * walk down to the last node level and free all the leaves
1351 */
6407bf6d 1352 while(*level >= 0) {
5caf2a00
CM
1353 WARN_ON(*level < 0);
1354 WARN_ON(*level >= BTRFS_MAX_LEVEL);
20524f02 1355 cur = path->nodes[*level];
e011599b
CM
1356
1357 if (*level > 0 && path->slots[*level] == 0)
5f39d397 1358 reada_walk_down(root, cur);
e011599b 1359
5f39d397 1360 if (btrfs_header_level(cur) != *level)
2c90e5d6 1361 WARN_ON(1);
e011599b 1362
7518a238 1363 if (path->slots[*level] >=
5f39d397 1364 btrfs_header_nritems(cur))
20524f02 1365 break;
6407bf6d
CM
1366 if (*level == 0) {
1367 ret = drop_leaf_ref(trans, root, cur);
1368 BUG_ON(ret);
1369 break;
1370 }
db94535d
CM
1371 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1372 blocksize = btrfs_level_size(root, *level - 1);
1373 ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs);
6407bf6d
CM
1374 BUG_ON(ret);
1375 if (refs != 1) {
20524f02 1376 path->slots[*level]++;
db94535d
CM
1377 ret = btrfs_free_extent(trans, root, bytenr,
1378 blocksize, 1);
20524f02
CM
1379 BUG_ON(ret);
1380 continue;
1381 }
db94535d 1382 next = btrfs_find_tree_block(root, bytenr, blocksize);
5f39d397
CM
1383 if (!next || !btrfs_buffer_uptodate(next)) {
1384 free_extent_buffer(next);
e9d0b13b 1385 mutex_unlock(&root->fs_info->fs_mutex);
db94535d 1386 next = read_tree_block(root, bytenr, blocksize);
e9d0b13b
CM
1387 mutex_lock(&root->fs_info->fs_mutex);
1388
1389 /* we dropped the lock, check one more time */
db94535d
CM
1390 ret = lookup_extent_ref(trans, root, bytenr,
1391 blocksize, &refs);
e9d0b13b
CM
1392 BUG_ON(ret);
1393 if (refs != 1) {
1394 path->slots[*level]++;
5f39d397 1395 free_extent_buffer(next);
e9d0b13b 1396 ret = btrfs_free_extent(trans, root,
db94535d 1397 bytenr, blocksize, 1);
e9d0b13b
CM
1398 BUG_ON(ret);
1399 continue;
1400 }
1401 }
5caf2a00 1402 WARN_ON(*level <= 0);
83e15a28 1403 if (path->nodes[*level-1])
5f39d397 1404 free_extent_buffer(path->nodes[*level-1]);
20524f02 1405 path->nodes[*level-1] = next;
5f39d397 1406 *level = btrfs_header_level(next);
20524f02
CM
1407 path->slots[*level] = 0;
1408 }
1409out:
5caf2a00
CM
1410 WARN_ON(*level < 0);
1411 WARN_ON(*level >= BTRFS_MAX_LEVEL);
db94535d
CM
1412 ret = btrfs_free_extent(trans, root, path->nodes[*level]->start,
1413 path->nodes[*level]->len, 1);
5f39d397 1414 free_extent_buffer(path->nodes[*level]);
20524f02
CM
1415 path->nodes[*level] = NULL;
1416 *level += 1;
1417 BUG_ON(ret);
1418 return 0;
1419}
1420
9aca1d51
CM
1421/*
1422 * helper for dropping snapshots. This walks back up the tree in the path
1423 * to find the first node higher up where we haven't yet gone through
1424 * all the slots
1425 */
e089f05c
CM
1426static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root
1427 *root, struct btrfs_path *path, int *level)
20524f02
CM
1428{
1429 int i;
1430 int slot;
1431 int ret;
9f3a7427
CM
1432 struct btrfs_root_item *root_item = &root->root_item;
1433
234b63a0 1434 for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
20524f02 1435 slot = path->slots[i];
5f39d397
CM
1436 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
1437 struct extent_buffer *node;
1438 struct btrfs_disk_key disk_key;
1439 node = path->nodes[i];
20524f02
CM
1440 path->slots[i]++;
1441 *level = i;
9f3a7427 1442 WARN_ON(*level == 0);
5f39d397 1443 btrfs_node_key(node, &disk_key, path->slots[i]);
9f3a7427 1444 memcpy(&root_item->drop_progress,
5f39d397 1445 &disk_key, sizeof(disk_key));
9f3a7427 1446 root_item->drop_level = i;
20524f02
CM
1447 return 0;
1448 } else {
e089f05c 1449 ret = btrfs_free_extent(trans, root,
db94535d
CM
1450 path->nodes[*level]->start,
1451 path->nodes[*level]->len, 1);
6407bf6d 1452 BUG_ON(ret);
5f39d397 1453 free_extent_buffer(path->nodes[*level]);
83e15a28 1454 path->nodes[*level] = NULL;
20524f02 1455 *level = i + 1;
20524f02
CM
1456 }
1457 }
1458 return 1;
1459}
1460
9aca1d51
CM
1461/*
1462 * drop the reference count on the tree rooted at 'snap'. This traverses
1463 * the tree freeing any blocks that have a ref count of zero after being
1464 * decremented.
1465 */
e089f05c 1466int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
9f3a7427 1467 *root)
20524f02 1468{
3768f368 1469 int ret = 0;
9aca1d51 1470 int wret;
20524f02 1471 int level;
5caf2a00 1472 struct btrfs_path *path;
20524f02
CM
1473 int i;
1474 int orig_level;
9f3a7427 1475 struct btrfs_root_item *root_item = &root->root_item;
20524f02 1476
5caf2a00
CM
1477 path = btrfs_alloc_path();
1478 BUG_ON(!path);
20524f02 1479
5f39d397 1480 level = btrfs_header_level(root->node);
20524f02 1481 orig_level = level;
9f3a7427
CM
1482 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
1483 path->nodes[level] = root->node;
f510cfec 1484 extent_buffer_get(root->node);
9f3a7427
CM
1485 path->slots[level] = 0;
1486 } else {
1487 struct btrfs_key key;
5f39d397
CM
1488 struct btrfs_disk_key found_key;
1489 struct extent_buffer *node;
6702ed49 1490
9f3a7427 1491 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6702ed49
CM
1492 level = root_item->drop_level;
1493 path->lowest_level = level;
9f3a7427 1494 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6702ed49 1495 if (wret < 0) {
9f3a7427
CM
1496 ret = wret;
1497 goto out;
1498 }
5f39d397
CM
1499 node = path->nodes[level];
1500 btrfs_node_key(node, &found_key, path->slots[level]);
1501 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
1502 sizeof(found_key)));
9f3a7427 1503 }
20524f02 1504 while(1) {
5caf2a00 1505 wret = walk_down_tree(trans, root, path, &level);
9aca1d51 1506 if (wret > 0)
20524f02 1507 break;
9aca1d51
CM
1508 if (wret < 0)
1509 ret = wret;
1510
5caf2a00 1511 wret = walk_up_tree(trans, root, path, &level);
9aca1d51 1512 if (wret > 0)
20524f02 1513 break;
9aca1d51
CM
1514 if (wret < 0)
1515 ret = wret;
409eb95d 1516 ret = -EAGAIN;
409eb95d 1517 break;
20524f02 1518 }
83e15a28 1519 for (i = 0; i <= orig_level; i++) {
5caf2a00 1520 if (path->nodes[i]) {
5f39d397 1521 free_extent_buffer(path->nodes[i]);
0f82731f 1522 path->nodes[i] = NULL;
83e15a28 1523 }
20524f02 1524 }
9f3a7427 1525out:
5caf2a00 1526 btrfs_free_path(path);
9aca1d51 1527 return ret;
20524f02 1528}
9078a3e1 1529
96b5179d 1530int btrfs_free_block_groups(struct btrfs_fs_info *info)
9078a3e1 1531{
96b5179d
CM
1532 u64 start;
1533 u64 end;
b97f9203 1534 u64 ptr;
9078a3e1 1535 int ret;
9078a3e1 1536 while(1) {
96b5179d
CM
1537 ret = find_first_extent_bit(&info->block_group_cache, 0,
1538 &start, &end, (unsigned int)-1);
1539 if (ret)
9078a3e1 1540 break;
b97f9203
Y
1541 ret = get_state_private(&info->block_group_cache, start, &ptr);
1542 if (!ret)
1543 kfree((void *)(unsigned long)ptr);
96b5179d
CM
1544 clear_extent_bits(&info->block_group_cache, start,
1545 end, (unsigned int)-1, GFP_NOFS);
9078a3e1 1546 }
e37c9e69 1547 while(1) {
f510cfec
CM
1548 ret = find_first_extent_bit(&info->free_space_cache, 0,
1549 &start, &end, EXTENT_DIRTY);
1550 if (ret)
e37c9e69 1551 break;
f510cfec
CM
1552 clear_extent_dirty(&info->free_space_cache, start,
1553 end, GFP_NOFS);
e37c9e69 1554 }
be744175
CM
1555 return 0;
1556}
1557
9078a3e1
CM
1558int btrfs_read_block_groups(struct btrfs_root *root)
1559{
1560 struct btrfs_path *path;
1561 int ret;
1562 int err = 0;
96b5179d 1563 int bit;
9078a3e1 1564 struct btrfs_block_group_cache *cache;
be744175 1565 struct btrfs_fs_info *info = root->fs_info;
96b5179d 1566 struct extent_map_tree *block_group_cache;
9078a3e1
CM
1567 struct btrfs_key key;
1568 struct btrfs_key found_key;
5f39d397 1569 struct extent_buffer *leaf;
96b5179d
CM
1570
1571 block_group_cache = &info->block_group_cache;
9078a3e1 1572
be744175 1573 root = info->extent_root;
9078a3e1 1574 key.objectid = 0;
db94535d 1575 key.offset = BTRFS_BLOCK_GROUP_SIZE;
9078a3e1
CM
1576 btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
1577
1578 path = btrfs_alloc_path();
1579 if (!path)
1580 return -ENOMEM;
1581
1582 while(1) {
be744175 1583 ret = btrfs_search_slot(NULL, info->extent_root,
9078a3e1
CM
1584 &key, path, 0, 0);
1585 if (ret != 0) {
1586 err = ret;
1587 break;
1588 }
5f39d397
CM
1589 leaf = path->nodes[0];
1590 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9078a3e1
CM
1591 cache = kmalloc(sizeof(*cache), GFP_NOFS);
1592 if (!cache) {
1593 err = -1;
1594 break;
1595 }
3e1ad54f 1596
5f39d397
CM
1597 read_extent_buffer(leaf, &cache->item,
1598 btrfs_item_ptr_offset(leaf, path->slots[0]),
1599 sizeof(cache->item));
9078a3e1 1600 memcpy(&cache->key, &found_key, sizeof(found_key));
e37c9e69 1601 cache->cached = 0;
324ae4df 1602 cache->pinned = 0;
9078a3e1
CM
1603 key.objectid = found_key.objectid + found_key.offset;
1604 btrfs_release_path(root, path);
5f39d397 1605
f84a8b36
CM
1606 if (cache->item.flags & BTRFS_BLOCK_GROUP_MIXED) {
1607 bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
1608 cache->data = BTRFS_BLOCK_GROUP_MIXED;
1609 } else if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) {
96b5179d 1610 bit = BLOCK_GROUP_DATA;
f84a8b36 1611 cache->data = BTRFS_BLOCK_GROUP_DATA;
96b5179d
CM
1612 } else {
1613 bit = BLOCK_GROUP_METADATA;
1614 cache->data = 0;
31f3c99b 1615 }
96b5179d
CM
1616
1617 /* use EXTENT_LOCKED to prevent merging */
1618 set_extent_bits(block_group_cache, found_key.objectid,
1619 found_key.objectid + found_key.offset - 1,
1620 bit | EXTENT_LOCKED, GFP_NOFS);
1621 set_state_private(block_group_cache, found_key.objectid,
ae2f5411 1622 (unsigned long)cache);
96b5179d 1623
9078a3e1 1624 if (key.objectid >=
db94535d 1625 btrfs_super_total_bytes(&info->super_copy))
9078a3e1
CM
1626 break;
1627 }
1628
1629 btrfs_free_path(path);
1630 return 0;
1631}