Btrfs: Add missing hunk from Yan Zheng's cache reclaim patch
[linux-2.6-block.git] / fs / btrfs / extent-tree.c
CommitLineData
6cbd5570
CM
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
ec6b910f 18#include <linux/sched.h>
edbd8d4e 19#include <linux/pagemap.h>
ec44a35c 20#include <linux/writeback.h>
74493f7a 21#include "hash.h"
a5eb62e3 22#include "crc32c.h"
fec577fb
CM
23#include "ctree.h"
24#include "disk-io.h"
25#include "print-tree.h"
e089f05c 26#include "transaction.h"
0b86a832 27#include "volumes.h"
925baedd 28#include "locking.h"
31153d81 29#include "ref-cache.h"
fec577fb 30
0b86a832 31#define BLOCK_GROUP_DATA EXTENT_WRITEBACK
96b5179d 32#define BLOCK_GROUP_METADATA EXTENT_UPTODATE
0b86a832
CM
33#define BLOCK_GROUP_SYSTEM EXTENT_NEW
34
96b5179d
CM
35#define BLOCK_GROUP_DIRTY EXTENT_DIRTY
36
e089f05c
CM
37static int finish_current_insert(struct btrfs_trans_handle *trans, struct
38 btrfs_root *extent_root);
e20d96d6
CM
39static int del_pending_extents(struct btrfs_trans_handle *trans, struct
40 btrfs_root *extent_root);
925baedd
CM
41static struct btrfs_block_group_cache *
42__btrfs_find_block_group(struct btrfs_root *root,
43 struct btrfs_block_group_cache *hint,
44 u64 search_start, int data, int owner);
d548ee51 45
925baedd
CM
46void maybe_lock_mutex(struct btrfs_root *root)
47{
48 if (root != root->fs_info->extent_root &&
49 root != root->fs_info->chunk_root &&
50 root != root->fs_info->dev_root) {
51 mutex_lock(&root->fs_info->alloc_mutex);
52 }
53}
54
55void maybe_unlock_mutex(struct btrfs_root *root)
56{
57 if (root != root->fs_info->extent_root &&
58 root != root->fs_info->chunk_root &&
59 root != root->fs_info->dev_root) {
60 mutex_unlock(&root->fs_info->alloc_mutex);
61 }
62}
fec577fb 63
e37c9e69
CM
64static int cache_block_group(struct btrfs_root *root,
65 struct btrfs_block_group_cache *block_group)
66{
67 struct btrfs_path *path;
68 int ret;
69 struct btrfs_key key;
5f39d397 70 struct extent_buffer *leaf;
d1310b2e 71 struct extent_io_tree *free_space_cache;
e37c9e69 72 int slot;
e37c9e69
CM
73 u64 last = 0;
74 u64 hole_size;
7d7d6068 75 u64 first_free;
e37c9e69
CM
76 int found = 0;
77
00f5c795
CM
78 if (!block_group)
79 return 0;
80
e37c9e69 81 root = root->fs_info->extent_root;
f510cfec 82 free_space_cache = &root->fs_info->free_space_cache;
e37c9e69
CM
83
84 if (block_group->cached)
85 return 0;
f510cfec 86
e37c9e69
CM
87 path = btrfs_alloc_path();
88 if (!path)
89 return -ENOMEM;
7d7d6068 90
2cc58cf2 91 path->reada = 2;
5cd57b2c
CM
92 /*
93 * we get into deadlocks with paths held by callers of this function.
94 * since the alloc_mutex is protecting things right now, just
95 * skip the locking here
96 */
97 path->skip_locking = 1;
7d7d6068 98 first_free = block_group->key.objectid;
e37c9e69 99 key.objectid = block_group->key.objectid;
e37c9e69
CM
100 key.offset = 0;
101 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
102 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
103 if (ret < 0)
104 return ret;
0b86a832 105 ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
d548ee51
Y
106 if (ret < 0)
107 return ret;
108 if (ret == 0) {
109 leaf = path->nodes[0];
110 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
111 if (key.objectid + key.offset > first_free)
112 first_free = key.objectid + key.offset;
113 }
e37c9e69 114 while(1) {
5f39d397 115 leaf = path->nodes[0];
e37c9e69 116 slot = path->slots[0];
5f39d397 117 if (slot >= btrfs_header_nritems(leaf)) {
e37c9e69 118 ret = btrfs_next_leaf(root, path);
54aa1f4d
CM
119 if (ret < 0)
120 goto err;
de428b63 121 if (ret == 0) {
e37c9e69 122 continue;
de428b63 123 } else {
e37c9e69
CM
124 break;
125 }
126 }
5f39d397 127 btrfs_item_key_to_cpu(leaf, &key, slot);
7d7d6068 128 if (key.objectid < block_group->key.objectid) {
7d7d6068
Y
129 goto next;
130 }
e37c9e69
CM
131 if (key.objectid >= block_group->key.objectid +
132 block_group->key.offset) {
e37c9e69
CM
133 break;
134 }
7d7d6068 135
e37c9e69
CM
136 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
137 if (!found) {
7d7d6068 138 last = first_free;
e37c9e69 139 found = 1;
e37c9e69 140 }
f510cfec
CM
141 if (key.objectid > last) {
142 hole_size = key.objectid - last;
143 set_extent_dirty(free_space_cache, last,
144 last + hole_size - 1,
145 GFP_NOFS);
7d7d6068
Y
146 }
147 last = key.objectid + key.offset;
e37c9e69 148 }
7d7d6068 149next:
e37c9e69
CM
150 path->slots[0]++;
151 }
152
7d7d6068
Y
153 if (!found)
154 last = first_free;
155 if (block_group->key.objectid +
156 block_group->key.offset > last) {
157 hole_size = block_group->key.objectid +
158 block_group->key.offset - last;
f510cfec
CM
159 set_extent_dirty(free_space_cache, last,
160 last + hole_size - 1, GFP_NOFS);
7d7d6068 161 }
e37c9e69 162 block_group->cached = 1;
54aa1f4d 163err:
e37c9e69
CM
164 btrfs_free_path(path);
165 return 0;
166}
167
0ef3e66b
CM
168struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct
169 btrfs_fs_info *info,
170 u64 bytenr)
171{
172 struct extent_io_tree *block_group_cache;
173 struct btrfs_block_group_cache *block_group = NULL;
174 u64 ptr;
175 u64 start;
176 u64 end;
177 int ret;
178
179 bytenr = max_t(u64, bytenr,
180 BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE);
181 block_group_cache = &info->block_group_cache;
182 ret = find_first_extent_bit(block_group_cache,
183 bytenr, &start, &end,
184 BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
185 BLOCK_GROUP_SYSTEM);
186 if (ret) {
187 return NULL;
188 }
189 ret = get_state_private(block_group_cache, start, &ptr);
190 if (ret)
191 return NULL;
192
193 block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
194 return block_group;
195}
196
5276aeda
CM
197struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
198 btrfs_fs_info *info,
db94535d 199 u64 bytenr)
be744175 200{
d1310b2e 201 struct extent_io_tree *block_group_cache;
96b5179d
CM
202 struct btrfs_block_group_cache *block_group = NULL;
203 u64 ptr;
204 u64 start;
205 u64 end;
be744175
CM
206 int ret;
207
a061fc8d
CM
208 bytenr = max_t(u64, bytenr,
209 BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE);
96b5179d
CM
210 block_group_cache = &info->block_group_cache;
211 ret = find_first_extent_bit(block_group_cache,
db94535d 212 bytenr, &start, &end,
0b86a832
CM
213 BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
214 BLOCK_GROUP_SYSTEM);
be744175 215 if (ret) {
96b5179d 216 return NULL;
be744175 217 }
96b5179d
CM
218 ret = get_state_private(block_group_cache, start, &ptr);
219 if (ret)
220 return NULL;
221
ae2f5411 222 block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
5cf66426 223 if (block_group->key.objectid <= bytenr && bytenr <
96b5179d
CM
224 block_group->key.objectid + block_group->key.offset)
225 return block_group;
be744175
CM
226 return NULL;
227}
0b86a832
CM
228
229static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
230{
593060d7 231 return (cache->flags & bits) == bits;
0b86a832
CM
232}
233
234static int noinline find_search_start(struct btrfs_root *root,
98ed5174 235 struct btrfs_block_group_cache **cache_ret,
0ef3e66b 236 u64 *start_ret, u64 num, int data)
e37c9e69 237{
e37c9e69
CM
238 int ret;
239 struct btrfs_block_group_cache *cache = *cache_ret;
d7fc640e 240 struct extent_io_tree *free_space_cache;
7d1660d4 241 struct extent_state *state;
e19caa5f 242 u64 last;
f510cfec 243 u64 start = 0;
257d0ce3 244 u64 cache_miss = 0;
c31f8830 245 u64 total_fs_bytes;
0b86a832 246 u64 search_start = *start_ret;
f84a8b36 247 int wrapped = 0;
e37c9e69 248
7d9eb12c 249 WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
c31f8830 250 total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
d7fc640e
CM
251 free_space_cache = &root->fs_info->free_space_cache;
252
0ef3e66b
CM
253 if (!cache)
254 goto out;
255
e37c9e69 256again:
54aa1f4d 257 ret = cache_block_group(root, cache);
0ef3e66b 258 if (ret) {
54aa1f4d 259 goto out;
0ef3e66b 260 }
f84a8b36 261
e19caa5f 262 last = max(search_start, cache->key.objectid);
0ef3e66b 263 if (!block_group_bits(cache, data) || cache->ro)
0b86a832 264 goto new_group;
e19caa5f 265
7d1660d4
CM
266 spin_lock_irq(&free_space_cache->lock);
267 state = find_first_extent_bit_state(free_space_cache, last, EXTENT_DIRTY);
e37c9e69 268 while(1) {
7d1660d4 269 if (!state) {
257d0ce3
CM
270 if (!cache_miss)
271 cache_miss = last;
7d1660d4 272 spin_unlock_irq(&free_space_cache->lock);
e19caa5f
CM
273 goto new_group;
274 }
f510cfec 275
7d1660d4
CM
276 start = max(last, state->start);
277 last = state->end + 1;
257d0ce3 278 if (last - start < num) {
7d1660d4
CM
279 do {
280 state = extent_state_next(state);
281 } while(state && !(state->state & EXTENT_DIRTY));
f510cfec 282 continue;
257d0ce3 283 }
7d1660d4 284 spin_unlock_irq(&free_space_cache->lock);
0ef3e66b 285 if (cache->ro) {
8f18cf13 286 goto new_group;
0ef3e66b 287 }
0b86a832 288 if (start + num > cache->key.objectid + cache->key.offset)
e37c9e69 289 goto new_group;
8790d502 290 if (!block_group_bits(cache, data)) {
611f0e00 291 printk("block group bits don't match %Lu %d\n", cache->flags, data);
8790d502 292 }
0b86a832
CM
293 *start_ret = start;
294 return 0;
8790d502
CM
295 }
296out:
1a2b2ac7
CM
297 cache = btrfs_lookup_block_group(root->fs_info, search_start);
298 if (!cache) {
0b86a832 299 printk("Unable to find block group for %Lu\n", search_start);
1a2b2ac7 300 WARN_ON(1);
1a2b2ac7 301 }
0b86a832 302 return -ENOSPC;
e37c9e69
CM
303
304new_group:
e19caa5f 305 last = cache->key.objectid + cache->key.offset;
f84a8b36 306wrapped:
0ef3e66b 307 cache = btrfs_lookup_first_block_group(root->fs_info, last);
c31f8830 308 if (!cache || cache->key.objectid >= total_fs_bytes) {
0e4de584 309no_cache:
f84a8b36
CM
310 if (!wrapped) {
311 wrapped = 1;
312 last = search_start;
f84a8b36
CM
313 goto wrapped;
314 }
1a2b2ac7 315 goto out;
e37c9e69 316 }
257d0ce3
CM
317 if (cache_miss && !cache->cached) {
318 cache_block_group(root, cache);
319 last = cache_miss;
0ef3e66b 320 cache = btrfs_lookup_first_block_group(root->fs_info, last);
257d0ce3 321 }
0ef3e66b 322 cache_miss = 0;
c286ac48 323 cache = btrfs_find_block_group(root, cache, last, data, 0);
0e4de584
CM
324 if (!cache)
325 goto no_cache;
e37c9e69
CM
326 *cache_ret = cache;
327 goto again;
328}
329
84f54cfa
CM
330static u64 div_factor(u64 num, int factor)
331{
257d0ce3
CM
332 if (factor == 10)
333 return num;
84f54cfa
CM
334 num *= factor;
335 do_div(num, 10);
336 return num;
337}
338
6324fbf3
CM
339static int block_group_state_bits(u64 flags)
340{
341 int bits = 0;
342 if (flags & BTRFS_BLOCK_GROUP_DATA)
343 bits |= BLOCK_GROUP_DATA;
344 if (flags & BTRFS_BLOCK_GROUP_METADATA)
345 bits |= BLOCK_GROUP_METADATA;
346 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
347 bits |= BLOCK_GROUP_SYSTEM;
348 return bits;
349}
350
925baedd
CM
351static struct btrfs_block_group_cache *
352__btrfs_find_block_group(struct btrfs_root *root,
353 struct btrfs_block_group_cache *hint,
354 u64 search_start, int data, int owner)
cd1bc465 355{
96b5179d 356 struct btrfs_block_group_cache *cache;
d1310b2e 357 struct extent_io_tree *block_group_cache;
31f3c99b 358 struct btrfs_block_group_cache *found_group = NULL;
cd1bc465
CM
359 struct btrfs_fs_info *info = root->fs_info;
360 u64 used;
31f3c99b 361 u64 last = 0;
96b5179d
CM
362 u64 start;
363 u64 end;
364 u64 free_check;
365 u64 ptr;
366 int bit;
cd1bc465 367 int ret;
31f3c99b 368 int full_search = 0;
bce4eae9 369 int factor = 10;
0ef3e66b 370 int wrapped = 0;
de428b63 371
96b5179d
CM
372 block_group_cache = &info->block_group_cache;
373
a236aed1
CM
374 if (data & BTRFS_BLOCK_GROUP_METADATA)
375 factor = 9;
be744175 376
6324fbf3 377 bit = block_group_state_bits(data);
be744175 378
0ef3e66b 379 if (search_start) {
be744175 380 struct btrfs_block_group_cache *shint;
0ef3e66b 381 shint = btrfs_lookup_first_block_group(info, search_start);
8f18cf13 382 if (shint && block_group_bits(shint, data) && !shint->ro) {
c286ac48 383 spin_lock(&shint->lock);
be744175 384 used = btrfs_block_group_used(&shint->item);
324ae4df
Y
385 if (used + shint->pinned <
386 div_factor(shint->key.offset, factor)) {
c286ac48 387 spin_unlock(&shint->lock);
be744175
CM
388 return shint;
389 }
c286ac48 390 spin_unlock(&shint->lock);
be744175
CM
391 }
392 }
0ef3e66b 393 if (hint && !hint->ro && block_group_bits(hint, data)) {
c286ac48 394 spin_lock(&hint->lock);
31f3c99b 395 used = btrfs_block_group_used(&hint->item);
324ae4df
Y
396 if (used + hint->pinned <
397 div_factor(hint->key.offset, factor)) {
c286ac48 398 spin_unlock(&hint->lock);
31f3c99b
CM
399 return hint;
400 }
c286ac48 401 spin_unlock(&hint->lock);
e19caa5f 402 last = hint->key.objectid + hint->key.offset;
31f3c99b 403 } else {
e37c9e69 404 if (hint)
0ef3e66b 405 last = max(hint->key.objectid, search_start);
e37c9e69 406 else
0ef3e66b 407 last = search_start;
31f3c99b 408 }
31f3c99b 409again:
cd1bc465 410 while(1) {
96b5179d
CM
411 ret = find_first_extent_bit(block_group_cache, last,
412 &start, &end, bit);
413 if (ret)
cd1bc465 414 break;
96b5179d
CM
415
416 ret = get_state_private(block_group_cache, start, &ptr);
0ef3e66b
CM
417 if (ret) {
418 last = end + 1;
419 continue;
420 }
96b5179d 421
ae2f5411 422 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
c286ac48 423 spin_lock(&cache->lock);
96b5179d
CM
424 last = cache->key.objectid + cache->key.offset;
425 used = btrfs_block_group_used(&cache->item);
426
8f18cf13 427 if (!cache->ro && block_group_bits(cache, data)) {
0ef3e66b 428 free_check = div_factor(cache->key.offset, factor);
8790d502
CM
429 if (used + cache->pinned < free_check) {
430 found_group = cache;
c286ac48 431 spin_unlock(&cache->lock);
8790d502
CM
432 goto found;
433 }
6324fbf3 434 }
c286ac48 435 spin_unlock(&cache->lock);
de428b63 436 cond_resched();
cd1bc465 437 }
0ef3e66b
CM
438 if (!wrapped) {
439 last = search_start;
440 wrapped = 1;
441 goto again;
442 }
443 if (!full_search && factor < 10) {
be744175 444 last = search_start;
31f3c99b 445 full_search = 1;
0ef3e66b 446 factor = 10;
31f3c99b
CM
447 goto again;
448 }
be744175 449found:
31f3c99b 450 return found_group;
cd1bc465
CM
451}
452
925baedd
CM
453struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
454 struct btrfs_block_group_cache
455 *hint, u64 search_start,
456 int data, int owner)
457{
458
459 struct btrfs_block_group_cache *ret;
925baedd 460 ret = __btrfs_find_block_group(root, hint, search_start, data, owner);
925baedd
CM
461 return ret;
462}
7bb86316 463static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation,
74493f7a
CM
464 u64 owner, u64 owner_offset)
465{
466 u32 high_crc = ~(u32)0;
467 u32 low_crc = ~(u32)0;
468 __le64 lenum;
74493f7a 469 lenum = cpu_to_le64(root_objectid);
a5eb62e3 470 high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
7bb86316 471 lenum = cpu_to_le64(ref_generation);
a5eb62e3 472 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
21a4989d
CM
473 if (owner >= BTRFS_FIRST_FREE_OBJECTID) {
474 lenum = cpu_to_le64(owner);
a5eb62e3 475 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
21a4989d 476 lenum = cpu_to_le64(owner_offset);
a5eb62e3 477 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
21a4989d 478 }
74493f7a
CM
479 return ((u64)high_crc << 32) | (u64)low_crc;
480}
481
7bb86316
CM
482static int match_extent_ref(struct extent_buffer *leaf,
483 struct btrfs_extent_ref *disk_ref,
484 struct btrfs_extent_ref *cpu_ref)
485{
486 int ret;
487 int len;
488
489 if (cpu_ref->objectid)
490 len = sizeof(*cpu_ref);
491 else
492 len = 2 * sizeof(u64);
493 ret = memcmp_extent_buffer(leaf, cpu_ref, (unsigned long)disk_ref,
494 len);
495 return ret == 0;
496}
497
98ed5174
CM
498static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans,
499 struct btrfs_root *root,
500 struct btrfs_path *path, u64 bytenr,
501 u64 root_objectid,
502 u64 ref_generation, u64 owner,
503 u64 owner_offset, int del)
74493f7a
CM
504{
505 u64 hash;
506 struct btrfs_key key;
7bb86316 507 struct btrfs_key found_key;
74493f7a 508 struct btrfs_extent_ref ref;
7bb86316
CM
509 struct extent_buffer *leaf;
510 struct btrfs_extent_ref *disk_ref;
511 int ret;
512 int ret2;
513
514 btrfs_set_stack_ref_root(&ref, root_objectid);
515 btrfs_set_stack_ref_generation(&ref, ref_generation);
516 btrfs_set_stack_ref_objectid(&ref, owner);
517 btrfs_set_stack_ref_offset(&ref, owner_offset);
518
519 hash = hash_extent_ref(root_objectid, ref_generation, owner,
520 owner_offset);
521 key.offset = hash;
522 key.objectid = bytenr;
523 key.type = BTRFS_EXTENT_REF_KEY;
524
525 while (1) {
526 ret = btrfs_search_slot(trans, root, &key, path,
527 del ? -1 : 0, del);
528 if (ret < 0)
529 goto out;
530 leaf = path->nodes[0];
531 if (ret != 0) {
532 u32 nritems = btrfs_header_nritems(leaf);
533 if (path->slots[0] >= nritems) {
534 ret2 = btrfs_next_leaf(root, path);
535 if (ret2)
536 goto out;
537 leaf = path->nodes[0];
538 }
539 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
540 if (found_key.objectid != bytenr ||
541 found_key.type != BTRFS_EXTENT_REF_KEY)
542 goto out;
543 key.offset = found_key.offset;
544 if (del) {
545 btrfs_release_path(root, path);
546 continue;
547 }
548 }
549 disk_ref = btrfs_item_ptr(path->nodes[0],
550 path->slots[0],
551 struct btrfs_extent_ref);
552 if (match_extent_ref(path->nodes[0], disk_ref, &ref)) {
553 ret = 0;
554 goto out;
555 }
556 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
557 key.offset = found_key.offset + 1;
558 btrfs_release_path(root, path);
559 }
560out:
561 return ret;
562}
563
d8d5f3e1
CM
564/*
565 * Back reference rules. Back refs have three main goals:
566 *
567 * 1) differentiate between all holders of references to an extent so that
568 * when a reference is dropped we can make sure it was a valid reference
569 * before freeing the extent.
570 *
571 * 2) Provide enough information to quickly find the holders of an extent
572 * if we notice a given block is corrupted or bad.
573 *
574 * 3) Make it easy to migrate blocks for FS shrinking or storage pool
575 * maintenance. This is actually the same as #2, but with a slightly
576 * different use case.
577 *
578 * File extents can be referenced by:
579 *
580 * - multiple snapshots, subvolumes, or different generations in one subvol
581 * - different files inside a single subvolume (in theory, not implemented yet)
582 * - different offsets inside a file (bookend extents in file.c)
583 *
584 * The extent ref structure has fields for:
585 *
586 * - Objectid of the subvolume root
587 * - Generation number of the tree holding the reference
588 * - objectid of the file holding the reference
589 * - offset in the file corresponding to the key holding the reference
590 *
591 * When a file extent is allocated the fields are filled in:
592 * (root_key.objectid, trans->transid, inode objectid, offset in file)
593 *
594 * When a leaf is cow'd new references are added for every file extent found
595 * in the leaf. It looks the same as the create case, but trans->transid
596 * will be different when the block is cow'd.
597 *
598 * (root_key.objectid, trans->transid, inode objectid, offset in file)
599 *
600 * When a file extent is removed either during snapshot deletion or file
601 * truncation, the corresponding back reference is found
602 * by searching for:
603 *
604 * (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
605 * inode objectid, offset in file)
606 *
607 * Btree extents can be referenced by:
608 *
609 * - Different subvolumes
610 * - Different generations of the same subvolume
611 *
612 * Storing sufficient information for a full reverse mapping of a btree
613 * block would require storing the lowest key of the block in the backref,
614 * and it would require updating that lowest key either before write out or
615 * every time it changed. Instead, the objectid of the lowest key is stored
616 * along with the level of the tree block. This provides a hint
617 * about where in the btree the block can be found. Searches through the
618 * btree only need to look for a pointer to that block, so they stop one
619 * level higher than the level recorded in the backref.
620 *
621 * Some btrees do not do reference counting on their extents. These
622 * include the extent tree and the tree of tree roots. Backrefs for these
623 * trees always have a generation of zero.
624 *
625 * When a tree block is created, back references are inserted:
626 *
f6dbff55 627 * (root->root_key.objectid, trans->transid or zero, level, lowest_key_objectid)
d8d5f3e1
CM
628 *
629 * When a tree block is cow'd in a reference counted root,
630 * new back references are added for all the blocks it points to.
631 * These are of the form (trans->transid will have increased since creation):
632 *
f6dbff55 633 * (root->root_key.objectid, trans->transid, level, lowest_key_objectid)
d8d5f3e1
CM
634 *
635 * Because the lowest_key_objectid and the level are just hints
636 * they are not used when backrefs are deleted. When a backref is deleted:
637 *
638 * if backref was for a tree root:
639 * root_objectid = root->root_key.objectid
640 * else
641 * root_objectid = btrfs_header_owner(parent)
642 *
643 * (root_objectid, btrfs_header_generation(parent) or zero, 0, 0)
644 *
645 * Back Reference Key hashing:
646 *
647 * Back references have four fields, each 64 bits long. Unfortunately,
648 * This is hashed into a single 64 bit number and placed into the key offset.
649 * The key objectid corresponds to the first byte in the extent, and the
650 * key type is set to BTRFS_EXTENT_REF_KEY
651 */
7bb86316
CM
652int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans,
653 struct btrfs_root *root,
654 struct btrfs_path *path, u64 bytenr,
655 u64 root_objectid, u64 ref_generation,
656 u64 owner, u64 owner_offset)
657{
658 u64 hash;
659 struct btrfs_key key;
660 struct btrfs_extent_ref ref;
661 struct btrfs_extent_ref *disk_ref;
74493f7a
CM
662 int ret;
663
664 btrfs_set_stack_ref_root(&ref, root_objectid);
7bb86316 665 btrfs_set_stack_ref_generation(&ref, ref_generation);
74493f7a
CM
666 btrfs_set_stack_ref_objectid(&ref, owner);
667 btrfs_set_stack_ref_offset(&ref, owner_offset);
668
7bb86316
CM
669 hash = hash_extent_ref(root_objectid, ref_generation, owner,
670 owner_offset);
74493f7a
CM
671 key.offset = hash;
672 key.objectid = bytenr;
673 key.type = BTRFS_EXTENT_REF_KEY;
674
675 ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(ref));
676 while (ret == -EEXIST) {
7bb86316
CM
677 disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
678 struct btrfs_extent_ref);
679 if (match_extent_ref(path->nodes[0], disk_ref, &ref))
680 goto out;
681 key.offset++;
682 btrfs_release_path(root, path);
683 ret = btrfs_insert_empty_item(trans, root, path, &key,
684 sizeof(ref));
74493f7a 685 }
7bb86316
CM
686 if (ret)
687 goto out;
688 disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
689 struct btrfs_extent_ref);
690 write_extent_buffer(path->nodes[0], &ref, (unsigned long)disk_ref,
691 sizeof(ref));
692 btrfs_mark_buffer_dirty(path->nodes[0]);
693out:
694 btrfs_release_path(root, path);
695 return ret;
74493f7a
CM
696}
697
925baedd 698static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
b18c6685 699 struct btrfs_root *root,
74493f7a 700 u64 bytenr, u64 num_bytes,
7bb86316 701 u64 root_objectid, u64 ref_generation,
74493f7a 702 u64 owner, u64 owner_offset)
02217ed2 703{
5caf2a00 704 struct btrfs_path *path;
02217ed2 705 int ret;
e2fa7227 706 struct btrfs_key key;
5f39d397 707 struct extent_buffer *l;
234b63a0 708 struct btrfs_extent_item *item;
cf27e1ee 709 u32 refs;
037e6390 710
db94535d 711 WARN_ON(num_bytes < root->sectorsize);
5caf2a00 712 path = btrfs_alloc_path();
54aa1f4d
CM
713 if (!path)
714 return -ENOMEM;
26b8003f 715
3c12ac72 716 path->reada = 1;
db94535d 717 key.objectid = bytenr;
62e2749e 718 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
db94535d 719 key.offset = num_bytes;
5caf2a00 720 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
9f5fae2f 721 0, 1);
54aa1f4d
CM
722 if (ret < 0)
723 return ret;
a429e513 724 if (ret != 0) {
a28ec197 725 BUG();
a429e513 726 }
02217ed2 727 BUG_ON(ret != 0);
5f39d397 728 l = path->nodes[0];
5caf2a00 729 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
5f39d397
CM
730 refs = btrfs_extent_refs(l, item);
731 btrfs_set_extent_refs(l, item, refs + 1);
5caf2a00 732 btrfs_mark_buffer_dirty(path->nodes[0]);
a28ec197 733
5caf2a00 734 btrfs_release_path(root->fs_info->extent_root, path);
7bb86316 735
3c12ac72 736 path->reada = 1;
7bb86316
CM
737 ret = btrfs_insert_extent_backref(trans, root->fs_info->extent_root,
738 path, bytenr, root_objectid,
739 ref_generation, owner, owner_offset);
740 BUG_ON(ret);
9f5fae2f 741 finish_current_insert(trans, root->fs_info->extent_root);
e20d96d6 742 del_pending_extents(trans, root->fs_info->extent_root);
74493f7a
CM
743
744 btrfs_free_path(path);
02217ed2
CM
745 return 0;
746}
747
925baedd
CM
748int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
749 struct btrfs_root *root,
750 u64 bytenr, u64 num_bytes,
751 u64 root_objectid, u64 ref_generation,
752 u64 owner, u64 owner_offset)
753{
754 int ret;
755
756 mutex_lock(&root->fs_info->alloc_mutex);
757 ret = __btrfs_inc_extent_ref(trans, root, bytenr, num_bytes,
758 root_objectid, ref_generation,
759 owner, owner_offset);
760 mutex_unlock(&root->fs_info->alloc_mutex);
761 return ret;
762}
763
e9d0b13b
CM
764int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
765 struct btrfs_root *root)
766{
767 finish_current_insert(trans, root->fs_info->extent_root);
768 del_pending_extents(trans, root->fs_info->extent_root);
769 return 0;
770}
771
b18c6685 772static int lookup_extent_ref(struct btrfs_trans_handle *trans,
db94535d
CM
773 struct btrfs_root *root, u64 bytenr,
774 u64 num_bytes, u32 *refs)
a28ec197 775{
5caf2a00 776 struct btrfs_path *path;
a28ec197 777 int ret;
e2fa7227 778 struct btrfs_key key;
5f39d397 779 struct extent_buffer *l;
234b63a0 780 struct btrfs_extent_item *item;
5caf2a00 781
db94535d 782 WARN_ON(num_bytes < root->sectorsize);
5caf2a00 783 path = btrfs_alloc_path();
3c12ac72 784 path->reada = 1;
db94535d
CM
785 key.objectid = bytenr;
786 key.offset = num_bytes;
62e2749e 787 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
5caf2a00 788 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
9f5fae2f 789 0, 0);
54aa1f4d
CM
790 if (ret < 0)
791 goto out;
5f39d397
CM
792 if (ret != 0) {
793 btrfs_print_leaf(root, path->nodes[0]);
db94535d 794 printk("failed to find block number %Lu\n", bytenr);
a28ec197 795 BUG();
5f39d397
CM
796 }
797 l = path->nodes[0];
5caf2a00 798 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
5f39d397 799 *refs = btrfs_extent_refs(l, item);
54aa1f4d 800out:
5caf2a00 801 btrfs_free_path(path);
a28ec197
CM
802 return 0;
803}
804
f321e491
YZ
805
806static int get_reference_status(struct btrfs_root *root, u64 bytenr,
807 u64 parent_gen, u64 ref_objectid,
808 u64 *min_generation, u32 *ref_count)
be20aa9d
CM
809{
810 struct btrfs_root *extent_root = root->fs_info->extent_root;
811 struct btrfs_path *path;
f321e491
YZ
812 struct extent_buffer *leaf;
813 struct btrfs_extent_ref *ref_item;
814 struct btrfs_key key;
815 struct btrfs_key found_key;
56b453c9 816 u64 root_objectid = root->root_key.objectid;
f321e491 817 u64 ref_generation;
be20aa9d
CM
818 u32 nritems;
819 int ret;
925baedd 820
be20aa9d
CM
821 key.objectid = bytenr;
822 key.offset = 0;
f321e491 823 key.type = BTRFS_EXTENT_ITEM_KEY;
be20aa9d 824
f321e491
YZ
825 path = btrfs_alloc_path();
826 mutex_lock(&root->fs_info->alloc_mutex);
be20aa9d
CM
827 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
828 if (ret < 0)
829 goto out;
830 BUG_ON(ret == 0);
831
f321e491
YZ
832 leaf = path->nodes[0];
833 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
be20aa9d
CM
834
835 if (found_key.objectid != bytenr ||
836 found_key.type != BTRFS_EXTENT_ITEM_KEY) {
f321e491 837 ret = 1;
be20aa9d
CM
838 goto out;
839 }
840
f321e491
YZ
841 *ref_count = 0;
842 *min_generation = (u64)-1;
843
be20aa9d 844 while (1) {
f321e491
YZ
845 leaf = path->nodes[0];
846 nritems = btrfs_header_nritems(leaf);
be20aa9d
CM
847 if (path->slots[0] >= nritems) {
848 ret = btrfs_next_leaf(extent_root, path);
f321e491
YZ
849 if (ret < 0)
850 goto out;
be20aa9d
CM
851 if (ret == 0)
852 continue;
853 break;
854 }
f321e491 855 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
be20aa9d
CM
856 if (found_key.objectid != bytenr)
857 break;
bd09835d 858
be20aa9d
CM
859 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
860 path->slots[0]++;
861 continue;
862 }
863
f321e491 864 ref_item = btrfs_item_ptr(leaf, path->slots[0],
be20aa9d 865 struct btrfs_extent_ref);
f321e491
YZ
866 ref_generation = btrfs_ref_generation(leaf, ref_item);
867 /*
868 * For (parent_gen > 0 && parent_gen > ref_gen):
869 *
bcc63abb
Y
870 * we reach here through the oldest root, therefore
871 * all other reference from same snapshot should have
f321e491
YZ
872 * a larger generation.
873 */
874 if ((root_objectid != btrfs_ref_root(leaf, ref_item)) ||
875 (parent_gen > 0 && parent_gen > ref_generation) ||
876 (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID &&
877 ref_objectid != btrfs_ref_objectid(leaf, ref_item))) {
878 if (ref_count)
879 *ref_count = 2;
880 break;
a68d5933 881 }
f321e491
YZ
882
883 *ref_count = 1;
884 if (*min_generation > ref_generation)
885 *min_generation = ref_generation;
886
be20aa9d
CM
887 path->slots[0]++;
888 }
f321e491
YZ
889 ret = 0;
890out:
891 mutex_unlock(&root->fs_info->alloc_mutex);
892 btrfs_free_path(path);
893 return ret;
894}
895
896int btrfs_cross_ref_exists(struct btrfs_root *root,
897 struct btrfs_key *key, u64 bytenr)
898{
899 struct btrfs_trans_handle *trans;
900 struct btrfs_root *old_root;
901 struct btrfs_path *path = NULL;
902 struct extent_buffer *eb;
903 struct btrfs_file_extent_item *item;
904 u64 ref_generation;
905 u64 min_generation;
906 u64 extent_start;
907 u32 ref_count;
908 int level;
909 int ret;
910
911 BUG_ON(key->type != BTRFS_EXTENT_DATA_KEY);
912 ret = get_reference_status(root, bytenr, 0, key->objectid,
913 &min_generation, &ref_count);
914 if (ret)
915 return ret;
916
917 if (ref_count != 1)
918 return 1;
919
920 trans = btrfs_start_transaction(root, 0);
921 old_root = root->dirty_root->root;
922 ref_generation = old_root->root_key.offset;
923
924 /* all references are created in running transaction */
925 if (min_generation > ref_generation) {
926 ret = 0;
bbaf549e
CM
927 goto out;
928 }
f321e491
YZ
929
930 path = btrfs_alloc_path();
931 if (!path) {
932 ret = -ENOMEM;
be20aa9d
CM
933 goto out;
934 }
f321e491
YZ
935
936 path->skip_locking = 1;
937 /* if no item found, the extent is referenced by other snapshot */
938 ret = btrfs_search_slot(NULL, old_root, key, path, 0, 0);
939 if (ret)
be20aa9d 940 goto out;
be20aa9d 941
f321e491
YZ
942 eb = path->nodes[0];
943 item = btrfs_item_ptr(eb, path->slots[0],
944 struct btrfs_file_extent_item);
945 if (btrfs_file_extent_type(eb, item) != BTRFS_FILE_EXTENT_REG ||
946 btrfs_file_extent_disk_bytenr(eb, item) != bytenr) {
947 ret = 1;
948 goto out;
949 }
950
951 for (level = BTRFS_MAX_LEVEL - 1; level >= -1; level--) {
952 if (level >= 0) {
953 eb = path->nodes[level];
954 if (!eb)
955 continue;
956 extent_start = eb->start;
bcc63abb 957 } else
f321e491
YZ
958 extent_start = bytenr;
959
960 ret = get_reference_status(root, extent_start, ref_generation,
961 0, &min_generation, &ref_count);
962 if (ret)
963 goto out;
964
965 if (ref_count != 1) {
966 ret = 1;
967 goto out;
968 }
969 if (level >= 0)
970 ref_generation = btrfs_header_generation(eb);
971 }
972 ret = 0;
be20aa9d 973out:
f321e491
YZ
974 if (path)
975 btrfs_free_path(path);
976 btrfs_end_transaction(trans, root);
977 return ret;
be20aa9d 978}
c5739bba 979
e089f05c 980int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
31153d81 981 struct extent_buffer *buf, int cache_ref)
02217ed2 982{
db94535d 983 u64 bytenr;
5f39d397
CM
984 u32 nritems;
985 struct btrfs_key key;
6407bf6d 986 struct btrfs_file_extent_item *fi;
02217ed2 987 int i;
db94535d 988 int level;
6407bf6d 989 int ret;
54aa1f4d 990 int faili;
31153d81 991 int nr_file_extents = 0;
a28ec197 992
3768f368 993 if (!root->ref_cows)
a28ec197 994 return 0;
5f39d397 995
db94535d 996 level = btrfs_header_level(buf);
5f39d397
CM
997 nritems = btrfs_header_nritems(buf);
998 for (i = 0; i < nritems; i++) {
e34a5b4f 999 cond_resched();
db94535d
CM
1000 if (level == 0) {
1001 u64 disk_bytenr;
5f39d397
CM
1002 btrfs_item_key_to_cpu(buf, &key, i);
1003 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
6407bf6d 1004 continue;
5f39d397 1005 fi = btrfs_item_ptr(buf, i,
6407bf6d 1006 struct btrfs_file_extent_item);
5f39d397 1007 if (btrfs_file_extent_type(buf, fi) ==
236454df
CM
1008 BTRFS_FILE_EXTENT_INLINE)
1009 continue;
db94535d
CM
1010 disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1011 if (disk_bytenr == 0)
3a686375 1012 continue;
4a096752 1013
31153d81
YZ
1014 if (buf != root->commit_root)
1015 nr_file_extents++;
1016
4a096752 1017 mutex_lock(&root->fs_info->alloc_mutex);
925baedd 1018 ret = __btrfs_inc_extent_ref(trans, root, disk_bytenr,
7bb86316
CM
1019 btrfs_file_extent_disk_num_bytes(buf, fi),
1020 root->root_key.objectid, trans->transid,
1021 key.objectid, key.offset);
4a096752 1022 mutex_unlock(&root->fs_info->alloc_mutex);
54aa1f4d
CM
1023 if (ret) {
1024 faili = i;
4a096752 1025 WARN_ON(1);
54aa1f4d
CM
1026 goto fail;
1027 }
6407bf6d 1028 } else {
db94535d 1029 bytenr = btrfs_node_blockptr(buf, i);
6caab489 1030 btrfs_node_key_to_cpu(buf, &key, i);
4a096752
CM
1031
1032 mutex_lock(&root->fs_info->alloc_mutex);
925baedd 1033 ret = __btrfs_inc_extent_ref(trans, root, bytenr,
7bb86316
CM
1034 btrfs_level_size(root, level - 1),
1035 root->root_key.objectid,
f6dbff55
CM
1036 trans->transid,
1037 level - 1, key.objectid);
4a096752 1038 mutex_unlock(&root->fs_info->alloc_mutex);
54aa1f4d
CM
1039 if (ret) {
1040 faili = i;
4a096752 1041 WARN_ON(1);
54aa1f4d
CM
1042 goto fail;
1043 }
6407bf6d 1044 }
02217ed2 1045 }
31153d81
YZ
1046 /* cache orignal leaf block's references */
1047 if (level == 0 && cache_ref && buf != root->commit_root) {
1048 struct btrfs_leaf_ref *ref;
1049 struct btrfs_extent_info *info;
1050
bcc63abb 1051 ref = btrfs_alloc_leaf_ref(root, nr_file_extents);
31153d81
YZ
1052 if (!ref) {
1053 WARN_ON(1);
1054 goto out;
1055 }
1056
47ac14fa 1057 ref->root_gen = root->root_key.offset;
31153d81
YZ
1058 ref->bytenr = buf->start;
1059 ref->owner = btrfs_header_owner(buf);
1060 ref->generation = btrfs_header_generation(buf);
1061 ref->nritems = nr_file_extents;
1062 info = ref->extents;
bcc63abb 1063
31153d81
YZ
1064 for (i = 0; nr_file_extents > 0 && i < nritems; i++) {
1065 u64 disk_bytenr;
1066 btrfs_item_key_to_cpu(buf, &key, i);
1067 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1068 continue;
1069 fi = btrfs_item_ptr(buf, i,
1070 struct btrfs_file_extent_item);
1071 if (btrfs_file_extent_type(buf, fi) ==
1072 BTRFS_FILE_EXTENT_INLINE)
1073 continue;
1074 disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1075 if (disk_bytenr == 0)
1076 continue;
1077
1078 info->bytenr = disk_bytenr;
1079 info->num_bytes =
1080 btrfs_file_extent_disk_num_bytes(buf, fi);
1081 info->objectid = key.objectid;
1082 info->offset = key.offset;
1083 info++;
1084 }
1085
1086 BUG_ON(!root->ref_tree);
1087 ret = btrfs_add_leaf_ref(root, ref);
1088 WARN_ON(ret);
bcc63abb 1089 btrfs_free_leaf_ref(root, ref);
31153d81
YZ
1090 }
1091out:
02217ed2 1092 return 0;
54aa1f4d 1093fail:
ccd467d6 1094 WARN_ON(1);
7bb86316 1095#if 0
54aa1f4d 1096 for (i =0; i < faili; i++) {
db94535d
CM
1097 if (level == 0) {
1098 u64 disk_bytenr;
5f39d397
CM
1099 btrfs_item_key_to_cpu(buf, &key, i);
1100 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
54aa1f4d 1101 continue;
5f39d397 1102 fi = btrfs_item_ptr(buf, i,
54aa1f4d 1103 struct btrfs_file_extent_item);
5f39d397 1104 if (btrfs_file_extent_type(buf, fi) ==
54aa1f4d
CM
1105 BTRFS_FILE_EXTENT_INLINE)
1106 continue;
db94535d
CM
1107 disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1108 if (disk_bytenr == 0)
54aa1f4d 1109 continue;
db94535d
CM
1110 err = btrfs_free_extent(trans, root, disk_bytenr,
1111 btrfs_file_extent_disk_num_bytes(buf,
5f39d397 1112 fi), 0);
54aa1f4d
CM
1113 BUG_ON(err);
1114 } else {
db94535d
CM
1115 bytenr = btrfs_node_blockptr(buf, i);
1116 err = btrfs_free_extent(trans, root, bytenr,
1117 btrfs_level_size(root, level - 1), 0);
54aa1f4d
CM
1118 BUG_ON(err);
1119 }
1120 }
7bb86316 1121#endif
54aa1f4d 1122 return ret;
02217ed2
CM
1123}
1124
9078a3e1
CM
1125static int write_one_cache_group(struct btrfs_trans_handle *trans,
1126 struct btrfs_root *root,
1127 struct btrfs_path *path,
1128 struct btrfs_block_group_cache *cache)
1129{
1130 int ret;
1131 int pending_ret;
1132 struct btrfs_root *extent_root = root->fs_info->extent_root;
5f39d397
CM
1133 unsigned long bi;
1134 struct extent_buffer *leaf;
9078a3e1 1135
9078a3e1 1136 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
54aa1f4d
CM
1137 if (ret < 0)
1138 goto fail;
9078a3e1 1139 BUG_ON(ret);
5f39d397
CM
1140
1141 leaf = path->nodes[0];
1142 bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
1143 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
1144 btrfs_mark_buffer_dirty(leaf);
9078a3e1 1145 btrfs_release_path(extent_root, path);
54aa1f4d 1146fail:
9078a3e1
CM
1147 finish_current_insert(trans, extent_root);
1148 pending_ret = del_pending_extents(trans, extent_root);
1149 if (ret)
1150 return ret;
1151 if (pending_ret)
1152 return pending_ret;
1153 return 0;
1154
1155}
1156
96b5179d
CM
1157int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
1158 struct btrfs_root *root)
9078a3e1 1159{
d1310b2e 1160 struct extent_io_tree *block_group_cache;
96b5179d 1161 struct btrfs_block_group_cache *cache;
9078a3e1
CM
1162 int ret;
1163 int err = 0;
1164 int werr = 0;
9078a3e1 1165 struct btrfs_path *path;
96b5179d
CM
1166 u64 last = 0;
1167 u64 start;
1168 u64 end;
1169 u64 ptr;
9078a3e1 1170
96b5179d 1171 block_group_cache = &root->fs_info->block_group_cache;
9078a3e1
CM
1172 path = btrfs_alloc_path();
1173 if (!path)
1174 return -ENOMEM;
1175
925baedd 1176 mutex_lock(&root->fs_info->alloc_mutex);
9078a3e1 1177 while(1) {
96b5179d
CM
1178 ret = find_first_extent_bit(block_group_cache, last,
1179 &start, &end, BLOCK_GROUP_DIRTY);
1180 if (ret)
9078a3e1 1181 break;
54aa1f4d 1182
96b5179d
CM
1183 last = end + 1;
1184 ret = get_state_private(block_group_cache, start, &ptr);
1185 if (ret)
1186 break;
ae2f5411 1187 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
96b5179d
CM
1188 err = write_one_cache_group(trans, root,
1189 path, cache);
1190 /*
1191 * if we fail to write the cache group, we want
1192 * to keep it marked dirty in hopes that a later
1193 * write will work
1194 */
1195 if (err) {
1196 werr = err;
1197 continue;
9078a3e1 1198 }
96b5179d
CM
1199 clear_extent_bits(block_group_cache, start, end,
1200 BLOCK_GROUP_DIRTY, GFP_NOFS);
9078a3e1
CM
1201 }
1202 btrfs_free_path(path);
925baedd 1203 mutex_unlock(&root->fs_info->alloc_mutex);
9078a3e1
CM
1204 return werr;
1205}
1206
6324fbf3
CM
1207static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
1208 u64 flags)
1209{
1210 struct list_head *head = &info->space_info;
1211 struct list_head *cur;
1212 struct btrfs_space_info *found;
1213 list_for_each(cur, head) {
1214 found = list_entry(cur, struct btrfs_space_info, list);
1215 if (found->flags == flags)
1216 return found;
1217 }
1218 return NULL;
1219
1220}
1221
593060d7
CM
1222static int update_space_info(struct btrfs_fs_info *info, u64 flags,
1223 u64 total_bytes, u64 bytes_used,
1224 struct btrfs_space_info **space_info)
1225{
1226 struct btrfs_space_info *found;
1227
1228 found = __find_space_info(info, flags);
1229 if (found) {
1230 found->total_bytes += total_bytes;
1231 found->bytes_used += bytes_used;
8f18cf13 1232 found->full = 0;
593060d7
CM
1233 WARN_ON(found->total_bytes < found->bytes_used);
1234 *space_info = found;
1235 return 0;
1236 }
1237 found = kmalloc(sizeof(*found), GFP_NOFS);
1238 if (!found)
1239 return -ENOMEM;
1240
1241 list_add(&found->list, &info->space_info);
1242 found->flags = flags;
1243 found->total_bytes = total_bytes;
1244 found->bytes_used = bytes_used;
1245 found->bytes_pinned = 0;
1246 found->full = 0;
0ef3e66b 1247 found->force_alloc = 0;
593060d7
CM
1248 *space_info = found;
1249 return 0;
1250}
1251
8790d502
CM
1252static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
1253{
1254 u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
611f0e00 1255 BTRFS_BLOCK_GROUP_RAID1 |
321aecc6 1256 BTRFS_BLOCK_GROUP_RAID10 |
611f0e00 1257 BTRFS_BLOCK_GROUP_DUP);
8790d502
CM
1258 if (extra_flags) {
1259 if (flags & BTRFS_BLOCK_GROUP_DATA)
1260 fs_info->avail_data_alloc_bits |= extra_flags;
1261 if (flags & BTRFS_BLOCK_GROUP_METADATA)
1262 fs_info->avail_metadata_alloc_bits |= extra_flags;
1263 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
1264 fs_info->avail_system_alloc_bits |= extra_flags;
1265 }
1266}
593060d7 1267
a061fc8d 1268static u64 reduce_alloc_profile(struct btrfs_root *root, u64 flags)
ec44a35c 1269{
a061fc8d
CM
1270 u64 num_devices = root->fs_info->fs_devices->num_devices;
1271
1272 if (num_devices == 1)
1273 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
1274 if (num_devices < 4)
1275 flags &= ~BTRFS_BLOCK_GROUP_RAID10;
1276
ec44a35c
CM
1277 if ((flags & BTRFS_BLOCK_GROUP_DUP) &&
1278 (flags & (BTRFS_BLOCK_GROUP_RAID1 |
a061fc8d 1279 BTRFS_BLOCK_GROUP_RAID10))) {
ec44a35c 1280 flags &= ~BTRFS_BLOCK_GROUP_DUP;
a061fc8d 1281 }
ec44a35c
CM
1282
1283 if ((flags & BTRFS_BLOCK_GROUP_RAID1) &&
a061fc8d 1284 (flags & BTRFS_BLOCK_GROUP_RAID10)) {
ec44a35c 1285 flags &= ~BTRFS_BLOCK_GROUP_RAID1;
a061fc8d 1286 }
ec44a35c
CM
1287
1288 if ((flags & BTRFS_BLOCK_GROUP_RAID0) &&
1289 ((flags & BTRFS_BLOCK_GROUP_RAID1) |
1290 (flags & BTRFS_BLOCK_GROUP_RAID10) |
1291 (flags & BTRFS_BLOCK_GROUP_DUP)))
1292 flags &= ~BTRFS_BLOCK_GROUP_RAID0;
1293 return flags;
1294}
1295
6324fbf3
CM
1296static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1297 struct btrfs_root *extent_root, u64 alloc_bytes,
0ef3e66b 1298 u64 flags, int force)
6324fbf3
CM
1299{
1300 struct btrfs_space_info *space_info;
1301 u64 thresh;
1302 u64 start;
1303 u64 num_bytes;
1304 int ret;
1305
a061fc8d 1306 flags = reduce_alloc_profile(extent_root, flags);
ec44a35c 1307
6324fbf3 1308 space_info = __find_space_info(extent_root->fs_info, flags);
593060d7
CM
1309 if (!space_info) {
1310 ret = update_space_info(extent_root->fs_info, flags,
1311 0, 0, &space_info);
1312 BUG_ON(ret);
1313 }
6324fbf3
CM
1314 BUG_ON(!space_info);
1315
0ef3e66b
CM
1316 if (space_info->force_alloc) {
1317 force = 1;
1318 space_info->force_alloc = 0;
1319 }
6324fbf3 1320 if (space_info->full)
925baedd 1321 goto out;
6324fbf3 1322
8790d502 1323 thresh = div_factor(space_info->total_bytes, 6);
0ef3e66b
CM
1324 if (!force &&
1325 (space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) <
6324fbf3 1326 thresh)
925baedd 1327 goto out;
6324fbf3 1328
925baedd 1329 mutex_lock(&extent_root->fs_info->chunk_mutex);
6324fbf3
CM
1330 ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags);
1331 if (ret == -ENOSPC) {
1332printk("space info full %Lu\n", flags);
1333 space_info->full = 1;
a74a4b97 1334 goto out_unlock;
6324fbf3 1335 }
6324fbf3
CM
1336 BUG_ON(ret);
1337
1338 ret = btrfs_make_block_group(trans, extent_root, 0, flags,
e17cade2 1339 BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes);
6324fbf3 1340 BUG_ON(ret);
a74a4b97 1341out_unlock:
333db94c 1342 mutex_unlock(&extent_root->fs_info->chunk_mutex);
a74a4b97 1343out:
6324fbf3
CM
1344 return 0;
1345}
1346
9078a3e1
CM
1347static int update_block_group(struct btrfs_trans_handle *trans,
1348 struct btrfs_root *root,
db94535d 1349 u64 bytenr, u64 num_bytes, int alloc,
0b86a832 1350 int mark_free)
9078a3e1
CM
1351{
1352 struct btrfs_block_group_cache *cache;
1353 struct btrfs_fs_info *info = root->fs_info;
db94535d 1354 u64 total = num_bytes;
9078a3e1 1355 u64 old_val;
db94535d 1356 u64 byte_in_group;
96b5179d
CM
1357 u64 start;
1358 u64 end;
3e1ad54f 1359
7d9eb12c 1360 WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
9078a3e1 1361 while(total) {
db94535d 1362 cache = btrfs_lookup_block_group(info, bytenr);
3e1ad54f 1363 if (!cache) {
9078a3e1 1364 return -1;
cd1bc465 1365 }
db94535d
CM
1366 byte_in_group = bytenr - cache->key.objectid;
1367 WARN_ON(byte_in_group > cache->key.offset);
96b5179d
CM
1368 start = cache->key.objectid;
1369 end = start + cache->key.offset - 1;
1370 set_extent_bits(&info->block_group_cache, start, end,
1371 BLOCK_GROUP_DIRTY, GFP_NOFS);
9078a3e1 1372
c286ac48 1373 spin_lock(&cache->lock);
9078a3e1 1374 old_val = btrfs_block_group_used(&cache->item);
db94535d 1375 num_bytes = min(total, cache->key.offset - byte_in_group);
cd1bc465 1376 if (alloc) {
db94535d 1377 old_val += num_bytes;
6324fbf3 1378 cache->space_info->bytes_used += num_bytes;
c286ac48
CM
1379 btrfs_set_block_group_used(&cache->item, old_val);
1380 spin_unlock(&cache->lock);
cd1bc465 1381 } else {
db94535d 1382 old_val -= num_bytes;
6324fbf3 1383 cache->space_info->bytes_used -= num_bytes;
c286ac48
CM
1384 btrfs_set_block_group_used(&cache->item, old_val);
1385 spin_unlock(&cache->lock);
f510cfec
CM
1386 if (mark_free) {
1387 set_extent_dirty(&info->free_space_cache,
db94535d 1388 bytenr, bytenr + num_bytes - 1,
f510cfec 1389 GFP_NOFS);
e37c9e69 1390 }
cd1bc465 1391 }
db94535d
CM
1392 total -= num_bytes;
1393 bytenr += num_bytes;
9078a3e1
CM
1394 }
1395 return 0;
1396}
6324fbf3 1397
a061fc8d
CM
1398static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
1399{
1400 u64 start;
1401 u64 end;
1402 int ret;
1403 ret = find_first_extent_bit(&root->fs_info->block_group_cache,
1404 search_start, &start, &end,
1405 BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
1406 BLOCK_GROUP_SYSTEM);
1407 if (ret)
1408 return 0;
1409 return start;
1410}
1411
1412
324ae4df
Y
1413static int update_pinned_extents(struct btrfs_root *root,
1414 u64 bytenr, u64 num, int pin)
1415{
1416 u64 len;
1417 struct btrfs_block_group_cache *cache;
1418 struct btrfs_fs_info *fs_info = root->fs_info;
1419
7d9eb12c 1420 WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
324ae4df
Y
1421 if (pin) {
1422 set_extent_dirty(&fs_info->pinned_extents,
1423 bytenr, bytenr + num - 1, GFP_NOFS);
1424 } else {
1425 clear_extent_dirty(&fs_info->pinned_extents,
1426 bytenr, bytenr + num - 1, GFP_NOFS);
1427 }
1428 while (num > 0) {
1429 cache = btrfs_lookup_block_group(fs_info, bytenr);
a061fc8d
CM
1430 if (!cache) {
1431 u64 first = first_logical_byte(root, bytenr);
1432 WARN_ON(first < bytenr);
1433 len = min(first - bytenr, num);
1434 } else {
1435 len = min(num, cache->key.offset -
1436 (bytenr - cache->key.objectid));
1437 }
324ae4df 1438 if (pin) {
a061fc8d 1439 if (cache) {
c286ac48 1440 spin_lock(&cache->lock);
a061fc8d
CM
1441 cache->pinned += len;
1442 cache->space_info->bytes_pinned += len;
c286ac48 1443 spin_unlock(&cache->lock);
a061fc8d 1444 }
324ae4df
Y
1445 fs_info->total_pinned += len;
1446 } else {
a061fc8d 1447 if (cache) {
c286ac48 1448 spin_lock(&cache->lock);
a061fc8d
CM
1449 cache->pinned -= len;
1450 cache->space_info->bytes_pinned -= len;
c286ac48 1451 spin_unlock(&cache->lock);
a061fc8d 1452 }
324ae4df
Y
1453 fs_info->total_pinned -= len;
1454 }
1455 bytenr += len;
1456 num -= len;
1457 }
1458 return 0;
1459}
9078a3e1 1460
d1310b2e 1461int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
ccd467d6 1462{
ccd467d6 1463 u64 last = 0;
1a5bc167
CM
1464 u64 start;
1465 u64 end;
d1310b2e 1466 struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
ccd467d6 1467 int ret;
ccd467d6
CM
1468
1469 while(1) {
1a5bc167
CM
1470 ret = find_first_extent_bit(pinned_extents, last,
1471 &start, &end, EXTENT_DIRTY);
1472 if (ret)
ccd467d6 1473 break;
1a5bc167
CM
1474 set_extent_dirty(copy, start, end, GFP_NOFS);
1475 last = end + 1;
ccd467d6
CM
1476 }
1477 return 0;
1478}
1479
1480int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
1481 struct btrfs_root *root,
d1310b2e 1482 struct extent_io_tree *unpin)
a28ec197 1483{
1a5bc167
CM
1484 u64 start;
1485 u64 end;
a28ec197 1486 int ret;
d1310b2e 1487 struct extent_io_tree *free_space_cache;
f510cfec 1488 free_space_cache = &root->fs_info->free_space_cache;
a28ec197 1489
925baedd 1490 mutex_lock(&root->fs_info->alloc_mutex);
a28ec197 1491 while(1) {
1a5bc167
CM
1492 ret = find_first_extent_bit(unpin, 0, &start, &end,
1493 EXTENT_DIRTY);
1494 if (ret)
a28ec197 1495 break;
324ae4df 1496 update_pinned_extents(root, start, end + 1 - start, 0);
1a5bc167
CM
1497 clear_extent_dirty(unpin, start, end, GFP_NOFS);
1498 set_extent_dirty(free_space_cache, start, end, GFP_NOFS);
c286ac48
CM
1499 if (need_resched()) {
1500 mutex_unlock(&root->fs_info->alloc_mutex);
1501 cond_resched();
1502 mutex_lock(&root->fs_info->alloc_mutex);
1503 }
a28ec197 1504 }
925baedd 1505 mutex_unlock(&root->fs_info->alloc_mutex);
a28ec197
CM
1506 return 0;
1507}
1508
98ed5174
CM
1509static int finish_current_insert(struct btrfs_trans_handle *trans,
1510 struct btrfs_root *extent_root)
037e6390 1511{
7bb86316
CM
1512 u64 start;
1513 u64 end;
1514 struct btrfs_fs_info *info = extent_root->fs_info;
d8d5f3e1 1515 struct extent_buffer *eb;
7bb86316 1516 struct btrfs_path *path;
e2fa7227 1517 struct btrfs_key ins;
d8d5f3e1 1518 struct btrfs_disk_key first;
234b63a0 1519 struct btrfs_extent_item extent_item;
037e6390 1520 int ret;
d8d5f3e1 1521 int level;
1a5bc167 1522 int err = 0;
037e6390 1523
7d9eb12c 1524 WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex));
5f39d397 1525 btrfs_set_stack_extent_refs(&extent_item, 1);
62e2749e 1526 btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
7bb86316 1527 path = btrfs_alloc_path();
037e6390 1528
26b8003f 1529 while(1) {
1a5bc167
CM
1530 ret = find_first_extent_bit(&info->extent_ins, 0, &start,
1531 &end, EXTENT_LOCKED);
1532 if (ret)
26b8003f
CM
1533 break;
1534
1a5bc167
CM
1535 ins.objectid = start;
1536 ins.offset = end + 1 - start;
1537 err = btrfs_insert_item(trans, extent_root, &ins,
1538 &extent_item, sizeof(extent_item));
1539 clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED,
1540 GFP_NOFS);
c286ac48
CM
1541
1542 eb = btrfs_find_tree_block(extent_root, ins.objectid,
1543 ins.offset);
1544
1545 if (!btrfs_buffer_uptodate(eb, trans->transid)) {
1546 mutex_unlock(&extent_root->fs_info->alloc_mutex);
1547 btrfs_read_buffer(eb, trans->transid);
1548 mutex_lock(&extent_root->fs_info->alloc_mutex);
1549 }
1550
925baedd 1551 btrfs_tree_lock(eb);
d8d5f3e1
CM
1552 level = btrfs_header_level(eb);
1553 if (level == 0) {
1554 btrfs_item_key(eb, &first, 0);
1555 } else {
1556 btrfs_node_key(eb, &first, 0);
1557 }
925baedd
CM
1558 btrfs_tree_unlock(eb);
1559 free_extent_buffer(eb);
1560 /*
1561 * the first key is just a hint, so the race we've created
1562 * against reading it is fine
1563 */
7bb86316
CM
1564 err = btrfs_insert_extent_backref(trans, extent_root, path,
1565 start, extent_root->root_key.objectid,
f6dbff55
CM
1566 0, level,
1567 btrfs_disk_key_objectid(&first));
7bb86316 1568 BUG_ON(err);
c286ac48
CM
1569 if (need_resched()) {
1570 mutex_unlock(&extent_root->fs_info->alloc_mutex);
1571 cond_resched();
1572 mutex_lock(&extent_root->fs_info->alloc_mutex);
1573 }
037e6390 1574 }
7bb86316 1575 btrfs_free_path(path);
037e6390
CM
1576 return 0;
1577}
1578
db94535d
CM
1579static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes,
1580 int pending)
e20d96d6 1581{
1a5bc167 1582 int err = 0;
8ef97622 1583
7d9eb12c 1584 WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
f4b9aa8d 1585 if (!pending) {
925baedd 1586 struct extent_buffer *buf;
db94535d 1587 buf = btrfs_find_tree_block(root, bytenr, num_bytes);
5f39d397 1588 if (buf) {
974e35a8
Y
1589 if (btrfs_buffer_uptodate(buf, 0) &&
1590 btrfs_try_tree_lock(buf)) {
2c90e5d6
CM
1591 u64 transid =
1592 root->fs_info->running_transaction->transid;
dc17ff8f
CM
1593 u64 header_transid =
1594 btrfs_header_generation(buf);
6bc34676
CM
1595 if (header_transid == transid &&
1596 !btrfs_header_flag(buf,
1597 BTRFS_HEADER_FLAG_WRITTEN)) {
55c69072 1598 clean_tree_block(NULL, root, buf);
925baedd 1599 btrfs_tree_unlock(buf);
5f39d397 1600 free_extent_buffer(buf);
c549228f 1601 return 1;
2c90e5d6 1602 }
925baedd 1603 btrfs_tree_unlock(buf);
f4b9aa8d 1604 }
5f39d397 1605 free_extent_buffer(buf);
8ef97622 1606 }
324ae4df 1607 update_pinned_extents(root, bytenr, num_bytes, 1);
f4b9aa8d 1608 } else {
1a5bc167 1609 set_extent_bits(&root->fs_info->pending_del,
db94535d
CM
1610 bytenr, bytenr + num_bytes - 1,
1611 EXTENT_LOCKED, GFP_NOFS);
f4b9aa8d 1612 }
be744175 1613 BUG_ON(err < 0);
e20d96d6
CM
1614 return 0;
1615}
1616
fec577fb 1617/*
a28ec197 1618 * remove an extent from the root, returns 0 on success
fec577fb 1619 */
e089f05c 1620static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
7bb86316
CM
1621 *root, u64 bytenr, u64 num_bytes,
1622 u64 root_objectid, u64 ref_generation,
1623 u64 owner_objectid, u64 owner_offset, int pin,
e37c9e69 1624 int mark_free)
a28ec197 1625{
5caf2a00 1626 struct btrfs_path *path;
e2fa7227 1627 struct btrfs_key key;
1261ec42
CM
1628 struct btrfs_fs_info *info = root->fs_info;
1629 struct btrfs_root *extent_root = info->extent_root;
5f39d397 1630 struct extent_buffer *leaf;
a28ec197 1631 int ret;
952fccac
CM
1632 int extent_slot = 0;
1633 int found_extent = 0;
1634 int num_to_del = 1;
234b63a0 1635 struct btrfs_extent_item *ei;
cf27e1ee 1636 u32 refs;
037e6390 1637
7d9eb12c 1638 WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
db94535d 1639 key.objectid = bytenr;
62e2749e 1640 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
db94535d 1641 key.offset = num_bytes;
5caf2a00 1642 path = btrfs_alloc_path();
54aa1f4d
CM
1643 if (!path)
1644 return -ENOMEM;
5f26f772 1645
3c12ac72 1646 path->reada = 1;
7bb86316
CM
1647 ret = lookup_extent_backref(trans, extent_root, path,
1648 bytenr, root_objectid,
1649 ref_generation,
1650 owner_objectid, owner_offset, 1);
1651 if (ret == 0) {
952fccac
CM
1652 struct btrfs_key found_key;
1653 extent_slot = path->slots[0];
1654 while(extent_slot > 0) {
1655 extent_slot--;
1656 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1657 extent_slot);
1658 if (found_key.objectid != bytenr)
1659 break;
1660 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
1661 found_key.offset == num_bytes) {
1662 found_extent = 1;
1663 break;
1664 }
1665 if (path->slots[0] - extent_slot > 5)
1666 break;
1667 }
1668 if (!found_extent)
1669 ret = btrfs_del_item(trans, extent_root, path);
7bb86316
CM
1670 } else {
1671 btrfs_print_leaf(extent_root, path->nodes[0]);
1672 WARN_ON(1);
1673 printk("Unable to find ref byte nr %Lu root %Lu "
1674 " gen %Lu owner %Lu offset %Lu\n", bytenr,
1675 root_objectid, ref_generation, owner_objectid,
1676 owner_offset);
1677 }
952fccac
CM
1678 if (!found_extent) {
1679 btrfs_release_path(extent_root, path);
1680 ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
1681 if (ret < 0)
1682 return ret;
1683 BUG_ON(ret);
1684 extent_slot = path->slots[0];
1685 }
5f39d397
CM
1686
1687 leaf = path->nodes[0];
952fccac 1688 ei = btrfs_item_ptr(leaf, extent_slot,
123abc88 1689 struct btrfs_extent_item);
5f39d397
CM
1690 refs = btrfs_extent_refs(leaf, ei);
1691 BUG_ON(refs == 0);
1692 refs -= 1;
1693 btrfs_set_extent_refs(leaf, ei, refs);
952fccac 1694
5f39d397
CM
1695 btrfs_mark_buffer_dirty(leaf);
1696
952fccac
CM
1697 if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) {
1698 /* if the back ref and the extent are next to each other
1699 * they get deleted below in one shot
1700 */
1701 path->slots[0] = extent_slot;
1702 num_to_del = 2;
1703 } else if (found_extent) {
1704 /* otherwise delete the extent back ref */
1705 ret = btrfs_del_item(trans, extent_root, path);
1706 BUG_ON(ret);
1707 /* if refs are 0, we need to setup the path for deletion */
1708 if (refs == 0) {
1709 btrfs_release_path(extent_root, path);
1710 ret = btrfs_search_slot(trans, extent_root, &key, path,
1711 -1, 1);
1712 if (ret < 0)
1713 return ret;
1714 BUG_ON(ret);
1715 }
1716 }
1717
cf27e1ee 1718 if (refs == 0) {
db94535d
CM
1719 u64 super_used;
1720 u64 root_used;
78fae27e
CM
1721
1722 if (pin) {
db94535d 1723 ret = pin_down_bytes(root, bytenr, num_bytes, 0);
c549228f
Y
1724 if (ret > 0)
1725 mark_free = 1;
1726 BUG_ON(ret < 0);
78fae27e
CM
1727 }
1728
58176a96 1729 /* block accounting for super block */
a2135011 1730 spin_lock_irq(&info->delalloc_lock);
db94535d
CM
1731 super_used = btrfs_super_bytes_used(&info->super_copy);
1732 btrfs_set_super_bytes_used(&info->super_copy,
1733 super_used - num_bytes);
a2135011 1734 spin_unlock_irq(&info->delalloc_lock);
58176a96
JB
1735
1736 /* block accounting for root item */
db94535d 1737 root_used = btrfs_root_used(&root->root_item);
5f39d397 1738 btrfs_set_root_used(&root->root_item,
db94535d 1739 root_used - num_bytes);
952fccac
CM
1740 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
1741 num_to_del);
54aa1f4d
CM
1742 if (ret) {
1743 return ret;
1744 }
db94535d 1745 ret = update_block_group(trans, root, bytenr, num_bytes, 0,
0b86a832 1746 mark_free);
9078a3e1 1747 BUG_ON(ret);
a28ec197 1748 }
5caf2a00 1749 btrfs_free_path(path);
e089f05c 1750 finish_current_insert(trans, extent_root);
a28ec197
CM
1751 return ret;
1752}
1753
a28ec197
CM
1754/*
1755 * find all the blocks marked as pending in the radix tree and remove
1756 * them from the extent map
1757 */
e089f05c
CM
1758static int del_pending_extents(struct btrfs_trans_handle *trans, struct
1759 btrfs_root *extent_root)
a28ec197
CM
1760{
1761 int ret;
e20d96d6 1762 int err = 0;
1a5bc167
CM
1763 u64 start;
1764 u64 end;
d1310b2e
CM
1765 struct extent_io_tree *pending_del;
1766 struct extent_io_tree *pinned_extents;
8ef97622 1767
7d9eb12c 1768 WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex));
1a5bc167
CM
1769 pending_del = &extent_root->fs_info->pending_del;
1770 pinned_extents = &extent_root->fs_info->pinned_extents;
a28ec197
CM
1771
1772 while(1) {
1a5bc167
CM
1773 ret = find_first_extent_bit(pending_del, 0, &start, &end,
1774 EXTENT_LOCKED);
1775 if (ret)
a28ec197 1776 break;
1a5bc167
CM
1777 clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
1778 GFP_NOFS);
c286ac48
CM
1779 if (!test_range_bit(&extent_root->fs_info->extent_ins,
1780 start, end, EXTENT_LOCKED, 0)) {
1781 update_pinned_extents(extent_root, start,
1782 end + 1 - start, 1);
1783 ret = __free_extent(trans, extent_root,
1784 start, end + 1 - start,
1785 extent_root->root_key.objectid,
1786 0, 0, 0, 0, 0);
1787 } else {
1788 clear_extent_bits(&extent_root->fs_info->extent_ins,
1789 start, end, EXTENT_LOCKED, GFP_NOFS);
1790 }
1a5bc167
CM
1791 if (ret)
1792 err = ret;
c286ac48
CM
1793
1794 if (need_resched()) {
1795 mutex_unlock(&extent_root->fs_info->alloc_mutex);
1796 cond_resched();
1797 mutex_lock(&extent_root->fs_info->alloc_mutex);
1798 }
fec577fb 1799 }
e20d96d6 1800 return err;
fec577fb
CM
1801}
1802
1803/*
1804 * remove an extent from the root, returns 0 on success
1805 */
925baedd
CM
1806static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
1807 struct btrfs_root *root, u64 bytenr,
1808 u64 num_bytes, u64 root_objectid,
1809 u64 ref_generation, u64 owner_objectid,
1810 u64 owner_offset, int pin)
fec577fb 1811{
9f5fae2f 1812 struct btrfs_root *extent_root = root->fs_info->extent_root;
fec577fb
CM
1813 int pending_ret;
1814 int ret;
a28ec197 1815
db94535d 1816 WARN_ON(num_bytes < root->sectorsize);
7bb86316
CM
1817 if (!root->ref_cows)
1818 ref_generation = 0;
1819
fec577fb 1820 if (root == extent_root) {
db94535d 1821 pin_down_bytes(root, bytenr, num_bytes, 1);
fec577fb
CM
1822 return 0;
1823 }
7bb86316
CM
1824 ret = __free_extent(trans, root, bytenr, num_bytes, root_objectid,
1825 ref_generation, owner_objectid, owner_offset,
1826 pin, pin == 0);
ee6e6504
CM
1827
1828 finish_current_insert(trans, root->fs_info->extent_root);
e20d96d6 1829 pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
fec577fb
CM
1830 return ret ? ret : pending_ret;
1831}
1832
925baedd
CM
1833int btrfs_free_extent(struct btrfs_trans_handle *trans,
1834 struct btrfs_root *root, u64 bytenr,
1835 u64 num_bytes, u64 root_objectid,
1836 u64 ref_generation, u64 owner_objectid,
1837 u64 owner_offset, int pin)
1838{
1839 int ret;
1840
1841 maybe_lock_mutex(root);
1842 ret = __btrfs_free_extent(trans, root, bytenr, num_bytes,
1843 root_objectid, ref_generation,
1844 owner_objectid, owner_offset, pin);
1845 maybe_unlock_mutex(root);
1846 return ret;
1847}
1848
87ee04eb
CM
1849static u64 stripe_align(struct btrfs_root *root, u64 val)
1850{
1851 u64 mask = ((u64)root->stripesize - 1);
1852 u64 ret = (val + mask) & ~mask;
1853 return ret;
1854}
1855
fec577fb
CM
1856/*
1857 * walks the btree of allocated extents and find a hole of a given size.
1858 * The key ins is changed to record the hole:
1859 * ins->objectid == block start
62e2749e 1860 * ins->flags = BTRFS_EXTENT_ITEM_KEY
fec577fb
CM
1861 * ins->offset == number of blocks
1862 * Any available blocks before search_start are skipped.
1863 */
98ed5174
CM
1864static int noinline find_free_extent(struct btrfs_trans_handle *trans,
1865 struct btrfs_root *orig_root,
1866 u64 num_bytes, u64 empty_size,
1867 u64 search_start, u64 search_end,
1868 u64 hint_byte, struct btrfs_key *ins,
1869 u64 exclude_start, u64 exclude_nr,
1870 int data)
fec577fb 1871{
87ee04eb 1872 int ret;
a061fc8d 1873 u64 orig_search_start;
9f5fae2f 1874 struct btrfs_root * root = orig_root->fs_info->extent_root;
f2458e1d 1875 struct btrfs_fs_info *info = root->fs_info;
db94535d 1876 u64 total_needed = num_bytes;
239b14b3 1877 u64 *last_ptr = NULL;
be08c1b9 1878 struct btrfs_block_group_cache *block_group;
be744175 1879 int full_scan = 0;
fbdc762b 1880 int wrapped = 0;
0ef3e66b 1881 int chunk_alloc_done = 0;
239b14b3 1882 int empty_cluster = 2 * 1024 * 1024;
0ef3e66b 1883 int allowed_chunk_alloc = 0;
fec577fb 1884
db94535d 1885 WARN_ON(num_bytes < root->sectorsize);
b1a4d965
CM
1886 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
1887
0ef3e66b
CM
1888 if (orig_root->ref_cows || empty_size)
1889 allowed_chunk_alloc = 1;
1890
239b14b3
CM
1891 if (data & BTRFS_BLOCK_GROUP_METADATA) {
1892 last_ptr = &root->fs_info->last_alloc;
8790d502 1893 empty_cluster = 256 * 1024;
239b14b3
CM
1894 }
1895
1896 if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) {
1897 last_ptr = &root->fs_info->last_data_alloc;
1898 }
1899
1900 if (last_ptr) {
1901 if (*last_ptr)
1902 hint_byte = *last_ptr;
1903 else {
1904 empty_size += empty_cluster;
1905 }
1906 }
1907
a061fc8d
CM
1908 search_start = max(search_start, first_logical_byte(root, 0));
1909 orig_search_start = search_start;
1910
7f93bf8d
CM
1911 if (search_end == (u64)-1)
1912 search_end = btrfs_super_total_bytes(&info->super_copy);
0b86a832 1913
db94535d 1914 if (hint_byte) {
0ef3e66b 1915 block_group = btrfs_lookup_first_block_group(info, hint_byte);
1a2b2ac7
CM
1916 if (!block_group)
1917 hint_byte = search_start;
c286ac48 1918 block_group = btrfs_find_block_group(root, block_group,
db94535d 1919 hint_byte, data, 1);
239b14b3
CM
1920 if (last_ptr && *last_ptr == 0 && block_group)
1921 hint_byte = block_group->key.objectid;
be744175 1922 } else {
c286ac48 1923 block_group = btrfs_find_block_group(root,
1a2b2ac7
CM
1924 trans->block_group,
1925 search_start, data, 1);
be744175 1926 }
239b14b3 1927 search_start = max(search_start, hint_byte);
be744175 1928
6702ed49 1929 total_needed += empty_size;
0b86a832 1930
be744175 1931check_failed:
70b043f0 1932 if (!block_group) {
0ef3e66b
CM
1933 block_group = btrfs_lookup_first_block_group(info,
1934 search_start);
70b043f0 1935 if (!block_group)
0ef3e66b 1936 block_group = btrfs_lookup_first_block_group(info,
70b043f0
CM
1937 orig_search_start);
1938 }
0ef3e66b
CM
1939 if (full_scan && !chunk_alloc_done) {
1940 if (allowed_chunk_alloc) {
1941 do_chunk_alloc(trans, root,
1942 num_bytes + 2 * 1024 * 1024, data, 1);
1943 allowed_chunk_alloc = 0;
1944 } else if (block_group && block_group_bits(block_group, data)) {
1945 block_group->space_info->force_alloc = 1;
1946 }
1947 chunk_alloc_done = 1;
1948 }
0b86a832
CM
1949 ret = find_search_start(root, &block_group, &search_start,
1950 total_needed, data);
239b14b3
CM
1951 if (ret == -ENOSPC && last_ptr && *last_ptr) {
1952 *last_ptr = 0;
0ef3e66b
CM
1953 block_group = btrfs_lookup_first_block_group(info,
1954 orig_search_start);
239b14b3
CM
1955 search_start = orig_search_start;
1956 ret = find_search_start(root, &block_group, &search_start,
1957 total_needed, data);
1958 }
1959 if (ret == -ENOSPC)
1960 goto enospc;
0b86a832 1961 if (ret)
d548ee51 1962 goto error;
e19caa5f 1963
239b14b3
CM
1964 if (last_ptr && *last_ptr && search_start != *last_ptr) {
1965 *last_ptr = 0;
1966 if (!empty_size) {
1967 empty_size += empty_cluster;
1968 total_needed += empty_size;
1969 }
0ef3e66b 1970 block_group = btrfs_lookup_first_block_group(info,
239b14b3
CM
1971 orig_search_start);
1972 search_start = orig_search_start;
1973 ret = find_search_start(root, &block_group,
1974 &search_start, total_needed, data);
1975 if (ret == -ENOSPC)
1976 goto enospc;
1977 if (ret)
1978 goto error;
1979 }
1980
0b86a832
CM
1981 search_start = stripe_align(root, search_start);
1982 ins->objectid = search_start;
1983 ins->offset = num_bytes;
e37c9e69 1984
db94535d 1985 if (ins->objectid + num_bytes >= search_end)
cf67582b 1986 goto enospc;
0b86a832
CM
1987
1988 if (ins->objectid + num_bytes >
1989 block_group->key.objectid + block_group->key.offset) {
e19caa5f
CM
1990 search_start = block_group->key.objectid +
1991 block_group->key.offset;
1992 goto new_group;
1993 }
0b86a832 1994
1a5bc167 1995 if (test_range_bit(&info->extent_ins, ins->objectid,
db94535d
CM
1996 ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) {
1997 search_start = ins->objectid + num_bytes;
1a5bc167
CM
1998 goto new_group;
1999 }
0b86a832 2000
1a5bc167 2001 if (test_range_bit(&info->pinned_extents, ins->objectid,
db94535d
CM
2002 ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
2003 search_start = ins->objectid + num_bytes;
1a5bc167 2004 goto new_group;
fec577fb 2005 }
0b86a832 2006
db94535d 2007 if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
f2654de4
CM
2008 ins->objectid < exclude_start + exclude_nr)) {
2009 search_start = exclude_start + exclude_nr;
2010 goto new_group;
2011 }
0b86a832 2012
6324fbf3 2013 if (!(data & BTRFS_BLOCK_GROUP_DATA)) {
5276aeda 2014 block_group = btrfs_lookup_block_group(info, ins->objectid);
26b8003f
CM
2015 if (block_group)
2016 trans->block_group = block_group;
f2458e1d 2017 }
db94535d 2018 ins->offset = num_bytes;
239b14b3
CM
2019 if (last_ptr) {
2020 *last_ptr = ins->objectid + ins->offset;
2021 if (*last_ptr ==
2022 btrfs_super_total_bytes(&root->fs_info->super_copy)) {
2023 *last_ptr = 0;
2024 }
2025 }
fec577fb 2026 return 0;
be744175
CM
2027
2028new_group:
db94535d 2029 if (search_start + num_bytes >= search_end) {
cf67582b 2030enospc:
be744175 2031 search_start = orig_search_start;
fbdc762b
CM
2032 if (full_scan) {
2033 ret = -ENOSPC;
2034 goto error;
2035 }
6702ed49
CM
2036 if (wrapped) {
2037 if (!full_scan)
2038 total_needed -= empty_size;
fbdc762b 2039 full_scan = 1;
6702ed49 2040 } else
fbdc762b 2041 wrapped = 1;
be744175 2042 }
0ef3e66b 2043 block_group = btrfs_lookup_first_block_group(info, search_start);
fbdc762b 2044 cond_resched();
c286ac48 2045 block_group = btrfs_find_block_group(root, block_group,
1a2b2ac7 2046 search_start, data, 0);
be744175
CM
2047 goto check_failed;
2048
0f70abe2 2049error:
0f70abe2 2050 return ret;
fec577fb 2051}
ec44a35c 2052
e6dcd2dc
CM
2053static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
2054 struct btrfs_root *root,
2055 u64 num_bytes, u64 min_alloc_size,
2056 u64 empty_size, u64 hint_byte,
2057 u64 search_end, struct btrfs_key *ins,
2058 u64 data)
fec577fb
CM
2059{
2060 int ret;
fbdc762b 2061 u64 search_start = 0;
8790d502 2062 u64 alloc_profile;
1261ec42 2063 struct btrfs_fs_info *info = root->fs_info;
925baedd 2064
6324fbf3 2065 if (data) {
8790d502
CM
2066 alloc_profile = info->avail_data_alloc_bits &
2067 info->data_alloc_profile;
2068 data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
6324fbf3 2069 } else if (root == root->fs_info->chunk_root) {
8790d502
CM
2070 alloc_profile = info->avail_system_alloc_bits &
2071 info->system_alloc_profile;
2072 data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
6324fbf3 2073 } else {
8790d502
CM
2074 alloc_profile = info->avail_metadata_alloc_bits &
2075 info->metadata_alloc_profile;
2076 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
6324fbf3 2077 }
98d20f67 2078again:
a061fc8d 2079 data = reduce_alloc_profile(root, data);
0ef3e66b
CM
2080 /*
2081 * the only place that sets empty_size is btrfs_realloc_node, which
2082 * is not called recursively on allocations
2083 */
2084 if (empty_size || root->ref_cows) {
593060d7 2085 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
6324fbf3 2086 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
0ef3e66b
CM
2087 2 * 1024 * 1024,
2088 BTRFS_BLOCK_GROUP_METADATA |
2089 (info->metadata_alloc_profile &
2090 info->avail_metadata_alloc_bits), 0);
6324fbf3
CM
2091 BUG_ON(ret);
2092 }
2093 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
0ef3e66b 2094 num_bytes + 2 * 1024 * 1024, data, 0);
6324fbf3
CM
2095 BUG_ON(ret);
2096 }
0b86a832 2097
db94535d
CM
2098 WARN_ON(num_bytes < root->sectorsize);
2099 ret = find_free_extent(trans, root, num_bytes, empty_size,
2100 search_start, search_end, hint_byte, ins,
26b8003f
CM
2101 trans->alloc_exclude_start,
2102 trans->alloc_exclude_nr, data);
3b951516 2103
98d20f67
CM
2104 if (ret == -ENOSPC && num_bytes > min_alloc_size) {
2105 num_bytes = num_bytes >> 1;
2106 num_bytes = max(num_bytes, min_alloc_size);
0ef3e66b
CM
2107 do_chunk_alloc(trans, root->fs_info->extent_root,
2108 num_bytes, data, 1);
98d20f67
CM
2109 goto again;
2110 }
ec44a35c
CM
2111 if (ret) {
2112 printk("allocation failed flags %Lu\n", data);
925baedd 2113 BUG();
925baedd 2114 }
e6dcd2dc
CM
2115 clear_extent_dirty(&root->fs_info->free_space_cache,
2116 ins->objectid, ins->objectid + ins->offset - 1,
2117 GFP_NOFS);
2118 return 0;
2119}
2120
2121int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
2122 struct btrfs_root *root,
2123 u64 num_bytes, u64 min_alloc_size,
2124 u64 empty_size, u64 hint_byte,
2125 u64 search_end, struct btrfs_key *ins,
2126 u64 data)
2127{
2128 int ret;
2129 maybe_lock_mutex(root);
2130 ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
2131 empty_size, hint_byte, search_end, ins,
2132 data);
2133 maybe_unlock_mutex(root);
2134 return ret;
2135}
2136
2137static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
2138 struct btrfs_root *root,
2139 u64 root_objectid, u64 ref_generation,
2140 u64 owner, u64 owner_offset,
2141 struct btrfs_key *ins)
2142{
2143 int ret;
2144 int pending_ret;
2145 u64 super_used;
2146 u64 root_used;
2147 u64 num_bytes = ins->offset;
2148 u32 sizes[2];
2149 struct btrfs_fs_info *info = root->fs_info;
2150 struct btrfs_root *extent_root = info->extent_root;
2151 struct btrfs_extent_item *extent_item;
2152 struct btrfs_extent_ref *ref;
2153 struct btrfs_path *path;
2154 struct btrfs_key keys[2];
fec577fb 2155
58176a96 2156 /* block accounting for super block */
a2135011 2157 spin_lock_irq(&info->delalloc_lock);
db94535d
CM
2158 super_used = btrfs_super_bytes_used(&info->super_copy);
2159 btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
a2135011 2160 spin_unlock_irq(&info->delalloc_lock);
26b8003f 2161
58176a96 2162 /* block accounting for root item */
db94535d
CM
2163 root_used = btrfs_root_used(&root->root_item);
2164 btrfs_set_root_used(&root->root_item, root_used + num_bytes);
58176a96 2165
26b8003f 2166 if (root == extent_root) {
1a5bc167
CM
2167 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
2168 ins->objectid + ins->offset - 1,
2169 EXTENT_LOCKED, GFP_NOFS);
26b8003f
CM
2170 goto update_block;
2171 }
2172
47e4bb98
CM
2173 memcpy(&keys[0], ins, sizeof(*ins));
2174 keys[1].offset = hash_extent_ref(root_objectid, ref_generation,
2175 owner, owner_offset);
2176 keys[1].objectid = ins->objectid;
2177 keys[1].type = BTRFS_EXTENT_REF_KEY;
2178 sizes[0] = sizeof(*extent_item);
2179 sizes[1] = sizeof(*ref);
7bb86316
CM
2180
2181 path = btrfs_alloc_path();
2182 BUG_ON(!path);
47e4bb98
CM
2183
2184 ret = btrfs_insert_empty_items(trans, extent_root, path, keys,
2185 sizes, 2);
26b8003f 2186
ccd467d6 2187 BUG_ON(ret);
47e4bb98
CM
2188 extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2189 struct btrfs_extent_item);
2190 btrfs_set_extent_refs(path->nodes[0], extent_item, 1);
2191 ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
2192 struct btrfs_extent_ref);
2193
2194 btrfs_set_ref_root(path->nodes[0], ref, root_objectid);
2195 btrfs_set_ref_generation(path->nodes[0], ref, ref_generation);
2196 btrfs_set_ref_objectid(path->nodes[0], ref, owner);
2197 btrfs_set_ref_offset(path->nodes[0], ref, owner_offset);
2198
2199 btrfs_mark_buffer_dirty(path->nodes[0]);
2200
2201 trans->alloc_exclude_start = 0;
2202 trans->alloc_exclude_nr = 0;
7bb86316 2203 btrfs_free_path(path);
e089f05c 2204 finish_current_insert(trans, extent_root);
e20d96d6 2205 pending_ret = del_pending_extents(trans, extent_root);
f510cfec 2206
925baedd
CM
2207 if (ret)
2208 goto out;
e37c9e69 2209 if (pending_ret) {
925baedd
CM
2210 ret = pending_ret;
2211 goto out;
e37c9e69 2212 }
26b8003f
CM
2213
2214update_block:
0b86a832 2215 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0);
f5947066
CM
2216 if (ret) {
2217 printk("update block group failed for %Lu %Lu\n",
2218 ins->objectid, ins->offset);
2219 BUG();
2220 }
925baedd 2221out:
e6dcd2dc
CM
2222 return ret;
2223}
2224
2225int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
2226 struct btrfs_root *root,
2227 u64 root_objectid, u64 ref_generation,
2228 u64 owner, u64 owner_offset,
2229 struct btrfs_key *ins)
2230{
2231 int ret;
2232 maybe_lock_mutex(root);
2233 ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid,
2234 ref_generation, owner,
2235 owner_offset, ins);
2236 maybe_unlock_mutex(root);
2237 return ret;
2238}
2239/*
2240 * finds a free extent and does all the dirty work required for allocation
2241 * returns the key for the extent through ins, and a tree buffer for
2242 * the first block of the extent through buf.
2243 *
2244 * returns 0 if everything worked, non-zero otherwise.
2245 */
2246int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
2247 struct btrfs_root *root,
2248 u64 num_bytes, u64 min_alloc_size,
2249 u64 root_objectid, u64 ref_generation,
2250 u64 owner, u64 owner_offset,
2251 u64 empty_size, u64 hint_byte,
2252 u64 search_end, struct btrfs_key *ins, u64 data)
2253{
2254 int ret;
2255
2256 maybe_lock_mutex(root);
2257
2258 ret = __btrfs_reserve_extent(trans, root, num_bytes,
2259 min_alloc_size, empty_size, hint_byte,
2260 search_end, ins, data);
2261 BUG_ON(ret);
2262 ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid,
2263 ref_generation, owner,
2264 owner_offset, ins);
2265 BUG_ON(ret);
2266
925baedd
CM
2267 maybe_unlock_mutex(root);
2268 return ret;
fec577fb 2269}
fec577fb
CM
2270/*
2271 * helper function to allocate a block for a given tree
2272 * returns the tree buffer or NULL.
2273 */
5f39d397 2274struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
7bb86316
CM
2275 struct btrfs_root *root,
2276 u32 blocksize,
2277 u64 root_objectid,
2278 u64 ref_generation,
2279 u64 first_objectid,
2280 int level,
2281 u64 hint,
5f39d397 2282 u64 empty_size)
fec577fb 2283{
e2fa7227 2284 struct btrfs_key ins;
fec577fb 2285 int ret;
5f39d397 2286 struct extent_buffer *buf;
fec577fb 2287
98d20f67 2288 ret = btrfs_alloc_extent(trans, root, blocksize, blocksize,
7bb86316 2289 root_objectid, ref_generation,
f6dbff55 2290 level, first_objectid, empty_size, hint,
db94535d 2291 (u64)-1, &ins, 0);
fec577fb 2292 if (ret) {
54aa1f4d
CM
2293 BUG_ON(ret > 0);
2294 return ERR_PTR(ret);
fec577fb 2295 }
db94535d 2296 buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize);
54aa1f4d 2297 if (!buf) {
7bb86316
CM
2298 btrfs_free_extent(trans, root, ins.objectid, blocksize,
2299 root->root_key.objectid, ref_generation,
2300 0, 0, 0);
54aa1f4d
CM
2301 return ERR_PTR(-ENOMEM);
2302 }
55c69072 2303 btrfs_set_header_generation(buf, trans->transid);
925baedd 2304 btrfs_tree_lock(buf);
55c69072 2305 clean_tree_block(trans, root, buf);
5f39d397 2306 btrfs_set_buffer_uptodate(buf);
55c69072
CM
2307
2308 if (PageDirty(buf->first_page)) {
2309 printk("page %lu dirty\n", buf->first_page->index);
2310 WARN_ON(1);
2311 }
2312
5f39d397
CM
2313 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
2314 buf->start + buf->len - 1, GFP_NOFS);
d3c2fdcf 2315 trans->blocks_used++;
fec577fb
CM
2316 return buf;
2317}
a28ec197 2318
31153d81 2319static int noinline drop_leaf_ref_no_cache(struct btrfs_trans_handle *trans,
bcc63abb 2320 struct btrfs_root *root,
31153d81 2321 struct extent_buffer *leaf)
6407bf6d 2322{
7bb86316
CM
2323 u64 leaf_owner;
2324 u64 leaf_generation;
5f39d397 2325 struct btrfs_key key;
6407bf6d
CM
2326 struct btrfs_file_extent_item *fi;
2327 int i;
2328 int nritems;
2329 int ret;
2330
5f39d397
CM
2331 BUG_ON(!btrfs_is_leaf(leaf));
2332 nritems = btrfs_header_nritems(leaf);
7bb86316
CM
2333 leaf_owner = btrfs_header_owner(leaf);
2334 leaf_generation = btrfs_header_generation(leaf);
2335
4a096752
CM
2336 mutex_unlock(&root->fs_info->alloc_mutex);
2337
6407bf6d 2338 for (i = 0; i < nritems; i++) {
db94535d 2339 u64 disk_bytenr;
e34a5b4f 2340 cond_resched();
5f39d397
CM
2341
2342 btrfs_item_key_to_cpu(leaf, &key, i);
2343 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
6407bf6d
CM
2344 continue;
2345 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
5f39d397
CM
2346 if (btrfs_file_extent_type(leaf, fi) ==
2347 BTRFS_FILE_EXTENT_INLINE)
236454df 2348 continue;
6407bf6d
CM
2349 /*
2350 * FIXME make sure to insert a trans record that
2351 * repeats the snapshot del on crash
2352 */
db94535d
CM
2353 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
2354 if (disk_bytenr == 0)
3a686375 2355 continue;
4a096752
CM
2356
2357 mutex_lock(&root->fs_info->alloc_mutex);
925baedd 2358 ret = __btrfs_free_extent(trans, root, disk_bytenr,
7bb86316
CM
2359 btrfs_file_extent_disk_num_bytes(leaf, fi),
2360 leaf_owner, leaf_generation,
2361 key.objectid, key.offset, 0);
4a096752 2362 mutex_unlock(&root->fs_info->alloc_mutex);
6407bf6d
CM
2363 BUG_ON(ret);
2364 }
4a096752
CM
2365
2366 mutex_lock(&root->fs_info->alloc_mutex);
6407bf6d
CM
2367 return 0;
2368}
2369
31153d81 2370static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
bcc63abb 2371 struct btrfs_root *root,
31153d81
YZ
2372 struct btrfs_leaf_ref *ref)
2373{
2374 int i;
2375 int ret;
2376 struct btrfs_extent_info *info = ref->extents;
2377
2378 mutex_unlock(&root->fs_info->alloc_mutex);
2379 for (i = 0; i < ref->nritems; i++) {
2380 mutex_lock(&root->fs_info->alloc_mutex);
2381 ret = __btrfs_free_extent(trans, root,
2382 info->bytenr, info->num_bytes,
2383 ref->owner, ref->generation,
2384 info->objectid, info->offset, 0);
2385 mutex_unlock(&root->fs_info->alloc_mutex);
2386 BUG_ON(ret);
2387 info++;
2388 }
2389 mutex_lock(&root->fs_info->alloc_mutex);
2390
2391 return 0;
2392}
2393
98ed5174 2394static void noinline reada_walk_down(struct btrfs_root *root,
bea495e5
CM
2395 struct extent_buffer *node,
2396 int slot)
e011599b 2397{
db94535d 2398 u64 bytenr;
bea495e5
CM
2399 u64 last = 0;
2400 u32 nritems;
e011599b 2401 u32 refs;
db94535d 2402 u32 blocksize;
bea495e5
CM
2403 int ret;
2404 int i;
2405 int level;
2406 int skipped = 0;
e011599b 2407
5f39d397 2408 nritems = btrfs_header_nritems(node);
db94535d 2409 level = btrfs_header_level(node);
bea495e5
CM
2410 if (level)
2411 return;
2412
2413 for (i = slot; i < nritems && skipped < 32; i++) {
db94535d 2414 bytenr = btrfs_node_blockptr(node, i);
bea495e5
CM
2415 if (last && ((bytenr > last && bytenr - last > 32 * 1024) ||
2416 (last > bytenr && last - bytenr > 32 * 1024))) {
2417 skipped++;
e011599b 2418 continue;
bea495e5
CM
2419 }
2420 blocksize = btrfs_level_size(root, level - 1);
2421 if (i != slot) {
2422 ret = lookup_extent_ref(NULL, root, bytenr,
2423 blocksize, &refs);
2424 BUG_ON(ret);
2425 if (refs != 1) {
2426 skipped++;
2427 continue;
2428 }
2429 }
ca7a79ad
CM
2430 ret = readahead_tree_block(root, bytenr, blocksize,
2431 btrfs_node_ptr_generation(node, i));
bea495e5 2432 last = bytenr + blocksize;
409eb95d 2433 cond_resched();
e011599b
CM
2434 if (ret)
2435 break;
2436 }
2437}
2438
333db94c
CM
2439int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len,
2440 u32 *refs)
2441{
017e5369 2442 int ret;
333db94c 2443 mutex_unlock(&root->fs_info->alloc_mutex);
017e5369 2444 ret = lookup_extent_ref(NULL, root, start, len, refs);
e7a84565 2445 cond_resched();
333db94c 2446 mutex_lock(&root->fs_info->alloc_mutex);
017e5369 2447 return ret;
333db94c
CM
2448}
2449
9aca1d51
CM
2450/*
2451 * helper function for drop_snapshot, this walks down the tree dropping ref
2452 * counts as it goes.
2453 */
98ed5174
CM
2454static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
2455 struct btrfs_root *root,
2456 struct btrfs_path *path, int *level)
20524f02 2457{
7bb86316
CM
2458 u64 root_owner;
2459 u64 root_gen;
2460 u64 bytenr;
ca7a79ad 2461 u64 ptr_gen;
5f39d397
CM
2462 struct extent_buffer *next;
2463 struct extent_buffer *cur;
7bb86316 2464 struct extent_buffer *parent;
31153d81 2465 struct btrfs_leaf_ref *ref;
db94535d 2466 u32 blocksize;
20524f02
CM
2467 int ret;
2468 u32 refs;
2469
925baedd
CM
2470 mutex_lock(&root->fs_info->alloc_mutex);
2471
5caf2a00
CM
2472 WARN_ON(*level < 0);
2473 WARN_ON(*level >= BTRFS_MAX_LEVEL);
333db94c 2474 ret = drop_snap_lookup_refcount(root, path->nodes[*level]->start,
db94535d 2475 path->nodes[*level]->len, &refs);
20524f02
CM
2476 BUG_ON(ret);
2477 if (refs > 1)
2478 goto out;
e011599b 2479
9aca1d51
CM
2480 /*
2481 * walk down to the last node level and free all the leaves
2482 */
6407bf6d 2483 while(*level >= 0) {
5caf2a00
CM
2484 WARN_ON(*level < 0);
2485 WARN_ON(*level >= BTRFS_MAX_LEVEL);
20524f02 2486 cur = path->nodes[*level];
e011599b 2487
5f39d397 2488 if (btrfs_header_level(cur) != *level)
2c90e5d6 2489 WARN_ON(1);
e011599b 2490
7518a238 2491 if (path->slots[*level] >=
5f39d397 2492 btrfs_header_nritems(cur))
20524f02 2493 break;
6407bf6d 2494 if (*level == 0) {
31153d81 2495 ret = drop_leaf_ref_no_cache(trans, root, cur);
6407bf6d
CM
2496 BUG_ON(ret);
2497 break;
2498 }
db94535d 2499 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
ca7a79ad 2500 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
db94535d 2501 blocksize = btrfs_level_size(root, *level - 1);
925baedd 2502
333db94c 2503 ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs);
6407bf6d
CM
2504 BUG_ON(ret);
2505 if (refs != 1) {
7bb86316
CM
2506 parent = path->nodes[*level];
2507 root_owner = btrfs_header_owner(parent);
2508 root_gen = btrfs_header_generation(parent);
20524f02 2509 path->slots[*level]++;
925baedd 2510 ret = __btrfs_free_extent(trans, root, bytenr,
7bb86316
CM
2511 blocksize, root_owner,
2512 root_gen, 0, 0, 1);
20524f02
CM
2513 BUG_ON(ret);
2514 continue;
2515 }
017e5369 2516
31153d81
YZ
2517 if (*level == 1) {
2518 struct btrfs_key key;
2519 btrfs_node_key_to_cpu(cur, &key, path->slots[*level]);
017e5369 2520 ref = btrfs_lookup_leaf_ref(root, bytenr);
31153d81
YZ
2521 if (ref) {
2522 ret = drop_leaf_ref(trans, root, ref);
2523 BUG_ON(ret);
2524 btrfs_remove_leaf_ref(root, ref);
bcc63abb 2525 btrfs_free_leaf_ref(root, ref);
31153d81
YZ
2526 *level = 0;
2527 break;
2528 }
2529 }
db94535d 2530 next = btrfs_find_tree_block(root, bytenr, blocksize);
1259ab75 2531 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
5f39d397 2532 free_extent_buffer(next);
333db94c
CM
2533 mutex_unlock(&root->fs_info->alloc_mutex);
2534
a74a4b97
CM
2535 if (path->slots[*level] == 0)
2536 reada_walk_down(root, cur, path->slots[*level]);
ca7a79ad
CM
2537 next = read_tree_block(root, bytenr, blocksize,
2538 ptr_gen);
e7a84565 2539 cond_resched();
925baedd 2540 mutex_lock(&root->fs_info->alloc_mutex);
e9d0b13b 2541
8790d502 2542 /* we've dropped the lock, double check */
e6dcd2dc
CM
2543 ret = lookup_extent_ref(NULL, root, bytenr, blocksize,
2544 &refs);
e9d0b13b
CM
2545 BUG_ON(ret);
2546 if (refs != 1) {
7bb86316
CM
2547 parent = path->nodes[*level];
2548 root_owner = btrfs_header_owner(parent);
2549 root_gen = btrfs_header_generation(parent);
2550
e9d0b13b 2551 path->slots[*level]++;
5f39d397 2552 free_extent_buffer(next);
925baedd 2553 ret = __btrfs_free_extent(trans, root, bytenr,
7bb86316
CM
2554 blocksize,
2555 root_owner,
2556 root_gen, 0, 0, 1);
e9d0b13b
CM
2557 BUG_ON(ret);
2558 continue;
2559 }
2560 }
5caf2a00 2561 WARN_ON(*level <= 0);
83e15a28 2562 if (path->nodes[*level-1])
5f39d397 2563 free_extent_buffer(path->nodes[*level-1]);
20524f02 2564 path->nodes[*level-1] = next;
5f39d397 2565 *level = btrfs_header_level(next);
20524f02
CM
2566 path->slots[*level] = 0;
2567 }
2568out:
5caf2a00
CM
2569 WARN_ON(*level < 0);
2570 WARN_ON(*level >= BTRFS_MAX_LEVEL);
7bb86316
CM
2571
2572 if (path->nodes[*level] == root->node) {
7bb86316 2573 parent = path->nodes[*level];
31153d81 2574 bytenr = path->nodes[*level]->start;
7bb86316
CM
2575 } else {
2576 parent = path->nodes[*level + 1];
31153d81 2577 bytenr = btrfs_node_blockptr(parent, path->slots[*level + 1]);
7bb86316
CM
2578 }
2579
31153d81
YZ
2580 blocksize = btrfs_level_size(root, *level);
2581 root_owner = btrfs_header_owner(parent);
7bb86316 2582 root_gen = btrfs_header_generation(parent);
31153d81
YZ
2583
2584 ret = __btrfs_free_extent(trans, root, bytenr, blocksize,
2585 root_owner, root_gen, 0, 0, 1);
5f39d397 2586 free_extent_buffer(path->nodes[*level]);
20524f02
CM
2587 path->nodes[*level] = NULL;
2588 *level += 1;
2589 BUG_ON(ret);
925baedd 2590 mutex_unlock(&root->fs_info->alloc_mutex);
e7a84565 2591 cond_resched();
20524f02
CM
2592 return 0;
2593}
2594
9aca1d51
CM
2595/*
2596 * helper for dropping snapshots. This walks back up the tree in the path
2597 * to find the first node higher up where we haven't yet gone through
2598 * all the slots
2599 */
98ed5174
CM
2600static int noinline walk_up_tree(struct btrfs_trans_handle *trans,
2601 struct btrfs_root *root,
2602 struct btrfs_path *path, int *level)
20524f02 2603{
7bb86316
CM
2604 u64 root_owner;
2605 u64 root_gen;
2606 struct btrfs_root_item *root_item = &root->root_item;
20524f02
CM
2607 int i;
2608 int slot;
2609 int ret;
9f3a7427 2610
234b63a0 2611 for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
20524f02 2612 slot = path->slots[i];
5f39d397
CM
2613 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
2614 struct extent_buffer *node;
2615 struct btrfs_disk_key disk_key;
2616 node = path->nodes[i];
20524f02
CM
2617 path->slots[i]++;
2618 *level = i;
9f3a7427 2619 WARN_ON(*level == 0);
5f39d397 2620 btrfs_node_key(node, &disk_key, path->slots[i]);
9f3a7427 2621 memcpy(&root_item->drop_progress,
5f39d397 2622 &disk_key, sizeof(disk_key));
9f3a7427 2623 root_item->drop_level = i;
20524f02
CM
2624 return 0;
2625 } else {
7bb86316
CM
2626 if (path->nodes[*level] == root->node) {
2627 root_owner = root->root_key.objectid;
2628 root_gen =
2629 btrfs_header_generation(path->nodes[*level]);
2630 } else {
2631 struct extent_buffer *node;
2632 node = path->nodes[*level + 1];
2633 root_owner = btrfs_header_owner(node);
2634 root_gen = btrfs_header_generation(node);
2635 }
e089f05c 2636 ret = btrfs_free_extent(trans, root,
db94535d 2637 path->nodes[*level]->start,
7bb86316
CM
2638 path->nodes[*level]->len,
2639 root_owner, root_gen, 0, 0, 1);
6407bf6d 2640 BUG_ON(ret);
5f39d397 2641 free_extent_buffer(path->nodes[*level]);
83e15a28 2642 path->nodes[*level] = NULL;
20524f02 2643 *level = i + 1;
20524f02
CM
2644 }
2645 }
2646 return 1;
2647}
2648
9aca1d51
CM
2649/*
2650 * drop the reference count on the tree rooted at 'snap'. This traverses
2651 * the tree freeing any blocks that have a ref count of zero after being
2652 * decremented.
2653 */
e089f05c 2654int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
9f3a7427 2655 *root)
20524f02 2656{
3768f368 2657 int ret = 0;
9aca1d51 2658 int wret;
20524f02 2659 int level;
5caf2a00 2660 struct btrfs_path *path;
20524f02
CM
2661 int i;
2662 int orig_level;
9f3a7427 2663 struct btrfs_root_item *root_item = &root->root_item;
20524f02 2664
a2135011 2665 WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex));
5caf2a00
CM
2666 path = btrfs_alloc_path();
2667 BUG_ON(!path);
20524f02 2668
5f39d397 2669 level = btrfs_header_level(root->node);
20524f02 2670 orig_level = level;
9f3a7427
CM
2671 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
2672 path->nodes[level] = root->node;
f510cfec 2673 extent_buffer_get(root->node);
9f3a7427
CM
2674 path->slots[level] = 0;
2675 } else {
2676 struct btrfs_key key;
5f39d397
CM
2677 struct btrfs_disk_key found_key;
2678 struct extent_buffer *node;
6702ed49 2679
9f3a7427 2680 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6702ed49
CM
2681 level = root_item->drop_level;
2682 path->lowest_level = level;
9f3a7427 2683 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6702ed49 2684 if (wret < 0) {
9f3a7427
CM
2685 ret = wret;
2686 goto out;
2687 }
5f39d397
CM
2688 node = path->nodes[level];
2689 btrfs_node_key(node, &found_key, path->slots[level]);
2690 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
2691 sizeof(found_key)));
7d9eb12c
CM
2692 /*
2693 * unlock our path, this is safe because only this
2694 * function is allowed to delete this snapshot
2695 */
925baedd
CM
2696 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
2697 if (path->nodes[i] && path->locks[i]) {
2698 path->locks[i] = 0;
2699 btrfs_tree_unlock(path->nodes[i]);
2700 }
2701 }
9f3a7427 2702 }
20524f02 2703 while(1) {
ab78c84d 2704 atomic_inc(&root->fs_info->throttle_gen);
5caf2a00 2705 wret = walk_down_tree(trans, root, path, &level);
9aca1d51 2706 if (wret > 0)
20524f02 2707 break;
9aca1d51
CM
2708 if (wret < 0)
2709 ret = wret;
2710
5caf2a00 2711 wret = walk_up_tree(trans, root, path, &level);
9aca1d51 2712 if (wret > 0)
20524f02 2713 break;
9aca1d51
CM
2714 if (wret < 0)
2715 ret = wret;
e7a84565
CM
2716 if (trans->transaction->in_commit) {
2717 ret = -EAGAIN;
2718 break;
2719 }
017e5369 2720 wake_up(&root->fs_info->transaction_throttle);
20524f02 2721 }
83e15a28 2722 for (i = 0; i <= orig_level; i++) {
5caf2a00 2723 if (path->nodes[i]) {
5f39d397 2724 free_extent_buffer(path->nodes[i]);
0f82731f 2725 path->nodes[i] = NULL;
83e15a28 2726 }
20524f02 2727 }
9f3a7427 2728out:
5caf2a00 2729 btrfs_free_path(path);
9aca1d51 2730 return ret;
20524f02 2731}
9078a3e1 2732
96b5179d 2733int btrfs_free_block_groups(struct btrfs_fs_info *info)
9078a3e1 2734{
96b5179d
CM
2735 u64 start;
2736 u64 end;
b97f9203 2737 u64 ptr;
9078a3e1 2738 int ret;
925baedd
CM
2739
2740 mutex_lock(&info->alloc_mutex);
9078a3e1 2741 while(1) {
96b5179d
CM
2742 ret = find_first_extent_bit(&info->block_group_cache, 0,
2743 &start, &end, (unsigned int)-1);
2744 if (ret)
9078a3e1 2745 break;
b97f9203
Y
2746 ret = get_state_private(&info->block_group_cache, start, &ptr);
2747 if (!ret)
2748 kfree((void *)(unsigned long)ptr);
96b5179d
CM
2749 clear_extent_bits(&info->block_group_cache, start,
2750 end, (unsigned int)-1, GFP_NOFS);
9078a3e1 2751 }
e37c9e69 2752 while(1) {
f510cfec
CM
2753 ret = find_first_extent_bit(&info->free_space_cache, 0,
2754 &start, &end, EXTENT_DIRTY);
2755 if (ret)
e37c9e69 2756 break;
f510cfec
CM
2757 clear_extent_dirty(&info->free_space_cache, start,
2758 end, GFP_NOFS);
e37c9e69 2759 }
925baedd 2760 mutex_unlock(&info->alloc_mutex);
be744175
CM
2761 return 0;
2762}
2763
8e7bf94f
CM
2764static unsigned long calc_ra(unsigned long start, unsigned long last,
2765 unsigned long nr)
2766{
2767 return min(last, start + nr - 1);
2768}
2769
98ed5174
CM
2770static int noinline relocate_inode_pages(struct inode *inode, u64 start,
2771 u64 len)
edbd8d4e
CM
2772{
2773 u64 page_start;
2774 u64 page_end;
edbd8d4e 2775 unsigned long last_index;
edbd8d4e
CM
2776 unsigned long i;
2777 struct page *page;
d1310b2e 2778 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
4313b399 2779 struct file_ra_state *ra;
8e7bf94f
CM
2780 unsigned long total_read = 0;
2781 unsigned long ra_pages;
3eaa2885 2782 struct btrfs_ordered_extent *ordered;
a061fc8d 2783 struct btrfs_trans_handle *trans;
4313b399
CM
2784
2785 ra = kzalloc(sizeof(*ra), GFP_NOFS);
edbd8d4e
CM
2786
2787 mutex_lock(&inode->i_mutex);
4313b399 2788 i = start >> PAGE_CACHE_SHIFT;
edbd8d4e
CM
2789 last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
2790
8e7bf94f
CM
2791 ra_pages = BTRFS_I(inode)->root->fs_info->bdi.ra_pages;
2792
4313b399 2793 file_ra_state_init(ra, inode->i_mapping);
edbd8d4e 2794
4313b399 2795 for (; i <= last_index; i++) {
8e7bf94f
CM
2796 if (total_read % ra_pages == 0) {
2797 btrfs_force_ra(inode->i_mapping, ra, NULL, i,
2798 calc_ra(i, last_index, ra_pages));
2799 }
2800 total_read++;
3eaa2885
CM
2801again:
2802 if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode))
a061fc8d 2803 goto truncate_racing;
edbd8d4e 2804 page = grab_cache_page(inode->i_mapping, i);
a061fc8d 2805 if (!page) {
edbd8d4e 2806 goto out_unlock;
a061fc8d 2807 }
edbd8d4e
CM
2808 if (!PageUptodate(page)) {
2809 btrfs_readpage(NULL, page);
2810 lock_page(page);
2811 if (!PageUptodate(page)) {
2812 unlock_page(page);
2813 page_cache_release(page);
2814 goto out_unlock;
2815 }
2816 }
ec44a35c 2817 wait_on_page_writeback(page);
3eaa2885 2818
edbd8d4e
CM
2819 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2820 page_end = page_start + PAGE_CACHE_SIZE - 1;
d1310b2e 2821 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
edbd8d4e 2822
3eaa2885
CM
2823 ordered = btrfs_lookup_ordered_extent(inode, page_start);
2824 if (ordered) {
2825 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
2826 unlock_page(page);
2827 page_cache_release(page);
2828 btrfs_start_ordered_extent(inode, ordered, 1);
2829 btrfs_put_ordered_extent(ordered);
2830 goto again;
2831 }
2832 set_page_extent_mapped(page);
2833
2834
d1310b2e 2835 set_extent_delalloc(io_tree, page_start,
edbd8d4e 2836 page_end, GFP_NOFS);
a061fc8d 2837 set_page_dirty(page);
edbd8d4e 2838
d1310b2e 2839 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
edbd8d4e
CM
2840 unlock_page(page);
2841 page_cache_release(page);
2842 }
2843
2844out_unlock:
3eaa2885
CM
2845 /* we have to start the IO in order to get the ordered extents
2846 * instantiated. This allows the relocation to code to wait
2847 * for all the ordered extents to hit the disk.
2848 *
2849 * Otherwise, it would constantly loop over the same extents
2850 * because the old ones don't get deleted until the IO is
2851 * started
2852 */
2853 btrfs_fdatawrite_range(inode->i_mapping, start, start + len - 1,
2854 WB_SYNC_NONE);
ec44a35c 2855 kfree(ra);
a061fc8d
CM
2856 trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
2857 if (trans) {
a061fc8d
CM
2858 btrfs_end_transaction(trans, BTRFS_I(inode)->root);
2859 mark_inode_dirty(inode);
2860 }
edbd8d4e
CM
2861 mutex_unlock(&inode->i_mutex);
2862 return 0;
a061fc8d
CM
2863
2864truncate_racing:
2865 vmtruncate(inode, inode->i_size);
2866 balance_dirty_pages_ratelimited_nr(inode->i_mapping,
2867 total_read);
2868 goto out_unlock;
edbd8d4e
CM
2869}
2870
bf4ef679
CM
2871/*
2872 * The back references tell us which tree holds a ref on a block,
2873 * but it is possible for the tree root field in the reference to
2874 * reflect the original root before a snapshot was made. In this
2875 * case we should search through all the children of a given root
2876 * to find potential holders of references on a block.
2877 *
2878 * Instead, we do something a little less fancy and just search
2879 * all the roots for a given key/block combination.
2880 */
2881static int find_root_for_ref(struct btrfs_root *root,
2882 struct btrfs_path *path,
2883 struct btrfs_key *key0,
2884 int level,
2885 int file_key,
2886 struct btrfs_root **found_root,
2887 u64 bytenr)
2888{
2889 struct btrfs_key root_location;
2890 struct btrfs_root *cur_root = *found_root;
2891 struct btrfs_file_extent_item *file_extent;
2892 u64 root_search_start = BTRFS_FS_TREE_OBJECTID;
2893 u64 found_bytenr;
2894 int ret;
bf4ef679
CM
2895
2896 root_location.offset = (u64)-1;
2897 root_location.type = BTRFS_ROOT_ITEM_KEY;
2898 path->lowest_level = level;
2899 path->reada = 0;
2900 while(1) {
2901 ret = btrfs_search_slot(NULL, cur_root, key0, path, 0, 0);
2902 found_bytenr = 0;
2903 if (ret == 0 && file_key) {
2904 struct extent_buffer *leaf = path->nodes[0];
2905 file_extent = btrfs_item_ptr(leaf, path->slots[0],
2906 struct btrfs_file_extent_item);
2907 if (btrfs_file_extent_type(leaf, file_extent) ==
2908 BTRFS_FILE_EXTENT_REG) {
2909 found_bytenr =
2910 btrfs_file_extent_disk_bytenr(leaf,
2911 file_extent);
2912 }
323da79c 2913 } else if (!file_key) {
bf4ef679
CM
2914 if (path->nodes[level])
2915 found_bytenr = path->nodes[level]->start;
2916 }
2917
bf4ef679
CM
2918 btrfs_release_path(cur_root, path);
2919
2920 if (found_bytenr == bytenr) {
2921 *found_root = cur_root;
2922 ret = 0;
2923 goto out;
2924 }
2925 ret = btrfs_search_root(root->fs_info->tree_root,
2926 root_search_start, &root_search_start);
2927 if (ret)
2928 break;
2929
2930 root_location.objectid = root_search_start;
2931 cur_root = btrfs_read_fs_root_no_name(root->fs_info,
2932 &root_location);
2933 if (!cur_root) {
2934 ret = 1;
2935 break;
2936 }
2937 }
2938out:
2939 path->lowest_level = 0;
2940 return ret;
2941}
2942
4313b399
CM
2943/*
2944 * note, this releases the path
2945 */
98ed5174 2946static int noinline relocate_one_reference(struct btrfs_root *extent_root,
edbd8d4e 2947 struct btrfs_path *path,
0ef3e66b
CM
2948 struct btrfs_key *extent_key,
2949 u64 *last_file_objectid,
2950 u64 *last_file_offset,
2951 u64 *last_file_root,
2952 u64 last_extent)
edbd8d4e
CM
2953{
2954 struct inode *inode;
2955 struct btrfs_root *found_root;
bf4ef679
CM
2956 struct btrfs_key root_location;
2957 struct btrfs_key found_key;
4313b399
CM
2958 struct btrfs_extent_ref *ref;
2959 u64 ref_root;
2960 u64 ref_gen;
2961 u64 ref_objectid;
2962 u64 ref_offset;
edbd8d4e 2963 int ret;
bf4ef679 2964 int level;
edbd8d4e 2965
7d9eb12c
CM
2966 WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex));
2967
4313b399
CM
2968 ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
2969 struct btrfs_extent_ref);
2970 ref_root = btrfs_ref_root(path->nodes[0], ref);
2971 ref_gen = btrfs_ref_generation(path->nodes[0], ref);
2972 ref_objectid = btrfs_ref_objectid(path->nodes[0], ref);
2973 ref_offset = btrfs_ref_offset(path->nodes[0], ref);
2974 btrfs_release_path(extent_root, path);
2975
bf4ef679 2976 root_location.objectid = ref_root;
edbd8d4e 2977 if (ref_gen == 0)
bf4ef679 2978 root_location.offset = 0;
edbd8d4e 2979 else
bf4ef679
CM
2980 root_location.offset = (u64)-1;
2981 root_location.type = BTRFS_ROOT_ITEM_KEY;
edbd8d4e
CM
2982
2983 found_root = btrfs_read_fs_root_no_name(extent_root->fs_info,
bf4ef679 2984 &root_location);
edbd8d4e 2985 BUG_ON(!found_root);
7d9eb12c 2986 mutex_unlock(&extent_root->fs_info->alloc_mutex);
edbd8d4e
CM
2987
2988 if (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
bf4ef679
CM
2989 found_key.objectid = ref_objectid;
2990 found_key.type = BTRFS_EXTENT_DATA_KEY;
2991 found_key.offset = ref_offset;
2992 level = 0;
2993
0ef3e66b
CM
2994 if (last_extent == extent_key->objectid &&
2995 *last_file_objectid == ref_objectid &&
2996 *last_file_offset == ref_offset &&
2997 *last_file_root == ref_root)
2998 goto out;
2999
bf4ef679
CM
3000 ret = find_root_for_ref(extent_root, path, &found_key,
3001 level, 1, &found_root,
3002 extent_key->objectid);
3003
3004 if (ret)
3005 goto out;
3006
0ef3e66b
CM
3007 if (last_extent == extent_key->objectid &&
3008 *last_file_objectid == ref_objectid &&
3009 *last_file_offset == ref_offset &&
3010 *last_file_root == ref_root)
3011 goto out;
3012
edbd8d4e
CM
3013 inode = btrfs_iget_locked(extent_root->fs_info->sb,
3014 ref_objectid, found_root);
3015 if (inode->i_state & I_NEW) {
3016 /* the inode and parent dir are two different roots */
3017 BTRFS_I(inode)->root = found_root;
3018 BTRFS_I(inode)->location.objectid = ref_objectid;
3019 BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
3020 BTRFS_I(inode)->location.offset = 0;
3021 btrfs_read_locked_inode(inode);
3022 unlock_new_inode(inode);
3023
3024 }
3025 /* this can happen if the reference is not against
3026 * the latest version of the tree root
3027 */
7d9eb12c 3028 if (is_bad_inode(inode))
edbd8d4e 3029 goto out;
7d9eb12c 3030
0ef3e66b
CM
3031 *last_file_objectid = inode->i_ino;
3032 *last_file_root = found_root->root_key.objectid;
3033 *last_file_offset = ref_offset;
3034
edbd8d4e 3035 relocate_inode_pages(inode, ref_offset, extent_key->offset);
edbd8d4e 3036 iput(inode);
edbd8d4e
CM
3037 } else {
3038 struct btrfs_trans_handle *trans;
edbd8d4e 3039 struct extent_buffer *eb;
7d9eb12c 3040 int needs_lock = 0;
edbd8d4e 3041
edbd8d4e 3042 eb = read_tree_block(found_root, extent_key->objectid,
ca7a79ad 3043 extent_key->offset, 0);
925baedd 3044 btrfs_tree_lock(eb);
edbd8d4e
CM
3045 level = btrfs_header_level(eb);
3046
3047 if (level == 0)
3048 btrfs_item_key_to_cpu(eb, &found_key, 0);
3049 else
3050 btrfs_node_key_to_cpu(eb, &found_key, 0);
3051
925baedd 3052 btrfs_tree_unlock(eb);
edbd8d4e
CM
3053 free_extent_buffer(eb);
3054
bf4ef679
CM
3055 ret = find_root_for_ref(extent_root, path, &found_key,
3056 level, 0, &found_root,
3057 extent_key->objectid);
3058
3059 if (ret)
3060 goto out;
3061
7d9eb12c
CM
3062 /*
3063 * right here almost anything could happen to our key,
3064 * but that's ok. The cow below will either relocate it
3065 * or someone else will have relocated it. Either way,
3066 * it is in a different spot than it was before and
3067 * we're happy.
3068 */
3069
bf4ef679
CM
3070 trans = btrfs_start_transaction(found_root, 1);
3071
7d9eb12c
CM
3072 if (found_root == extent_root->fs_info->extent_root ||
3073 found_root == extent_root->fs_info->chunk_root ||
3074 found_root == extent_root->fs_info->dev_root) {
3075 needs_lock = 1;
3076 mutex_lock(&extent_root->fs_info->alloc_mutex);
3077 }
3078
edbd8d4e 3079 path->lowest_level = level;
8f662a76 3080 path->reada = 2;
edbd8d4e
CM
3081 ret = btrfs_search_slot(trans, found_root, &found_key, path,
3082 0, 1);
3083 path->lowest_level = 0;
edbd8d4e 3084 btrfs_release_path(found_root, path);
7d9eb12c 3085
0ef3e66b
CM
3086 if (found_root == found_root->fs_info->extent_root)
3087 btrfs_extent_post_op(trans, found_root);
7d9eb12c
CM
3088 if (needs_lock)
3089 mutex_unlock(&extent_root->fs_info->alloc_mutex);
3090
edbd8d4e 3091 btrfs_end_transaction(trans, found_root);
edbd8d4e 3092
7d9eb12c 3093 }
edbd8d4e 3094out:
7d9eb12c 3095 mutex_lock(&extent_root->fs_info->alloc_mutex);
edbd8d4e
CM
3096 return 0;
3097}
3098
a061fc8d
CM
3099static int noinline del_extent_zero(struct btrfs_root *extent_root,
3100 struct btrfs_path *path,
3101 struct btrfs_key *extent_key)
3102{
3103 int ret;
3104 struct btrfs_trans_handle *trans;
3105
3106 trans = btrfs_start_transaction(extent_root, 1);
3107 ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1);
3108 if (ret > 0) {
3109 ret = -EIO;
3110 goto out;
3111 }
3112 if (ret < 0)
3113 goto out;
3114 ret = btrfs_del_item(trans, extent_root, path);
3115out:
3116 btrfs_end_transaction(trans, extent_root);
3117 return ret;
3118}
3119
98ed5174
CM
3120static int noinline relocate_one_extent(struct btrfs_root *extent_root,
3121 struct btrfs_path *path,
3122 struct btrfs_key *extent_key)
edbd8d4e
CM
3123{
3124 struct btrfs_key key;
3125 struct btrfs_key found_key;
edbd8d4e 3126 struct extent_buffer *leaf;
0ef3e66b
CM
3127 u64 last_file_objectid = 0;
3128 u64 last_file_root = 0;
3129 u64 last_file_offset = (u64)-1;
3130 u64 last_extent = 0;
edbd8d4e
CM
3131 u32 nritems;
3132 u32 item_size;
3133 int ret = 0;
3134
a061fc8d
CM
3135 if (extent_key->objectid == 0) {
3136 ret = del_extent_zero(extent_root, path, extent_key);
3137 goto out;
3138 }
edbd8d4e
CM
3139 key.objectid = extent_key->objectid;
3140 key.type = BTRFS_EXTENT_REF_KEY;
3141 key.offset = 0;
3142
3143 while(1) {
3144 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
3145
edbd8d4e
CM
3146 if (ret < 0)
3147 goto out;
3148
3149 ret = 0;
3150 leaf = path->nodes[0];
3151 nritems = btrfs_header_nritems(leaf);
a061fc8d
CM
3152 if (path->slots[0] == nritems) {
3153 ret = btrfs_next_leaf(extent_root, path);
3154 if (ret > 0) {
3155 ret = 0;
3156 goto out;
3157 }
3158 if (ret < 0)
3159 goto out;
bf4ef679 3160 leaf = path->nodes[0];
a061fc8d 3161 }
edbd8d4e
CM
3162
3163 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
a061fc8d 3164 if (found_key.objectid != extent_key->objectid) {
edbd8d4e 3165 break;
a061fc8d 3166 }
edbd8d4e 3167
a061fc8d 3168 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
edbd8d4e 3169 break;
a061fc8d 3170 }
edbd8d4e
CM
3171
3172 key.offset = found_key.offset + 1;
3173 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
3174
0ef3e66b
CM
3175 ret = relocate_one_reference(extent_root, path, extent_key,
3176 &last_file_objectid,
3177 &last_file_offset,
3178 &last_file_root, last_extent);
edbd8d4e
CM
3179 if (ret)
3180 goto out;
0ef3e66b 3181 last_extent = extent_key->objectid;
edbd8d4e
CM
3182 }
3183 ret = 0;
3184out:
3185 btrfs_release_path(extent_root, path);
3186 return ret;
3187}
3188
ec44a35c
CM
3189static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
3190{
3191 u64 num_devices;
3192 u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
3193 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
3194
a061fc8d 3195 num_devices = root->fs_info->fs_devices->num_devices;
ec44a35c
CM
3196 if (num_devices == 1) {
3197 stripped |= BTRFS_BLOCK_GROUP_DUP;
3198 stripped = flags & ~stripped;
3199
3200 /* turn raid0 into single device chunks */
3201 if (flags & BTRFS_BLOCK_GROUP_RAID0)
3202 return stripped;
3203
3204 /* turn mirroring into duplication */
3205 if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
3206 BTRFS_BLOCK_GROUP_RAID10))
3207 return stripped | BTRFS_BLOCK_GROUP_DUP;
3208 return flags;
3209 } else {
3210 /* they already had raid on here, just return */
ec44a35c
CM
3211 if (flags & stripped)
3212 return flags;
3213
3214 stripped |= BTRFS_BLOCK_GROUP_DUP;
3215 stripped = flags & ~stripped;
3216
3217 /* switch duplicated blocks with raid1 */
3218 if (flags & BTRFS_BLOCK_GROUP_DUP)
3219 return stripped | BTRFS_BLOCK_GROUP_RAID1;
3220
3221 /* turn single device chunks into raid0 */
3222 return stripped | BTRFS_BLOCK_GROUP_RAID0;
3223 }
3224 return flags;
3225}
3226
0ef3e66b
CM
3227int __alloc_chunk_for_shrink(struct btrfs_root *root,
3228 struct btrfs_block_group_cache *shrink_block_group,
3229 int force)
3230{
3231 struct btrfs_trans_handle *trans;
3232 u64 new_alloc_flags;
3233 u64 calc;
3234
c286ac48 3235 spin_lock(&shrink_block_group->lock);
0ef3e66b 3236 if (btrfs_block_group_used(&shrink_block_group->item) > 0) {
c286ac48 3237 spin_unlock(&shrink_block_group->lock);
7d9eb12c 3238 mutex_unlock(&root->fs_info->alloc_mutex);
c286ac48 3239
0ef3e66b 3240 trans = btrfs_start_transaction(root, 1);
7d9eb12c 3241 mutex_lock(&root->fs_info->alloc_mutex);
c286ac48 3242 spin_lock(&shrink_block_group->lock);
7d9eb12c 3243
0ef3e66b
CM
3244 new_alloc_flags = update_block_group_flags(root,
3245 shrink_block_group->flags);
3246 if (new_alloc_flags != shrink_block_group->flags) {
3247 calc =
3248 btrfs_block_group_used(&shrink_block_group->item);
3249 } else {
3250 calc = shrink_block_group->key.offset;
3251 }
c286ac48
CM
3252 spin_unlock(&shrink_block_group->lock);
3253
0ef3e66b
CM
3254 do_chunk_alloc(trans, root->fs_info->extent_root,
3255 calc + 2 * 1024 * 1024, new_alloc_flags, force);
7d9eb12c
CM
3256
3257 mutex_unlock(&root->fs_info->alloc_mutex);
0ef3e66b 3258 btrfs_end_transaction(trans, root);
7d9eb12c 3259 mutex_lock(&root->fs_info->alloc_mutex);
c286ac48
CM
3260 } else
3261 spin_unlock(&shrink_block_group->lock);
0ef3e66b
CM
3262 return 0;
3263}
3264
8f18cf13 3265int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start)
edbd8d4e
CM
3266{
3267 struct btrfs_trans_handle *trans;
3268 struct btrfs_root *tree_root = root->fs_info->tree_root;
3269 struct btrfs_path *path;
3270 u64 cur_byte;
3271 u64 total_found;
8f18cf13
CM
3272 u64 shrink_last_byte;
3273 struct btrfs_block_group_cache *shrink_block_group;
edbd8d4e 3274 struct btrfs_fs_info *info = root->fs_info;
edbd8d4e 3275 struct btrfs_key key;
73e48b27 3276 struct btrfs_key found_key;
edbd8d4e
CM
3277 struct extent_buffer *leaf;
3278 u32 nritems;
3279 int ret;
a061fc8d 3280 int progress;
edbd8d4e 3281
925baedd 3282 mutex_lock(&root->fs_info->alloc_mutex);
8f18cf13
CM
3283 shrink_block_group = btrfs_lookup_block_group(root->fs_info,
3284 shrink_start);
3285 BUG_ON(!shrink_block_group);
3286
0ef3e66b
CM
3287 shrink_last_byte = shrink_block_group->key.objectid +
3288 shrink_block_group->key.offset;
8f18cf13
CM
3289
3290 shrink_block_group->space_info->total_bytes -=
3291 shrink_block_group->key.offset;
edbd8d4e
CM
3292 path = btrfs_alloc_path();
3293 root = root->fs_info->extent_root;
8f662a76 3294 path->reada = 2;
edbd8d4e 3295
323da79c
CM
3296 printk("btrfs relocating block group %llu flags %llu\n",
3297 (unsigned long long)shrink_start,
3298 (unsigned long long)shrink_block_group->flags);
3299
0ef3e66b
CM
3300 __alloc_chunk_for_shrink(root, shrink_block_group, 1);
3301
edbd8d4e 3302again:
323da79c 3303
8f18cf13
CM
3304 shrink_block_group->ro = 1;
3305
edbd8d4e 3306 total_found = 0;
a061fc8d 3307 progress = 0;
8f18cf13 3308 key.objectid = shrink_start;
edbd8d4e
CM
3309 key.offset = 0;
3310 key.type = 0;
73e48b27 3311 cur_byte = key.objectid;
4313b399 3312
73e48b27
Y
3313 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3314 if (ret < 0)
3315 goto out;
3316
0b86a832 3317 ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
73e48b27
Y
3318 if (ret < 0)
3319 goto out;
8f18cf13 3320
73e48b27
Y
3321 if (ret == 0) {
3322 leaf = path->nodes[0];
3323 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
8f18cf13
CM
3324 if (found_key.objectid + found_key.offset > shrink_start &&
3325 found_key.objectid < shrink_last_byte) {
73e48b27
Y
3326 cur_byte = found_key.objectid;
3327 key.objectid = cur_byte;
3328 }
3329 }
3330 btrfs_release_path(root, path);
3331
3332 while(1) {
edbd8d4e
CM
3333 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3334 if (ret < 0)
3335 goto out;
73e48b27 3336
7d9eb12c 3337next:
edbd8d4e 3338 leaf = path->nodes[0];
73e48b27 3339 nritems = btrfs_header_nritems(leaf);
73e48b27
Y
3340 if (path->slots[0] >= nritems) {
3341 ret = btrfs_next_leaf(root, path);
3342 if (ret < 0)
3343 goto out;
3344 if (ret == 1) {
3345 ret = 0;
3346 break;
edbd8d4e 3347 }
73e48b27
Y
3348 leaf = path->nodes[0];
3349 nritems = btrfs_header_nritems(leaf);
edbd8d4e 3350 }
73e48b27
Y
3351
3352 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
725c8463 3353
8f18cf13
CM
3354 if (found_key.objectid >= shrink_last_byte)
3355 break;
3356
725c8463
CM
3357 if (progress && need_resched()) {
3358 memcpy(&key, &found_key, sizeof(key));
725c8463 3359 cond_resched();
725c8463
CM
3360 btrfs_release_path(root, path);
3361 btrfs_search_slot(NULL, root, &key, path, 0, 0);
3362 progress = 0;
3363 goto next;
3364 }
3365 progress = 1;
3366
73e48b27
Y
3367 if (btrfs_key_type(&found_key) != BTRFS_EXTENT_ITEM_KEY ||
3368 found_key.objectid + found_key.offset <= cur_byte) {
0ef3e66b
CM
3369 memcpy(&key, &found_key, sizeof(key));
3370 key.offset++;
edbd8d4e 3371 path->slots[0]++;
edbd8d4e
CM
3372 goto next;
3373 }
73e48b27 3374
edbd8d4e
CM
3375 total_found++;
3376 cur_byte = found_key.objectid + found_key.offset;
3377 key.objectid = cur_byte;
3378 btrfs_release_path(root, path);
3379 ret = relocate_one_extent(root, path, &found_key);
0ef3e66b 3380 __alloc_chunk_for_shrink(root, shrink_block_group, 0);
edbd8d4e
CM
3381 }
3382
3383 btrfs_release_path(root, path);
3384
3385 if (total_found > 0) {
323da79c
CM
3386 printk("btrfs relocate found %llu last extent was %llu\n",
3387 (unsigned long long)total_found,
3388 (unsigned long long)found_key.objectid);
7d9eb12c 3389 mutex_unlock(&root->fs_info->alloc_mutex);
edbd8d4e
CM
3390 trans = btrfs_start_transaction(tree_root, 1);
3391 btrfs_commit_transaction(trans, tree_root);
3392
edbd8d4e 3393 btrfs_clean_old_snapshots(tree_root);
edbd8d4e 3394
3eaa2885
CM
3395 btrfs_wait_ordered_extents(tree_root);
3396
edbd8d4e
CM
3397 trans = btrfs_start_transaction(tree_root, 1);
3398 btrfs_commit_transaction(trans, tree_root);
7d9eb12c 3399 mutex_lock(&root->fs_info->alloc_mutex);
edbd8d4e
CM
3400 goto again;
3401 }
3402
8f18cf13
CM
3403 /*
3404 * we've freed all the extents, now remove the block
3405 * group item from the tree
3406 */
7d9eb12c
CM
3407 mutex_unlock(&root->fs_info->alloc_mutex);
3408
edbd8d4e 3409 trans = btrfs_start_transaction(root, 1);
c286ac48 3410
7d9eb12c 3411 mutex_lock(&root->fs_info->alloc_mutex);
8f18cf13 3412 memcpy(&key, &shrink_block_group->key, sizeof(key));
1372f8e6 3413
8f18cf13
CM
3414 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3415 if (ret > 0)
3416 ret = -EIO;
8e8a1e31
JB
3417 if (ret < 0) {
3418 btrfs_end_transaction(trans, root);
8f18cf13 3419 goto out;
8e8a1e31 3420 }
73e48b27 3421
0ef3e66b
CM
3422 clear_extent_bits(&info->block_group_cache, key.objectid,
3423 key.objectid + key.offset - 1,
8f18cf13 3424 (unsigned int)-1, GFP_NOFS);
edbd8d4e 3425
0ef3e66b
CM
3426
3427 clear_extent_bits(&info->free_space_cache,
3428 key.objectid, key.objectid + key.offset - 1,
3429 (unsigned int)-1, GFP_NOFS);
3430
3431 memset(shrink_block_group, 0, sizeof(*shrink_block_group));
3432 kfree(shrink_block_group);
3433
8f18cf13 3434 btrfs_del_item(trans, root, path);
7d9eb12c
CM
3435 btrfs_release_path(root, path);
3436 mutex_unlock(&root->fs_info->alloc_mutex);
edbd8d4e 3437 btrfs_commit_transaction(trans, root);
0ef3e66b 3438
7d9eb12c
CM
3439 mutex_lock(&root->fs_info->alloc_mutex);
3440
0ef3e66b
CM
3441 /* the code to unpin extents might set a few bits in the free
3442 * space cache for this range again
3443 */
3444 clear_extent_bits(&info->free_space_cache,
3445 key.objectid, key.objectid + key.offset - 1,
3446 (unsigned int)-1, GFP_NOFS);
edbd8d4e
CM
3447out:
3448 btrfs_free_path(path);
925baedd 3449 mutex_unlock(&root->fs_info->alloc_mutex);
edbd8d4e
CM
3450 return ret;
3451}
3452
0b86a832
CM
3453int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path,
3454 struct btrfs_key *key)
3455{
925baedd 3456 int ret = 0;
0b86a832
CM
3457 struct btrfs_key found_key;
3458 struct extent_buffer *leaf;
3459 int slot;
edbd8d4e 3460
0b86a832
CM
3461 ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
3462 if (ret < 0)
925baedd
CM
3463 goto out;
3464
0b86a832
CM
3465 while(1) {
3466 slot = path->slots[0];
edbd8d4e 3467 leaf = path->nodes[0];
0b86a832
CM
3468 if (slot >= btrfs_header_nritems(leaf)) {
3469 ret = btrfs_next_leaf(root, path);
3470 if (ret == 0)
3471 continue;
3472 if (ret < 0)
925baedd 3473 goto out;
0b86a832 3474 break;
edbd8d4e 3475 }
0b86a832 3476 btrfs_item_key_to_cpu(leaf, &found_key, slot);
edbd8d4e 3477
0b86a832 3478 if (found_key.objectid >= key->objectid &&
925baedd
CM
3479 found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
3480 ret = 0;
3481 goto out;
3482 }
0b86a832 3483 path->slots[0]++;
edbd8d4e 3484 }
0b86a832 3485 ret = -ENOENT;
925baedd 3486out:
0b86a832 3487 return ret;
edbd8d4e
CM
3488}
3489
9078a3e1
CM
3490int btrfs_read_block_groups(struct btrfs_root *root)
3491{
3492 struct btrfs_path *path;
3493 int ret;
96b5179d 3494 int bit;
9078a3e1 3495 struct btrfs_block_group_cache *cache;
be744175 3496 struct btrfs_fs_info *info = root->fs_info;
6324fbf3 3497 struct btrfs_space_info *space_info;
d1310b2e 3498 struct extent_io_tree *block_group_cache;
9078a3e1
CM
3499 struct btrfs_key key;
3500 struct btrfs_key found_key;
5f39d397 3501 struct extent_buffer *leaf;
96b5179d
CM
3502
3503 block_group_cache = &info->block_group_cache;
be744175 3504 root = info->extent_root;
9078a3e1 3505 key.objectid = 0;
0b86a832 3506 key.offset = 0;
9078a3e1 3507 btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
9078a3e1
CM
3508 path = btrfs_alloc_path();
3509 if (!path)
3510 return -ENOMEM;
3511
925baedd 3512 mutex_lock(&root->fs_info->alloc_mutex);
9078a3e1 3513 while(1) {
0b86a832
CM
3514 ret = find_first_block_group(root, path, &key);
3515 if (ret > 0) {
3516 ret = 0;
3517 goto error;
9078a3e1 3518 }
0b86a832
CM
3519 if (ret != 0)
3520 goto error;
3521
5f39d397
CM
3522 leaf = path->nodes[0];
3523 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
8f18cf13 3524 cache = kzalloc(sizeof(*cache), GFP_NOFS);
9078a3e1 3525 if (!cache) {
0b86a832 3526 ret = -ENOMEM;
9078a3e1
CM
3527 break;
3528 }
3e1ad54f 3529
c286ac48 3530 spin_lock_init(&cache->lock);
5f39d397
CM
3531 read_extent_buffer(leaf, &cache->item,
3532 btrfs_item_ptr_offset(leaf, path->slots[0]),
3533 sizeof(cache->item));
9078a3e1 3534 memcpy(&cache->key, &found_key, sizeof(found_key));
0b86a832 3535
9078a3e1
CM
3536 key.objectid = found_key.objectid + found_key.offset;
3537 btrfs_release_path(root, path);
0b86a832
CM
3538 cache->flags = btrfs_block_group_flags(&cache->item);
3539 bit = 0;
3540 if (cache->flags & BTRFS_BLOCK_GROUP_DATA) {
96b5179d 3541 bit = BLOCK_GROUP_DATA;
0b86a832
CM
3542 } else if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
3543 bit = BLOCK_GROUP_SYSTEM;
3544 } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) {
96b5179d 3545 bit = BLOCK_GROUP_METADATA;
31f3c99b 3546 }
8790d502 3547 set_avail_alloc_bits(info, cache->flags);
96b5179d 3548
6324fbf3
CM
3549 ret = update_space_info(info, cache->flags, found_key.offset,
3550 btrfs_block_group_used(&cache->item),
3551 &space_info);
3552 BUG_ON(ret);
3553 cache->space_info = space_info;
3554
96b5179d
CM
3555 /* use EXTENT_LOCKED to prevent merging */
3556 set_extent_bits(block_group_cache, found_key.objectid,
3557 found_key.objectid + found_key.offset - 1,
c286ac48 3558 EXTENT_LOCKED, GFP_NOFS);
96b5179d 3559 set_state_private(block_group_cache, found_key.objectid,
ae2f5411 3560 (unsigned long)cache);
c286ac48
CM
3561 set_extent_bits(block_group_cache, found_key.objectid,
3562 found_key.objectid + found_key.offset - 1,
3563 bit | EXTENT_LOCKED, GFP_NOFS);
9078a3e1 3564 if (key.objectid >=
db94535d 3565 btrfs_super_total_bytes(&info->super_copy))
9078a3e1
CM
3566 break;
3567 }
0b86a832
CM
3568 ret = 0;
3569error:
9078a3e1 3570 btrfs_free_path(path);
925baedd 3571 mutex_unlock(&root->fs_info->alloc_mutex);
0b86a832 3572 return ret;
9078a3e1 3573}
6324fbf3
CM
3574
3575int btrfs_make_block_group(struct btrfs_trans_handle *trans,
3576 struct btrfs_root *root, u64 bytes_used,
e17cade2 3577 u64 type, u64 chunk_objectid, u64 chunk_offset,
6324fbf3
CM
3578 u64 size)
3579{
3580 int ret;
3581 int bit = 0;
3582 struct btrfs_root *extent_root;
3583 struct btrfs_block_group_cache *cache;
3584 struct extent_io_tree *block_group_cache;
3585
7d9eb12c 3586 WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
6324fbf3
CM
3587 extent_root = root->fs_info->extent_root;
3588 block_group_cache = &root->fs_info->block_group_cache;
3589
8f18cf13 3590 cache = kzalloc(sizeof(*cache), GFP_NOFS);
6324fbf3 3591 BUG_ON(!cache);
e17cade2 3592 cache->key.objectid = chunk_offset;
6324fbf3 3593 cache->key.offset = size;
c286ac48 3594 spin_lock_init(&cache->lock);
6324fbf3 3595 btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
0ef3e66b 3596
6324fbf3 3597 btrfs_set_block_group_used(&cache->item, bytes_used);
6324fbf3
CM
3598 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
3599 cache->flags = type;
3600 btrfs_set_block_group_flags(&cache->item, type);
3601
3602 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
3603 &cache->space_info);
3604 BUG_ON(ret);
3605
d18a2c44 3606 bit = block_group_state_bits(type);
e17cade2
CM
3607 set_extent_bits(block_group_cache, chunk_offset,
3608 chunk_offset + size - 1,
c286ac48 3609 EXTENT_LOCKED, GFP_NOFS);
e17cade2
CM
3610 set_state_private(block_group_cache, chunk_offset,
3611 (unsigned long)cache);
c286ac48
CM
3612 set_extent_bits(block_group_cache, chunk_offset,
3613 chunk_offset + size - 1,
3614 bit | EXTENT_LOCKED, GFP_NOFS);
3615
6324fbf3
CM
3616 ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
3617 sizeof(cache->item));
3618 BUG_ON(ret);
3619
3620 finish_current_insert(trans, extent_root);
3621 ret = del_pending_extents(trans, extent_root);
3622 BUG_ON(ret);
d18a2c44 3623 set_avail_alloc_bits(extent_root->fs_info, type);
925baedd 3624
6324fbf3
CM
3625 return 0;
3626}