Btrfs: Implement our own copy of the nfsd readdir hack, for older kernels
[linux-2.6-block.git] / fs / btrfs / extent-tree.c
CommitLineData
6cbd5570
CM
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
ec6b910f 18#include <linux/sched.h>
edbd8d4e 19#include <linux/pagemap.h>
ec44a35c 20#include <linux/writeback.h>
74493f7a 21#include "hash.h"
a5eb62e3 22#include "crc32c.h"
fec577fb
CM
23#include "ctree.h"
24#include "disk-io.h"
25#include "print-tree.h"
e089f05c 26#include "transaction.h"
0b86a832 27#include "volumes.h"
925baedd 28#include "locking.h"
31153d81 29#include "ref-cache.h"
fec577fb 30
0b86a832 31#define BLOCK_GROUP_DATA EXTENT_WRITEBACK
96b5179d 32#define BLOCK_GROUP_METADATA EXTENT_UPTODATE
0b86a832
CM
33#define BLOCK_GROUP_SYSTEM EXTENT_NEW
34
96b5179d
CM
35#define BLOCK_GROUP_DIRTY EXTENT_DIRTY
36
e089f05c
CM
37static int finish_current_insert(struct btrfs_trans_handle *trans, struct
38 btrfs_root *extent_root);
e20d96d6
CM
39static int del_pending_extents(struct btrfs_trans_handle *trans, struct
40 btrfs_root *extent_root);
925baedd
CM
41static struct btrfs_block_group_cache *
42__btrfs_find_block_group(struct btrfs_root *root,
43 struct btrfs_block_group_cache *hint,
44 u64 search_start, int data, int owner);
d548ee51 45
925baedd
CM
46void maybe_lock_mutex(struct btrfs_root *root)
47{
48 if (root != root->fs_info->extent_root &&
49 root != root->fs_info->chunk_root &&
50 root != root->fs_info->dev_root) {
51 mutex_lock(&root->fs_info->alloc_mutex);
52 }
53}
54
55void maybe_unlock_mutex(struct btrfs_root *root)
56{
57 if (root != root->fs_info->extent_root &&
58 root != root->fs_info->chunk_root &&
59 root != root->fs_info->dev_root) {
60 mutex_unlock(&root->fs_info->alloc_mutex);
61 }
62}
fec577fb 63
e37c9e69
CM
64static int cache_block_group(struct btrfs_root *root,
65 struct btrfs_block_group_cache *block_group)
66{
67 struct btrfs_path *path;
68 int ret;
69 struct btrfs_key key;
5f39d397 70 struct extent_buffer *leaf;
d1310b2e 71 struct extent_io_tree *free_space_cache;
e37c9e69 72 int slot;
e37c9e69
CM
73 u64 last = 0;
74 u64 hole_size;
7d7d6068 75 u64 first_free;
e37c9e69
CM
76 int found = 0;
77
00f5c795
CM
78 if (!block_group)
79 return 0;
80
e37c9e69 81 root = root->fs_info->extent_root;
f510cfec 82 free_space_cache = &root->fs_info->free_space_cache;
e37c9e69
CM
83
84 if (block_group->cached)
85 return 0;
f510cfec 86
e37c9e69
CM
87 path = btrfs_alloc_path();
88 if (!path)
89 return -ENOMEM;
7d7d6068 90
2cc58cf2 91 path->reada = 2;
5cd57b2c
CM
92 /*
93 * we get into deadlocks with paths held by callers of this function.
94 * since the alloc_mutex is protecting things right now, just
95 * skip the locking here
96 */
97 path->skip_locking = 1;
7d7d6068 98 first_free = block_group->key.objectid;
e37c9e69 99 key.objectid = block_group->key.objectid;
e37c9e69
CM
100 key.offset = 0;
101 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
102 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
103 if (ret < 0)
104 return ret;
0b86a832 105 ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
d548ee51
Y
106 if (ret < 0)
107 return ret;
108 if (ret == 0) {
109 leaf = path->nodes[0];
110 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
111 if (key.objectid + key.offset > first_free)
112 first_free = key.objectid + key.offset;
113 }
e37c9e69 114 while(1) {
5f39d397 115 leaf = path->nodes[0];
e37c9e69 116 slot = path->slots[0];
5f39d397 117 if (slot >= btrfs_header_nritems(leaf)) {
e37c9e69 118 ret = btrfs_next_leaf(root, path);
54aa1f4d
CM
119 if (ret < 0)
120 goto err;
de428b63 121 if (ret == 0) {
e37c9e69 122 continue;
de428b63 123 } else {
e37c9e69
CM
124 break;
125 }
126 }
5f39d397 127 btrfs_item_key_to_cpu(leaf, &key, slot);
7d7d6068 128 if (key.objectid < block_group->key.objectid) {
7d7d6068
Y
129 goto next;
130 }
e37c9e69
CM
131 if (key.objectid >= block_group->key.objectid +
132 block_group->key.offset) {
e37c9e69
CM
133 break;
134 }
7d7d6068 135
e37c9e69
CM
136 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
137 if (!found) {
7d7d6068 138 last = first_free;
e37c9e69 139 found = 1;
e37c9e69 140 }
f510cfec
CM
141 if (key.objectid > last) {
142 hole_size = key.objectid - last;
143 set_extent_dirty(free_space_cache, last,
144 last + hole_size - 1,
145 GFP_NOFS);
7d7d6068
Y
146 }
147 last = key.objectid + key.offset;
e37c9e69 148 }
7d7d6068 149next:
e37c9e69
CM
150 path->slots[0]++;
151 }
152
7d7d6068
Y
153 if (!found)
154 last = first_free;
155 if (block_group->key.objectid +
156 block_group->key.offset > last) {
157 hole_size = block_group->key.objectid +
158 block_group->key.offset - last;
f510cfec
CM
159 set_extent_dirty(free_space_cache, last,
160 last + hole_size - 1, GFP_NOFS);
7d7d6068 161 }
e37c9e69 162 block_group->cached = 1;
54aa1f4d 163err:
e37c9e69
CM
164 btrfs_free_path(path);
165 return 0;
166}
167
0ef3e66b
CM
168struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct
169 btrfs_fs_info *info,
170 u64 bytenr)
171{
172 struct extent_io_tree *block_group_cache;
173 struct btrfs_block_group_cache *block_group = NULL;
174 u64 ptr;
175 u64 start;
176 u64 end;
177 int ret;
178
179 bytenr = max_t(u64, bytenr,
180 BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE);
181 block_group_cache = &info->block_group_cache;
182 ret = find_first_extent_bit(block_group_cache,
183 bytenr, &start, &end,
184 BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
185 BLOCK_GROUP_SYSTEM);
186 if (ret) {
187 return NULL;
188 }
189 ret = get_state_private(block_group_cache, start, &ptr);
190 if (ret)
191 return NULL;
192
193 block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
194 return block_group;
195}
196
5276aeda
CM
197struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
198 btrfs_fs_info *info,
db94535d 199 u64 bytenr)
be744175 200{
d1310b2e 201 struct extent_io_tree *block_group_cache;
96b5179d
CM
202 struct btrfs_block_group_cache *block_group = NULL;
203 u64 ptr;
204 u64 start;
205 u64 end;
be744175
CM
206 int ret;
207
a061fc8d
CM
208 bytenr = max_t(u64, bytenr,
209 BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE);
96b5179d
CM
210 block_group_cache = &info->block_group_cache;
211 ret = find_first_extent_bit(block_group_cache,
db94535d 212 bytenr, &start, &end,
0b86a832
CM
213 BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
214 BLOCK_GROUP_SYSTEM);
be744175 215 if (ret) {
96b5179d 216 return NULL;
be744175 217 }
96b5179d
CM
218 ret = get_state_private(block_group_cache, start, &ptr);
219 if (ret)
220 return NULL;
221
ae2f5411 222 block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
5cf66426 223 if (block_group->key.objectid <= bytenr && bytenr <
96b5179d
CM
224 block_group->key.objectid + block_group->key.offset)
225 return block_group;
be744175
CM
226 return NULL;
227}
0b86a832
CM
228
229static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
230{
593060d7 231 return (cache->flags & bits) == bits;
0b86a832
CM
232}
233
234static int noinline find_search_start(struct btrfs_root *root,
98ed5174 235 struct btrfs_block_group_cache **cache_ret,
0ef3e66b 236 u64 *start_ret, u64 num, int data)
e37c9e69 237{
e37c9e69
CM
238 int ret;
239 struct btrfs_block_group_cache *cache = *cache_ret;
d7fc640e 240 struct extent_io_tree *free_space_cache;
7d1660d4 241 struct extent_state *state;
e19caa5f 242 u64 last;
f510cfec 243 u64 start = 0;
257d0ce3 244 u64 cache_miss = 0;
c31f8830 245 u64 total_fs_bytes;
0b86a832 246 u64 search_start = *start_ret;
f84a8b36 247 int wrapped = 0;
e37c9e69 248
7d9eb12c 249 WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
c31f8830 250 total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
d7fc640e
CM
251 free_space_cache = &root->fs_info->free_space_cache;
252
0ef3e66b
CM
253 if (!cache)
254 goto out;
255
e37c9e69 256again:
54aa1f4d 257 ret = cache_block_group(root, cache);
0ef3e66b 258 if (ret) {
54aa1f4d 259 goto out;
0ef3e66b 260 }
f84a8b36 261
e19caa5f 262 last = max(search_start, cache->key.objectid);
0ef3e66b 263 if (!block_group_bits(cache, data) || cache->ro)
0b86a832 264 goto new_group;
e19caa5f 265
7d1660d4
CM
266 spin_lock_irq(&free_space_cache->lock);
267 state = find_first_extent_bit_state(free_space_cache, last, EXTENT_DIRTY);
e37c9e69 268 while(1) {
7d1660d4 269 if (!state) {
257d0ce3
CM
270 if (!cache_miss)
271 cache_miss = last;
7d1660d4 272 spin_unlock_irq(&free_space_cache->lock);
e19caa5f
CM
273 goto new_group;
274 }
f510cfec 275
7d1660d4
CM
276 start = max(last, state->start);
277 last = state->end + 1;
257d0ce3 278 if (last - start < num) {
7d1660d4
CM
279 do {
280 state = extent_state_next(state);
281 } while(state && !(state->state & EXTENT_DIRTY));
f510cfec 282 continue;
257d0ce3 283 }
7d1660d4 284 spin_unlock_irq(&free_space_cache->lock);
0ef3e66b 285 if (cache->ro) {
8f18cf13 286 goto new_group;
0ef3e66b 287 }
0b86a832 288 if (start + num > cache->key.objectid + cache->key.offset)
e37c9e69 289 goto new_group;
8790d502 290 if (!block_group_bits(cache, data)) {
611f0e00 291 printk("block group bits don't match %Lu %d\n", cache->flags, data);
8790d502 292 }
0b86a832
CM
293 *start_ret = start;
294 return 0;
8790d502
CM
295 }
296out:
1a2b2ac7
CM
297 cache = btrfs_lookup_block_group(root->fs_info, search_start);
298 if (!cache) {
0b86a832 299 printk("Unable to find block group for %Lu\n", search_start);
1a2b2ac7 300 WARN_ON(1);
1a2b2ac7 301 }
0b86a832 302 return -ENOSPC;
e37c9e69
CM
303
304new_group:
e19caa5f 305 last = cache->key.objectid + cache->key.offset;
f84a8b36 306wrapped:
0ef3e66b 307 cache = btrfs_lookup_first_block_group(root->fs_info, last);
c31f8830 308 if (!cache || cache->key.objectid >= total_fs_bytes) {
0e4de584 309no_cache:
f84a8b36
CM
310 if (!wrapped) {
311 wrapped = 1;
312 last = search_start;
f84a8b36
CM
313 goto wrapped;
314 }
1a2b2ac7 315 goto out;
e37c9e69 316 }
257d0ce3
CM
317 if (cache_miss && !cache->cached) {
318 cache_block_group(root, cache);
319 last = cache_miss;
0ef3e66b 320 cache = btrfs_lookup_first_block_group(root->fs_info, last);
257d0ce3 321 }
0ef3e66b 322 cache_miss = 0;
c286ac48 323 cache = btrfs_find_block_group(root, cache, last, data, 0);
0e4de584
CM
324 if (!cache)
325 goto no_cache;
e37c9e69
CM
326 *cache_ret = cache;
327 goto again;
328}
329
84f54cfa
CM
330static u64 div_factor(u64 num, int factor)
331{
257d0ce3
CM
332 if (factor == 10)
333 return num;
84f54cfa
CM
334 num *= factor;
335 do_div(num, 10);
336 return num;
337}
338
6324fbf3
CM
339static int block_group_state_bits(u64 flags)
340{
341 int bits = 0;
342 if (flags & BTRFS_BLOCK_GROUP_DATA)
343 bits |= BLOCK_GROUP_DATA;
344 if (flags & BTRFS_BLOCK_GROUP_METADATA)
345 bits |= BLOCK_GROUP_METADATA;
346 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
347 bits |= BLOCK_GROUP_SYSTEM;
348 return bits;
349}
350
925baedd
CM
351static struct btrfs_block_group_cache *
352__btrfs_find_block_group(struct btrfs_root *root,
353 struct btrfs_block_group_cache *hint,
354 u64 search_start, int data, int owner)
cd1bc465 355{
96b5179d 356 struct btrfs_block_group_cache *cache;
d1310b2e 357 struct extent_io_tree *block_group_cache;
31f3c99b 358 struct btrfs_block_group_cache *found_group = NULL;
cd1bc465
CM
359 struct btrfs_fs_info *info = root->fs_info;
360 u64 used;
31f3c99b 361 u64 last = 0;
96b5179d
CM
362 u64 start;
363 u64 end;
364 u64 free_check;
365 u64 ptr;
366 int bit;
cd1bc465 367 int ret;
31f3c99b 368 int full_search = 0;
bce4eae9 369 int factor = 10;
0ef3e66b 370 int wrapped = 0;
de428b63 371
96b5179d
CM
372 block_group_cache = &info->block_group_cache;
373
a236aed1
CM
374 if (data & BTRFS_BLOCK_GROUP_METADATA)
375 factor = 9;
be744175 376
6324fbf3 377 bit = block_group_state_bits(data);
be744175 378
0ef3e66b 379 if (search_start) {
be744175 380 struct btrfs_block_group_cache *shint;
0ef3e66b 381 shint = btrfs_lookup_first_block_group(info, search_start);
8f18cf13 382 if (shint && block_group_bits(shint, data) && !shint->ro) {
c286ac48 383 spin_lock(&shint->lock);
be744175 384 used = btrfs_block_group_used(&shint->item);
324ae4df
Y
385 if (used + shint->pinned <
386 div_factor(shint->key.offset, factor)) {
c286ac48 387 spin_unlock(&shint->lock);
be744175
CM
388 return shint;
389 }
c286ac48 390 spin_unlock(&shint->lock);
be744175
CM
391 }
392 }
0ef3e66b 393 if (hint && !hint->ro && block_group_bits(hint, data)) {
c286ac48 394 spin_lock(&hint->lock);
31f3c99b 395 used = btrfs_block_group_used(&hint->item);
324ae4df
Y
396 if (used + hint->pinned <
397 div_factor(hint->key.offset, factor)) {
c286ac48 398 spin_unlock(&hint->lock);
31f3c99b
CM
399 return hint;
400 }
c286ac48 401 spin_unlock(&hint->lock);
e19caa5f 402 last = hint->key.objectid + hint->key.offset;
31f3c99b 403 } else {
e37c9e69 404 if (hint)
0ef3e66b 405 last = max(hint->key.objectid, search_start);
e37c9e69 406 else
0ef3e66b 407 last = search_start;
31f3c99b 408 }
31f3c99b 409again:
cd1bc465 410 while(1) {
96b5179d
CM
411 ret = find_first_extent_bit(block_group_cache, last,
412 &start, &end, bit);
413 if (ret)
cd1bc465 414 break;
96b5179d
CM
415
416 ret = get_state_private(block_group_cache, start, &ptr);
0ef3e66b
CM
417 if (ret) {
418 last = end + 1;
419 continue;
420 }
96b5179d 421
ae2f5411 422 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
c286ac48 423 spin_lock(&cache->lock);
96b5179d
CM
424 last = cache->key.objectid + cache->key.offset;
425 used = btrfs_block_group_used(&cache->item);
426
8f18cf13 427 if (!cache->ro && block_group_bits(cache, data)) {
0ef3e66b 428 free_check = div_factor(cache->key.offset, factor);
8790d502
CM
429 if (used + cache->pinned < free_check) {
430 found_group = cache;
c286ac48 431 spin_unlock(&cache->lock);
8790d502
CM
432 goto found;
433 }
6324fbf3 434 }
c286ac48 435 spin_unlock(&cache->lock);
de428b63 436 cond_resched();
cd1bc465 437 }
0ef3e66b
CM
438 if (!wrapped) {
439 last = search_start;
440 wrapped = 1;
441 goto again;
442 }
443 if (!full_search && factor < 10) {
be744175 444 last = search_start;
31f3c99b 445 full_search = 1;
0ef3e66b 446 factor = 10;
31f3c99b
CM
447 goto again;
448 }
be744175 449found:
31f3c99b 450 return found_group;
cd1bc465
CM
451}
452
925baedd
CM
453struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
454 struct btrfs_block_group_cache
455 *hint, u64 search_start,
456 int data, int owner)
457{
458
459 struct btrfs_block_group_cache *ret;
925baedd 460 ret = __btrfs_find_block_group(root, hint, search_start, data, owner);
925baedd
CM
461 return ret;
462}
7bb86316 463static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation,
74493f7a
CM
464 u64 owner, u64 owner_offset)
465{
466 u32 high_crc = ~(u32)0;
467 u32 low_crc = ~(u32)0;
468 __le64 lenum;
74493f7a 469 lenum = cpu_to_le64(root_objectid);
a5eb62e3 470 high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
7bb86316 471 lenum = cpu_to_le64(ref_generation);
a5eb62e3 472 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
21a4989d
CM
473 if (owner >= BTRFS_FIRST_FREE_OBJECTID) {
474 lenum = cpu_to_le64(owner);
a5eb62e3 475 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
21a4989d 476 lenum = cpu_to_le64(owner_offset);
a5eb62e3 477 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
21a4989d 478 }
74493f7a
CM
479 return ((u64)high_crc << 32) | (u64)low_crc;
480}
481
7bb86316
CM
482static int match_extent_ref(struct extent_buffer *leaf,
483 struct btrfs_extent_ref *disk_ref,
484 struct btrfs_extent_ref *cpu_ref)
485{
486 int ret;
487 int len;
488
489 if (cpu_ref->objectid)
490 len = sizeof(*cpu_ref);
491 else
492 len = 2 * sizeof(u64);
493 ret = memcmp_extent_buffer(leaf, cpu_ref, (unsigned long)disk_ref,
494 len);
495 return ret == 0;
496}
497
98ed5174
CM
498static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans,
499 struct btrfs_root *root,
500 struct btrfs_path *path, u64 bytenr,
501 u64 root_objectid,
502 u64 ref_generation, u64 owner,
503 u64 owner_offset, int del)
74493f7a
CM
504{
505 u64 hash;
506 struct btrfs_key key;
7bb86316 507 struct btrfs_key found_key;
74493f7a 508 struct btrfs_extent_ref ref;
7bb86316
CM
509 struct extent_buffer *leaf;
510 struct btrfs_extent_ref *disk_ref;
511 int ret;
512 int ret2;
513
514 btrfs_set_stack_ref_root(&ref, root_objectid);
515 btrfs_set_stack_ref_generation(&ref, ref_generation);
516 btrfs_set_stack_ref_objectid(&ref, owner);
517 btrfs_set_stack_ref_offset(&ref, owner_offset);
518
519 hash = hash_extent_ref(root_objectid, ref_generation, owner,
520 owner_offset);
521 key.offset = hash;
522 key.objectid = bytenr;
523 key.type = BTRFS_EXTENT_REF_KEY;
524
525 while (1) {
526 ret = btrfs_search_slot(trans, root, &key, path,
527 del ? -1 : 0, del);
528 if (ret < 0)
529 goto out;
530 leaf = path->nodes[0];
531 if (ret != 0) {
532 u32 nritems = btrfs_header_nritems(leaf);
533 if (path->slots[0] >= nritems) {
534 ret2 = btrfs_next_leaf(root, path);
535 if (ret2)
536 goto out;
537 leaf = path->nodes[0];
538 }
539 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
540 if (found_key.objectid != bytenr ||
541 found_key.type != BTRFS_EXTENT_REF_KEY)
542 goto out;
543 key.offset = found_key.offset;
544 if (del) {
545 btrfs_release_path(root, path);
546 continue;
547 }
548 }
549 disk_ref = btrfs_item_ptr(path->nodes[0],
550 path->slots[0],
551 struct btrfs_extent_ref);
552 if (match_extent_ref(path->nodes[0], disk_ref, &ref)) {
553 ret = 0;
554 goto out;
555 }
556 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
557 key.offset = found_key.offset + 1;
558 btrfs_release_path(root, path);
559 }
560out:
561 return ret;
562}
563
d8d5f3e1
CM
564/*
565 * Back reference rules. Back refs have three main goals:
566 *
567 * 1) differentiate between all holders of references to an extent so that
568 * when a reference is dropped we can make sure it was a valid reference
569 * before freeing the extent.
570 *
571 * 2) Provide enough information to quickly find the holders of an extent
572 * if we notice a given block is corrupted or bad.
573 *
574 * 3) Make it easy to migrate blocks for FS shrinking or storage pool
575 * maintenance. This is actually the same as #2, but with a slightly
576 * different use case.
577 *
578 * File extents can be referenced by:
579 *
580 * - multiple snapshots, subvolumes, or different generations in one subvol
581 * - different files inside a single subvolume (in theory, not implemented yet)
582 * - different offsets inside a file (bookend extents in file.c)
583 *
584 * The extent ref structure has fields for:
585 *
586 * - Objectid of the subvolume root
587 * - Generation number of the tree holding the reference
588 * - objectid of the file holding the reference
589 * - offset in the file corresponding to the key holding the reference
590 *
591 * When a file extent is allocated the fields are filled in:
592 * (root_key.objectid, trans->transid, inode objectid, offset in file)
593 *
594 * When a leaf is cow'd new references are added for every file extent found
595 * in the leaf. It looks the same as the create case, but trans->transid
596 * will be different when the block is cow'd.
597 *
598 * (root_key.objectid, trans->transid, inode objectid, offset in file)
599 *
600 * When a file extent is removed either during snapshot deletion or file
601 * truncation, the corresponding back reference is found
602 * by searching for:
603 *
604 * (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
605 * inode objectid, offset in file)
606 *
607 * Btree extents can be referenced by:
608 *
609 * - Different subvolumes
610 * - Different generations of the same subvolume
611 *
612 * Storing sufficient information for a full reverse mapping of a btree
613 * block would require storing the lowest key of the block in the backref,
614 * and it would require updating that lowest key either before write out or
615 * every time it changed. Instead, the objectid of the lowest key is stored
616 * along with the level of the tree block. This provides a hint
617 * about where in the btree the block can be found. Searches through the
618 * btree only need to look for a pointer to that block, so they stop one
619 * level higher than the level recorded in the backref.
620 *
621 * Some btrees do not do reference counting on their extents. These
622 * include the extent tree and the tree of tree roots. Backrefs for these
623 * trees always have a generation of zero.
624 *
625 * When a tree block is created, back references are inserted:
626 *
f6dbff55 627 * (root->root_key.objectid, trans->transid or zero, level, lowest_key_objectid)
d8d5f3e1
CM
628 *
629 * When a tree block is cow'd in a reference counted root,
630 * new back references are added for all the blocks it points to.
631 * These are of the form (trans->transid will have increased since creation):
632 *
f6dbff55 633 * (root->root_key.objectid, trans->transid, level, lowest_key_objectid)
d8d5f3e1
CM
634 *
635 * Because the lowest_key_objectid and the level are just hints
636 * they are not used when backrefs are deleted. When a backref is deleted:
637 *
638 * if backref was for a tree root:
639 * root_objectid = root->root_key.objectid
640 * else
641 * root_objectid = btrfs_header_owner(parent)
642 *
643 * (root_objectid, btrfs_header_generation(parent) or zero, 0, 0)
644 *
645 * Back Reference Key hashing:
646 *
647 * Back references have four fields, each 64 bits long. Unfortunately,
648 * This is hashed into a single 64 bit number and placed into the key offset.
649 * The key objectid corresponds to the first byte in the extent, and the
650 * key type is set to BTRFS_EXTENT_REF_KEY
651 */
7bb86316
CM
652int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans,
653 struct btrfs_root *root,
654 struct btrfs_path *path, u64 bytenr,
655 u64 root_objectid, u64 ref_generation,
656 u64 owner, u64 owner_offset)
657{
658 u64 hash;
659 struct btrfs_key key;
660 struct btrfs_extent_ref ref;
661 struct btrfs_extent_ref *disk_ref;
74493f7a
CM
662 int ret;
663
664 btrfs_set_stack_ref_root(&ref, root_objectid);
7bb86316 665 btrfs_set_stack_ref_generation(&ref, ref_generation);
74493f7a
CM
666 btrfs_set_stack_ref_objectid(&ref, owner);
667 btrfs_set_stack_ref_offset(&ref, owner_offset);
668
7bb86316
CM
669 hash = hash_extent_ref(root_objectid, ref_generation, owner,
670 owner_offset);
74493f7a
CM
671 key.offset = hash;
672 key.objectid = bytenr;
673 key.type = BTRFS_EXTENT_REF_KEY;
674
675 ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(ref));
676 while (ret == -EEXIST) {
7bb86316
CM
677 disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
678 struct btrfs_extent_ref);
679 if (match_extent_ref(path->nodes[0], disk_ref, &ref))
680 goto out;
681 key.offset++;
682 btrfs_release_path(root, path);
683 ret = btrfs_insert_empty_item(trans, root, path, &key,
684 sizeof(ref));
74493f7a 685 }
7bb86316
CM
686 if (ret)
687 goto out;
688 disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
689 struct btrfs_extent_ref);
690 write_extent_buffer(path->nodes[0], &ref, (unsigned long)disk_ref,
691 sizeof(ref));
692 btrfs_mark_buffer_dirty(path->nodes[0]);
693out:
694 btrfs_release_path(root, path);
695 return ret;
74493f7a
CM
696}
697
925baedd 698static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
b18c6685 699 struct btrfs_root *root,
74493f7a 700 u64 bytenr, u64 num_bytes,
7bb86316 701 u64 root_objectid, u64 ref_generation,
74493f7a 702 u64 owner, u64 owner_offset)
02217ed2 703{
5caf2a00 704 struct btrfs_path *path;
02217ed2 705 int ret;
e2fa7227 706 struct btrfs_key key;
5f39d397 707 struct extent_buffer *l;
234b63a0 708 struct btrfs_extent_item *item;
cf27e1ee 709 u32 refs;
037e6390 710
db94535d 711 WARN_ON(num_bytes < root->sectorsize);
5caf2a00 712 path = btrfs_alloc_path();
54aa1f4d
CM
713 if (!path)
714 return -ENOMEM;
26b8003f 715
3c12ac72 716 path->reada = 1;
db94535d 717 key.objectid = bytenr;
62e2749e 718 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
db94535d 719 key.offset = num_bytes;
5caf2a00 720 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
9f5fae2f 721 0, 1);
54aa1f4d
CM
722 if (ret < 0)
723 return ret;
a429e513 724 if (ret != 0) {
a28ec197 725 BUG();
a429e513 726 }
02217ed2 727 BUG_ON(ret != 0);
5f39d397 728 l = path->nodes[0];
5caf2a00 729 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
5f39d397
CM
730 refs = btrfs_extent_refs(l, item);
731 btrfs_set_extent_refs(l, item, refs + 1);
5caf2a00 732 btrfs_mark_buffer_dirty(path->nodes[0]);
a28ec197 733
5caf2a00 734 btrfs_release_path(root->fs_info->extent_root, path);
7bb86316 735
3c12ac72 736 path->reada = 1;
7bb86316
CM
737 ret = btrfs_insert_extent_backref(trans, root->fs_info->extent_root,
738 path, bytenr, root_objectid,
739 ref_generation, owner, owner_offset);
740 BUG_ON(ret);
9f5fae2f 741 finish_current_insert(trans, root->fs_info->extent_root);
e20d96d6 742 del_pending_extents(trans, root->fs_info->extent_root);
74493f7a
CM
743
744 btrfs_free_path(path);
02217ed2
CM
745 return 0;
746}
747
925baedd
CM
748int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
749 struct btrfs_root *root,
750 u64 bytenr, u64 num_bytes,
751 u64 root_objectid, u64 ref_generation,
752 u64 owner, u64 owner_offset)
753{
754 int ret;
755
756 mutex_lock(&root->fs_info->alloc_mutex);
757 ret = __btrfs_inc_extent_ref(trans, root, bytenr, num_bytes,
758 root_objectid, ref_generation,
759 owner, owner_offset);
760 mutex_unlock(&root->fs_info->alloc_mutex);
761 return ret;
762}
763
e9d0b13b
CM
764int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
765 struct btrfs_root *root)
766{
767 finish_current_insert(trans, root->fs_info->extent_root);
768 del_pending_extents(trans, root->fs_info->extent_root);
769 return 0;
770}
771
b18c6685 772static int lookup_extent_ref(struct btrfs_trans_handle *trans,
db94535d
CM
773 struct btrfs_root *root, u64 bytenr,
774 u64 num_bytes, u32 *refs)
a28ec197 775{
5caf2a00 776 struct btrfs_path *path;
a28ec197 777 int ret;
e2fa7227 778 struct btrfs_key key;
5f39d397 779 struct extent_buffer *l;
234b63a0 780 struct btrfs_extent_item *item;
5caf2a00 781
db94535d 782 WARN_ON(num_bytes < root->sectorsize);
5caf2a00 783 path = btrfs_alloc_path();
3c12ac72 784 path->reada = 1;
db94535d
CM
785 key.objectid = bytenr;
786 key.offset = num_bytes;
62e2749e 787 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
5caf2a00 788 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
9f5fae2f 789 0, 0);
54aa1f4d
CM
790 if (ret < 0)
791 goto out;
5f39d397
CM
792 if (ret != 0) {
793 btrfs_print_leaf(root, path->nodes[0]);
db94535d 794 printk("failed to find block number %Lu\n", bytenr);
a28ec197 795 BUG();
5f39d397
CM
796 }
797 l = path->nodes[0];
5caf2a00 798 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
5f39d397 799 *refs = btrfs_extent_refs(l, item);
54aa1f4d 800out:
5caf2a00 801 btrfs_free_path(path);
a28ec197
CM
802 return 0;
803}
804
f321e491
YZ
805
806static int get_reference_status(struct btrfs_root *root, u64 bytenr,
807 u64 parent_gen, u64 ref_objectid,
808 u64 *min_generation, u32 *ref_count)
be20aa9d
CM
809{
810 struct btrfs_root *extent_root = root->fs_info->extent_root;
811 struct btrfs_path *path;
f321e491
YZ
812 struct extent_buffer *leaf;
813 struct btrfs_extent_ref *ref_item;
814 struct btrfs_key key;
815 struct btrfs_key found_key;
56b453c9 816 u64 root_objectid = root->root_key.objectid;
f321e491 817 u64 ref_generation;
be20aa9d
CM
818 u32 nritems;
819 int ret;
925baedd 820
be20aa9d
CM
821 key.objectid = bytenr;
822 key.offset = 0;
f321e491 823 key.type = BTRFS_EXTENT_ITEM_KEY;
be20aa9d 824
f321e491
YZ
825 path = btrfs_alloc_path();
826 mutex_lock(&root->fs_info->alloc_mutex);
be20aa9d
CM
827 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
828 if (ret < 0)
829 goto out;
830 BUG_ON(ret == 0);
831
f321e491
YZ
832 leaf = path->nodes[0];
833 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
be20aa9d
CM
834
835 if (found_key.objectid != bytenr ||
836 found_key.type != BTRFS_EXTENT_ITEM_KEY) {
f321e491 837 ret = 1;
be20aa9d
CM
838 goto out;
839 }
840
f321e491
YZ
841 *ref_count = 0;
842 *min_generation = (u64)-1;
843
be20aa9d 844 while (1) {
f321e491
YZ
845 leaf = path->nodes[0];
846 nritems = btrfs_header_nritems(leaf);
be20aa9d
CM
847 if (path->slots[0] >= nritems) {
848 ret = btrfs_next_leaf(extent_root, path);
f321e491
YZ
849 if (ret < 0)
850 goto out;
be20aa9d
CM
851 if (ret == 0)
852 continue;
853 break;
854 }
f321e491 855 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
be20aa9d
CM
856 if (found_key.objectid != bytenr)
857 break;
bd09835d 858
be20aa9d
CM
859 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
860 path->slots[0]++;
861 continue;
862 }
863
f321e491 864 ref_item = btrfs_item_ptr(leaf, path->slots[0],
be20aa9d 865 struct btrfs_extent_ref);
f321e491
YZ
866 ref_generation = btrfs_ref_generation(leaf, ref_item);
867 /*
868 * For (parent_gen > 0 && parent_gen > ref_gen):
869 *
bcc63abb
Y
870 * we reach here through the oldest root, therefore
871 * all other reference from same snapshot should have
f321e491
YZ
872 * a larger generation.
873 */
874 if ((root_objectid != btrfs_ref_root(leaf, ref_item)) ||
875 (parent_gen > 0 && parent_gen > ref_generation) ||
876 (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID &&
877 ref_objectid != btrfs_ref_objectid(leaf, ref_item))) {
878 if (ref_count)
879 *ref_count = 2;
880 break;
a68d5933 881 }
f321e491
YZ
882
883 *ref_count = 1;
884 if (*min_generation > ref_generation)
885 *min_generation = ref_generation;
886
be20aa9d
CM
887 path->slots[0]++;
888 }
f321e491
YZ
889 ret = 0;
890out:
891 mutex_unlock(&root->fs_info->alloc_mutex);
892 btrfs_free_path(path);
893 return ret;
894}
895
7ea394f1
YZ
896int btrfs_cross_ref_exists(struct btrfs_trans_handle *trans,
897 struct btrfs_root *root,
f321e491
YZ
898 struct btrfs_key *key, u64 bytenr)
899{
f321e491
YZ
900 struct btrfs_root *old_root;
901 struct btrfs_path *path = NULL;
902 struct extent_buffer *eb;
903 struct btrfs_file_extent_item *item;
904 u64 ref_generation;
905 u64 min_generation;
906 u64 extent_start;
907 u32 ref_count;
908 int level;
909 int ret;
910
7ea394f1 911 BUG_ON(trans == NULL);
f321e491
YZ
912 BUG_ON(key->type != BTRFS_EXTENT_DATA_KEY);
913 ret = get_reference_status(root, bytenr, 0, key->objectid,
914 &min_generation, &ref_count);
915 if (ret)
916 return ret;
917
918 if (ref_count != 1)
919 return 1;
920
f321e491
YZ
921 old_root = root->dirty_root->root;
922 ref_generation = old_root->root_key.offset;
923
924 /* all references are created in running transaction */
925 if (min_generation > ref_generation) {
926 ret = 0;
bbaf549e
CM
927 goto out;
928 }
f321e491
YZ
929
930 path = btrfs_alloc_path();
931 if (!path) {
932 ret = -ENOMEM;
be20aa9d
CM
933 goto out;
934 }
f321e491
YZ
935
936 path->skip_locking = 1;
937 /* if no item found, the extent is referenced by other snapshot */
938 ret = btrfs_search_slot(NULL, old_root, key, path, 0, 0);
939 if (ret)
be20aa9d 940 goto out;
be20aa9d 941
f321e491
YZ
942 eb = path->nodes[0];
943 item = btrfs_item_ptr(eb, path->slots[0],
944 struct btrfs_file_extent_item);
945 if (btrfs_file_extent_type(eb, item) != BTRFS_FILE_EXTENT_REG ||
946 btrfs_file_extent_disk_bytenr(eb, item) != bytenr) {
947 ret = 1;
948 goto out;
949 }
950
951 for (level = BTRFS_MAX_LEVEL - 1; level >= -1; level--) {
952 if (level >= 0) {
953 eb = path->nodes[level];
954 if (!eb)
955 continue;
956 extent_start = eb->start;
bcc63abb 957 } else
f321e491
YZ
958 extent_start = bytenr;
959
960 ret = get_reference_status(root, extent_start, ref_generation,
961 0, &min_generation, &ref_count);
962 if (ret)
963 goto out;
964
965 if (ref_count != 1) {
966 ret = 1;
967 goto out;
968 }
969 if (level >= 0)
970 ref_generation = btrfs_header_generation(eb);
971 }
972 ret = 0;
be20aa9d 973out:
f321e491
YZ
974 if (path)
975 btrfs_free_path(path);
f321e491 976 return ret;
be20aa9d 977}
c5739bba 978
e089f05c 979int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
31153d81 980 struct extent_buffer *buf, int cache_ref)
02217ed2 981{
db94535d 982 u64 bytenr;
5f39d397
CM
983 u32 nritems;
984 struct btrfs_key key;
6407bf6d 985 struct btrfs_file_extent_item *fi;
02217ed2 986 int i;
db94535d 987 int level;
6407bf6d 988 int ret;
54aa1f4d 989 int faili;
31153d81 990 int nr_file_extents = 0;
a28ec197 991
3768f368 992 if (!root->ref_cows)
a28ec197 993 return 0;
5f39d397 994
db94535d 995 level = btrfs_header_level(buf);
5f39d397
CM
996 nritems = btrfs_header_nritems(buf);
997 for (i = 0; i < nritems; i++) {
e34a5b4f 998 cond_resched();
db94535d
CM
999 if (level == 0) {
1000 u64 disk_bytenr;
5f39d397
CM
1001 btrfs_item_key_to_cpu(buf, &key, i);
1002 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
6407bf6d 1003 continue;
5f39d397 1004 fi = btrfs_item_ptr(buf, i,
6407bf6d 1005 struct btrfs_file_extent_item);
5f39d397 1006 if (btrfs_file_extent_type(buf, fi) ==
236454df
CM
1007 BTRFS_FILE_EXTENT_INLINE)
1008 continue;
db94535d
CM
1009 disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1010 if (disk_bytenr == 0)
3a686375 1011 continue;
4a096752 1012
31153d81
YZ
1013 if (buf != root->commit_root)
1014 nr_file_extents++;
1015
4a096752 1016 mutex_lock(&root->fs_info->alloc_mutex);
925baedd 1017 ret = __btrfs_inc_extent_ref(trans, root, disk_bytenr,
7bb86316
CM
1018 btrfs_file_extent_disk_num_bytes(buf, fi),
1019 root->root_key.objectid, trans->transid,
1020 key.objectid, key.offset);
4a096752 1021 mutex_unlock(&root->fs_info->alloc_mutex);
54aa1f4d
CM
1022 if (ret) {
1023 faili = i;
4a096752 1024 WARN_ON(1);
54aa1f4d
CM
1025 goto fail;
1026 }
6407bf6d 1027 } else {
db94535d 1028 bytenr = btrfs_node_blockptr(buf, i);
6caab489 1029 btrfs_node_key_to_cpu(buf, &key, i);
4a096752
CM
1030
1031 mutex_lock(&root->fs_info->alloc_mutex);
925baedd 1032 ret = __btrfs_inc_extent_ref(trans, root, bytenr,
7bb86316
CM
1033 btrfs_level_size(root, level - 1),
1034 root->root_key.objectid,
f6dbff55
CM
1035 trans->transid,
1036 level - 1, key.objectid);
4a096752 1037 mutex_unlock(&root->fs_info->alloc_mutex);
54aa1f4d
CM
1038 if (ret) {
1039 faili = i;
4a096752 1040 WARN_ON(1);
54aa1f4d
CM
1041 goto fail;
1042 }
6407bf6d 1043 }
02217ed2 1044 }
31153d81
YZ
1045 /* cache orignal leaf block's references */
1046 if (level == 0 && cache_ref && buf != root->commit_root) {
1047 struct btrfs_leaf_ref *ref;
1048 struct btrfs_extent_info *info;
1049
bcc63abb 1050 ref = btrfs_alloc_leaf_ref(root, nr_file_extents);
31153d81
YZ
1051 if (!ref) {
1052 WARN_ON(1);
1053 goto out;
1054 }
1055
47ac14fa 1056 ref->root_gen = root->root_key.offset;
31153d81
YZ
1057 ref->bytenr = buf->start;
1058 ref->owner = btrfs_header_owner(buf);
1059 ref->generation = btrfs_header_generation(buf);
1060 ref->nritems = nr_file_extents;
1061 info = ref->extents;
bcc63abb 1062
31153d81
YZ
1063 for (i = 0; nr_file_extents > 0 && i < nritems; i++) {
1064 u64 disk_bytenr;
1065 btrfs_item_key_to_cpu(buf, &key, i);
1066 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1067 continue;
1068 fi = btrfs_item_ptr(buf, i,
1069 struct btrfs_file_extent_item);
1070 if (btrfs_file_extent_type(buf, fi) ==
1071 BTRFS_FILE_EXTENT_INLINE)
1072 continue;
1073 disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1074 if (disk_bytenr == 0)
1075 continue;
1076
1077 info->bytenr = disk_bytenr;
1078 info->num_bytes =
1079 btrfs_file_extent_disk_num_bytes(buf, fi);
1080 info->objectid = key.objectid;
1081 info->offset = key.offset;
1082 info++;
1083 }
1084
1085 BUG_ON(!root->ref_tree);
1086 ret = btrfs_add_leaf_ref(root, ref);
1087 WARN_ON(ret);
bcc63abb 1088 btrfs_free_leaf_ref(root, ref);
31153d81
YZ
1089 }
1090out:
02217ed2 1091 return 0;
54aa1f4d 1092fail:
ccd467d6 1093 WARN_ON(1);
7bb86316 1094#if 0
54aa1f4d 1095 for (i =0; i < faili; i++) {
db94535d
CM
1096 if (level == 0) {
1097 u64 disk_bytenr;
5f39d397
CM
1098 btrfs_item_key_to_cpu(buf, &key, i);
1099 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
54aa1f4d 1100 continue;
5f39d397 1101 fi = btrfs_item_ptr(buf, i,
54aa1f4d 1102 struct btrfs_file_extent_item);
5f39d397 1103 if (btrfs_file_extent_type(buf, fi) ==
54aa1f4d
CM
1104 BTRFS_FILE_EXTENT_INLINE)
1105 continue;
db94535d
CM
1106 disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1107 if (disk_bytenr == 0)
54aa1f4d 1108 continue;
db94535d
CM
1109 err = btrfs_free_extent(trans, root, disk_bytenr,
1110 btrfs_file_extent_disk_num_bytes(buf,
5f39d397 1111 fi), 0);
54aa1f4d
CM
1112 BUG_ON(err);
1113 } else {
db94535d
CM
1114 bytenr = btrfs_node_blockptr(buf, i);
1115 err = btrfs_free_extent(trans, root, bytenr,
1116 btrfs_level_size(root, level - 1), 0);
54aa1f4d
CM
1117 BUG_ON(err);
1118 }
1119 }
7bb86316 1120#endif
54aa1f4d 1121 return ret;
02217ed2
CM
1122}
1123
9078a3e1
CM
1124static int write_one_cache_group(struct btrfs_trans_handle *trans,
1125 struct btrfs_root *root,
1126 struct btrfs_path *path,
1127 struct btrfs_block_group_cache *cache)
1128{
1129 int ret;
1130 int pending_ret;
1131 struct btrfs_root *extent_root = root->fs_info->extent_root;
5f39d397
CM
1132 unsigned long bi;
1133 struct extent_buffer *leaf;
9078a3e1 1134
9078a3e1 1135 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
54aa1f4d
CM
1136 if (ret < 0)
1137 goto fail;
9078a3e1 1138 BUG_ON(ret);
5f39d397
CM
1139
1140 leaf = path->nodes[0];
1141 bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
1142 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
1143 btrfs_mark_buffer_dirty(leaf);
9078a3e1 1144 btrfs_release_path(extent_root, path);
54aa1f4d 1145fail:
9078a3e1
CM
1146 finish_current_insert(trans, extent_root);
1147 pending_ret = del_pending_extents(trans, extent_root);
1148 if (ret)
1149 return ret;
1150 if (pending_ret)
1151 return pending_ret;
1152 return 0;
1153
1154}
1155
96b5179d
CM
1156int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
1157 struct btrfs_root *root)
9078a3e1 1158{
d1310b2e 1159 struct extent_io_tree *block_group_cache;
96b5179d 1160 struct btrfs_block_group_cache *cache;
9078a3e1
CM
1161 int ret;
1162 int err = 0;
1163 int werr = 0;
9078a3e1 1164 struct btrfs_path *path;
96b5179d
CM
1165 u64 last = 0;
1166 u64 start;
1167 u64 end;
1168 u64 ptr;
9078a3e1 1169
96b5179d 1170 block_group_cache = &root->fs_info->block_group_cache;
9078a3e1
CM
1171 path = btrfs_alloc_path();
1172 if (!path)
1173 return -ENOMEM;
1174
925baedd 1175 mutex_lock(&root->fs_info->alloc_mutex);
9078a3e1 1176 while(1) {
96b5179d
CM
1177 ret = find_first_extent_bit(block_group_cache, last,
1178 &start, &end, BLOCK_GROUP_DIRTY);
1179 if (ret)
9078a3e1 1180 break;
54aa1f4d 1181
96b5179d
CM
1182 last = end + 1;
1183 ret = get_state_private(block_group_cache, start, &ptr);
1184 if (ret)
1185 break;
ae2f5411 1186 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
96b5179d
CM
1187 err = write_one_cache_group(trans, root,
1188 path, cache);
1189 /*
1190 * if we fail to write the cache group, we want
1191 * to keep it marked dirty in hopes that a later
1192 * write will work
1193 */
1194 if (err) {
1195 werr = err;
1196 continue;
9078a3e1 1197 }
96b5179d
CM
1198 clear_extent_bits(block_group_cache, start, end,
1199 BLOCK_GROUP_DIRTY, GFP_NOFS);
9078a3e1
CM
1200 }
1201 btrfs_free_path(path);
925baedd 1202 mutex_unlock(&root->fs_info->alloc_mutex);
9078a3e1
CM
1203 return werr;
1204}
1205
6324fbf3
CM
1206static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
1207 u64 flags)
1208{
1209 struct list_head *head = &info->space_info;
1210 struct list_head *cur;
1211 struct btrfs_space_info *found;
1212 list_for_each(cur, head) {
1213 found = list_entry(cur, struct btrfs_space_info, list);
1214 if (found->flags == flags)
1215 return found;
1216 }
1217 return NULL;
1218
1219}
1220
593060d7
CM
1221static int update_space_info(struct btrfs_fs_info *info, u64 flags,
1222 u64 total_bytes, u64 bytes_used,
1223 struct btrfs_space_info **space_info)
1224{
1225 struct btrfs_space_info *found;
1226
1227 found = __find_space_info(info, flags);
1228 if (found) {
1229 found->total_bytes += total_bytes;
1230 found->bytes_used += bytes_used;
8f18cf13 1231 found->full = 0;
593060d7
CM
1232 *space_info = found;
1233 return 0;
1234 }
1235 found = kmalloc(sizeof(*found), GFP_NOFS);
1236 if (!found)
1237 return -ENOMEM;
1238
1239 list_add(&found->list, &info->space_info);
1240 found->flags = flags;
1241 found->total_bytes = total_bytes;
1242 found->bytes_used = bytes_used;
1243 found->bytes_pinned = 0;
1244 found->full = 0;
0ef3e66b 1245 found->force_alloc = 0;
593060d7
CM
1246 *space_info = found;
1247 return 0;
1248}
1249
8790d502
CM
1250static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
1251{
1252 u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
611f0e00 1253 BTRFS_BLOCK_GROUP_RAID1 |
321aecc6 1254 BTRFS_BLOCK_GROUP_RAID10 |
611f0e00 1255 BTRFS_BLOCK_GROUP_DUP);
8790d502
CM
1256 if (extra_flags) {
1257 if (flags & BTRFS_BLOCK_GROUP_DATA)
1258 fs_info->avail_data_alloc_bits |= extra_flags;
1259 if (flags & BTRFS_BLOCK_GROUP_METADATA)
1260 fs_info->avail_metadata_alloc_bits |= extra_flags;
1261 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
1262 fs_info->avail_system_alloc_bits |= extra_flags;
1263 }
1264}
593060d7 1265
a061fc8d 1266static u64 reduce_alloc_profile(struct btrfs_root *root, u64 flags)
ec44a35c 1267{
a061fc8d
CM
1268 u64 num_devices = root->fs_info->fs_devices->num_devices;
1269
1270 if (num_devices == 1)
1271 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
1272 if (num_devices < 4)
1273 flags &= ~BTRFS_BLOCK_GROUP_RAID10;
1274
ec44a35c
CM
1275 if ((flags & BTRFS_BLOCK_GROUP_DUP) &&
1276 (flags & (BTRFS_BLOCK_GROUP_RAID1 |
a061fc8d 1277 BTRFS_BLOCK_GROUP_RAID10))) {
ec44a35c 1278 flags &= ~BTRFS_BLOCK_GROUP_DUP;
a061fc8d 1279 }
ec44a35c
CM
1280
1281 if ((flags & BTRFS_BLOCK_GROUP_RAID1) &&
a061fc8d 1282 (flags & BTRFS_BLOCK_GROUP_RAID10)) {
ec44a35c 1283 flags &= ~BTRFS_BLOCK_GROUP_RAID1;
a061fc8d 1284 }
ec44a35c
CM
1285
1286 if ((flags & BTRFS_BLOCK_GROUP_RAID0) &&
1287 ((flags & BTRFS_BLOCK_GROUP_RAID1) |
1288 (flags & BTRFS_BLOCK_GROUP_RAID10) |
1289 (flags & BTRFS_BLOCK_GROUP_DUP)))
1290 flags &= ~BTRFS_BLOCK_GROUP_RAID0;
1291 return flags;
1292}
1293
6324fbf3
CM
1294static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1295 struct btrfs_root *extent_root, u64 alloc_bytes,
0ef3e66b 1296 u64 flags, int force)
6324fbf3
CM
1297{
1298 struct btrfs_space_info *space_info;
1299 u64 thresh;
1300 u64 start;
1301 u64 num_bytes;
1302 int ret;
1303
a061fc8d 1304 flags = reduce_alloc_profile(extent_root, flags);
ec44a35c 1305
6324fbf3 1306 space_info = __find_space_info(extent_root->fs_info, flags);
593060d7
CM
1307 if (!space_info) {
1308 ret = update_space_info(extent_root->fs_info, flags,
1309 0, 0, &space_info);
1310 BUG_ON(ret);
1311 }
6324fbf3
CM
1312 BUG_ON(!space_info);
1313
0ef3e66b
CM
1314 if (space_info->force_alloc) {
1315 force = 1;
1316 space_info->force_alloc = 0;
1317 }
6324fbf3 1318 if (space_info->full)
925baedd 1319 goto out;
6324fbf3 1320
8790d502 1321 thresh = div_factor(space_info->total_bytes, 6);
0ef3e66b
CM
1322 if (!force &&
1323 (space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) <
6324fbf3 1324 thresh)
925baedd 1325 goto out;
6324fbf3 1326
925baedd 1327 mutex_lock(&extent_root->fs_info->chunk_mutex);
6324fbf3
CM
1328 ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags);
1329 if (ret == -ENOSPC) {
1330printk("space info full %Lu\n", flags);
1331 space_info->full = 1;
a74a4b97 1332 goto out_unlock;
6324fbf3 1333 }
6324fbf3
CM
1334 BUG_ON(ret);
1335
1336 ret = btrfs_make_block_group(trans, extent_root, 0, flags,
e17cade2 1337 BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes);
6324fbf3 1338 BUG_ON(ret);
a74a4b97 1339out_unlock:
333db94c 1340 mutex_unlock(&extent_root->fs_info->chunk_mutex);
a74a4b97 1341out:
6324fbf3
CM
1342 return 0;
1343}
1344
9078a3e1
CM
1345static int update_block_group(struct btrfs_trans_handle *trans,
1346 struct btrfs_root *root,
db94535d 1347 u64 bytenr, u64 num_bytes, int alloc,
0b86a832 1348 int mark_free)
9078a3e1
CM
1349{
1350 struct btrfs_block_group_cache *cache;
1351 struct btrfs_fs_info *info = root->fs_info;
db94535d 1352 u64 total = num_bytes;
9078a3e1 1353 u64 old_val;
db94535d 1354 u64 byte_in_group;
96b5179d
CM
1355 u64 start;
1356 u64 end;
3e1ad54f 1357
7d9eb12c 1358 WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
9078a3e1 1359 while(total) {
db94535d 1360 cache = btrfs_lookup_block_group(info, bytenr);
3e1ad54f 1361 if (!cache) {
9078a3e1 1362 return -1;
cd1bc465 1363 }
db94535d
CM
1364 byte_in_group = bytenr - cache->key.objectid;
1365 WARN_ON(byte_in_group > cache->key.offset);
96b5179d
CM
1366 start = cache->key.objectid;
1367 end = start + cache->key.offset - 1;
1368 set_extent_bits(&info->block_group_cache, start, end,
1369 BLOCK_GROUP_DIRTY, GFP_NOFS);
9078a3e1 1370
c286ac48 1371 spin_lock(&cache->lock);
9078a3e1 1372 old_val = btrfs_block_group_used(&cache->item);
db94535d 1373 num_bytes = min(total, cache->key.offset - byte_in_group);
cd1bc465 1374 if (alloc) {
db94535d 1375 old_val += num_bytes;
6324fbf3 1376 cache->space_info->bytes_used += num_bytes;
c286ac48
CM
1377 btrfs_set_block_group_used(&cache->item, old_val);
1378 spin_unlock(&cache->lock);
cd1bc465 1379 } else {
db94535d 1380 old_val -= num_bytes;
6324fbf3 1381 cache->space_info->bytes_used -= num_bytes;
c286ac48
CM
1382 btrfs_set_block_group_used(&cache->item, old_val);
1383 spin_unlock(&cache->lock);
f510cfec
CM
1384 if (mark_free) {
1385 set_extent_dirty(&info->free_space_cache,
db94535d 1386 bytenr, bytenr + num_bytes - 1,
f510cfec 1387 GFP_NOFS);
e37c9e69 1388 }
cd1bc465 1389 }
db94535d
CM
1390 total -= num_bytes;
1391 bytenr += num_bytes;
9078a3e1
CM
1392 }
1393 return 0;
1394}
6324fbf3 1395
a061fc8d
CM
1396static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
1397{
1398 u64 start;
1399 u64 end;
1400 int ret;
1401 ret = find_first_extent_bit(&root->fs_info->block_group_cache,
1402 search_start, &start, &end,
1403 BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
1404 BLOCK_GROUP_SYSTEM);
1405 if (ret)
1406 return 0;
1407 return start;
1408}
1409
1410
324ae4df
Y
1411static int update_pinned_extents(struct btrfs_root *root,
1412 u64 bytenr, u64 num, int pin)
1413{
1414 u64 len;
1415 struct btrfs_block_group_cache *cache;
1416 struct btrfs_fs_info *fs_info = root->fs_info;
1417
7d9eb12c 1418 WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
324ae4df
Y
1419 if (pin) {
1420 set_extent_dirty(&fs_info->pinned_extents,
1421 bytenr, bytenr + num - 1, GFP_NOFS);
1422 } else {
1423 clear_extent_dirty(&fs_info->pinned_extents,
1424 bytenr, bytenr + num - 1, GFP_NOFS);
1425 }
1426 while (num > 0) {
1427 cache = btrfs_lookup_block_group(fs_info, bytenr);
a061fc8d
CM
1428 if (!cache) {
1429 u64 first = first_logical_byte(root, bytenr);
1430 WARN_ON(first < bytenr);
1431 len = min(first - bytenr, num);
1432 } else {
1433 len = min(num, cache->key.offset -
1434 (bytenr - cache->key.objectid));
1435 }
324ae4df 1436 if (pin) {
a061fc8d 1437 if (cache) {
c286ac48 1438 spin_lock(&cache->lock);
a061fc8d
CM
1439 cache->pinned += len;
1440 cache->space_info->bytes_pinned += len;
c286ac48 1441 spin_unlock(&cache->lock);
a061fc8d 1442 }
324ae4df
Y
1443 fs_info->total_pinned += len;
1444 } else {
a061fc8d 1445 if (cache) {
c286ac48 1446 spin_lock(&cache->lock);
a061fc8d
CM
1447 cache->pinned -= len;
1448 cache->space_info->bytes_pinned -= len;
c286ac48 1449 spin_unlock(&cache->lock);
a061fc8d 1450 }
324ae4df
Y
1451 fs_info->total_pinned -= len;
1452 }
1453 bytenr += len;
1454 num -= len;
1455 }
1456 return 0;
1457}
9078a3e1 1458
d1310b2e 1459int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
ccd467d6 1460{
ccd467d6 1461 u64 last = 0;
1a5bc167
CM
1462 u64 start;
1463 u64 end;
d1310b2e 1464 struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
ccd467d6 1465 int ret;
ccd467d6
CM
1466
1467 while(1) {
1a5bc167
CM
1468 ret = find_first_extent_bit(pinned_extents, last,
1469 &start, &end, EXTENT_DIRTY);
1470 if (ret)
ccd467d6 1471 break;
1a5bc167
CM
1472 set_extent_dirty(copy, start, end, GFP_NOFS);
1473 last = end + 1;
ccd467d6
CM
1474 }
1475 return 0;
1476}
1477
1478int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
1479 struct btrfs_root *root,
d1310b2e 1480 struct extent_io_tree *unpin)
a28ec197 1481{
1a5bc167
CM
1482 u64 start;
1483 u64 end;
a28ec197 1484 int ret;
d1310b2e 1485 struct extent_io_tree *free_space_cache;
f510cfec 1486 free_space_cache = &root->fs_info->free_space_cache;
a28ec197 1487
925baedd 1488 mutex_lock(&root->fs_info->alloc_mutex);
a28ec197 1489 while(1) {
1a5bc167
CM
1490 ret = find_first_extent_bit(unpin, 0, &start, &end,
1491 EXTENT_DIRTY);
1492 if (ret)
a28ec197 1493 break;
324ae4df 1494 update_pinned_extents(root, start, end + 1 - start, 0);
1a5bc167
CM
1495 clear_extent_dirty(unpin, start, end, GFP_NOFS);
1496 set_extent_dirty(free_space_cache, start, end, GFP_NOFS);
c286ac48
CM
1497 if (need_resched()) {
1498 mutex_unlock(&root->fs_info->alloc_mutex);
1499 cond_resched();
1500 mutex_lock(&root->fs_info->alloc_mutex);
1501 }
a28ec197 1502 }
925baedd 1503 mutex_unlock(&root->fs_info->alloc_mutex);
a28ec197
CM
1504 return 0;
1505}
1506
98ed5174
CM
1507static int finish_current_insert(struct btrfs_trans_handle *trans,
1508 struct btrfs_root *extent_root)
037e6390 1509{
7bb86316
CM
1510 u64 start;
1511 u64 end;
1512 struct btrfs_fs_info *info = extent_root->fs_info;
d8d5f3e1 1513 struct extent_buffer *eb;
7bb86316 1514 struct btrfs_path *path;
e2fa7227 1515 struct btrfs_key ins;
d8d5f3e1 1516 struct btrfs_disk_key first;
234b63a0 1517 struct btrfs_extent_item extent_item;
037e6390 1518 int ret;
d8d5f3e1 1519 int level;
1a5bc167 1520 int err = 0;
037e6390 1521
7d9eb12c 1522 WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex));
5f39d397 1523 btrfs_set_stack_extent_refs(&extent_item, 1);
62e2749e 1524 btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
7bb86316 1525 path = btrfs_alloc_path();
037e6390 1526
26b8003f 1527 while(1) {
1a5bc167
CM
1528 ret = find_first_extent_bit(&info->extent_ins, 0, &start,
1529 &end, EXTENT_LOCKED);
1530 if (ret)
26b8003f
CM
1531 break;
1532
1a5bc167
CM
1533 ins.objectid = start;
1534 ins.offset = end + 1 - start;
1535 err = btrfs_insert_item(trans, extent_root, &ins,
1536 &extent_item, sizeof(extent_item));
1537 clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED,
1538 GFP_NOFS);
c286ac48
CM
1539
1540 eb = btrfs_find_tree_block(extent_root, ins.objectid,
1541 ins.offset);
1542
1543 if (!btrfs_buffer_uptodate(eb, trans->transid)) {
1544 mutex_unlock(&extent_root->fs_info->alloc_mutex);
1545 btrfs_read_buffer(eb, trans->transid);
1546 mutex_lock(&extent_root->fs_info->alloc_mutex);
1547 }
1548
925baedd 1549 btrfs_tree_lock(eb);
d8d5f3e1
CM
1550 level = btrfs_header_level(eb);
1551 if (level == 0) {
1552 btrfs_item_key(eb, &first, 0);
1553 } else {
1554 btrfs_node_key(eb, &first, 0);
1555 }
925baedd
CM
1556 btrfs_tree_unlock(eb);
1557 free_extent_buffer(eb);
1558 /*
1559 * the first key is just a hint, so the race we've created
1560 * against reading it is fine
1561 */
7bb86316
CM
1562 err = btrfs_insert_extent_backref(trans, extent_root, path,
1563 start, extent_root->root_key.objectid,
f6dbff55
CM
1564 0, level,
1565 btrfs_disk_key_objectid(&first));
7bb86316 1566 BUG_ON(err);
c286ac48
CM
1567 if (need_resched()) {
1568 mutex_unlock(&extent_root->fs_info->alloc_mutex);
1569 cond_resched();
1570 mutex_lock(&extent_root->fs_info->alloc_mutex);
1571 }
037e6390 1572 }
7bb86316 1573 btrfs_free_path(path);
037e6390
CM
1574 return 0;
1575}
1576
db94535d
CM
1577static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes,
1578 int pending)
e20d96d6 1579{
1a5bc167 1580 int err = 0;
8ef97622 1581
7d9eb12c 1582 WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
f4b9aa8d 1583 if (!pending) {
925baedd 1584 struct extent_buffer *buf;
db94535d 1585 buf = btrfs_find_tree_block(root, bytenr, num_bytes);
5f39d397 1586 if (buf) {
974e35a8
Y
1587 if (btrfs_buffer_uptodate(buf, 0) &&
1588 btrfs_try_tree_lock(buf)) {
2c90e5d6
CM
1589 u64 transid =
1590 root->fs_info->running_transaction->transid;
dc17ff8f
CM
1591 u64 header_transid =
1592 btrfs_header_generation(buf);
6bc34676
CM
1593 if (header_transid == transid &&
1594 !btrfs_header_flag(buf,
1595 BTRFS_HEADER_FLAG_WRITTEN)) {
55c69072 1596 clean_tree_block(NULL, root, buf);
925baedd 1597 btrfs_tree_unlock(buf);
5f39d397 1598 free_extent_buffer(buf);
c549228f 1599 return 1;
2c90e5d6 1600 }
925baedd 1601 btrfs_tree_unlock(buf);
f4b9aa8d 1602 }
5f39d397 1603 free_extent_buffer(buf);
8ef97622 1604 }
324ae4df 1605 update_pinned_extents(root, bytenr, num_bytes, 1);
f4b9aa8d 1606 } else {
1a5bc167 1607 set_extent_bits(&root->fs_info->pending_del,
db94535d
CM
1608 bytenr, bytenr + num_bytes - 1,
1609 EXTENT_LOCKED, GFP_NOFS);
f4b9aa8d 1610 }
be744175 1611 BUG_ON(err < 0);
e20d96d6
CM
1612 return 0;
1613}
1614
fec577fb 1615/*
a28ec197 1616 * remove an extent from the root, returns 0 on success
fec577fb 1617 */
e089f05c 1618static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
7bb86316
CM
1619 *root, u64 bytenr, u64 num_bytes,
1620 u64 root_objectid, u64 ref_generation,
1621 u64 owner_objectid, u64 owner_offset, int pin,
e37c9e69 1622 int mark_free)
a28ec197 1623{
5caf2a00 1624 struct btrfs_path *path;
e2fa7227 1625 struct btrfs_key key;
1261ec42
CM
1626 struct btrfs_fs_info *info = root->fs_info;
1627 struct btrfs_root *extent_root = info->extent_root;
5f39d397 1628 struct extent_buffer *leaf;
a28ec197 1629 int ret;
952fccac
CM
1630 int extent_slot = 0;
1631 int found_extent = 0;
1632 int num_to_del = 1;
234b63a0 1633 struct btrfs_extent_item *ei;
cf27e1ee 1634 u32 refs;
037e6390 1635
7d9eb12c 1636 WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
db94535d 1637 key.objectid = bytenr;
62e2749e 1638 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
db94535d 1639 key.offset = num_bytes;
5caf2a00 1640 path = btrfs_alloc_path();
54aa1f4d
CM
1641 if (!path)
1642 return -ENOMEM;
5f26f772 1643
3c12ac72 1644 path->reada = 1;
7bb86316
CM
1645 ret = lookup_extent_backref(trans, extent_root, path,
1646 bytenr, root_objectid,
1647 ref_generation,
1648 owner_objectid, owner_offset, 1);
1649 if (ret == 0) {
952fccac
CM
1650 struct btrfs_key found_key;
1651 extent_slot = path->slots[0];
1652 while(extent_slot > 0) {
1653 extent_slot--;
1654 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1655 extent_slot);
1656 if (found_key.objectid != bytenr)
1657 break;
1658 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
1659 found_key.offset == num_bytes) {
1660 found_extent = 1;
1661 break;
1662 }
1663 if (path->slots[0] - extent_slot > 5)
1664 break;
1665 }
1666 if (!found_extent)
1667 ret = btrfs_del_item(trans, extent_root, path);
7bb86316
CM
1668 } else {
1669 btrfs_print_leaf(extent_root, path->nodes[0]);
1670 WARN_ON(1);
1671 printk("Unable to find ref byte nr %Lu root %Lu "
1672 " gen %Lu owner %Lu offset %Lu\n", bytenr,
1673 root_objectid, ref_generation, owner_objectid,
1674 owner_offset);
1675 }
952fccac
CM
1676 if (!found_extent) {
1677 btrfs_release_path(extent_root, path);
1678 ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
1679 if (ret < 0)
1680 return ret;
1681 BUG_ON(ret);
1682 extent_slot = path->slots[0];
1683 }
5f39d397
CM
1684
1685 leaf = path->nodes[0];
952fccac 1686 ei = btrfs_item_ptr(leaf, extent_slot,
123abc88 1687 struct btrfs_extent_item);
5f39d397
CM
1688 refs = btrfs_extent_refs(leaf, ei);
1689 BUG_ON(refs == 0);
1690 refs -= 1;
1691 btrfs_set_extent_refs(leaf, ei, refs);
952fccac 1692
5f39d397
CM
1693 btrfs_mark_buffer_dirty(leaf);
1694
952fccac
CM
1695 if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) {
1696 /* if the back ref and the extent are next to each other
1697 * they get deleted below in one shot
1698 */
1699 path->slots[0] = extent_slot;
1700 num_to_del = 2;
1701 } else if (found_extent) {
1702 /* otherwise delete the extent back ref */
1703 ret = btrfs_del_item(trans, extent_root, path);
1704 BUG_ON(ret);
1705 /* if refs are 0, we need to setup the path for deletion */
1706 if (refs == 0) {
1707 btrfs_release_path(extent_root, path);
1708 ret = btrfs_search_slot(trans, extent_root, &key, path,
1709 -1, 1);
1710 if (ret < 0)
1711 return ret;
1712 BUG_ON(ret);
1713 }
1714 }
1715
cf27e1ee 1716 if (refs == 0) {
db94535d
CM
1717 u64 super_used;
1718 u64 root_used;
78fae27e
CM
1719
1720 if (pin) {
db94535d 1721 ret = pin_down_bytes(root, bytenr, num_bytes, 0);
c549228f
Y
1722 if (ret > 0)
1723 mark_free = 1;
1724 BUG_ON(ret < 0);
78fae27e
CM
1725 }
1726
58176a96 1727 /* block accounting for super block */
a2135011 1728 spin_lock_irq(&info->delalloc_lock);
db94535d
CM
1729 super_used = btrfs_super_bytes_used(&info->super_copy);
1730 btrfs_set_super_bytes_used(&info->super_copy,
1731 super_used - num_bytes);
a2135011 1732 spin_unlock_irq(&info->delalloc_lock);
58176a96
JB
1733
1734 /* block accounting for root item */
db94535d 1735 root_used = btrfs_root_used(&root->root_item);
5f39d397 1736 btrfs_set_root_used(&root->root_item,
db94535d 1737 root_used - num_bytes);
952fccac
CM
1738 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
1739 num_to_del);
54aa1f4d
CM
1740 if (ret) {
1741 return ret;
1742 }
db94535d 1743 ret = update_block_group(trans, root, bytenr, num_bytes, 0,
0b86a832 1744 mark_free);
9078a3e1 1745 BUG_ON(ret);
a28ec197 1746 }
5caf2a00 1747 btrfs_free_path(path);
e089f05c 1748 finish_current_insert(trans, extent_root);
a28ec197
CM
1749 return ret;
1750}
1751
a28ec197
CM
1752/*
1753 * find all the blocks marked as pending in the radix tree and remove
1754 * them from the extent map
1755 */
e089f05c
CM
1756static int del_pending_extents(struct btrfs_trans_handle *trans, struct
1757 btrfs_root *extent_root)
a28ec197
CM
1758{
1759 int ret;
e20d96d6 1760 int err = 0;
1a5bc167
CM
1761 u64 start;
1762 u64 end;
d1310b2e
CM
1763 struct extent_io_tree *pending_del;
1764 struct extent_io_tree *pinned_extents;
8ef97622 1765
7d9eb12c 1766 WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex));
1a5bc167
CM
1767 pending_del = &extent_root->fs_info->pending_del;
1768 pinned_extents = &extent_root->fs_info->pinned_extents;
a28ec197
CM
1769
1770 while(1) {
1a5bc167
CM
1771 ret = find_first_extent_bit(pending_del, 0, &start, &end,
1772 EXTENT_LOCKED);
1773 if (ret)
a28ec197 1774 break;
1a5bc167
CM
1775 clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
1776 GFP_NOFS);
c286ac48
CM
1777 if (!test_range_bit(&extent_root->fs_info->extent_ins,
1778 start, end, EXTENT_LOCKED, 0)) {
1779 update_pinned_extents(extent_root, start,
1780 end + 1 - start, 1);
1781 ret = __free_extent(trans, extent_root,
1782 start, end + 1 - start,
1783 extent_root->root_key.objectid,
1784 0, 0, 0, 0, 0);
1785 } else {
1786 clear_extent_bits(&extent_root->fs_info->extent_ins,
1787 start, end, EXTENT_LOCKED, GFP_NOFS);
1788 }
1a5bc167
CM
1789 if (ret)
1790 err = ret;
c286ac48
CM
1791
1792 if (need_resched()) {
1793 mutex_unlock(&extent_root->fs_info->alloc_mutex);
1794 cond_resched();
1795 mutex_lock(&extent_root->fs_info->alloc_mutex);
1796 }
fec577fb 1797 }
e20d96d6 1798 return err;
fec577fb
CM
1799}
1800
1801/*
1802 * remove an extent from the root, returns 0 on success
1803 */
925baedd
CM
1804static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
1805 struct btrfs_root *root, u64 bytenr,
1806 u64 num_bytes, u64 root_objectid,
1807 u64 ref_generation, u64 owner_objectid,
1808 u64 owner_offset, int pin)
fec577fb 1809{
9f5fae2f 1810 struct btrfs_root *extent_root = root->fs_info->extent_root;
fec577fb
CM
1811 int pending_ret;
1812 int ret;
a28ec197 1813
db94535d 1814 WARN_ON(num_bytes < root->sectorsize);
7bb86316
CM
1815 if (!root->ref_cows)
1816 ref_generation = 0;
1817
fec577fb 1818 if (root == extent_root) {
db94535d 1819 pin_down_bytes(root, bytenr, num_bytes, 1);
fec577fb
CM
1820 return 0;
1821 }
7bb86316
CM
1822 ret = __free_extent(trans, root, bytenr, num_bytes, root_objectid,
1823 ref_generation, owner_objectid, owner_offset,
1824 pin, pin == 0);
ee6e6504
CM
1825
1826 finish_current_insert(trans, root->fs_info->extent_root);
e20d96d6 1827 pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
fec577fb
CM
1828 return ret ? ret : pending_ret;
1829}
1830
925baedd
CM
1831int btrfs_free_extent(struct btrfs_trans_handle *trans,
1832 struct btrfs_root *root, u64 bytenr,
1833 u64 num_bytes, u64 root_objectid,
1834 u64 ref_generation, u64 owner_objectid,
1835 u64 owner_offset, int pin)
1836{
1837 int ret;
1838
1839 maybe_lock_mutex(root);
1840 ret = __btrfs_free_extent(trans, root, bytenr, num_bytes,
1841 root_objectid, ref_generation,
1842 owner_objectid, owner_offset, pin);
1843 maybe_unlock_mutex(root);
1844 return ret;
1845}
1846
87ee04eb
CM
1847static u64 stripe_align(struct btrfs_root *root, u64 val)
1848{
1849 u64 mask = ((u64)root->stripesize - 1);
1850 u64 ret = (val + mask) & ~mask;
1851 return ret;
1852}
1853
fec577fb
CM
1854/*
1855 * walks the btree of allocated extents and find a hole of a given size.
1856 * The key ins is changed to record the hole:
1857 * ins->objectid == block start
62e2749e 1858 * ins->flags = BTRFS_EXTENT_ITEM_KEY
fec577fb
CM
1859 * ins->offset == number of blocks
1860 * Any available blocks before search_start are skipped.
1861 */
98ed5174
CM
1862static int noinline find_free_extent(struct btrfs_trans_handle *trans,
1863 struct btrfs_root *orig_root,
1864 u64 num_bytes, u64 empty_size,
1865 u64 search_start, u64 search_end,
1866 u64 hint_byte, struct btrfs_key *ins,
1867 u64 exclude_start, u64 exclude_nr,
1868 int data)
fec577fb 1869{
87ee04eb 1870 int ret;
a061fc8d 1871 u64 orig_search_start;
9f5fae2f 1872 struct btrfs_root * root = orig_root->fs_info->extent_root;
f2458e1d 1873 struct btrfs_fs_info *info = root->fs_info;
db94535d 1874 u64 total_needed = num_bytes;
239b14b3 1875 u64 *last_ptr = NULL;
be08c1b9 1876 struct btrfs_block_group_cache *block_group;
be744175 1877 int full_scan = 0;
fbdc762b 1878 int wrapped = 0;
0ef3e66b 1879 int chunk_alloc_done = 0;
239b14b3 1880 int empty_cluster = 2 * 1024 * 1024;
0ef3e66b 1881 int allowed_chunk_alloc = 0;
fec577fb 1882
db94535d 1883 WARN_ON(num_bytes < root->sectorsize);
b1a4d965
CM
1884 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
1885
0ef3e66b
CM
1886 if (orig_root->ref_cows || empty_size)
1887 allowed_chunk_alloc = 1;
1888
239b14b3
CM
1889 if (data & BTRFS_BLOCK_GROUP_METADATA) {
1890 last_ptr = &root->fs_info->last_alloc;
8790d502 1891 empty_cluster = 256 * 1024;
239b14b3
CM
1892 }
1893
1894 if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) {
1895 last_ptr = &root->fs_info->last_data_alloc;
1896 }
1897
1898 if (last_ptr) {
1899 if (*last_ptr)
1900 hint_byte = *last_ptr;
1901 else {
1902 empty_size += empty_cluster;
1903 }
1904 }
1905
a061fc8d
CM
1906 search_start = max(search_start, first_logical_byte(root, 0));
1907 orig_search_start = search_start;
1908
7f93bf8d
CM
1909 if (search_end == (u64)-1)
1910 search_end = btrfs_super_total_bytes(&info->super_copy);
0b86a832 1911
db94535d 1912 if (hint_byte) {
0ef3e66b 1913 block_group = btrfs_lookup_first_block_group(info, hint_byte);
1a2b2ac7
CM
1914 if (!block_group)
1915 hint_byte = search_start;
c286ac48 1916 block_group = btrfs_find_block_group(root, block_group,
db94535d 1917 hint_byte, data, 1);
239b14b3
CM
1918 if (last_ptr && *last_ptr == 0 && block_group)
1919 hint_byte = block_group->key.objectid;
be744175 1920 } else {
c286ac48 1921 block_group = btrfs_find_block_group(root,
1a2b2ac7
CM
1922 trans->block_group,
1923 search_start, data, 1);
be744175 1924 }
239b14b3 1925 search_start = max(search_start, hint_byte);
be744175 1926
6702ed49 1927 total_needed += empty_size;
0b86a832 1928
be744175 1929check_failed:
70b043f0 1930 if (!block_group) {
0ef3e66b
CM
1931 block_group = btrfs_lookup_first_block_group(info,
1932 search_start);
70b043f0 1933 if (!block_group)
0ef3e66b 1934 block_group = btrfs_lookup_first_block_group(info,
70b043f0
CM
1935 orig_search_start);
1936 }
0ef3e66b
CM
1937 if (full_scan && !chunk_alloc_done) {
1938 if (allowed_chunk_alloc) {
1939 do_chunk_alloc(trans, root,
1940 num_bytes + 2 * 1024 * 1024, data, 1);
1941 allowed_chunk_alloc = 0;
1942 } else if (block_group && block_group_bits(block_group, data)) {
1943 block_group->space_info->force_alloc = 1;
1944 }
1945 chunk_alloc_done = 1;
1946 }
0b86a832
CM
1947 ret = find_search_start(root, &block_group, &search_start,
1948 total_needed, data);
239b14b3
CM
1949 if (ret == -ENOSPC && last_ptr && *last_ptr) {
1950 *last_ptr = 0;
0ef3e66b
CM
1951 block_group = btrfs_lookup_first_block_group(info,
1952 orig_search_start);
239b14b3
CM
1953 search_start = orig_search_start;
1954 ret = find_search_start(root, &block_group, &search_start,
1955 total_needed, data);
1956 }
1957 if (ret == -ENOSPC)
1958 goto enospc;
0b86a832 1959 if (ret)
d548ee51 1960 goto error;
e19caa5f 1961
239b14b3
CM
1962 if (last_ptr && *last_ptr && search_start != *last_ptr) {
1963 *last_ptr = 0;
1964 if (!empty_size) {
1965 empty_size += empty_cluster;
1966 total_needed += empty_size;
1967 }
0ef3e66b 1968 block_group = btrfs_lookup_first_block_group(info,
239b14b3
CM
1969 orig_search_start);
1970 search_start = orig_search_start;
1971 ret = find_search_start(root, &block_group,
1972 &search_start, total_needed, data);
1973 if (ret == -ENOSPC)
1974 goto enospc;
1975 if (ret)
1976 goto error;
1977 }
1978
0b86a832
CM
1979 search_start = stripe_align(root, search_start);
1980 ins->objectid = search_start;
1981 ins->offset = num_bytes;
e37c9e69 1982
db94535d 1983 if (ins->objectid + num_bytes >= search_end)
cf67582b 1984 goto enospc;
0b86a832
CM
1985
1986 if (ins->objectid + num_bytes >
1987 block_group->key.objectid + block_group->key.offset) {
e19caa5f
CM
1988 search_start = block_group->key.objectid +
1989 block_group->key.offset;
1990 goto new_group;
1991 }
0b86a832 1992
1a5bc167 1993 if (test_range_bit(&info->extent_ins, ins->objectid,
db94535d
CM
1994 ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) {
1995 search_start = ins->objectid + num_bytes;
1a5bc167
CM
1996 goto new_group;
1997 }
0b86a832 1998
1a5bc167 1999 if (test_range_bit(&info->pinned_extents, ins->objectid,
db94535d
CM
2000 ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
2001 search_start = ins->objectid + num_bytes;
1a5bc167 2002 goto new_group;
fec577fb 2003 }
0b86a832 2004
db94535d 2005 if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
f2654de4
CM
2006 ins->objectid < exclude_start + exclude_nr)) {
2007 search_start = exclude_start + exclude_nr;
2008 goto new_group;
2009 }
0b86a832 2010
6324fbf3 2011 if (!(data & BTRFS_BLOCK_GROUP_DATA)) {
5276aeda 2012 block_group = btrfs_lookup_block_group(info, ins->objectid);
26b8003f
CM
2013 if (block_group)
2014 trans->block_group = block_group;
f2458e1d 2015 }
db94535d 2016 ins->offset = num_bytes;
239b14b3
CM
2017 if (last_ptr) {
2018 *last_ptr = ins->objectid + ins->offset;
2019 if (*last_ptr ==
2020 btrfs_super_total_bytes(&root->fs_info->super_copy)) {
2021 *last_ptr = 0;
2022 }
2023 }
fec577fb 2024 return 0;
be744175
CM
2025
2026new_group:
db94535d 2027 if (search_start + num_bytes >= search_end) {
cf67582b 2028enospc:
be744175 2029 search_start = orig_search_start;
fbdc762b
CM
2030 if (full_scan) {
2031 ret = -ENOSPC;
2032 goto error;
2033 }
6702ed49
CM
2034 if (wrapped) {
2035 if (!full_scan)
2036 total_needed -= empty_size;
fbdc762b 2037 full_scan = 1;
6702ed49 2038 } else
fbdc762b 2039 wrapped = 1;
be744175 2040 }
0ef3e66b 2041 block_group = btrfs_lookup_first_block_group(info, search_start);
fbdc762b 2042 cond_resched();
c286ac48 2043 block_group = btrfs_find_block_group(root, block_group,
1a2b2ac7 2044 search_start, data, 0);
be744175
CM
2045 goto check_failed;
2046
0f70abe2 2047error:
0f70abe2 2048 return ret;
fec577fb 2049}
ec44a35c 2050
e6dcd2dc
CM
2051static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
2052 struct btrfs_root *root,
2053 u64 num_bytes, u64 min_alloc_size,
2054 u64 empty_size, u64 hint_byte,
2055 u64 search_end, struct btrfs_key *ins,
2056 u64 data)
fec577fb
CM
2057{
2058 int ret;
fbdc762b 2059 u64 search_start = 0;
8790d502 2060 u64 alloc_profile;
1261ec42 2061 struct btrfs_fs_info *info = root->fs_info;
925baedd 2062
6324fbf3 2063 if (data) {
8790d502
CM
2064 alloc_profile = info->avail_data_alloc_bits &
2065 info->data_alloc_profile;
2066 data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
6324fbf3 2067 } else if (root == root->fs_info->chunk_root) {
8790d502
CM
2068 alloc_profile = info->avail_system_alloc_bits &
2069 info->system_alloc_profile;
2070 data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
6324fbf3 2071 } else {
8790d502
CM
2072 alloc_profile = info->avail_metadata_alloc_bits &
2073 info->metadata_alloc_profile;
2074 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
6324fbf3 2075 }
98d20f67 2076again:
a061fc8d 2077 data = reduce_alloc_profile(root, data);
0ef3e66b
CM
2078 /*
2079 * the only place that sets empty_size is btrfs_realloc_node, which
2080 * is not called recursively on allocations
2081 */
2082 if (empty_size || root->ref_cows) {
593060d7 2083 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
6324fbf3 2084 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
0ef3e66b
CM
2085 2 * 1024 * 1024,
2086 BTRFS_BLOCK_GROUP_METADATA |
2087 (info->metadata_alloc_profile &
2088 info->avail_metadata_alloc_bits), 0);
6324fbf3
CM
2089 BUG_ON(ret);
2090 }
2091 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
0ef3e66b 2092 num_bytes + 2 * 1024 * 1024, data, 0);
6324fbf3
CM
2093 BUG_ON(ret);
2094 }
0b86a832 2095
db94535d
CM
2096 WARN_ON(num_bytes < root->sectorsize);
2097 ret = find_free_extent(trans, root, num_bytes, empty_size,
2098 search_start, search_end, hint_byte, ins,
26b8003f
CM
2099 trans->alloc_exclude_start,
2100 trans->alloc_exclude_nr, data);
3b951516 2101
98d20f67
CM
2102 if (ret == -ENOSPC && num_bytes > min_alloc_size) {
2103 num_bytes = num_bytes >> 1;
2104 num_bytes = max(num_bytes, min_alloc_size);
0ef3e66b
CM
2105 do_chunk_alloc(trans, root->fs_info->extent_root,
2106 num_bytes, data, 1);
98d20f67
CM
2107 goto again;
2108 }
ec44a35c
CM
2109 if (ret) {
2110 printk("allocation failed flags %Lu\n", data);
925baedd 2111 BUG();
925baedd 2112 }
e6dcd2dc
CM
2113 clear_extent_dirty(&root->fs_info->free_space_cache,
2114 ins->objectid, ins->objectid + ins->offset - 1,
2115 GFP_NOFS);
2116 return 0;
2117}
2118
65b51a00
CM
2119int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
2120{
2121 maybe_lock_mutex(root);
2122 set_extent_dirty(&root->fs_info->free_space_cache,
2123 start, start + len - 1, GFP_NOFS);
2124 maybe_unlock_mutex(root);
2125 return 0;
2126}
2127
e6dcd2dc
CM
2128int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
2129 struct btrfs_root *root,
2130 u64 num_bytes, u64 min_alloc_size,
2131 u64 empty_size, u64 hint_byte,
2132 u64 search_end, struct btrfs_key *ins,
2133 u64 data)
2134{
2135 int ret;
2136 maybe_lock_mutex(root);
2137 ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
2138 empty_size, hint_byte, search_end, ins,
2139 data);
2140 maybe_unlock_mutex(root);
2141 return ret;
2142}
2143
2144static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
2145 struct btrfs_root *root,
2146 u64 root_objectid, u64 ref_generation,
2147 u64 owner, u64 owner_offset,
2148 struct btrfs_key *ins)
2149{
2150 int ret;
2151 int pending_ret;
2152 u64 super_used;
2153 u64 root_used;
2154 u64 num_bytes = ins->offset;
2155 u32 sizes[2];
2156 struct btrfs_fs_info *info = root->fs_info;
2157 struct btrfs_root *extent_root = info->extent_root;
2158 struct btrfs_extent_item *extent_item;
2159 struct btrfs_extent_ref *ref;
2160 struct btrfs_path *path;
2161 struct btrfs_key keys[2];
fec577fb 2162
58176a96 2163 /* block accounting for super block */
a2135011 2164 spin_lock_irq(&info->delalloc_lock);
db94535d
CM
2165 super_used = btrfs_super_bytes_used(&info->super_copy);
2166 btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
a2135011 2167 spin_unlock_irq(&info->delalloc_lock);
26b8003f 2168
58176a96 2169 /* block accounting for root item */
db94535d
CM
2170 root_used = btrfs_root_used(&root->root_item);
2171 btrfs_set_root_used(&root->root_item, root_used + num_bytes);
58176a96 2172
26b8003f 2173 if (root == extent_root) {
1a5bc167
CM
2174 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
2175 ins->objectid + ins->offset - 1,
2176 EXTENT_LOCKED, GFP_NOFS);
26b8003f
CM
2177 goto update_block;
2178 }
2179
47e4bb98
CM
2180 memcpy(&keys[0], ins, sizeof(*ins));
2181 keys[1].offset = hash_extent_ref(root_objectid, ref_generation,
2182 owner, owner_offset);
2183 keys[1].objectid = ins->objectid;
2184 keys[1].type = BTRFS_EXTENT_REF_KEY;
2185 sizes[0] = sizeof(*extent_item);
2186 sizes[1] = sizeof(*ref);
7bb86316
CM
2187
2188 path = btrfs_alloc_path();
2189 BUG_ON(!path);
47e4bb98
CM
2190
2191 ret = btrfs_insert_empty_items(trans, extent_root, path, keys,
2192 sizes, 2);
26b8003f 2193
ccd467d6 2194 BUG_ON(ret);
47e4bb98
CM
2195 extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2196 struct btrfs_extent_item);
2197 btrfs_set_extent_refs(path->nodes[0], extent_item, 1);
2198 ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
2199 struct btrfs_extent_ref);
2200
2201 btrfs_set_ref_root(path->nodes[0], ref, root_objectid);
2202 btrfs_set_ref_generation(path->nodes[0], ref, ref_generation);
2203 btrfs_set_ref_objectid(path->nodes[0], ref, owner);
2204 btrfs_set_ref_offset(path->nodes[0], ref, owner_offset);
2205
2206 btrfs_mark_buffer_dirty(path->nodes[0]);
2207
2208 trans->alloc_exclude_start = 0;
2209 trans->alloc_exclude_nr = 0;
7bb86316 2210 btrfs_free_path(path);
e089f05c 2211 finish_current_insert(trans, extent_root);
e20d96d6 2212 pending_ret = del_pending_extents(trans, extent_root);
f510cfec 2213
925baedd
CM
2214 if (ret)
2215 goto out;
e37c9e69 2216 if (pending_ret) {
925baedd
CM
2217 ret = pending_ret;
2218 goto out;
e37c9e69 2219 }
26b8003f
CM
2220
2221update_block:
0b86a832 2222 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0);
f5947066
CM
2223 if (ret) {
2224 printk("update block group failed for %Lu %Lu\n",
2225 ins->objectid, ins->offset);
2226 BUG();
2227 }
925baedd 2228out:
e6dcd2dc
CM
2229 return ret;
2230}
2231
2232int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
2233 struct btrfs_root *root,
2234 u64 root_objectid, u64 ref_generation,
2235 u64 owner, u64 owner_offset,
2236 struct btrfs_key *ins)
2237{
2238 int ret;
2239 maybe_lock_mutex(root);
2240 ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid,
2241 ref_generation, owner,
2242 owner_offset, ins);
2243 maybe_unlock_mutex(root);
2244 return ret;
2245}
2246/*
2247 * finds a free extent and does all the dirty work required for allocation
2248 * returns the key for the extent through ins, and a tree buffer for
2249 * the first block of the extent through buf.
2250 *
2251 * returns 0 if everything worked, non-zero otherwise.
2252 */
2253int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
2254 struct btrfs_root *root,
2255 u64 num_bytes, u64 min_alloc_size,
2256 u64 root_objectid, u64 ref_generation,
2257 u64 owner, u64 owner_offset,
2258 u64 empty_size, u64 hint_byte,
2259 u64 search_end, struct btrfs_key *ins, u64 data)
2260{
2261 int ret;
2262
2263 maybe_lock_mutex(root);
2264
2265 ret = __btrfs_reserve_extent(trans, root, num_bytes,
2266 min_alloc_size, empty_size, hint_byte,
2267 search_end, ins, data);
2268 BUG_ON(ret);
2269 ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid,
2270 ref_generation, owner,
2271 owner_offset, ins);
2272 BUG_ON(ret);
2273
925baedd
CM
2274 maybe_unlock_mutex(root);
2275 return ret;
fec577fb 2276}
65b51a00
CM
2277
2278struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
2279 struct btrfs_root *root,
2280 u64 bytenr, u32 blocksize)
2281{
2282 struct extent_buffer *buf;
2283
2284 buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
2285 if (!buf)
2286 return ERR_PTR(-ENOMEM);
2287 btrfs_set_header_generation(buf, trans->transid);
2288 btrfs_tree_lock(buf);
2289 clean_tree_block(trans, root, buf);
2290 btrfs_set_buffer_uptodate(buf);
2291 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
2292 buf->start + buf->len - 1, GFP_NOFS);
2293 trans->blocks_used++;
2294 return buf;
2295}
2296
fec577fb
CM
2297/*
2298 * helper function to allocate a block for a given tree
2299 * returns the tree buffer or NULL.
2300 */
5f39d397 2301struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
7bb86316
CM
2302 struct btrfs_root *root,
2303 u32 blocksize,
2304 u64 root_objectid,
2305 u64 ref_generation,
2306 u64 first_objectid,
2307 int level,
2308 u64 hint,
5f39d397 2309 u64 empty_size)
fec577fb 2310{
e2fa7227 2311 struct btrfs_key ins;
fec577fb 2312 int ret;
5f39d397 2313 struct extent_buffer *buf;
fec577fb 2314
98d20f67 2315 ret = btrfs_alloc_extent(trans, root, blocksize, blocksize,
7bb86316 2316 root_objectid, ref_generation,
f6dbff55 2317 level, first_objectid, empty_size, hint,
db94535d 2318 (u64)-1, &ins, 0);
fec577fb 2319 if (ret) {
54aa1f4d
CM
2320 BUG_ON(ret > 0);
2321 return ERR_PTR(ret);
fec577fb 2322 }
55c69072 2323
65b51a00 2324 buf = btrfs_init_new_buffer(trans, root, ins.objectid, blocksize);
fec577fb
CM
2325 return buf;
2326}
a28ec197 2327
31153d81 2328static int noinline drop_leaf_ref_no_cache(struct btrfs_trans_handle *trans,
bcc63abb 2329 struct btrfs_root *root,
31153d81 2330 struct extent_buffer *leaf)
6407bf6d 2331{
7bb86316
CM
2332 u64 leaf_owner;
2333 u64 leaf_generation;
5f39d397 2334 struct btrfs_key key;
6407bf6d
CM
2335 struct btrfs_file_extent_item *fi;
2336 int i;
2337 int nritems;
2338 int ret;
2339
5f39d397
CM
2340 BUG_ON(!btrfs_is_leaf(leaf));
2341 nritems = btrfs_header_nritems(leaf);
7bb86316
CM
2342 leaf_owner = btrfs_header_owner(leaf);
2343 leaf_generation = btrfs_header_generation(leaf);
2344
6407bf6d 2345 for (i = 0; i < nritems; i++) {
db94535d 2346 u64 disk_bytenr;
e34a5b4f 2347 cond_resched();
5f39d397
CM
2348
2349 btrfs_item_key_to_cpu(leaf, &key, i);
2350 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
6407bf6d
CM
2351 continue;
2352 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
5f39d397
CM
2353 if (btrfs_file_extent_type(leaf, fi) ==
2354 BTRFS_FILE_EXTENT_INLINE)
236454df 2355 continue;
6407bf6d
CM
2356 /*
2357 * FIXME make sure to insert a trans record that
2358 * repeats the snapshot del on crash
2359 */
db94535d
CM
2360 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
2361 if (disk_bytenr == 0)
3a686375 2362 continue;
4a096752
CM
2363
2364 mutex_lock(&root->fs_info->alloc_mutex);
925baedd 2365 ret = __btrfs_free_extent(trans, root, disk_bytenr,
7bb86316
CM
2366 btrfs_file_extent_disk_num_bytes(leaf, fi),
2367 leaf_owner, leaf_generation,
2368 key.objectid, key.offset, 0);
4a096752 2369 mutex_unlock(&root->fs_info->alloc_mutex);
2dd3e67b
CM
2370
2371 atomic_inc(&root->fs_info->throttle_gen);
2372 wake_up(&root->fs_info->transaction_throttle);
2373 cond_resched();
2374
6407bf6d
CM
2375 BUG_ON(ret);
2376 }
2377 return 0;
2378}
2379
31153d81 2380static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
bcc63abb 2381 struct btrfs_root *root,
31153d81
YZ
2382 struct btrfs_leaf_ref *ref)
2383{
2384 int i;
2385 int ret;
2386 struct btrfs_extent_info *info = ref->extents;
2387
31153d81
YZ
2388 for (i = 0; i < ref->nritems; i++) {
2389 mutex_lock(&root->fs_info->alloc_mutex);
2390 ret = __btrfs_free_extent(trans, root,
2391 info->bytenr, info->num_bytes,
2392 ref->owner, ref->generation,
2393 info->objectid, info->offset, 0);
2394 mutex_unlock(&root->fs_info->alloc_mutex);
2dd3e67b
CM
2395
2396 atomic_inc(&root->fs_info->throttle_gen);
2397 wake_up(&root->fs_info->transaction_throttle);
2398 cond_resched();
2399
31153d81
YZ
2400 BUG_ON(ret);
2401 info++;
2402 }
31153d81
YZ
2403
2404 return 0;
2405}
2406
333db94c
CM
2407int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len,
2408 u32 *refs)
2409{
017e5369 2410 int ret;
f87f057b 2411
017e5369 2412 ret = lookup_extent_ref(NULL, root, start, len, refs);
f87f057b
CM
2413 BUG_ON(ret);
2414
2415#if 0 // some debugging code in case we see problems here
2416 /* if the refs count is one, it won't get increased again. But
2417 * if the ref count is > 1, someone may be decreasing it at
2418 * the same time we are.
2419 */
2420 if (*refs != 1) {
2421 struct extent_buffer *eb = NULL;
2422 eb = btrfs_find_create_tree_block(root, start, len);
2423 if (eb)
2424 btrfs_tree_lock(eb);
2425
2426 mutex_lock(&root->fs_info->alloc_mutex);
2427 ret = lookup_extent_ref(NULL, root, start, len, refs);
2428 BUG_ON(ret);
2429 mutex_unlock(&root->fs_info->alloc_mutex);
2430
2431 if (eb) {
2432 btrfs_tree_unlock(eb);
2433 free_extent_buffer(eb);
2434 }
2435 if (*refs == 1) {
2436 printk("block %llu went down to one during drop_snap\n",
2437 (unsigned long long)start);
2438 }
2439
2440 }
2441#endif
2442
e7a84565 2443 cond_resched();
017e5369 2444 return ret;
333db94c
CM
2445}
2446
9aca1d51
CM
2447/*
2448 * helper function for drop_snapshot, this walks down the tree dropping ref
2449 * counts as it goes.
2450 */
98ed5174
CM
2451static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
2452 struct btrfs_root *root,
2453 struct btrfs_path *path, int *level)
20524f02 2454{
7bb86316
CM
2455 u64 root_owner;
2456 u64 root_gen;
2457 u64 bytenr;
ca7a79ad 2458 u64 ptr_gen;
5f39d397
CM
2459 struct extent_buffer *next;
2460 struct extent_buffer *cur;
7bb86316 2461 struct extent_buffer *parent;
31153d81 2462 struct btrfs_leaf_ref *ref;
db94535d 2463 u32 blocksize;
20524f02
CM
2464 int ret;
2465 u32 refs;
2466
5caf2a00
CM
2467 WARN_ON(*level < 0);
2468 WARN_ON(*level >= BTRFS_MAX_LEVEL);
333db94c 2469 ret = drop_snap_lookup_refcount(root, path->nodes[*level]->start,
db94535d 2470 path->nodes[*level]->len, &refs);
20524f02
CM
2471 BUG_ON(ret);
2472 if (refs > 1)
2473 goto out;
e011599b 2474
9aca1d51
CM
2475 /*
2476 * walk down to the last node level and free all the leaves
2477 */
6407bf6d 2478 while(*level >= 0) {
5caf2a00
CM
2479 WARN_ON(*level < 0);
2480 WARN_ON(*level >= BTRFS_MAX_LEVEL);
20524f02 2481 cur = path->nodes[*level];
e011599b 2482
5f39d397 2483 if (btrfs_header_level(cur) != *level)
2c90e5d6 2484 WARN_ON(1);
e011599b 2485
7518a238 2486 if (path->slots[*level] >=
5f39d397 2487 btrfs_header_nritems(cur))
20524f02 2488 break;
6407bf6d 2489 if (*level == 0) {
31153d81 2490 ret = drop_leaf_ref_no_cache(trans, root, cur);
6407bf6d
CM
2491 BUG_ON(ret);
2492 break;
2493 }
db94535d 2494 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
ca7a79ad 2495 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
db94535d 2496 blocksize = btrfs_level_size(root, *level - 1);
925baedd 2497
333db94c 2498 ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs);
6407bf6d
CM
2499 BUG_ON(ret);
2500 if (refs != 1) {
7bb86316
CM
2501 parent = path->nodes[*level];
2502 root_owner = btrfs_header_owner(parent);
2503 root_gen = btrfs_header_generation(parent);
20524f02 2504 path->slots[*level]++;
f87f057b
CM
2505
2506 mutex_lock(&root->fs_info->alloc_mutex);
925baedd 2507 ret = __btrfs_free_extent(trans, root, bytenr,
7bb86316
CM
2508 blocksize, root_owner,
2509 root_gen, 0, 0, 1);
20524f02 2510 BUG_ON(ret);
f87f057b 2511 mutex_unlock(&root->fs_info->alloc_mutex);
18e35e0a
CM
2512
2513 atomic_inc(&root->fs_info->throttle_gen);
2514 wake_up(&root->fs_info->transaction_throttle);
2dd3e67b 2515 cond_resched();
18e35e0a 2516
20524f02
CM
2517 continue;
2518 }
f87f057b
CM
2519 /*
2520 * at this point, we have a single ref, and since the
2521 * only place referencing this extent is a dead root
2522 * the reference count should never go higher.
2523 * So, we don't need to check it again
2524 */
31153d81
YZ
2525 if (*level == 1) {
2526 struct btrfs_key key;
2527 btrfs_node_key_to_cpu(cur, &key, path->slots[*level]);
017e5369 2528 ref = btrfs_lookup_leaf_ref(root, bytenr);
31153d81
YZ
2529 if (ref) {
2530 ret = drop_leaf_ref(trans, root, ref);
2531 BUG_ON(ret);
2532 btrfs_remove_leaf_ref(root, ref);
bcc63abb 2533 btrfs_free_leaf_ref(root, ref);
31153d81
YZ
2534 *level = 0;
2535 break;
2536 }
37d1aeee
CM
2537 if (printk_ratelimit())
2538 printk("leaf ref miss for bytenr %llu\n",
2539 (unsigned long long)bytenr);
31153d81 2540 }
db94535d 2541 next = btrfs_find_tree_block(root, bytenr, blocksize);
1259ab75 2542 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
5f39d397 2543 free_extent_buffer(next);
333db94c 2544
ca7a79ad
CM
2545 next = read_tree_block(root, bytenr, blocksize,
2546 ptr_gen);
e7a84565 2547 cond_resched();
f87f057b
CM
2548#if 0
2549 /*
2550 * this is a debugging check and can go away
2551 * the ref should never go all the way down to 1
2552 * at this point
2553 */
e6dcd2dc
CM
2554 ret = lookup_extent_ref(NULL, root, bytenr, blocksize,
2555 &refs);
e9d0b13b 2556 BUG_ON(ret);
f87f057b
CM
2557 WARN_ON(refs != 1);
2558#endif
e9d0b13b 2559 }
5caf2a00 2560 WARN_ON(*level <= 0);
83e15a28 2561 if (path->nodes[*level-1])
5f39d397 2562 free_extent_buffer(path->nodes[*level-1]);
20524f02 2563 path->nodes[*level-1] = next;
5f39d397 2564 *level = btrfs_header_level(next);
20524f02 2565 path->slots[*level] = 0;
2dd3e67b 2566 cond_resched();
20524f02
CM
2567 }
2568out:
5caf2a00
CM
2569 WARN_ON(*level < 0);
2570 WARN_ON(*level >= BTRFS_MAX_LEVEL);
7bb86316
CM
2571
2572 if (path->nodes[*level] == root->node) {
7bb86316 2573 parent = path->nodes[*level];
31153d81 2574 bytenr = path->nodes[*level]->start;
7bb86316
CM
2575 } else {
2576 parent = path->nodes[*level + 1];
31153d81 2577 bytenr = btrfs_node_blockptr(parent, path->slots[*level + 1]);
7bb86316
CM
2578 }
2579
31153d81
YZ
2580 blocksize = btrfs_level_size(root, *level);
2581 root_owner = btrfs_header_owner(parent);
7bb86316 2582 root_gen = btrfs_header_generation(parent);
31153d81 2583
f87f057b 2584 mutex_lock(&root->fs_info->alloc_mutex);
31153d81
YZ
2585 ret = __btrfs_free_extent(trans, root, bytenr, blocksize,
2586 root_owner, root_gen, 0, 0, 1);
5f39d397 2587 free_extent_buffer(path->nodes[*level]);
20524f02
CM
2588 path->nodes[*level] = NULL;
2589 *level += 1;
2590 BUG_ON(ret);
925baedd 2591 mutex_unlock(&root->fs_info->alloc_mutex);
f87f057b 2592
e7a84565 2593 cond_resched();
20524f02
CM
2594 return 0;
2595}
2596
9aca1d51
CM
2597/*
2598 * helper for dropping snapshots. This walks back up the tree in the path
2599 * to find the first node higher up where we haven't yet gone through
2600 * all the slots
2601 */
98ed5174
CM
2602static int noinline walk_up_tree(struct btrfs_trans_handle *trans,
2603 struct btrfs_root *root,
2604 struct btrfs_path *path, int *level)
20524f02 2605{
7bb86316
CM
2606 u64 root_owner;
2607 u64 root_gen;
2608 struct btrfs_root_item *root_item = &root->root_item;
20524f02
CM
2609 int i;
2610 int slot;
2611 int ret;
9f3a7427 2612
234b63a0 2613 for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
20524f02 2614 slot = path->slots[i];
5f39d397
CM
2615 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
2616 struct extent_buffer *node;
2617 struct btrfs_disk_key disk_key;
2618 node = path->nodes[i];
20524f02
CM
2619 path->slots[i]++;
2620 *level = i;
9f3a7427 2621 WARN_ON(*level == 0);
5f39d397 2622 btrfs_node_key(node, &disk_key, path->slots[i]);
9f3a7427 2623 memcpy(&root_item->drop_progress,
5f39d397 2624 &disk_key, sizeof(disk_key));
9f3a7427 2625 root_item->drop_level = i;
20524f02
CM
2626 return 0;
2627 } else {
7bb86316
CM
2628 if (path->nodes[*level] == root->node) {
2629 root_owner = root->root_key.objectid;
2630 root_gen =
2631 btrfs_header_generation(path->nodes[*level]);
2632 } else {
2633 struct extent_buffer *node;
2634 node = path->nodes[*level + 1];
2635 root_owner = btrfs_header_owner(node);
2636 root_gen = btrfs_header_generation(node);
2637 }
e089f05c 2638 ret = btrfs_free_extent(trans, root,
db94535d 2639 path->nodes[*level]->start,
7bb86316
CM
2640 path->nodes[*level]->len,
2641 root_owner, root_gen, 0, 0, 1);
6407bf6d 2642 BUG_ON(ret);
5f39d397 2643 free_extent_buffer(path->nodes[*level]);
83e15a28 2644 path->nodes[*level] = NULL;
20524f02 2645 *level = i + 1;
20524f02
CM
2646 }
2647 }
2648 return 1;
2649}
2650
9aca1d51
CM
2651/*
2652 * drop the reference count on the tree rooted at 'snap'. This traverses
2653 * the tree freeing any blocks that have a ref count of zero after being
2654 * decremented.
2655 */
e089f05c 2656int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
9f3a7427 2657 *root)
20524f02 2658{
3768f368 2659 int ret = 0;
9aca1d51 2660 int wret;
20524f02 2661 int level;
5caf2a00 2662 struct btrfs_path *path;
20524f02
CM
2663 int i;
2664 int orig_level;
9f3a7427 2665 struct btrfs_root_item *root_item = &root->root_item;
20524f02 2666
a2135011 2667 WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex));
5caf2a00
CM
2668 path = btrfs_alloc_path();
2669 BUG_ON(!path);
20524f02 2670
5f39d397 2671 level = btrfs_header_level(root->node);
20524f02 2672 orig_level = level;
9f3a7427
CM
2673 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
2674 path->nodes[level] = root->node;
f510cfec 2675 extent_buffer_get(root->node);
9f3a7427
CM
2676 path->slots[level] = 0;
2677 } else {
2678 struct btrfs_key key;
5f39d397
CM
2679 struct btrfs_disk_key found_key;
2680 struct extent_buffer *node;
6702ed49 2681
9f3a7427 2682 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6702ed49
CM
2683 level = root_item->drop_level;
2684 path->lowest_level = level;
9f3a7427 2685 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6702ed49 2686 if (wret < 0) {
9f3a7427
CM
2687 ret = wret;
2688 goto out;
2689 }
5f39d397
CM
2690 node = path->nodes[level];
2691 btrfs_node_key(node, &found_key, path->slots[level]);
2692 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
2693 sizeof(found_key)));
7d9eb12c
CM
2694 /*
2695 * unlock our path, this is safe because only this
2696 * function is allowed to delete this snapshot
2697 */
925baedd
CM
2698 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
2699 if (path->nodes[i] && path->locks[i]) {
2700 path->locks[i] = 0;
2701 btrfs_tree_unlock(path->nodes[i]);
2702 }
2703 }
9f3a7427 2704 }
20524f02 2705 while(1) {
5caf2a00 2706 wret = walk_down_tree(trans, root, path, &level);
9aca1d51 2707 if (wret > 0)
20524f02 2708 break;
9aca1d51
CM
2709 if (wret < 0)
2710 ret = wret;
2711
5caf2a00 2712 wret = walk_up_tree(trans, root, path, &level);
9aca1d51 2713 if (wret > 0)
20524f02 2714 break;
9aca1d51
CM
2715 if (wret < 0)
2716 ret = wret;
e7a84565
CM
2717 if (trans->transaction->in_commit) {
2718 ret = -EAGAIN;
2719 break;
2720 }
18e35e0a 2721 atomic_inc(&root->fs_info->throttle_gen);
017e5369 2722 wake_up(&root->fs_info->transaction_throttle);
20524f02 2723 }
83e15a28 2724 for (i = 0; i <= orig_level; i++) {
5caf2a00 2725 if (path->nodes[i]) {
5f39d397 2726 free_extent_buffer(path->nodes[i]);
0f82731f 2727 path->nodes[i] = NULL;
83e15a28 2728 }
20524f02 2729 }
9f3a7427 2730out:
5caf2a00 2731 btrfs_free_path(path);
9aca1d51 2732 return ret;
20524f02 2733}
9078a3e1 2734
96b5179d 2735int btrfs_free_block_groups(struct btrfs_fs_info *info)
9078a3e1 2736{
96b5179d
CM
2737 u64 start;
2738 u64 end;
b97f9203 2739 u64 ptr;
9078a3e1 2740 int ret;
925baedd
CM
2741
2742 mutex_lock(&info->alloc_mutex);
9078a3e1 2743 while(1) {
96b5179d
CM
2744 ret = find_first_extent_bit(&info->block_group_cache, 0,
2745 &start, &end, (unsigned int)-1);
2746 if (ret)
9078a3e1 2747 break;
b97f9203
Y
2748 ret = get_state_private(&info->block_group_cache, start, &ptr);
2749 if (!ret)
2750 kfree((void *)(unsigned long)ptr);
96b5179d
CM
2751 clear_extent_bits(&info->block_group_cache, start,
2752 end, (unsigned int)-1, GFP_NOFS);
9078a3e1 2753 }
e37c9e69 2754 while(1) {
f510cfec
CM
2755 ret = find_first_extent_bit(&info->free_space_cache, 0,
2756 &start, &end, EXTENT_DIRTY);
2757 if (ret)
e37c9e69 2758 break;
f510cfec
CM
2759 clear_extent_dirty(&info->free_space_cache, start,
2760 end, GFP_NOFS);
e37c9e69 2761 }
925baedd 2762 mutex_unlock(&info->alloc_mutex);
be744175
CM
2763 return 0;
2764}
2765
8e7bf94f
CM
2766static unsigned long calc_ra(unsigned long start, unsigned long last,
2767 unsigned long nr)
2768{
2769 return min(last, start + nr - 1);
2770}
2771
98ed5174
CM
2772static int noinline relocate_inode_pages(struct inode *inode, u64 start,
2773 u64 len)
edbd8d4e
CM
2774{
2775 u64 page_start;
2776 u64 page_end;
edbd8d4e 2777 unsigned long last_index;
edbd8d4e
CM
2778 unsigned long i;
2779 struct page *page;
d1310b2e 2780 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
4313b399 2781 struct file_ra_state *ra;
8e7bf94f
CM
2782 unsigned long total_read = 0;
2783 unsigned long ra_pages;
3eaa2885 2784 struct btrfs_ordered_extent *ordered;
a061fc8d 2785 struct btrfs_trans_handle *trans;
4313b399
CM
2786
2787 ra = kzalloc(sizeof(*ra), GFP_NOFS);
edbd8d4e
CM
2788
2789 mutex_lock(&inode->i_mutex);
4313b399 2790 i = start >> PAGE_CACHE_SHIFT;
edbd8d4e
CM
2791 last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
2792
8e7bf94f
CM
2793 ra_pages = BTRFS_I(inode)->root->fs_info->bdi.ra_pages;
2794
4313b399 2795 file_ra_state_init(ra, inode->i_mapping);
edbd8d4e 2796
4313b399 2797 for (; i <= last_index; i++) {
8e7bf94f
CM
2798 if (total_read % ra_pages == 0) {
2799 btrfs_force_ra(inode->i_mapping, ra, NULL, i,
2800 calc_ra(i, last_index, ra_pages));
2801 }
2802 total_read++;
3eaa2885
CM
2803again:
2804 if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode))
a061fc8d 2805 goto truncate_racing;
edbd8d4e 2806 page = grab_cache_page(inode->i_mapping, i);
a061fc8d 2807 if (!page) {
edbd8d4e 2808 goto out_unlock;
a061fc8d 2809 }
edbd8d4e
CM
2810 if (!PageUptodate(page)) {
2811 btrfs_readpage(NULL, page);
2812 lock_page(page);
2813 if (!PageUptodate(page)) {
2814 unlock_page(page);
2815 page_cache_release(page);
2816 goto out_unlock;
2817 }
2818 }
ec44a35c 2819 wait_on_page_writeback(page);
3eaa2885 2820
edbd8d4e
CM
2821 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2822 page_end = page_start + PAGE_CACHE_SIZE - 1;
d1310b2e 2823 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
edbd8d4e 2824
3eaa2885
CM
2825 ordered = btrfs_lookup_ordered_extent(inode, page_start);
2826 if (ordered) {
2827 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
2828 unlock_page(page);
2829 page_cache_release(page);
2830 btrfs_start_ordered_extent(inode, ordered, 1);
2831 btrfs_put_ordered_extent(ordered);
2832 goto again;
2833 }
2834 set_page_extent_mapped(page);
2835
f87f057b
CM
2836 /*
2837 * make sure page_mkwrite is called for this page if userland
2838 * wants to change it from mmap
2839 */
2840 clear_page_dirty_for_io(page);
3eaa2885 2841
ea8c2819 2842 btrfs_set_extent_delalloc(inode, page_start, page_end);
a061fc8d 2843 set_page_dirty(page);
edbd8d4e 2844
d1310b2e 2845 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
edbd8d4e
CM
2846 unlock_page(page);
2847 page_cache_release(page);
2848 }
2849
2850out_unlock:
3eaa2885
CM
2851 /* we have to start the IO in order to get the ordered extents
2852 * instantiated. This allows the relocation to code to wait
2853 * for all the ordered extents to hit the disk.
2854 *
2855 * Otherwise, it would constantly loop over the same extents
2856 * because the old ones don't get deleted until the IO is
2857 * started
2858 */
2859 btrfs_fdatawrite_range(inode->i_mapping, start, start + len - 1,
2860 WB_SYNC_NONE);
ec44a35c 2861 kfree(ra);
a061fc8d
CM
2862 trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
2863 if (trans) {
a061fc8d
CM
2864 btrfs_end_transaction(trans, BTRFS_I(inode)->root);
2865 mark_inode_dirty(inode);
2866 }
edbd8d4e
CM
2867 mutex_unlock(&inode->i_mutex);
2868 return 0;
a061fc8d
CM
2869
2870truncate_racing:
2871 vmtruncate(inode, inode->i_size);
2872 balance_dirty_pages_ratelimited_nr(inode->i_mapping,
2873 total_read);
2874 goto out_unlock;
edbd8d4e
CM
2875}
2876
bf4ef679
CM
2877/*
2878 * The back references tell us which tree holds a ref on a block,
2879 * but it is possible for the tree root field in the reference to
2880 * reflect the original root before a snapshot was made. In this
2881 * case we should search through all the children of a given root
2882 * to find potential holders of references on a block.
2883 *
2884 * Instead, we do something a little less fancy and just search
2885 * all the roots for a given key/block combination.
2886 */
2887static int find_root_for_ref(struct btrfs_root *root,
2888 struct btrfs_path *path,
2889 struct btrfs_key *key0,
2890 int level,
2891 int file_key,
2892 struct btrfs_root **found_root,
2893 u64 bytenr)
2894{
2895 struct btrfs_key root_location;
2896 struct btrfs_root *cur_root = *found_root;
2897 struct btrfs_file_extent_item *file_extent;
2898 u64 root_search_start = BTRFS_FS_TREE_OBJECTID;
2899 u64 found_bytenr;
2900 int ret;
bf4ef679
CM
2901
2902 root_location.offset = (u64)-1;
2903 root_location.type = BTRFS_ROOT_ITEM_KEY;
2904 path->lowest_level = level;
2905 path->reada = 0;
2906 while(1) {
2907 ret = btrfs_search_slot(NULL, cur_root, key0, path, 0, 0);
2908 found_bytenr = 0;
2909 if (ret == 0 && file_key) {
2910 struct extent_buffer *leaf = path->nodes[0];
2911 file_extent = btrfs_item_ptr(leaf, path->slots[0],
2912 struct btrfs_file_extent_item);
2913 if (btrfs_file_extent_type(leaf, file_extent) ==
2914 BTRFS_FILE_EXTENT_REG) {
2915 found_bytenr =
2916 btrfs_file_extent_disk_bytenr(leaf,
2917 file_extent);
2918 }
323da79c 2919 } else if (!file_key) {
bf4ef679
CM
2920 if (path->nodes[level])
2921 found_bytenr = path->nodes[level]->start;
2922 }
2923
bf4ef679
CM
2924 btrfs_release_path(cur_root, path);
2925
2926 if (found_bytenr == bytenr) {
2927 *found_root = cur_root;
2928 ret = 0;
2929 goto out;
2930 }
2931 ret = btrfs_search_root(root->fs_info->tree_root,
2932 root_search_start, &root_search_start);
2933 if (ret)
2934 break;
2935
2936 root_location.objectid = root_search_start;
2937 cur_root = btrfs_read_fs_root_no_name(root->fs_info,
2938 &root_location);
2939 if (!cur_root) {
2940 ret = 1;
2941 break;
2942 }
2943 }
2944out:
2945 path->lowest_level = 0;
2946 return ret;
2947}
2948
4313b399
CM
2949/*
2950 * note, this releases the path
2951 */
98ed5174 2952static int noinline relocate_one_reference(struct btrfs_root *extent_root,
edbd8d4e 2953 struct btrfs_path *path,
0ef3e66b
CM
2954 struct btrfs_key *extent_key,
2955 u64 *last_file_objectid,
2956 u64 *last_file_offset,
2957 u64 *last_file_root,
2958 u64 last_extent)
edbd8d4e
CM
2959{
2960 struct inode *inode;
2961 struct btrfs_root *found_root;
bf4ef679
CM
2962 struct btrfs_key root_location;
2963 struct btrfs_key found_key;
4313b399
CM
2964 struct btrfs_extent_ref *ref;
2965 u64 ref_root;
2966 u64 ref_gen;
2967 u64 ref_objectid;
2968 u64 ref_offset;
edbd8d4e 2969 int ret;
bf4ef679 2970 int level;
edbd8d4e 2971
7d9eb12c
CM
2972 WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex));
2973
4313b399
CM
2974 ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
2975 struct btrfs_extent_ref);
2976 ref_root = btrfs_ref_root(path->nodes[0], ref);
2977 ref_gen = btrfs_ref_generation(path->nodes[0], ref);
2978 ref_objectid = btrfs_ref_objectid(path->nodes[0], ref);
2979 ref_offset = btrfs_ref_offset(path->nodes[0], ref);
2980 btrfs_release_path(extent_root, path);
2981
bf4ef679 2982 root_location.objectid = ref_root;
edbd8d4e 2983 if (ref_gen == 0)
bf4ef679 2984 root_location.offset = 0;
edbd8d4e 2985 else
bf4ef679
CM
2986 root_location.offset = (u64)-1;
2987 root_location.type = BTRFS_ROOT_ITEM_KEY;
edbd8d4e
CM
2988
2989 found_root = btrfs_read_fs_root_no_name(extent_root->fs_info,
bf4ef679 2990 &root_location);
edbd8d4e 2991 BUG_ON(!found_root);
7d9eb12c 2992 mutex_unlock(&extent_root->fs_info->alloc_mutex);
edbd8d4e
CM
2993
2994 if (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
bf4ef679
CM
2995 found_key.objectid = ref_objectid;
2996 found_key.type = BTRFS_EXTENT_DATA_KEY;
2997 found_key.offset = ref_offset;
2998 level = 0;
2999
0ef3e66b
CM
3000 if (last_extent == extent_key->objectid &&
3001 *last_file_objectid == ref_objectid &&
3002 *last_file_offset == ref_offset &&
3003 *last_file_root == ref_root)
3004 goto out;
3005
bf4ef679
CM
3006 ret = find_root_for_ref(extent_root, path, &found_key,
3007 level, 1, &found_root,
3008 extent_key->objectid);
3009
3010 if (ret)
3011 goto out;
3012
0ef3e66b
CM
3013 if (last_extent == extent_key->objectid &&
3014 *last_file_objectid == ref_objectid &&
3015 *last_file_offset == ref_offset &&
3016 *last_file_root == ref_root)
3017 goto out;
3018
edbd8d4e
CM
3019 inode = btrfs_iget_locked(extent_root->fs_info->sb,
3020 ref_objectid, found_root);
3021 if (inode->i_state & I_NEW) {
3022 /* the inode and parent dir are two different roots */
3023 BTRFS_I(inode)->root = found_root;
3024 BTRFS_I(inode)->location.objectid = ref_objectid;
3025 BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
3026 BTRFS_I(inode)->location.offset = 0;
3027 btrfs_read_locked_inode(inode);
3028 unlock_new_inode(inode);
3029
3030 }
3031 /* this can happen if the reference is not against
3032 * the latest version of the tree root
3033 */
7d9eb12c 3034 if (is_bad_inode(inode))
edbd8d4e 3035 goto out;
7d9eb12c 3036
0ef3e66b
CM
3037 *last_file_objectid = inode->i_ino;
3038 *last_file_root = found_root->root_key.objectid;
3039 *last_file_offset = ref_offset;
3040
edbd8d4e 3041 relocate_inode_pages(inode, ref_offset, extent_key->offset);
edbd8d4e 3042 iput(inode);
edbd8d4e
CM
3043 } else {
3044 struct btrfs_trans_handle *trans;
edbd8d4e 3045 struct extent_buffer *eb;
7d9eb12c 3046 int needs_lock = 0;
edbd8d4e 3047
edbd8d4e 3048 eb = read_tree_block(found_root, extent_key->objectid,
ca7a79ad 3049 extent_key->offset, 0);
925baedd 3050 btrfs_tree_lock(eb);
edbd8d4e
CM
3051 level = btrfs_header_level(eb);
3052
3053 if (level == 0)
3054 btrfs_item_key_to_cpu(eb, &found_key, 0);
3055 else
3056 btrfs_node_key_to_cpu(eb, &found_key, 0);
3057
925baedd 3058 btrfs_tree_unlock(eb);
edbd8d4e
CM
3059 free_extent_buffer(eb);
3060
bf4ef679
CM
3061 ret = find_root_for_ref(extent_root, path, &found_key,
3062 level, 0, &found_root,
3063 extent_key->objectid);
3064
3065 if (ret)
3066 goto out;
3067
7d9eb12c
CM
3068 /*
3069 * right here almost anything could happen to our key,
3070 * but that's ok. The cow below will either relocate it
3071 * or someone else will have relocated it. Either way,
3072 * it is in a different spot than it was before and
3073 * we're happy.
3074 */
3075
bf4ef679
CM
3076 trans = btrfs_start_transaction(found_root, 1);
3077
7d9eb12c
CM
3078 if (found_root == extent_root->fs_info->extent_root ||
3079 found_root == extent_root->fs_info->chunk_root ||
3080 found_root == extent_root->fs_info->dev_root) {
3081 needs_lock = 1;
3082 mutex_lock(&extent_root->fs_info->alloc_mutex);
3083 }
3084
edbd8d4e 3085 path->lowest_level = level;
8f662a76 3086 path->reada = 2;
edbd8d4e
CM
3087 ret = btrfs_search_slot(trans, found_root, &found_key, path,
3088 0, 1);
3089 path->lowest_level = 0;
edbd8d4e 3090 btrfs_release_path(found_root, path);
7d9eb12c 3091
0ef3e66b
CM
3092 if (found_root == found_root->fs_info->extent_root)
3093 btrfs_extent_post_op(trans, found_root);
7d9eb12c
CM
3094 if (needs_lock)
3095 mutex_unlock(&extent_root->fs_info->alloc_mutex);
3096
edbd8d4e 3097 btrfs_end_transaction(trans, found_root);
edbd8d4e 3098
7d9eb12c 3099 }
edbd8d4e 3100out:
7d9eb12c 3101 mutex_lock(&extent_root->fs_info->alloc_mutex);
edbd8d4e
CM
3102 return 0;
3103}
3104
a061fc8d
CM
3105static int noinline del_extent_zero(struct btrfs_root *extent_root,
3106 struct btrfs_path *path,
3107 struct btrfs_key *extent_key)
3108{
3109 int ret;
3110 struct btrfs_trans_handle *trans;
3111
3112 trans = btrfs_start_transaction(extent_root, 1);
3113 ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1);
3114 if (ret > 0) {
3115 ret = -EIO;
3116 goto out;
3117 }
3118 if (ret < 0)
3119 goto out;
3120 ret = btrfs_del_item(trans, extent_root, path);
3121out:
3122 btrfs_end_transaction(trans, extent_root);
3123 return ret;
3124}
3125
98ed5174
CM
3126static int noinline relocate_one_extent(struct btrfs_root *extent_root,
3127 struct btrfs_path *path,
3128 struct btrfs_key *extent_key)
edbd8d4e
CM
3129{
3130 struct btrfs_key key;
3131 struct btrfs_key found_key;
edbd8d4e 3132 struct extent_buffer *leaf;
0ef3e66b
CM
3133 u64 last_file_objectid = 0;
3134 u64 last_file_root = 0;
3135 u64 last_file_offset = (u64)-1;
3136 u64 last_extent = 0;
edbd8d4e
CM
3137 u32 nritems;
3138 u32 item_size;
3139 int ret = 0;
3140
a061fc8d
CM
3141 if (extent_key->objectid == 0) {
3142 ret = del_extent_zero(extent_root, path, extent_key);
3143 goto out;
3144 }
edbd8d4e
CM
3145 key.objectid = extent_key->objectid;
3146 key.type = BTRFS_EXTENT_REF_KEY;
3147 key.offset = 0;
3148
3149 while(1) {
3150 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
3151
edbd8d4e
CM
3152 if (ret < 0)
3153 goto out;
3154
3155 ret = 0;
3156 leaf = path->nodes[0];
3157 nritems = btrfs_header_nritems(leaf);
a061fc8d
CM
3158 if (path->slots[0] == nritems) {
3159 ret = btrfs_next_leaf(extent_root, path);
3160 if (ret > 0) {
3161 ret = 0;
3162 goto out;
3163 }
3164 if (ret < 0)
3165 goto out;
bf4ef679 3166 leaf = path->nodes[0];
a061fc8d 3167 }
edbd8d4e
CM
3168
3169 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
a061fc8d 3170 if (found_key.objectid != extent_key->objectid) {
edbd8d4e 3171 break;
a061fc8d 3172 }
edbd8d4e 3173
a061fc8d 3174 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
edbd8d4e 3175 break;
a061fc8d 3176 }
edbd8d4e
CM
3177
3178 key.offset = found_key.offset + 1;
3179 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
3180
0ef3e66b
CM
3181 ret = relocate_one_reference(extent_root, path, extent_key,
3182 &last_file_objectid,
3183 &last_file_offset,
3184 &last_file_root, last_extent);
edbd8d4e
CM
3185 if (ret)
3186 goto out;
0ef3e66b 3187 last_extent = extent_key->objectid;
edbd8d4e
CM
3188 }
3189 ret = 0;
3190out:
3191 btrfs_release_path(extent_root, path);
3192 return ret;
3193}
3194
ec44a35c
CM
3195static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
3196{
3197 u64 num_devices;
3198 u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
3199 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
3200
a061fc8d 3201 num_devices = root->fs_info->fs_devices->num_devices;
ec44a35c
CM
3202 if (num_devices == 1) {
3203 stripped |= BTRFS_BLOCK_GROUP_DUP;
3204 stripped = flags & ~stripped;
3205
3206 /* turn raid0 into single device chunks */
3207 if (flags & BTRFS_BLOCK_GROUP_RAID0)
3208 return stripped;
3209
3210 /* turn mirroring into duplication */
3211 if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
3212 BTRFS_BLOCK_GROUP_RAID10))
3213 return stripped | BTRFS_BLOCK_GROUP_DUP;
3214 return flags;
3215 } else {
3216 /* they already had raid on here, just return */
ec44a35c
CM
3217 if (flags & stripped)
3218 return flags;
3219
3220 stripped |= BTRFS_BLOCK_GROUP_DUP;
3221 stripped = flags & ~stripped;
3222
3223 /* switch duplicated blocks with raid1 */
3224 if (flags & BTRFS_BLOCK_GROUP_DUP)
3225 return stripped | BTRFS_BLOCK_GROUP_RAID1;
3226
3227 /* turn single device chunks into raid0 */
3228 return stripped | BTRFS_BLOCK_GROUP_RAID0;
3229 }
3230 return flags;
3231}
3232
0ef3e66b
CM
3233int __alloc_chunk_for_shrink(struct btrfs_root *root,
3234 struct btrfs_block_group_cache *shrink_block_group,
3235 int force)
3236{
3237 struct btrfs_trans_handle *trans;
3238 u64 new_alloc_flags;
3239 u64 calc;
3240
c286ac48 3241 spin_lock(&shrink_block_group->lock);
0ef3e66b 3242 if (btrfs_block_group_used(&shrink_block_group->item) > 0) {
c286ac48 3243 spin_unlock(&shrink_block_group->lock);
7d9eb12c 3244 mutex_unlock(&root->fs_info->alloc_mutex);
c286ac48 3245
0ef3e66b 3246 trans = btrfs_start_transaction(root, 1);
7d9eb12c 3247 mutex_lock(&root->fs_info->alloc_mutex);
c286ac48 3248 spin_lock(&shrink_block_group->lock);
7d9eb12c 3249
0ef3e66b
CM
3250 new_alloc_flags = update_block_group_flags(root,
3251 shrink_block_group->flags);
3252 if (new_alloc_flags != shrink_block_group->flags) {
3253 calc =
3254 btrfs_block_group_used(&shrink_block_group->item);
3255 } else {
3256 calc = shrink_block_group->key.offset;
3257 }
c286ac48
CM
3258 spin_unlock(&shrink_block_group->lock);
3259
0ef3e66b
CM
3260 do_chunk_alloc(trans, root->fs_info->extent_root,
3261 calc + 2 * 1024 * 1024, new_alloc_flags, force);
7d9eb12c
CM
3262
3263 mutex_unlock(&root->fs_info->alloc_mutex);
0ef3e66b 3264 btrfs_end_transaction(trans, root);
7d9eb12c 3265 mutex_lock(&root->fs_info->alloc_mutex);
c286ac48
CM
3266 } else
3267 spin_unlock(&shrink_block_group->lock);
0ef3e66b
CM
3268 return 0;
3269}
3270
8f18cf13 3271int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start)
edbd8d4e
CM
3272{
3273 struct btrfs_trans_handle *trans;
3274 struct btrfs_root *tree_root = root->fs_info->tree_root;
3275 struct btrfs_path *path;
3276 u64 cur_byte;
3277 u64 total_found;
8f18cf13
CM
3278 u64 shrink_last_byte;
3279 struct btrfs_block_group_cache *shrink_block_group;
edbd8d4e 3280 struct btrfs_fs_info *info = root->fs_info;
edbd8d4e 3281 struct btrfs_key key;
73e48b27 3282 struct btrfs_key found_key;
edbd8d4e
CM
3283 struct extent_buffer *leaf;
3284 u32 nritems;
3285 int ret;
a061fc8d 3286 int progress;
edbd8d4e 3287
925baedd 3288 mutex_lock(&root->fs_info->alloc_mutex);
8f18cf13
CM
3289 shrink_block_group = btrfs_lookup_block_group(root->fs_info,
3290 shrink_start);
3291 BUG_ON(!shrink_block_group);
3292
0ef3e66b
CM
3293 shrink_last_byte = shrink_block_group->key.objectid +
3294 shrink_block_group->key.offset;
8f18cf13
CM
3295
3296 shrink_block_group->space_info->total_bytes -=
3297 shrink_block_group->key.offset;
edbd8d4e
CM
3298 path = btrfs_alloc_path();
3299 root = root->fs_info->extent_root;
8f662a76 3300 path->reada = 2;
edbd8d4e 3301
323da79c
CM
3302 printk("btrfs relocating block group %llu flags %llu\n",
3303 (unsigned long long)shrink_start,
3304 (unsigned long long)shrink_block_group->flags);
3305
0ef3e66b
CM
3306 __alloc_chunk_for_shrink(root, shrink_block_group, 1);
3307
edbd8d4e 3308again:
323da79c 3309
8f18cf13
CM
3310 shrink_block_group->ro = 1;
3311
edbd8d4e 3312 total_found = 0;
a061fc8d 3313 progress = 0;
8f18cf13 3314 key.objectid = shrink_start;
edbd8d4e
CM
3315 key.offset = 0;
3316 key.type = 0;
73e48b27 3317 cur_byte = key.objectid;
4313b399 3318
ea8c2819
CM
3319 mutex_unlock(&root->fs_info->alloc_mutex);
3320
3321 btrfs_start_delalloc_inodes(root);
7ea394f1 3322 btrfs_wait_ordered_extents(tree_root, 0);
ea8c2819
CM
3323
3324 mutex_lock(&root->fs_info->alloc_mutex);
3325
73e48b27
Y
3326 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3327 if (ret < 0)
3328 goto out;
3329
0b86a832 3330 ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
73e48b27
Y
3331 if (ret < 0)
3332 goto out;
8f18cf13 3333
73e48b27
Y
3334 if (ret == 0) {
3335 leaf = path->nodes[0];
3336 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
8f18cf13
CM
3337 if (found_key.objectid + found_key.offset > shrink_start &&
3338 found_key.objectid < shrink_last_byte) {
73e48b27
Y
3339 cur_byte = found_key.objectid;
3340 key.objectid = cur_byte;
3341 }
3342 }
3343 btrfs_release_path(root, path);
3344
3345 while(1) {
edbd8d4e
CM
3346 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3347 if (ret < 0)
3348 goto out;
73e48b27 3349
7d9eb12c 3350next:
edbd8d4e 3351 leaf = path->nodes[0];
73e48b27 3352 nritems = btrfs_header_nritems(leaf);
73e48b27
Y
3353 if (path->slots[0] >= nritems) {
3354 ret = btrfs_next_leaf(root, path);
3355 if (ret < 0)
3356 goto out;
3357 if (ret == 1) {
3358 ret = 0;
3359 break;
edbd8d4e 3360 }
73e48b27
Y
3361 leaf = path->nodes[0];
3362 nritems = btrfs_header_nritems(leaf);
edbd8d4e 3363 }
73e48b27
Y
3364
3365 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
725c8463 3366
8f18cf13
CM
3367 if (found_key.objectid >= shrink_last_byte)
3368 break;
3369
725c8463
CM
3370 if (progress && need_resched()) {
3371 memcpy(&key, &found_key, sizeof(key));
725c8463 3372 cond_resched();
725c8463
CM
3373 btrfs_release_path(root, path);
3374 btrfs_search_slot(NULL, root, &key, path, 0, 0);
3375 progress = 0;
3376 goto next;
3377 }
3378 progress = 1;
3379
73e48b27
Y
3380 if (btrfs_key_type(&found_key) != BTRFS_EXTENT_ITEM_KEY ||
3381 found_key.objectid + found_key.offset <= cur_byte) {
0ef3e66b
CM
3382 memcpy(&key, &found_key, sizeof(key));
3383 key.offset++;
edbd8d4e 3384 path->slots[0]++;
edbd8d4e
CM
3385 goto next;
3386 }
73e48b27 3387
edbd8d4e
CM
3388 total_found++;
3389 cur_byte = found_key.objectid + found_key.offset;
3390 key.objectid = cur_byte;
3391 btrfs_release_path(root, path);
3392 ret = relocate_one_extent(root, path, &found_key);
0ef3e66b 3393 __alloc_chunk_for_shrink(root, shrink_block_group, 0);
edbd8d4e
CM
3394 }
3395
3396 btrfs_release_path(root, path);
3397
3398 if (total_found > 0) {
323da79c
CM
3399 printk("btrfs relocate found %llu last extent was %llu\n",
3400 (unsigned long long)total_found,
3401 (unsigned long long)found_key.objectid);
7d9eb12c 3402 mutex_unlock(&root->fs_info->alloc_mutex);
edbd8d4e
CM
3403 trans = btrfs_start_transaction(tree_root, 1);
3404 btrfs_commit_transaction(trans, tree_root);
3405
edbd8d4e 3406 btrfs_clean_old_snapshots(tree_root);
edbd8d4e 3407
ea8c2819 3408 btrfs_start_delalloc_inodes(root);
7ea394f1 3409 btrfs_wait_ordered_extents(tree_root, 0);
3eaa2885 3410
edbd8d4e
CM
3411 trans = btrfs_start_transaction(tree_root, 1);
3412 btrfs_commit_transaction(trans, tree_root);
7d9eb12c 3413 mutex_lock(&root->fs_info->alloc_mutex);
edbd8d4e
CM
3414 goto again;
3415 }
3416
8f18cf13
CM
3417 /*
3418 * we've freed all the extents, now remove the block
3419 * group item from the tree
3420 */
7d9eb12c
CM
3421 mutex_unlock(&root->fs_info->alloc_mutex);
3422
edbd8d4e 3423 trans = btrfs_start_transaction(root, 1);
c286ac48 3424
7d9eb12c 3425 mutex_lock(&root->fs_info->alloc_mutex);
8f18cf13 3426 memcpy(&key, &shrink_block_group->key, sizeof(key));
1372f8e6 3427
8f18cf13
CM
3428 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3429 if (ret > 0)
3430 ret = -EIO;
8e8a1e31
JB
3431 if (ret < 0) {
3432 btrfs_end_transaction(trans, root);
8f18cf13 3433 goto out;
8e8a1e31 3434 }
73e48b27 3435
0ef3e66b
CM
3436 clear_extent_bits(&info->block_group_cache, key.objectid,
3437 key.objectid + key.offset - 1,
8f18cf13 3438 (unsigned int)-1, GFP_NOFS);
edbd8d4e 3439
0ef3e66b
CM
3440
3441 clear_extent_bits(&info->free_space_cache,
3442 key.objectid, key.objectid + key.offset - 1,
3443 (unsigned int)-1, GFP_NOFS);
3444
d7a029a8 3445 /*
0ef3e66b
CM
3446 memset(shrink_block_group, 0, sizeof(*shrink_block_group));
3447 kfree(shrink_block_group);
d7a029a8 3448 */
0ef3e66b 3449
8f18cf13 3450 btrfs_del_item(trans, root, path);
7d9eb12c
CM
3451 btrfs_release_path(root, path);
3452 mutex_unlock(&root->fs_info->alloc_mutex);
edbd8d4e 3453 btrfs_commit_transaction(trans, root);
0ef3e66b 3454
7d9eb12c
CM
3455 mutex_lock(&root->fs_info->alloc_mutex);
3456
0ef3e66b
CM
3457 /* the code to unpin extents might set a few bits in the free
3458 * space cache for this range again
3459 */
3460 clear_extent_bits(&info->free_space_cache,
3461 key.objectid, key.objectid + key.offset - 1,
3462 (unsigned int)-1, GFP_NOFS);
edbd8d4e
CM
3463out:
3464 btrfs_free_path(path);
925baedd 3465 mutex_unlock(&root->fs_info->alloc_mutex);
edbd8d4e
CM
3466 return ret;
3467}
3468
0b86a832
CM
3469int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path,
3470 struct btrfs_key *key)
3471{
925baedd 3472 int ret = 0;
0b86a832
CM
3473 struct btrfs_key found_key;
3474 struct extent_buffer *leaf;
3475 int slot;
edbd8d4e 3476
0b86a832
CM
3477 ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
3478 if (ret < 0)
925baedd
CM
3479 goto out;
3480
0b86a832
CM
3481 while(1) {
3482 slot = path->slots[0];
edbd8d4e 3483 leaf = path->nodes[0];
0b86a832
CM
3484 if (slot >= btrfs_header_nritems(leaf)) {
3485 ret = btrfs_next_leaf(root, path);
3486 if (ret == 0)
3487 continue;
3488 if (ret < 0)
925baedd 3489 goto out;
0b86a832 3490 break;
edbd8d4e 3491 }
0b86a832 3492 btrfs_item_key_to_cpu(leaf, &found_key, slot);
edbd8d4e 3493
0b86a832 3494 if (found_key.objectid >= key->objectid &&
925baedd
CM
3495 found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
3496 ret = 0;
3497 goto out;
3498 }
0b86a832 3499 path->slots[0]++;
edbd8d4e 3500 }
0b86a832 3501 ret = -ENOENT;
925baedd 3502out:
0b86a832 3503 return ret;
edbd8d4e
CM
3504}
3505
9078a3e1
CM
3506int btrfs_read_block_groups(struct btrfs_root *root)
3507{
3508 struct btrfs_path *path;
3509 int ret;
96b5179d 3510 int bit;
9078a3e1 3511 struct btrfs_block_group_cache *cache;
be744175 3512 struct btrfs_fs_info *info = root->fs_info;
6324fbf3 3513 struct btrfs_space_info *space_info;
d1310b2e 3514 struct extent_io_tree *block_group_cache;
9078a3e1
CM
3515 struct btrfs_key key;
3516 struct btrfs_key found_key;
5f39d397 3517 struct extent_buffer *leaf;
96b5179d
CM
3518
3519 block_group_cache = &info->block_group_cache;
be744175 3520 root = info->extent_root;
9078a3e1 3521 key.objectid = 0;
0b86a832 3522 key.offset = 0;
9078a3e1 3523 btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
9078a3e1
CM
3524 path = btrfs_alloc_path();
3525 if (!path)
3526 return -ENOMEM;
3527
925baedd 3528 mutex_lock(&root->fs_info->alloc_mutex);
9078a3e1 3529 while(1) {
0b86a832
CM
3530 ret = find_first_block_group(root, path, &key);
3531 if (ret > 0) {
3532 ret = 0;
3533 goto error;
9078a3e1 3534 }
0b86a832
CM
3535 if (ret != 0)
3536 goto error;
3537
5f39d397
CM
3538 leaf = path->nodes[0];
3539 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
8f18cf13 3540 cache = kzalloc(sizeof(*cache), GFP_NOFS);
9078a3e1 3541 if (!cache) {
0b86a832 3542 ret = -ENOMEM;
9078a3e1
CM
3543 break;
3544 }
3e1ad54f 3545
c286ac48 3546 spin_lock_init(&cache->lock);
5f39d397
CM
3547 read_extent_buffer(leaf, &cache->item,
3548 btrfs_item_ptr_offset(leaf, path->slots[0]),
3549 sizeof(cache->item));
9078a3e1 3550 memcpy(&cache->key, &found_key, sizeof(found_key));
0b86a832 3551
9078a3e1
CM
3552 key.objectid = found_key.objectid + found_key.offset;
3553 btrfs_release_path(root, path);
0b86a832
CM
3554 cache->flags = btrfs_block_group_flags(&cache->item);
3555 bit = 0;
3556 if (cache->flags & BTRFS_BLOCK_GROUP_DATA) {
96b5179d 3557 bit = BLOCK_GROUP_DATA;
0b86a832
CM
3558 } else if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
3559 bit = BLOCK_GROUP_SYSTEM;
3560 } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) {
96b5179d 3561 bit = BLOCK_GROUP_METADATA;
31f3c99b 3562 }
8790d502 3563 set_avail_alloc_bits(info, cache->flags);
96b5179d 3564
6324fbf3
CM
3565 ret = update_space_info(info, cache->flags, found_key.offset,
3566 btrfs_block_group_used(&cache->item),
3567 &space_info);
3568 BUG_ON(ret);
3569 cache->space_info = space_info;
3570
96b5179d
CM
3571 /* use EXTENT_LOCKED to prevent merging */
3572 set_extent_bits(block_group_cache, found_key.objectid,
3573 found_key.objectid + found_key.offset - 1,
c286ac48 3574 EXTENT_LOCKED, GFP_NOFS);
96b5179d 3575 set_state_private(block_group_cache, found_key.objectid,
ae2f5411 3576 (unsigned long)cache);
c286ac48
CM
3577 set_extent_bits(block_group_cache, found_key.objectid,
3578 found_key.objectid + found_key.offset - 1,
3579 bit | EXTENT_LOCKED, GFP_NOFS);
9078a3e1 3580 if (key.objectid >=
db94535d 3581 btrfs_super_total_bytes(&info->super_copy))
9078a3e1
CM
3582 break;
3583 }
0b86a832
CM
3584 ret = 0;
3585error:
9078a3e1 3586 btrfs_free_path(path);
925baedd 3587 mutex_unlock(&root->fs_info->alloc_mutex);
0b86a832 3588 return ret;
9078a3e1 3589}
6324fbf3
CM
3590
3591int btrfs_make_block_group(struct btrfs_trans_handle *trans,
3592 struct btrfs_root *root, u64 bytes_used,
e17cade2 3593 u64 type, u64 chunk_objectid, u64 chunk_offset,
6324fbf3
CM
3594 u64 size)
3595{
3596 int ret;
3597 int bit = 0;
3598 struct btrfs_root *extent_root;
3599 struct btrfs_block_group_cache *cache;
3600 struct extent_io_tree *block_group_cache;
3601
7d9eb12c 3602 WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
6324fbf3
CM
3603 extent_root = root->fs_info->extent_root;
3604 block_group_cache = &root->fs_info->block_group_cache;
3605
8f18cf13 3606 cache = kzalloc(sizeof(*cache), GFP_NOFS);
6324fbf3 3607 BUG_ON(!cache);
e17cade2 3608 cache->key.objectid = chunk_offset;
6324fbf3 3609 cache->key.offset = size;
c286ac48 3610 spin_lock_init(&cache->lock);
6324fbf3 3611 btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
0ef3e66b 3612
6324fbf3 3613 btrfs_set_block_group_used(&cache->item, bytes_used);
6324fbf3
CM
3614 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
3615 cache->flags = type;
3616 btrfs_set_block_group_flags(&cache->item, type);
3617
3618 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
3619 &cache->space_info);
3620 BUG_ON(ret);
3621
d18a2c44 3622 bit = block_group_state_bits(type);
e17cade2
CM
3623 set_extent_bits(block_group_cache, chunk_offset,
3624 chunk_offset + size - 1,
c286ac48 3625 EXTENT_LOCKED, GFP_NOFS);
e17cade2
CM
3626 set_state_private(block_group_cache, chunk_offset,
3627 (unsigned long)cache);
c286ac48
CM
3628 set_extent_bits(block_group_cache, chunk_offset,
3629 chunk_offset + size - 1,
3630 bit | EXTENT_LOCKED, GFP_NOFS);
3631
6324fbf3
CM
3632 ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
3633 sizeof(cache->item));
3634 BUG_ON(ret);
3635
3636 finish_current_insert(trans, extent_root);
3637 ret = del_pending_extents(trans, extent_root);
3638 BUG_ON(ret);
d18a2c44 3639 set_avail_alloc_bits(extent_root->fs_info, type);
925baedd 3640
6324fbf3
CM
3641 return 0;
3642}