Btrfs: Add readahead to the online shrinker, and a mount -o alloc_start= for testing
[linux-2.6-block.git] / fs / btrfs / extent-tree.c
CommitLineData
6cbd5570
CM
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
ec6b910f 19#include <linux/sched.h>
7bb86316 20#include <linux/crc32c.h>
edbd8d4e 21#include <linux/pagemap.h>
74493f7a 22#include "hash.h"
fec577fb
CM
23#include "ctree.h"
24#include "disk-io.h"
25#include "print-tree.h"
e089f05c 26#include "transaction.h"
fec577fb 27
96b5179d
CM
28#define BLOCK_GROUP_DATA EXTENT_WRITEBACK
29#define BLOCK_GROUP_METADATA EXTENT_UPTODATE
30#define BLOCK_GROUP_DIRTY EXTENT_DIRTY
31
e089f05c
CM
32static int finish_current_insert(struct btrfs_trans_handle *trans, struct
33 btrfs_root *extent_root);
e20d96d6
CM
34static int del_pending_extents(struct btrfs_trans_handle *trans, struct
35 btrfs_root *extent_root);
fec577fb 36
e37c9e69
CM
37static int cache_block_group(struct btrfs_root *root,
38 struct btrfs_block_group_cache *block_group)
39{
40 struct btrfs_path *path;
41 int ret;
42 struct btrfs_key key;
5f39d397 43 struct extent_buffer *leaf;
f510cfec 44 struct extent_map_tree *free_space_cache;
e37c9e69 45 int slot;
e37c9e69
CM
46 u64 last = 0;
47 u64 hole_size;
7d7d6068 48 u64 first_free;
e37c9e69
CM
49 int found = 0;
50
00f5c795
CM
51 if (!block_group)
52 return 0;
53
e37c9e69 54 root = root->fs_info->extent_root;
f510cfec 55 free_space_cache = &root->fs_info->free_space_cache;
e37c9e69
CM
56
57 if (block_group->cached)
58 return 0;
f510cfec 59
e37c9e69
CM
60 path = btrfs_alloc_path();
61 if (!path)
62 return -ENOMEM;
7d7d6068 63
2cc58cf2 64 path->reada = 2;
7d7d6068 65 first_free = block_group->key.objectid;
e37c9e69 66 key.objectid = block_group->key.objectid;
e37c9e69 67 key.offset = 0;
7d7d6068 68
e37c9e69
CM
69 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
70 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7d7d6068 71
e37c9e69
CM
72 if (ret < 0)
73 return ret;
7d7d6068 74
e37c9e69
CM
75 if (ret && path->slots[0] > 0)
76 path->slots[0]--;
7d7d6068 77
e37c9e69 78 while(1) {
5f39d397 79 leaf = path->nodes[0];
e37c9e69 80 slot = path->slots[0];
5f39d397 81 if (slot >= btrfs_header_nritems(leaf)) {
e37c9e69 82 ret = btrfs_next_leaf(root, path);
54aa1f4d
CM
83 if (ret < 0)
84 goto err;
de428b63 85 if (ret == 0) {
e37c9e69 86 continue;
de428b63 87 } else {
e37c9e69
CM
88 break;
89 }
90 }
7d7d6068 91
5f39d397 92 btrfs_item_key_to_cpu(leaf, &key, slot);
7d7d6068 93 if (key.objectid < block_group->key.objectid) {
7bb86316
CM
94 if (btrfs_key_type(&key) != BTRFS_EXTENT_REF_KEY &&
95 key.objectid + key.offset > first_free)
7d7d6068
Y
96 first_free = key.objectid + key.offset;
97 goto next;
98 }
99
e37c9e69
CM
100 if (key.objectid >= block_group->key.objectid +
101 block_group->key.offset) {
e37c9e69
CM
102 break;
103 }
7d7d6068 104
e37c9e69
CM
105 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
106 if (!found) {
7d7d6068 107 last = first_free;
e37c9e69 108 found = 1;
e37c9e69 109 }
f510cfec
CM
110 if (key.objectid > last) {
111 hole_size = key.objectid - last;
112 set_extent_dirty(free_space_cache, last,
113 last + hole_size - 1,
114 GFP_NOFS);
7d7d6068
Y
115 }
116 last = key.objectid + key.offset;
e37c9e69 117 }
7d7d6068 118next:
e37c9e69
CM
119 path->slots[0]++;
120 }
121
7d7d6068
Y
122 if (!found)
123 last = first_free;
124 if (block_group->key.objectid +
125 block_group->key.offset > last) {
126 hole_size = block_group->key.objectid +
127 block_group->key.offset - last;
f510cfec
CM
128 set_extent_dirty(free_space_cache, last,
129 last + hole_size - 1, GFP_NOFS);
7d7d6068 130 }
e37c9e69 131 block_group->cached = 1;
54aa1f4d 132err:
e37c9e69
CM
133 btrfs_free_path(path);
134 return 0;
135}
136
5276aeda
CM
137struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
138 btrfs_fs_info *info,
db94535d 139 u64 bytenr)
be744175 140{
96b5179d
CM
141 struct extent_map_tree *block_group_cache;
142 struct btrfs_block_group_cache *block_group = NULL;
143 u64 ptr;
144 u64 start;
145 u64 end;
be744175
CM
146 int ret;
147
96b5179d
CM
148 block_group_cache = &info->block_group_cache;
149 ret = find_first_extent_bit(block_group_cache,
db94535d 150 bytenr, &start, &end,
96b5179d 151 BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA);
be744175 152 if (ret) {
96b5179d 153 return NULL;
be744175 154 }
96b5179d
CM
155 ret = get_state_private(block_group_cache, start, &ptr);
156 if (ret)
157 return NULL;
158
ae2f5411 159 block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
5cf66426 160 if (block_group->key.objectid <= bytenr && bytenr <
96b5179d
CM
161 block_group->key.objectid + block_group->key.offset)
162 return block_group;
be744175
CM
163 return NULL;
164}
e37c9e69
CM
165static u64 find_search_start(struct btrfs_root *root,
166 struct btrfs_block_group_cache **cache_ret,
5e5745dc
Y
167 u64 search_start, int num,
168 int data, int full_scan)
e37c9e69 169{
e37c9e69
CM
170 int ret;
171 struct btrfs_block_group_cache *cache = *cache_ret;
e19caa5f 172 u64 last;
f510cfec
CM
173 u64 start = 0;
174 u64 end = 0;
257d0ce3 175 u64 cache_miss = 0;
f84a8b36 176 int wrapped = 0;
e37c9e69 177
00f5c795 178 if (!cache) {
1a2b2ac7 179 goto out;
00f5c795 180 }
e37c9e69 181again:
54aa1f4d
CM
182 ret = cache_block_group(root, cache);
183 if (ret)
184 goto out;
f84a8b36 185
e19caa5f
CM
186 last = max(search_start, cache->key.objectid);
187
e37c9e69 188 while(1) {
f510cfec
CM
189 ret = find_first_extent_bit(&root->fs_info->free_space_cache,
190 last, &start, &end, EXTENT_DIRTY);
e19caa5f 191 if (ret) {
257d0ce3
CM
192 if (!cache_miss)
193 cache_miss = last;
e19caa5f
CM
194 goto new_group;
195 }
f510cfec
CM
196
197 start = max(last, start);
198 last = end + 1;
257d0ce3
CM
199 if (last - start < num) {
200 if (last == cache->key.objectid + cache->key.offset)
201 cache_miss = start;
f510cfec 202 continue;
257d0ce3
CM
203 }
204 if (data != BTRFS_BLOCK_GROUP_MIXED &&
5cf66426 205 start + num > cache->key.objectid + cache->key.offset)
e37c9e69 206 goto new_group;
f510cfec 207 return start;
e37c9e69
CM
208 }
209out:
1a2b2ac7
CM
210 cache = btrfs_lookup_block_group(root->fs_info, search_start);
211 if (!cache) {
212 printk("Unable to find block group for %Lu\n",
213 search_start);
214 WARN_ON(1);
215 return search_start;
216 }
1a5bc167 217 return search_start;
e37c9e69
CM
218
219new_group:
e19caa5f 220 last = cache->key.objectid + cache->key.offset;
f84a8b36 221wrapped:
e19caa5f 222 cache = btrfs_lookup_block_group(root->fs_info, last);
e37c9e69 223 if (!cache) {
0e4de584 224no_cache:
f84a8b36
CM
225 if (!wrapped) {
226 wrapped = 1;
227 last = search_start;
228 data = BTRFS_BLOCK_GROUP_MIXED;
229 goto wrapped;
230 }
1a2b2ac7 231 goto out;
e37c9e69 232 }
257d0ce3
CM
233 if (cache_miss && !cache->cached) {
234 cache_block_group(root, cache);
235 last = cache_miss;
257d0ce3
CM
236 cache = btrfs_lookup_block_group(root->fs_info, last);
237 }
1a2b2ac7 238 cache = btrfs_find_block_group(root, cache, last, data, 0);
0e4de584
CM
239 if (!cache)
240 goto no_cache;
e37c9e69 241 *cache_ret = cache;
257d0ce3 242 cache_miss = 0;
e37c9e69
CM
243 goto again;
244}
245
84f54cfa
CM
246static u64 div_factor(u64 num, int factor)
247{
257d0ce3
CM
248 if (factor == 10)
249 return num;
84f54cfa
CM
250 num *= factor;
251 do_div(num, 10);
252 return num;
253}
254
31f3c99b
CM
255struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
256 struct btrfs_block_group_cache
be744175 257 *hint, u64 search_start,
de428b63 258 int data, int owner)
cd1bc465 259{
96b5179d
CM
260 struct btrfs_block_group_cache *cache;
261 struct extent_map_tree *block_group_cache;
31f3c99b 262 struct btrfs_block_group_cache *found_group = NULL;
cd1bc465
CM
263 struct btrfs_fs_info *info = root->fs_info;
264 u64 used;
31f3c99b
CM
265 u64 last = 0;
266 u64 hint_last;
96b5179d
CM
267 u64 start;
268 u64 end;
269 u64 free_check;
270 u64 ptr;
271 int bit;
cd1bc465 272 int ret;
31f3c99b 273 int full_search = 0;
de428b63 274 int factor = 8;
1e2677e0 275 int data_swap = 0;
de428b63 276
96b5179d
CM
277 block_group_cache = &info->block_group_cache;
278
de428b63 279 if (!owner)
f84a8b36 280 factor = 8;
be744175 281
257d0ce3 282 if (data == BTRFS_BLOCK_GROUP_MIXED) {
f84a8b36 283 bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
257d0ce3
CM
284 factor = 10;
285 } else if (data)
96b5179d
CM
286 bit = BLOCK_GROUP_DATA;
287 else
288 bit = BLOCK_GROUP_METADATA;
be744175
CM
289
290 if (search_start) {
291 struct btrfs_block_group_cache *shint;
5276aeda 292 shint = btrfs_lookup_block_group(info, search_start);
f84a8b36
CM
293 if (shint && (shint->data == data ||
294 shint->data == BTRFS_BLOCK_GROUP_MIXED)) {
be744175 295 used = btrfs_block_group_used(&shint->item);
324ae4df
Y
296 if (used + shint->pinned <
297 div_factor(shint->key.offset, factor)) {
be744175
CM
298 return shint;
299 }
300 }
301 }
f84a8b36
CM
302 if (hint && (hint->data == data ||
303 hint->data == BTRFS_BLOCK_GROUP_MIXED)) {
31f3c99b 304 used = btrfs_block_group_used(&hint->item);
324ae4df
Y
305 if (used + hint->pinned <
306 div_factor(hint->key.offset, factor)) {
31f3c99b
CM
307 return hint;
308 }
e19caa5f 309 last = hint->key.objectid + hint->key.offset;
31f3c99b
CM
310 hint_last = last;
311 } else {
e37c9e69
CM
312 if (hint)
313 hint_last = max(hint->key.objectid, search_start);
314 else
315 hint_last = search_start;
316
317 last = hint_last;
31f3c99b 318 }
31f3c99b 319again:
cd1bc465 320 while(1) {
96b5179d
CM
321 ret = find_first_extent_bit(block_group_cache, last,
322 &start, &end, bit);
323 if (ret)
cd1bc465 324 break;
96b5179d
CM
325
326 ret = get_state_private(block_group_cache, start, &ptr);
327 if (ret)
328 break;
329
ae2f5411 330 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
96b5179d
CM
331 last = cache->key.objectid + cache->key.offset;
332 used = btrfs_block_group_used(&cache->item);
333
334 if (full_search)
335 free_check = cache->key.offset;
336 else
337 free_check = div_factor(cache->key.offset, factor);
324ae4df 338 if (used + cache->pinned < free_check) {
96b5179d
CM
339 found_group = cache;
340 goto found;
cd1bc465 341 }
de428b63 342 cond_resched();
cd1bc465 343 }
31f3c99b 344 if (!full_search) {
be744175 345 last = search_start;
31f3c99b
CM
346 full_search = 1;
347 goto again;
348 }
1e2677e0 349 if (!data_swap) {
1e2677e0 350 data_swap = 1;
96b5179d 351 bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
1e2677e0
CM
352 last = search_start;
353 goto again;
354 }
be744175 355found:
31f3c99b 356 return found_group;
cd1bc465
CM
357}
358
7bb86316 359static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation,
74493f7a
CM
360 u64 owner, u64 owner_offset)
361{
362 u32 high_crc = ~(u32)0;
363 u32 low_crc = ~(u32)0;
364 __le64 lenum;
365
366 lenum = cpu_to_le64(root_objectid);
367 high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
7bb86316
CM
368 lenum = cpu_to_le64(ref_generation);
369 low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
74493f7a 370
7bb86316 371#if 0
74493f7a
CM
372 lenum = cpu_to_le64(owner);
373 low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
74493f7a
CM
374 lenum = cpu_to_le64(owner_offset);
375 low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
7bb86316 376#endif
74493f7a
CM
377 return ((u64)high_crc << 32) | (u64)low_crc;
378}
379
7bb86316
CM
380static int match_extent_ref(struct extent_buffer *leaf,
381 struct btrfs_extent_ref *disk_ref,
382 struct btrfs_extent_ref *cpu_ref)
383{
384 int ret;
385 int len;
386
387 if (cpu_ref->objectid)
388 len = sizeof(*cpu_ref);
389 else
390 len = 2 * sizeof(u64);
391 ret = memcmp_extent_buffer(leaf, cpu_ref, (unsigned long)disk_ref,
392 len);
393 return ret == 0;
394}
395
396static int lookup_extent_backref(struct btrfs_trans_handle *trans,
397 struct btrfs_root *root,
398 struct btrfs_path *path, u64 bytenr,
399 u64 root_objectid, u64 ref_generation,
400 u64 owner, u64 owner_offset, int del)
74493f7a
CM
401{
402 u64 hash;
403 struct btrfs_key key;
7bb86316 404 struct btrfs_key found_key;
74493f7a 405 struct btrfs_extent_ref ref;
7bb86316
CM
406 struct extent_buffer *leaf;
407 struct btrfs_extent_ref *disk_ref;
408 int ret;
409 int ret2;
410
411 btrfs_set_stack_ref_root(&ref, root_objectid);
412 btrfs_set_stack_ref_generation(&ref, ref_generation);
413 btrfs_set_stack_ref_objectid(&ref, owner);
414 btrfs_set_stack_ref_offset(&ref, owner_offset);
415
416 hash = hash_extent_ref(root_objectid, ref_generation, owner,
417 owner_offset);
418 key.offset = hash;
419 key.objectid = bytenr;
420 key.type = BTRFS_EXTENT_REF_KEY;
421
422 while (1) {
423 ret = btrfs_search_slot(trans, root, &key, path,
424 del ? -1 : 0, del);
425 if (ret < 0)
426 goto out;
427 leaf = path->nodes[0];
428 if (ret != 0) {
429 u32 nritems = btrfs_header_nritems(leaf);
430 if (path->slots[0] >= nritems) {
431 ret2 = btrfs_next_leaf(root, path);
432 if (ret2)
433 goto out;
434 leaf = path->nodes[0];
435 }
436 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
437 if (found_key.objectid != bytenr ||
438 found_key.type != BTRFS_EXTENT_REF_KEY)
439 goto out;
440 key.offset = found_key.offset;
441 if (del) {
442 btrfs_release_path(root, path);
443 continue;
444 }
445 }
446 disk_ref = btrfs_item_ptr(path->nodes[0],
447 path->slots[0],
448 struct btrfs_extent_ref);
449 if (match_extent_ref(path->nodes[0], disk_ref, &ref)) {
450 ret = 0;
451 goto out;
452 }
453 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
454 key.offset = found_key.offset + 1;
455 btrfs_release_path(root, path);
456 }
457out:
458 return ret;
459}
460
d8d5f3e1
CM
461/*
462 * Back reference rules. Back refs have three main goals:
463 *
464 * 1) differentiate between all holders of references to an extent so that
465 * when a reference is dropped we can make sure it was a valid reference
466 * before freeing the extent.
467 *
468 * 2) Provide enough information to quickly find the holders of an extent
469 * if we notice a given block is corrupted or bad.
470 *
471 * 3) Make it easy to migrate blocks for FS shrinking or storage pool
472 * maintenance. This is actually the same as #2, but with a slightly
473 * different use case.
474 *
475 * File extents can be referenced by:
476 *
477 * - multiple snapshots, subvolumes, or different generations in one subvol
478 * - different files inside a single subvolume (in theory, not implemented yet)
479 * - different offsets inside a file (bookend extents in file.c)
480 *
481 * The extent ref structure has fields for:
482 *
483 * - Objectid of the subvolume root
484 * - Generation number of the tree holding the reference
485 * - objectid of the file holding the reference
486 * - offset in the file corresponding to the key holding the reference
487 *
488 * When a file extent is allocated the fields are filled in:
489 * (root_key.objectid, trans->transid, inode objectid, offset in file)
490 *
491 * When a leaf is cow'd new references are added for every file extent found
492 * in the leaf. It looks the same as the create case, but trans->transid
493 * will be different when the block is cow'd.
494 *
495 * (root_key.objectid, trans->transid, inode objectid, offset in file)
496 *
497 * When a file extent is removed either during snapshot deletion or file
498 * truncation, the corresponding back reference is found
499 * by searching for:
500 *
501 * (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
502 * inode objectid, offset in file)
503 *
504 * Btree extents can be referenced by:
505 *
506 * - Different subvolumes
507 * - Different generations of the same subvolume
508 *
509 * Storing sufficient information for a full reverse mapping of a btree
510 * block would require storing the lowest key of the block in the backref,
511 * and it would require updating that lowest key either before write out or
512 * every time it changed. Instead, the objectid of the lowest key is stored
513 * along with the level of the tree block. This provides a hint
514 * about where in the btree the block can be found. Searches through the
515 * btree only need to look for a pointer to that block, so they stop one
516 * level higher than the level recorded in the backref.
517 *
518 * Some btrees do not do reference counting on their extents. These
519 * include the extent tree and the tree of tree roots. Backrefs for these
520 * trees always have a generation of zero.
521 *
522 * When a tree block is created, back references are inserted:
523 *
f6dbff55 524 * (root->root_key.objectid, trans->transid or zero, level, lowest_key_objectid)
d8d5f3e1
CM
525 *
526 * When a tree block is cow'd in a reference counted root,
527 * new back references are added for all the blocks it points to.
528 * These are of the form (trans->transid will have increased since creation):
529 *
f6dbff55 530 * (root->root_key.objectid, trans->transid, level, lowest_key_objectid)
d8d5f3e1
CM
531 *
532 * Because the lowest_key_objectid and the level are just hints
533 * they are not used when backrefs are deleted. When a backref is deleted:
534 *
535 * if backref was for a tree root:
536 * root_objectid = root->root_key.objectid
537 * else
538 * root_objectid = btrfs_header_owner(parent)
539 *
540 * (root_objectid, btrfs_header_generation(parent) or zero, 0, 0)
541 *
542 * Back Reference Key hashing:
543 *
544 * Back references have four fields, each 64 bits long. Unfortunately,
545 * This is hashed into a single 64 bit number and placed into the key offset.
546 * The key objectid corresponds to the first byte in the extent, and the
547 * key type is set to BTRFS_EXTENT_REF_KEY
548 */
7bb86316
CM
549int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans,
550 struct btrfs_root *root,
551 struct btrfs_path *path, u64 bytenr,
552 u64 root_objectid, u64 ref_generation,
553 u64 owner, u64 owner_offset)
554{
555 u64 hash;
556 struct btrfs_key key;
557 struct btrfs_extent_ref ref;
558 struct btrfs_extent_ref *disk_ref;
74493f7a
CM
559 int ret;
560
561 btrfs_set_stack_ref_root(&ref, root_objectid);
7bb86316 562 btrfs_set_stack_ref_generation(&ref, ref_generation);
74493f7a
CM
563 btrfs_set_stack_ref_objectid(&ref, owner);
564 btrfs_set_stack_ref_offset(&ref, owner_offset);
565
7bb86316
CM
566 hash = hash_extent_ref(root_objectid, ref_generation, owner,
567 owner_offset);
74493f7a
CM
568 key.offset = hash;
569 key.objectid = bytenr;
570 key.type = BTRFS_EXTENT_REF_KEY;
571
572 ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(ref));
573 while (ret == -EEXIST) {
7bb86316
CM
574 disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
575 struct btrfs_extent_ref);
576 if (match_extent_ref(path->nodes[0], disk_ref, &ref))
577 goto out;
578 key.offset++;
579 btrfs_release_path(root, path);
580 ret = btrfs_insert_empty_item(trans, root, path, &key,
581 sizeof(ref));
74493f7a 582 }
7bb86316
CM
583 if (ret)
584 goto out;
585 disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
586 struct btrfs_extent_ref);
587 write_extent_buffer(path->nodes[0], &ref, (unsigned long)disk_ref,
588 sizeof(ref));
589 btrfs_mark_buffer_dirty(path->nodes[0]);
590out:
591 btrfs_release_path(root, path);
592 return ret;
74493f7a
CM
593}
594
b18c6685
CM
595int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
596 struct btrfs_root *root,
74493f7a 597 u64 bytenr, u64 num_bytes,
7bb86316 598 u64 root_objectid, u64 ref_generation,
74493f7a 599 u64 owner, u64 owner_offset)
02217ed2 600{
5caf2a00 601 struct btrfs_path *path;
02217ed2 602 int ret;
e2fa7227 603 struct btrfs_key key;
5f39d397 604 struct extent_buffer *l;
234b63a0 605 struct btrfs_extent_item *item;
cf27e1ee 606 u32 refs;
037e6390 607
db94535d 608 WARN_ON(num_bytes < root->sectorsize);
5caf2a00 609 path = btrfs_alloc_path();
54aa1f4d
CM
610 if (!path)
611 return -ENOMEM;
26b8003f 612
db94535d 613 key.objectid = bytenr;
62e2749e 614 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
db94535d 615 key.offset = num_bytes;
5caf2a00 616 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
9f5fae2f 617 0, 1);
54aa1f4d
CM
618 if (ret < 0)
619 return ret;
a429e513 620 if (ret != 0) {
a28ec197 621 BUG();
a429e513 622 }
02217ed2 623 BUG_ON(ret != 0);
5f39d397 624 l = path->nodes[0];
5caf2a00 625 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
5f39d397
CM
626 refs = btrfs_extent_refs(l, item);
627 btrfs_set_extent_refs(l, item, refs + 1);
5caf2a00 628 btrfs_mark_buffer_dirty(path->nodes[0]);
a28ec197 629
5caf2a00 630 btrfs_release_path(root->fs_info->extent_root, path);
7bb86316
CM
631
632 ret = btrfs_insert_extent_backref(trans, root->fs_info->extent_root,
633 path, bytenr, root_objectid,
634 ref_generation, owner, owner_offset);
635 BUG_ON(ret);
9f5fae2f 636 finish_current_insert(trans, root->fs_info->extent_root);
e20d96d6 637 del_pending_extents(trans, root->fs_info->extent_root);
74493f7a
CM
638
639 btrfs_free_path(path);
02217ed2
CM
640 return 0;
641}
642
e9d0b13b
CM
643int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
644 struct btrfs_root *root)
645{
646 finish_current_insert(trans, root->fs_info->extent_root);
647 del_pending_extents(trans, root->fs_info->extent_root);
648 return 0;
649}
650
b18c6685 651static int lookup_extent_ref(struct btrfs_trans_handle *trans,
db94535d
CM
652 struct btrfs_root *root, u64 bytenr,
653 u64 num_bytes, u32 *refs)
a28ec197 654{
5caf2a00 655 struct btrfs_path *path;
a28ec197 656 int ret;
e2fa7227 657 struct btrfs_key key;
5f39d397 658 struct extent_buffer *l;
234b63a0 659 struct btrfs_extent_item *item;
5caf2a00 660
db94535d 661 WARN_ON(num_bytes < root->sectorsize);
5caf2a00 662 path = btrfs_alloc_path();
db94535d
CM
663 key.objectid = bytenr;
664 key.offset = num_bytes;
62e2749e 665 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
5caf2a00 666 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
9f5fae2f 667 0, 0);
54aa1f4d
CM
668 if (ret < 0)
669 goto out;
5f39d397
CM
670 if (ret != 0) {
671 btrfs_print_leaf(root, path->nodes[0]);
db94535d 672 printk("failed to find block number %Lu\n", bytenr);
a28ec197 673 BUG();
5f39d397
CM
674 }
675 l = path->nodes[0];
5caf2a00 676 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
5f39d397 677 *refs = btrfs_extent_refs(l, item);
54aa1f4d 678out:
5caf2a00 679 btrfs_free_path(path);
a28ec197
CM
680 return 0;
681}
682
be20aa9d
CM
683u32 btrfs_count_snapshots_in_path(struct btrfs_root *root,
684 struct btrfs_path *count_path,
685 u64 first_extent)
686{
687 struct btrfs_root *extent_root = root->fs_info->extent_root;
688 struct btrfs_path *path;
689 u64 bytenr;
690 u64 found_objectid;
691 u64 root_objectid = 0;
692 u32 total_count = 0;
693 u32 cur_count;
694 u32 refs;
695 u32 nritems;
696 int ret;
697 struct btrfs_key key;
698 struct btrfs_key found_key;
699 struct extent_buffer *l;
700 struct btrfs_extent_item *item;
701 struct btrfs_extent_ref *ref_item;
702 int level = -1;
703
704 path = btrfs_alloc_path();
705again:
706 if (level == -1)
707 bytenr = first_extent;
708 else
709 bytenr = count_path->nodes[level]->start;
710
711 cur_count = 0;
712 key.objectid = bytenr;
713 key.offset = 0;
714
715 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
716 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
717 if (ret < 0)
718 goto out;
719 BUG_ON(ret == 0);
720
721 l = path->nodes[0];
722 btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
723
724 if (found_key.objectid != bytenr ||
725 found_key.type != BTRFS_EXTENT_ITEM_KEY) {
726 goto out;
727 }
728
729 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
730 refs = btrfs_extent_refs(l, item);
731 while (1) {
732 nritems = btrfs_header_nritems(l);
733 if (path->slots[0] >= nritems) {
734 ret = btrfs_next_leaf(extent_root, path);
735 if (ret == 0)
736 continue;
737 break;
738 }
739 btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
740 if (found_key.objectid != bytenr)
741 break;
742 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
743 path->slots[0]++;
744 continue;
745 }
746
747 cur_count++;
748 ref_item = btrfs_item_ptr(l, path->slots[0],
749 struct btrfs_extent_ref);
750 found_objectid = btrfs_ref_root(l, ref_item);
751
752 if (found_objectid != root_objectid)
753 total_count++;
754
755 if (total_count > 1)
756 goto out;
757
758 if (root_objectid == 0)
759 root_objectid = found_objectid;
760
761 path->slots[0]++;
762 }
763 if (cur_count == 0) {
764 total_count = 0;
765 goto out;
766 }
767 if (total_count > 1)
768 goto out;
769 if (level >= 0 && root->node == count_path->nodes[level])
770 goto out;
771 level++;
772 btrfs_release_path(root, path);
773 goto again;
774
775out:
776 btrfs_free_path(path);
777 return total_count;
778
779}
780
c5739bba 781int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
7bb86316 782 struct btrfs_root *root, u64 owner_objectid)
c5739bba 783{
7bb86316
CM
784 u64 generation;
785 u64 key_objectid;
786 u64 level;
787 u32 nritems;
788 struct btrfs_disk_key disk_key;
789
790 level = btrfs_header_level(root->node);
791 generation = trans->transid;
792 nritems = btrfs_header_nritems(root->node);
793 if (nritems > 0) {
794 if (level == 0)
795 btrfs_item_key(root->node, &disk_key, 0);
796 else
797 btrfs_node_key(root->node, &disk_key, 0);
798 key_objectid = btrfs_disk_key_objectid(&disk_key);
799 } else {
800 key_objectid = 0;
801 }
db94535d 802 return btrfs_inc_extent_ref(trans, root, root->node->start,
7bb86316 803 root->node->len, owner_objectid,
f6dbff55 804 generation, level, key_objectid);
c5739bba
CM
805}
806
e089f05c 807int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
5f39d397 808 struct extent_buffer *buf)
02217ed2 809{
db94535d 810 u64 bytenr;
5f39d397
CM
811 u32 nritems;
812 struct btrfs_key key;
6407bf6d 813 struct btrfs_file_extent_item *fi;
02217ed2 814 int i;
db94535d 815 int level;
6407bf6d 816 int ret;
54aa1f4d 817 int faili;
a28ec197 818
3768f368 819 if (!root->ref_cows)
a28ec197 820 return 0;
5f39d397 821
db94535d 822 level = btrfs_header_level(buf);
5f39d397
CM
823 nritems = btrfs_header_nritems(buf);
824 for (i = 0; i < nritems; i++) {
db94535d
CM
825 if (level == 0) {
826 u64 disk_bytenr;
5f39d397
CM
827 btrfs_item_key_to_cpu(buf, &key, i);
828 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
6407bf6d 829 continue;
5f39d397 830 fi = btrfs_item_ptr(buf, i,
6407bf6d 831 struct btrfs_file_extent_item);
5f39d397 832 if (btrfs_file_extent_type(buf, fi) ==
236454df
CM
833 BTRFS_FILE_EXTENT_INLINE)
834 continue;
db94535d
CM
835 disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
836 if (disk_bytenr == 0)
3a686375 837 continue;
db94535d 838 ret = btrfs_inc_extent_ref(trans, root, disk_bytenr,
7bb86316
CM
839 btrfs_file_extent_disk_num_bytes(buf, fi),
840 root->root_key.objectid, trans->transid,
841 key.objectid, key.offset);
54aa1f4d
CM
842 if (ret) {
843 faili = i;
844 goto fail;
845 }
6407bf6d 846 } else {
db94535d 847 bytenr = btrfs_node_blockptr(buf, i);
6caab489 848 btrfs_node_key_to_cpu(buf, &key, i);
db94535d 849 ret = btrfs_inc_extent_ref(trans, root, bytenr,
7bb86316
CM
850 btrfs_level_size(root, level - 1),
851 root->root_key.objectid,
f6dbff55
CM
852 trans->transid,
853 level - 1, key.objectid);
54aa1f4d
CM
854 if (ret) {
855 faili = i;
856 goto fail;
857 }
6407bf6d 858 }
02217ed2
CM
859 }
860 return 0;
54aa1f4d 861fail:
ccd467d6 862 WARN_ON(1);
7bb86316 863#if 0
54aa1f4d 864 for (i =0; i < faili; i++) {
db94535d
CM
865 if (level == 0) {
866 u64 disk_bytenr;
5f39d397
CM
867 btrfs_item_key_to_cpu(buf, &key, i);
868 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
54aa1f4d 869 continue;
5f39d397 870 fi = btrfs_item_ptr(buf, i,
54aa1f4d 871 struct btrfs_file_extent_item);
5f39d397 872 if (btrfs_file_extent_type(buf, fi) ==
54aa1f4d
CM
873 BTRFS_FILE_EXTENT_INLINE)
874 continue;
db94535d
CM
875 disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
876 if (disk_bytenr == 0)
54aa1f4d 877 continue;
db94535d
CM
878 err = btrfs_free_extent(trans, root, disk_bytenr,
879 btrfs_file_extent_disk_num_bytes(buf,
5f39d397 880 fi), 0);
54aa1f4d
CM
881 BUG_ON(err);
882 } else {
db94535d
CM
883 bytenr = btrfs_node_blockptr(buf, i);
884 err = btrfs_free_extent(trans, root, bytenr,
885 btrfs_level_size(root, level - 1), 0);
54aa1f4d
CM
886 BUG_ON(err);
887 }
888 }
7bb86316 889#endif
54aa1f4d 890 return ret;
02217ed2
CM
891}
892
9078a3e1
CM
893static int write_one_cache_group(struct btrfs_trans_handle *trans,
894 struct btrfs_root *root,
895 struct btrfs_path *path,
896 struct btrfs_block_group_cache *cache)
897{
898 int ret;
899 int pending_ret;
900 struct btrfs_root *extent_root = root->fs_info->extent_root;
5f39d397
CM
901 unsigned long bi;
902 struct extent_buffer *leaf;
9078a3e1 903
9078a3e1 904 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
54aa1f4d
CM
905 if (ret < 0)
906 goto fail;
9078a3e1 907 BUG_ON(ret);
5f39d397
CM
908
909 leaf = path->nodes[0];
910 bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
911 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
912 btrfs_mark_buffer_dirty(leaf);
9078a3e1 913 btrfs_release_path(extent_root, path);
54aa1f4d 914fail:
9078a3e1
CM
915 finish_current_insert(trans, extent_root);
916 pending_ret = del_pending_extents(trans, extent_root);
917 if (ret)
918 return ret;
919 if (pending_ret)
920 return pending_ret;
921 return 0;
922
923}
924
96b5179d
CM
925int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
926 struct btrfs_root *root)
9078a3e1 927{
96b5179d
CM
928 struct extent_map_tree *block_group_cache;
929 struct btrfs_block_group_cache *cache;
9078a3e1
CM
930 int ret;
931 int err = 0;
932 int werr = 0;
9078a3e1 933 struct btrfs_path *path;
96b5179d
CM
934 u64 last = 0;
935 u64 start;
936 u64 end;
937 u64 ptr;
9078a3e1 938
96b5179d 939 block_group_cache = &root->fs_info->block_group_cache;
9078a3e1
CM
940 path = btrfs_alloc_path();
941 if (!path)
942 return -ENOMEM;
943
944 while(1) {
96b5179d
CM
945 ret = find_first_extent_bit(block_group_cache, last,
946 &start, &end, BLOCK_GROUP_DIRTY);
947 if (ret)
9078a3e1 948 break;
54aa1f4d 949
96b5179d
CM
950 last = end + 1;
951 ret = get_state_private(block_group_cache, start, &ptr);
952 if (ret)
953 break;
954
ae2f5411 955 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
96b5179d
CM
956 err = write_one_cache_group(trans, root,
957 path, cache);
958 /*
959 * if we fail to write the cache group, we want
960 * to keep it marked dirty in hopes that a later
961 * write will work
962 */
963 if (err) {
964 werr = err;
965 continue;
9078a3e1 966 }
96b5179d
CM
967 clear_extent_bits(block_group_cache, start, end,
968 BLOCK_GROUP_DIRTY, GFP_NOFS);
9078a3e1
CM
969 }
970 btrfs_free_path(path);
971 return werr;
972}
973
974static int update_block_group(struct btrfs_trans_handle *trans,
975 struct btrfs_root *root,
db94535d
CM
976 u64 bytenr, u64 num_bytes, int alloc,
977 int mark_free, int data)
9078a3e1
CM
978{
979 struct btrfs_block_group_cache *cache;
980 struct btrfs_fs_info *info = root->fs_info;
db94535d 981 u64 total = num_bytes;
9078a3e1 982 u64 old_val;
db94535d 983 u64 byte_in_group;
96b5179d
CM
984 u64 start;
985 u64 end;
3e1ad54f 986
9078a3e1 987 while(total) {
db94535d 988 cache = btrfs_lookup_block_group(info, bytenr);
3e1ad54f 989 if (!cache) {
9078a3e1 990 return -1;
cd1bc465 991 }
db94535d
CM
992 byte_in_group = bytenr - cache->key.objectid;
993 WARN_ON(byte_in_group > cache->key.offset);
96b5179d
CM
994 start = cache->key.objectid;
995 end = start + cache->key.offset - 1;
996 set_extent_bits(&info->block_group_cache, start, end,
997 BLOCK_GROUP_DIRTY, GFP_NOFS);
9078a3e1
CM
998
999 old_val = btrfs_block_group_used(&cache->item);
db94535d 1000 num_bytes = min(total, cache->key.offset - byte_in_group);
cd1bc465 1001 if (alloc) {
1e2677e0 1002 if (cache->data != data &&
84f54cfa 1003 old_val < (cache->key.offset >> 1)) {
96b5179d
CM
1004 int bit_to_clear;
1005 int bit_to_set;
96b5179d 1006 cache->data = data;
1e2677e0 1007 if (data) {
b97f9203
Y
1008 bit_to_clear = BLOCK_GROUP_METADATA;
1009 bit_to_set = BLOCK_GROUP_DATA;
f84a8b36
CM
1010 cache->item.flags &=
1011 ~BTRFS_BLOCK_GROUP_MIXED;
1e2677e0
CM
1012 cache->item.flags |=
1013 BTRFS_BLOCK_GROUP_DATA;
1014 } else {
b97f9203
Y
1015 bit_to_clear = BLOCK_GROUP_DATA;
1016 bit_to_set = BLOCK_GROUP_METADATA;
f84a8b36
CM
1017 cache->item.flags &=
1018 ~BTRFS_BLOCK_GROUP_MIXED;
1e2677e0
CM
1019 cache->item.flags &=
1020 ~BTRFS_BLOCK_GROUP_DATA;
1021 }
96b5179d
CM
1022 clear_extent_bits(&info->block_group_cache,
1023 start, end, bit_to_clear,
1024 GFP_NOFS);
1025 set_extent_bits(&info->block_group_cache,
1026 start, end, bit_to_set,
1027 GFP_NOFS);
f84a8b36
CM
1028 } else if (cache->data != data &&
1029 cache->data != BTRFS_BLOCK_GROUP_MIXED) {
1030 cache->data = BTRFS_BLOCK_GROUP_MIXED;
1031 set_extent_bits(&info->block_group_cache,
1032 start, end,
1033 BLOCK_GROUP_DATA |
1034 BLOCK_GROUP_METADATA,
1035 GFP_NOFS);
1e2677e0 1036 }
db94535d 1037 old_val += num_bytes;
cd1bc465 1038 } else {
db94535d 1039 old_val -= num_bytes;
f510cfec
CM
1040 if (mark_free) {
1041 set_extent_dirty(&info->free_space_cache,
db94535d 1042 bytenr, bytenr + num_bytes - 1,
f510cfec 1043 GFP_NOFS);
e37c9e69 1044 }
cd1bc465 1045 }
9078a3e1 1046 btrfs_set_block_group_used(&cache->item, old_val);
db94535d
CM
1047 total -= num_bytes;
1048 bytenr += num_bytes;
9078a3e1
CM
1049 }
1050 return 0;
1051}
324ae4df
Y
1052static int update_pinned_extents(struct btrfs_root *root,
1053 u64 bytenr, u64 num, int pin)
1054{
1055 u64 len;
1056 struct btrfs_block_group_cache *cache;
1057 struct btrfs_fs_info *fs_info = root->fs_info;
1058
1059 if (pin) {
1060 set_extent_dirty(&fs_info->pinned_extents,
1061 bytenr, bytenr + num - 1, GFP_NOFS);
1062 } else {
1063 clear_extent_dirty(&fs_info->pinned_extents,
1064 bytenr, bytenr + num - 1, GFP_NOFS);
1065 }
1066 while (num > 0) {
1067 cache = btrfs_lookup_block_group(fs_info, bytenr);
1068 WARN_ON(!cache);
1069 len = min(num, cache->key.offset -
1070 (bytenr - cache->key.objectid));
1071 if (pin) {
1072 cache->pinned += len;
1073 fs_info->total_pinned += len;
1074 } else {
1075 cache->pinned -= len;
1076 fs_info->total_pinned -= len;
1077 }
1078 bytenr += len;
1079 num -= len;
1080 }
1081 return 0;
1082}
9078a3e1 1083
1a5bc167 1084int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy)
ccd467d6 1085{
ccd467d6 1086 u64 last = 0;
1a5bc167
CM
1087 u64 start;
1088 u64 end;
1089 struct extent_map_tree *pinned_extents = &root->fs_info->pinned_extents;
ccd467d6 1090 int ret;
ccd467d6
CM
1091
1092 while(1) {
1a5bc167
CM
1093 ret = find_first_extent_bit(pinned_extents, last,
1094 &start, &end, EXTENT_DIRTY);
1095 if (ret)
ccd467d6 1096 break;
1a5bc167
CM
1097 set_extent_dirty(copy, start, end, GFP_NOFS);
1098 last = end + 1;
ccd467d6
CM
1099 }
1100 return 0;
1101}
1102
1103int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
1104 struct btrfs_root *root,
1a5bc167 1105 struct extent_map_tree *unpin)
a28ec197 1106{
1a5bc167
CM
1107 u64 start;
1108 u64 end;
a28ec197 1109 int ret;
f510cfec 1110 struct extent_map_tree *free_space_cache;
f510cfec 1111 free_space_cache = &root->fs_info->free_space_cache;
a28ec197
CM
1112
1113 while(1) {
1a5bc167
CM
1114 ret = find_first_extent_bit(unpin, 0, &start, &end,
1115 EXTENT_DIRTY);
1116 if (ret)
a28ec197 1117 break;
324ae4df 1118 update_pinned_extents(root, start, end + 1 - start, 0);
1a5bc167
CM
1119 clear_extent_dirty(unpin, start, end, GFP_NOFS);
1120 set_extent_dirty(free_space_cache, start, end, GFP_NOFS);
a28ec197
CM
1121 }
1122 return 0;
1123}
1124
e089f05c
CM
1125static int finish_current_insert(struct btrfs_trans_handle *trans, struct
1126 btrfs_root *extent_root)
037e6390 1127{
7bb86316
CM
1128 u64 start;
1129 u64 end;
1130 struct btrfs_fs_info *info = extent_root->fs_info;
d8d5f3e1 1131 struct extent_buffer *eb;
7bb86316 1132 struct btrfs_path *path;
e2fa7227 1133 struct btrfs_key ins;
d8d5f3e1 1134 struct btrfs_disk_key first;
234b63a0 1135 struct btrfs_extent_item extent_item;
037e6390 1136 int ret;
d8d5f3e1 1137 int level;
1a5bc167 1138 int err = 0;
037e6390 1139
5f39d397 1140 btrfs_set_stack_extent_refs(&extent_item, 1);
62e2749e 1141 btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
7bb86316 1142 path = btrfs_alloc_path();
037e6390 1143
26b8003f 1144 while(1) {
1a5bc167
CM
1145 ret = find_first_extent_bit(&info->extent_ins, 0, &start,
1146 &end, EXTENT_LOCKED);
1147 if (ret)
26b8003f
CM
1148 break;
1149
1a5bc167
CM
1150 ins.objectid = start;
1151 ins.offset = end + 1 - start;
1152 err = btrfs_insert_item(trans, extent_root, &ins,
1153 &extent_item, sizeof(extent_item));
1154 clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED,
1155 GFP_NOFS);
d8d5f3e1
CM
1156 eb = read_tree_block(extent_root, ins.objectid, ins.offset);
1157 level = btrfs_header_level(eb);
1158 if (level == 0) {
1159 btrfs_item_key(eb, &first, 0);
1160 } else {
1161 btrfs_node_key(eb, &first, 0);
1162 }
7bb86316
CM
1163 err = btrfs_insert_extent_backref(trans, extent_root, path,
1164 start, extent_root->root_key.objectid,
f6dbff55
CM
1165 0, level,
1166 btrfs_disk_key_objectid(&first));
7bb86316 1167 BUG_ON(err);
d8d5f3e1 1168 free_extent_buffer(eb);
037e6390 1169 }
7bb86316 1170 btrfs_free_path(path);
037e6390
CM
1171 return 0;
1172}
1173
db94535d
CM
1174static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes,
1175 int pending)
e20d96d6 1176{
1a5bc167 1177 int err = 0;
5f39d397 1178 struct extent_buffer *buf;
8ef97622 1179
f4b9aa8d 1180 if (!pending) {
db94535d 1181 buf = btrfs_find_tree_block(root, bytenr, num_bytes);
5f39d397
CM
1182 if (buf) {
1183 if (btrfs_buffer_uptodate(buf)) {
2c90e5d6
CM
1184 u64 transid =
1185 root->fs_info->running_transaction->transid;
5f39d397
CM
1186 if (btrfs_header_generation(buf) == transid) {
1187 free_extent_buffer(buf);
c549228f 1188 return 1;
2c90e5d6 1189 }
f4b9aa8d 1190 }
5f39d397 1191 free_extent_buffer(buf);
8ef97622 1192 }
324ae4df 1193 update_pinned_extents(root, bytenr, num_bytes, 1);
f4b9aa8d 1194 } else {
1a5bc167 1195 set_extent_bits(&root->fs_info->pending_del,
db94535d
CM
1196 bytenr, bytenr + num_bytes - 1,
1197 EXTENT_LOCKED, GFP_NOFS);
f4b9aa8d 1198 }
be744175 1199 BUG_ON(err < 0);
e20d96d6
CM
1200 return 0;
1201}
1202
fec577fb 1203/*
a28ec197 1204 * remove an extent from the root, returns 0 on success
fec577fb 1205 */
e089f05c 1206static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
7bb86316
CM
1207 *root, u64 bytenr, u64 num_bytes,
1208 u64 root_objectid, u64 ref_generation,
1209 u64 owner_objectid, u64 owner_offset, int pin,
e37c9e69 1210 int mark_free)
a28ec197 1211{
5caf2a00 1212 struct btrfs_path *path;
e2fa7227 1213 struct btrfs_key key;
1261ec42
CM
1214 struct btrfs_fs_info *info = root->fs_info;
1215 struct btrfs_root *extent_root = info->extent_root;
5f39d397 1216 struct extent_buffer *leaf;
a28ec197 1217 int ret;
234b63a0 1218 struct btrfs_extent_item *ei;
cf27e1ee 1219 u32 refs;
037e6390 1220
db94535d 1221 key.objectid = bytenr;
62e2749e 1222 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
db94535d 1223 key.offset = num_bytes;
a28ec197 1224
5caf2a00 1225 path = btrfs_alloc_path();
54aa1f4d
CM
1226 if (!path)
1227 return -ENOMEM;
5f26f772 1228
7bb86316
CM
1229 ret = lookup_extent_backref(trans, extent_root, path,
1230 bytenr, root_objectid,
1231 ref_generation,
1232 owner_objectid, owner_offset, 1);
1233 if (ret == 0) {
1234 ret = btrfs_del_item(trans, extent_root, path);
1235 } else {
1236 btrfs_print_leaf(extent_root, path->nodes[0]);
1237 WARN_ON(1);
1238 printk("Unable to find ref byte nr %Lu root %Lu "
1239 " gen %Lu owner %Lu offset %Lu\n", bytenr,
1240 root_objectid, ref_generation, owner_objectid,
1241 owner_offset);
1242 }
1243 btrfs_release_path(extent_root, path);
54aa1f4d
CM
1244 ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
1245 if (ret < 0)
1246 return ret;
1247 BUG_ON(ret);
5f39d397
CM
1248
1249 leaf = path->nodes[0];
1250 ei = btrfs_item_ptr(leaf, path->slots[0],
123abc88 1251 struct btrfs_extent_item);
5f39d397
CM
1252 refs = btrfs_extent_refs(leaf, ei);
1253 BUG_ON(refs == 0);
1254 refs -= 1;
1255 btrfs_set_extent_refs(leaf, ei, refs);
1256 btrfs_mark_buffer_dirty(leaf);
1257
cf27e1ee 1258 if (refs == 0) {
db94535d
CM
1259 u64 super_used;
1260 u64 root_used;
78fae27e
CM
1261
1262 if (pin) {
db94535d 1263 ret = pin_down_bytes(root, bytenr, num_bytes, 0);
c549228f
Y
1264 if (ret > 0)
1265 mark_free = 1;
1266 BUG_ON(ret < 0);
78fae27e
CM
1267 }
1268
58176a96 1269 /* block accounting for super block */
db94535d
CM
1270 super_used = btrfs_super_bytes_used(&info->super_copy);
1271 btrfs_set_super_bytes_used(&info->super_copy,
1272 super_used - num_bytes);
58176a96
JB
1273
1274 /* block accounting for root item */
db94535d 1275 root_used = btrfs_root_used(&root->root_item);
5f39d397 1276 btrfs_set_root_used(&root->root_item,
db94535d 1277 root_used - num_bytes);
58176a96 1278
5caf2a00 1279 ret = btrfs_del_item(trans, extent_root, path);
54aa1f4d
CM
1280 if (ret) {
1281 return ret;
1282 }
db94535d 1283 ret = update_block_group(trans, root, bytenr, num_bytes, 0,
1e2677e0 1284 mark_free, 0);
9078a3e1 1285 BUG_ON(ret);
a28ec197 1286 }
5caf2a00 1287 btrfs_free_path(path);
e089f05c 1288 finish_current_insert(trans, extent_root);
a28ec197
CM
1289 return ret;
1290}
1291
a28ec197
CM
1292/*
1293 * find all the blocks marked as pending in the radix tree and remove
1294 * them from the extent map
1295 */
e089f05c
CM
1296static int del_pending_extents(struct btrfs_trans_handle *trans, struct
1297 btrfs_root *extent_root)
a28ec197
CM
1298{
1299 int ret;
e20d96d6 1300 int err = 0;
1a5bc167
CM
1301 u64 start;
1302 u64 end;
1303 struct extent_map_tree *pending_del;
1304 struct extent_map_tree *pinned_extents;
8ef97622 1305
1a5bc167
CM
1306 pending_del = &extent_root->fs_info->pending_del;
1307 pinned_extents = &extent_root->fs_info->pinned_extents;
a28ec197
CM
1308
1309 while(1) {
1a5bc167
CM
1310 ret = find_first_extent_bit(pending_del, 0, &start, &end,
1311 EXTENT_LOCKED);
1312 if (ret)
a28ec197 1313 break;
324ae4df 1314 update_pinned_extents(extent_root, start, end + 1 - start, 1);
1a5bc167
CM
1315 clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
1316 GFP_NOFS);
1317 ret = __free_extent(trans, extent_root,
7bb86316
CM
1318 start, end + 1 - start,
1319 extent_root->root_key.objectid,
1320 0, 0, 0, 0, 0);
1a5bc167
CM
1321 if (ret)
1322 err = ret;
fec577fb 1323 }
e20d96d6 1324 return err;
fec577fb
CM
1325}
1326
1327/*
1328 * remove an extent from the root, returns 0 on success
1329 */
e089f05c 1330int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
7bb86316
CM
1331 *root, u64 bytenr, u64 num_bytes,
1332 u64 root_objectid, u64 ref_generation,
1333 u64 owner_objectid, u64 owner_offset, int pin)
fec577fb 1334{
9f5fae2f 1335 struct btrfs_root *extent_root = root->fs_info->extent_root;
fec577fb
CM
1336 int pending_ret;
1337 int ret;
a28ec197 1338
db94535d 1339 WARN_ON(num_bytes < root->sectorsize);
7bb86316
CM
1340 if (!root->ref_cows)
1341 ref_generation = 0;
1342
fec577fb 1343 if (root == extent_root) {
db94535d 1344 pin_down_bytes(root, bytenr, num_bytes, 1);
fec577fb
CM
1345 return 0;
1346 }
7bb86316
CM
1347 ret = __free_extent(trans, root, bytenr, num_bytes, root_objectid,
1348 ref_generation, owner_objectid, owner_offset,
1349 pin, pin == 0);
e20d96d6 1350 pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
fec577fb
CM
1351 return ret ? ret : pending_ret;
1352}
1353
87ee04eb
CM
1354static u64 stripe_align(struct btrfs_root *root, u64 val)
1355{
1356 u64 mask = ((u64)root->stripesize - 1);
1357 u64 ret = (val + mask) & ~mask;
1358 return ret;
1359}
1360
fec577fb
CM
1361/*
1362 * walks the btree of allocated extents and find a hole of a given size.
1363 * The key ins is changed to record the hole:
1364 * ins->objectid == block start
62e2749e 1365 * ins->flags = BTRFS_EXTENT_ITEM_KEY
fec577fb
CM
1366 * ins->offset == number of blocks
1367 * Any available blocks before search_start are skipped.
1368 */
e089f05c 1369static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
db94535d
CM
1370 *orig_root, u64 num_bytes, u64 empty_size,
1371 u64 search_start, u64 search_end, u64 hint_byte,
f2654de4
CM
1372 struct btrfs_key *ins, u64 exclude_start,
1373 u64 exclude_nr, int data)
fec577fb 1374{
5caf2a00 1375 struct btrfs_path *path;
e2fa7227 1376 struct btrfs_key key;
fec577fb 1377 u64 hole_size = 0;
87ee04eb
CM
1378 u64 aligned;
1379 int ret;
fec577fb 1380 int slot = 0;
db94535d 1381 u64 last_byte = 0;
be744175 1382 u64 orig_search_start = search_start;
fec577fb 1383 int start_found;
5f39d397 1384 struct extent_buffer *l;
9f5fae2f 1385 struct btrfs_root * root = orig_root->fs_info->extent_root;
f2458e1d 1386 struct btrfs_fs_info *info = root->fs_info;
db94535d 1387 u64 total_needed = num_bytes;
e20d96d6 1388 int level;
be08c1b9 1389 struct btrfs_block_group_cache *block_group;
be744175 1390 int full_scan = 0;
fbdc762b 1391 int wrapped = 0;
f84a8b36 1392 u64 cached_start;
fec577fb 1393
db94535d 1394 WARN_ON(num_bytes < root->sectorsize);
b1a4d965
CM
1395 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
1396
5f39d397
CM
1397 level = btrfs_header_level(root->node);
1398
015a739c 1399 if (num_bytes >= 32 * 1024 * 1024 && hint_byte) {
257d0ce3
CM
1400 data = BTRFS_BLOCK_GROUP_MIXED;
1401 }
1402
3e1ad54f 1403 if (search_end == (u64)-1)
db94535d
CM
1404 search_end = btrfs_super_total_bytes(&info->super_copy);
1405 if (hint_byte) {
1406 block_group = btrfs_lookup_block_group(info, hint_byte);
1a2b2ac7
CM
1407 if (!block_group)
1408 hint_byte = search_start;
be744175 1409 block_group = btrfs_find_block_group(root, block_group,
db94535d 1410 hint_byte, data, 1);
be744175
CM
1411 } else {
1412 block_group = btrfs_find_block_group(root,
1a2b2ac7
CM
1413 trans->block_group,
1414 search_start, data, 1);
be744175
CM
1415 }
1416
6702ed49 1417 total_needed += empty_size;
e011599b 1418 path = btrfs_alloc_path();
be744175 1419check_failed:
70b043f0
CM
1420 if (!block_group) {
1421 block_group = btrfs_lookup_block_group(info, search_start);
1422 if (!block_group)
1423 block_group = btrfs_lookup_block_group(info,
1424 orig_search_start);
1425 }
5e5745dc
Y
1426 search_start = find_search_start(root, &block_group, search_start,
1427 total_needed, data, full_scan);
87ee04eb 1428 search_start = stripe_align(root, search_start);
f84a8b36 1429 cached_start = search_start;
5caf2a00 1430 btrfs_init_path(path);
fec577fb
CM
1431 ins->objectid = search_start;
1432 ins->offset = 0;
fec577fb 1433 start_found = 0;
2cc58cf2 1434 path->reada = 2;
e37c9e69 1435
5caf2a00 1436 ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
0f70abe2
CM
1437 if (ret < 0)
1438 goto error;
aa5d6bed 1439
e37c9e69 1440 if (path->slots[0] > 0) {
5caf2a00 1441 path->slots[0]--;
e37c9e69
CM
1442 }
1443
5f39d397
CM
1444 l = path->nodes[0];
1445 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
1446
e37c9e69 1447 /*
7bb86316 1448 * walk backwards to find the first extent item key
e37c9e69 1449 */
7bb86316
CM
1450 while(btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) {
1451 if (path->slots[0] == 0) {
1452 ret = btrfs_prev_leaf(root, path);
1453 if (ret != 0) {
1454 ret = btrfs_search_slot(trans, root, ins,
1455 path, 0, 0);
1456 if (ret < 0)
1457 goto error;
1458 if (path->slots[0] > 0)
1459 path->slots[0]--;
1460 break;
1461 }
1462 } else {
e37c9e69
CM
1463 path->slots[0]--;
1464 }
7bb86316
CM
1465 l = path->nodes[0];
1466 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
e37c9e69 1467 }
fec577fb 1468 while (1) {
5f39d397 1469 l = path->nodes[0];
5caf2a00 1470 slot = path->slots[0];
5f39d397 1471 if (slot >= btrfs_header_nritems(l)) {
5caf2a00 1472 ret = btrfs_next_leaf(root, path);
fec577fb
CM
1473 if (ret == 0)
1474 continue;
0f70abe2
CM
1475 if (ret < 0)
1476 goto error;
e19caa5f
CM
1477
1478 search_start = max(search_start,
1479 block_group->key.objectid);
fec577fb 1480 if (!start_found) {
87ee04eb
CM
1481 aligned = stripe_align(root, search_start);
1482 ins->objectid = aligned;
1483 if (aligned >= search_end) {
1484 ret = -ENOSPC;
1485 goto error;
1486 }
1487 ins->offset = search_end - aligned;
fec577fb
CM
1488 start_found = 1;
1489 goto check_pending;
1490 }
87ee04eb
CM
1491 ins->objectid = stripe_align(root,
1492 last_byte > search_start ?
1493 last_byte : search_start);
1494 if (search_end <= ins->objectid) {
1495 ret = -ENOSPC;
1496 goto error;
1497 }
3e1ad54f 1498 ins->offset = search_end - ins->objectid;
e19caa5f 1499 BUG_ON(ins->objectid >= search_end);
fec577fb
CM
1500 goto check_pending;
1501 }
5f39d397 1502 btrfs_item_key_to_cpu(l, &key, slot);
96b5179d 1503
db94535d 1504 if (key.objectid >= search_start && key.objectid > last_byte &&
e37c9e69 1505 start_found) {
db94535d
CM
1506 if (last_byte < search_start)
1507 last_byte = search_start;
87ee04eb
CM
1508 aligned = stripe_align(root, last_byte);
1509 hole_size = key.objectid - aligned;
1510 if (key.objectid > aligned && hole_size >= num_bytes) {
1511 ins->objectid = aligned;
e37c9e69
CM
1512 ins->offset = hole_size;
1513 goto check_pending;
0579da42 1514 }
fec577fb 1515 }
96b5179d 1516 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) {
7bb86316
CM
1517 if (!start_found && btrfs_key_type(&key) ==
1518 BTRFS_BLOCK_GROUP_ITEM_KEY) {
db94535d 1519 last_byte = key.objectid;
96b5179d
CM
1520 start_found = 1;
1521 }
e37c9e69 1522 goto next;
96b5179d
CM
1523 }
1524
e37c9e69 1525
0579da42 1526 start_found = 1;
db94535d 1527 last_byte = key.objectid + key.offset;
f510cfec 1528
257d0ce3
CM
1529 if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED &&
1530 last_byte >= block_group->key.objectid +
be744175
CM
1531 block_group->key.offset) {
1532 btrfs_release_path(root, path);
1533 search_start = block_group->key.objectid +
e19caa5f 1534 block_group->key.offset;
be744175
CM
1535 goto new_group;
1536 }
9078a3e1 1537next:
5caf2a00 1538 path->slots[0]++;
de428b63 1539 cond_resched();
fec577fb 1540 }
fec577fb
CM
1541check_pending:
1542 /* we have to make sure we didn't find an extent that has already
1543 * been allocated by the map tree or the original allocation
1544 */
5caf2a00 1545 btrfs_release_path(root, path);
fec577fb 1546 BUG_ON(ins->objectid < search_start);
e37c9e69 1547
db94535d 1548 if (ins->objectid + num_bytes >= search_end)
cf67582b 1549 goto enospc;
257d0ce3 1550 if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED &&
5cf66426 1551 ins->objectid + num_bytes > block_group->
e19caa5f
CM
1552 key.objectid + block_group->key.offset) {
1553 search_start = block_group->key.objectid +
1554 block_group->key.offset;
1555 goto new_group;
1556 }
1a5bc167 1557 if (test_range_bit(&info->extent_ins, ins->objectid,
db94535d
CM
1558 ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) {
1559 search_start = ins->objectid + num_bytes;
1a5bc167
CM
1560 goto new_group;
1561 }
1562 if (test_range_bit(&info->pinned_extents, ins->objectid,
db94535d
CM
1563 ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
1564 search_start = ins->objectid + num_bytes;
1a5bc167 1565 goto new_group;
fec577fb 1566 }
db94535d 1567 if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
f2654de4
CM
1568 ins->objectid < exclude_start + exclude_nr)) {
1569 search_start = exclude_start + exclude_nr;
1570 goto new_group;
1571 }
e37c9e69 1572 if (!data) {
5276aeda 1573 block_group = btrfs_lookup_block_group(info, ins->objectid);
26b8003f
CM
1574 if (block_group)
1575 trans->block_group = block_group;
f2458e1d 1576 }
db94535d 1577 ins->offset = num_bytes;
5caf2a00 1578 btrfs_free_path(path);
fec577fb 1579 return 0;
be744175
CM
1580
1581new_group:
db94535d 1582 if (search_start + num_bytes >= search_end) {
cf67582b 1583enospc:
be744175 1584 search_start = orig_search_start;
fbdc762b
CM
1585 if (full_scan) {
1586 ret = -ENOSPC;
1587 goto error;
1588 }
6702ed49
CM
1589 if (wrapped) {
1590 if (!full_scan)
1591 total_needed -= empty_size;
fbdc762b 1592 full_scan = 1;
1a2b2ac7 1593 data = BTRFS_BLOCK_GROUP_MIXED;
6702ed49 1594 } else
fbdc762b 1595 wrapped = 1;
be744175 1596 }
5276aeda 1597 block_group = btrfs_lookup_block_group(info, search_start);
fbdc762b 1598 cond_resched();
1a2b2ac7
CM
1599 block_group = btrfs_find_block_group(root, block_group,
1600 search_start, data, 0);
be744175
CM
1601 goto check_failed;
1602
0f70abe2 1603error:
5caf2a00
CM
1604 btrfs_release_path(root, path);
1605 btrfs_free_path(path);
0f70abe2 1606 return ret;
fec577fb 1607}
fec577fb
CM
1608/*
1609 * finds a free extent and does all the dirty work required for allocation
1610 * returns the key for the extent through ins, and a tree buffer for
1611 * the first block of the extent through buf.
1612 *
1613 * returns 0 if everything worked, non-zero otherwise.
1614 */
4d775673 1615int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
7bb86316
CM
1616 struct btrfs_root *root,
1617 u64 num_bytes, u64 root_objectid, u64 ref_generation,
1618 u64 owner, u64 owner_offset,
1619 u64 empty_size, u64 hint_byte,
be08c1b9 1620 u64 search_end, struct btrfs_key *ins, int data)
fec577fb
CM
1621{
1622 int ret;
1623 int pending_ret;
db94535d 1624 u64 super_used, root_used;
fbdc762b 1625 u64 search_start = 0;
edbd8d4e 1626 u64 new_hint;
1261ec42
CM
1627 struct btrfs_fs_info *info = root->fs_info;
1628 struct btrfs_root *extent_root = info->extent_root;
234b63a0 1629 struct btrfs_extent_item extent_item;
7bb86316 1630 struct btrfs_path *path;
037e6390 1631
5f39d397 1632 btrfs_set_stack_extent_refs(&extent_item, 1);
8f662a76
CM
1633
1634 new_hint = max(hint_byte, root->fs_info->alloc_start);
edbd8d4e
CM
1635 if (new_hint < btrfs_super_total_bytes(&info->super_copy))
1636 hint_byte = new_hint;
8f662a76 1637
db94535d
CM
1638 WARN_ON(num_bytes < root->sectorsize);
1639 ret = find_free_extent(trans, root, num_bytes, empty_size,
1640 search_start, search_end, hint_byte, ins,
26b8003f
CM
1641 trans->alloc_exclude_start,
1642 trans->alloc_exclude_nr, data);
ccd467d6 1643 BUG_ON(ret);
f2654de4
CM
1644 if (ret)
1645 return ret;
fec577fb 1646
58176a96 1647 /* block accounting for super block */
db94535d
CM
1648 super_used = btrfs_super_bytes_used(&info->super_copy);
1649 btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
26b8003f 1650
58176a96 1651 /* block accounting for root item */
db94535d
CM
1652 root_used = btrfs_root_used(&root->root_item);
1653 btrfs_set_root_used(&root->root_item, root_used + num_bytes);
58176a96 1654
f510cfec
CM
1655 clear_extent_dirty(&root->fs_info->free_space_cache,
1656 ins->objectid, ins->objectid + ins->offset - 1,
1657 GFP_NOFS);
1658
26b8003f 1659 if (root == extent_root) {
1a5bc167
CM
1660 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
1661 ins->objectid + ins->offset - 1,
1662 EXTENT_LOCKED, GFP_NOFS);
e19caa5f 1663 WARN_ON(data == 1);
26b8003f
CM
1664 goto update_block;
1665 }
1666
1667 WARN_ON(trans->alloc_exclude_nr);
1668 trans->alloc_exclude_start = ins->objectid;
1669 trans->alloc_exclude_nr = ins->offset;
e089f05c
CM
1670 ret = btrfs_insert_item(trans, extent_root, ins, &extent_item,
1671 sizeof(extent_item));
037e6390 1672
26b8003f
CM
1673 trans->alloc_exclude_start = 0;
1674 trans->alloc_exclude_nr = 0;
7bb86316
CM
1675 BUG_ON(ret);
1676
1677 path = btrfs_alloc_path();
1678 BUG_ON(!path);
1679 ret = btrfs_insert_extent_backref(trans, extent_root, path,
1680 ins->objectid, root_objectid,
1681 ref_generation, owner, owner_offset);
26b8003f 1682
ccd467d6 1683 BUG_ON(ret);
7bb86316 1684 btrfs_free_path(path);
e089f05c 1685 finish_current_insert(trans, extent_root);
e20d96d6 1686 pending_ret = del_pending_extents(trans, extent_root);
f510cfec 1687
e37c9e69 1688 if (ret) {
037e6390 1689 return ret;
e37c9e69
CM
1690 }
1691 if (pending_ret) {
037e6390 1692 return pending_ret;
e37c9e69 1693 }
26b8003f
CM
1694
1695update_block:
1e2677e0
CM
1696 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0,
1697 data);
fabb5681 1698 BUG_ON(ret);
037e6390 1699 return 0;
fec577fb
CM
1700}
1701
1702/*
1703 * helper function to allocate a block for a given tree
1704 * returns the tree buffer or NULL.
1705 */
5f39d397 1706struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
db94535d 1707 struct btrfs_root *root,
7bb86316
CM
1708 u32 blocksize,
1709 u64 root_objectid, u64 hint,
1710 u64 empty_size)
1711{
1712 u64 ref_generation;
1713
1714 if (root->ref_cows)
1715 ref_generation = trans->transid;
1716 else
1717 ref_generation = 0;
1718
1719
1720 return __btrfs_alloc_free_block(trans, root, blocksize, root_objectid,
1721 ref_generation, 0, 0, hint, empty_size);
1722}
1723
1724/*
1725 * helper function to allocate a block for a given tree
1726 * returns the tree buffer or NULL.
1727 */
1728struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1729 struct btrfs_root *root,
1730 u32 blocksize,
1731 u64 root_objectid,
1732 u64 ref_generation,
1733 u64 first_objectid,
1734 int level,
1735 u64 hint,
5f39d397 1736 u64 empty_size)
fec577fb 1737{
e2fa7227 1738 struct btrfs_key ins;
fec577fb 1739 int ret;
5f39d397 1740 struct extent_buffer *buf;
fec577fb 1741
7bb86316
CM
1742 ret = btrfs_alloc_extent(trans, root, blocksize,
1743 root_objectid, ref_generation,
f6dbff55 1744 level, first_objectid, empty_size, hint,
db94535d 1745 (u64)-1, &ins, 0);
fec577fb 1746 if (ret) {
54aa1f4d
CM
1747 BUG_ON(ret > 0);
1748 return ERR_PTR(ret);
fec577fb 1749 }
db94535d 1750 buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize);
54aa1f4d 1751 if (!buf) {
7bb86316
CM
1752 btrfs_free_extent(trans, root, ins.objectid, blocksize,
1753 root->root_key.objectid, ref_generation,
1754 0, 0, 0);
54aa1f4d
CM
1755 return ERR_PTR(-ENOMEM);
1756 }
5f39d397
CM
1757 btrfs_set_buffer_uptodate(buf);
1758 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
1759 buf->start + buf->len - 1, GFP_NOFS);
19c00ddc
CM
1760 set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->extent_tree,
1761 buf->start, buf->start + buf->len - 1,
1762 EXTENT_CSUM, GFP_NOFS);
1763 buf->flags |= EXTENT_CSUM;
6b80053d 1764 btrfs_set_buffer_defrag(buf);
d3c2fdcf 1765 trans->blocks_used++;
fec577fb
CM
1766 return buf;
1767}
a28ec197 1768
6407bf6d 1769static int drop_leaf_ref(struct btrfs_trans_handle *trans,
5f39d397 1770 struct btrfs_root *root, struct extent_buffer *leaf)
6407bf6d 1771{
7bb86316
CM
1772 u64 leaf_owner;
1773 u64 leaf_generation;
5f39d397 1774 struct btrfs_key key;
6407bf6d
CM
1775 struct btrfs_file_extent_item *fi;
1776 int i;
1777 int nritems;
1778 int ret;
1779
5f39d397
CM
1780 BUG_ON(!btrfs_is_leaf(leaf));
1781 nritems = btrfs_header_nritems(leaf);
7bb86316
CM
1782 leaf_owner = btrfs_header_owner(leaf);
1783 leaf_generation = btrfs_header_generation(leaf);
1784
6407bf6d 1785 for (i = 0; i < nritems; i++) {
db94535d 1786 u64 disk_bytenr;
5f39d397
CM
1787
1788 btrfs_item_key_to_cpu(leaf, &key, i);
1789 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
6407bf6d
CM
1790 continue;
1791 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
5f39d397
CM
1792 if (btrfs_file_extent_type(leaf, fi) ==
1793 BTRFS_FILE_EXTENT_INLINE)
236454df 1794 continue;
6407bf6d
CM
1795 /*
1796 * FIXME make sure to insert a trans record that
1797 * repeats the snapshot del on crash
1798 */
db94535d
CM
1799 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1800 if (disk_bytenr == 0)
3a686375 1801 continue;
db94535d 1802 ret = btrfs_free_extent(trans, root, disk_bytenr,
7bb86316
CM
1803 btrfs_file_extent_disk_num_bytes(leaf, fi),
1804 leaf_owner, leaf_generation,
1805 key.objectid, key.offset, 0);
6407bf6d
CM
1806 BUG_ON(ret);
1807 }
1808 return 0;
1809}
1810
e011599b 1811static void reada_walk_down(struct btrfs_root *root,
5f39d397 1812 struct extent_buffer *node)
e011599b
CM
1813{
1814 int i;
1815 u32 nritems;
db94535d 1816 u64 bytenr;
e011599b
CM
1817 int ret;
1818 u32 refs;
db94535d
CM
1819 int level;
1820 u32 blocksize;
e011599b 1821
5f39d397 1822 nritems = btrfs_header_nritems(node);
db94535d 1823 level = btrfs_header_level(node);
e011599b 1824 for (i = 0; i < nritems; i++) {
db94535d
CM
1825 bytenr = btrfs_node_blockptr(node, i);
1826 blocksize = btrfs_level_size(root, level - 1);
1827 ret = lookup_extent_ref(NULL, root, bytenr, blocksize, &refs);
e011599b
CM
1828 BUG_ON(ret);
1829 if (refs != 1)
1830 continue;
409eb95d 1831 mutex_unlock(&root->fs_info->fs_mutex);
db94535d 1832 ret = readahead_tree_block(root, bytenr, blocksize);
409eb95d
CM
1833 cond_resched();
1834 mutex_lock(&root->fs_info->fs_mutex);
e011599b
CM
1835 if (ret)
1836 break;
1837 }
1838}
1839
9aca1d51
CM
1840/*
1841 * helper function for drop_snapshot, this walks down the tree dropping ref
1842 * counts as it goes.
1843 */
e089f05c
CM
1844static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root
1845 *root, struct btrfs_path *path, int *level)
20524f02 1846{
7bb86316
CM
1847 u64 root_owner;
1848 u64 root_gen;
1849 u64 bytenr;
5f39d397
CM
1850 struct extent_buffer *next;
1851 struct extent_buffer *cur;
7bb86316 1852 struct extent_buffer *parent;
db94535d 1853 u32 blocksize;
20524f02
CM
1854 int ret;
1855 u32 refs;
1856
5caf2a00
CM
1857 WARN_ON(*level < 0);
1858 WARN_ON(*level >= BTRFS_MAX_LEVEL);
5f39d397 1859 ret = lookup_extent_ref(trans, root,
db94535d
CM
1860 path->nodes[*level]->start,
1861 path->nodes[*level]->len, &refs);
20524f02
CM
1862 BUG_ON(ret);
1863 if (refs > 1)
1864 goto out;
e011599b 1865
9aca1d51
CM
1866 /*
1867 * walk down to the last node level and free all the leaves
1868 */
6407bf6d 1869 while(*level >= 0) {
5caf2a00
CM
1870 WARN_ON(*level < 0);
1871 WARN_ON(*level >= BTRFS_MAX_LEVEL);
20524f02 1872 cur = path->nodes[*level];
e011599b
CM
1873
1874 if (*level > 0 && path->slots[*level] == 0)
5f39d397 1875 reada_walk_down(root, cur);
e011599b 1876
5f39d397 1877 if (btrfs_header_level(cur) != *level)
2c90e5d6 1878 WARN_ON(1);
e011599b 1879
7518a238 1880 if (path->slots[*level] >=
5f39d397 1881 btrfs_header_nritems(cur))
20524f02 1882 break;
6407bf6d
CM
1883 if (*level == 0) {
1884 ret = drop_leaf_ref(trans, root, cur);
1885 BUG_ON(ret);
1886 break;
1887 }
db94535d
CM
1888 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1889 blocksize = btrfs_level_size(root, *level - 1);
1890 ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs);
6407bf6d
CM
1891 BUG_ON(ret);
1892 if (refs != 1) {
7bb86316
CM
1893 parent = path->nodes[*level];
1894 root_owner = btrfs_header_owner(parent);
1895 root_gen = btrfs_header_generation(parent);
20524f02 1896 path->slots[*level]++;
db94535d 1897 ret = btrfs_free_extent(trans, root, bytenr,
7bb86316
CM
1898 blocksize, root_owner,
1899 root_gen, 0, 0, 1);
20524f02
CM
1900 BUG_ON(ret);
1901 continue;
1902 }
db94535d 1903 next = btrfs_find_tree_block(root, bytenr, blocksize);
5f39d397
CM
1904 if (!next || !btrfs_buffer_uptodate(next)) {
1905 free_extent_buffer(next);
e9d0b13b 1906 mutex_unlock(&root->fs_info->fs_mutex);
db94535d 1907 next = read_tree_block(root, bytenr, blocksize);
e9d0b13b
CM
1908 mutex_lock(&root->fs_info->fs_mutex);
1909
1910 /* we dropped the lock, check one more time */
db94535d
CM
1911 ret = lookup_extent_ref(trans, root, bytenr,
1912 blocksize, &refs);
e9d0b13b
CM
1913 BUG_ON(ret);
1914 if (refs != 1) {
7bb86316
CM
1915 parent = path->nodes[*level];
1916 root_owner = btrfs_header_owner(parent);
1917 root_gen = btrfs_header_generation(parent);
1918
e9d0b13b 1919 path->slots[*level]++;
5f39d397 1920 free_extent_buffer(next);
7bb86316
CM
1921 ret = btrfs_free_extent(trans, root, bytenr,
1922 blocksize,
1923 root_owner,
1924 root_gen, 0, 0, 1);
e9d0b13b
CM
1925 BUG_ON(ret);
1926 continue;
1927 }
1928 }
5caf2a00 1929 WARN_ON(*level <= 0);
83e15a28 1930 if (path->nodes[*level-1])
5f39d397 1931 free_extent_buffer(path->nodes[*level-1]);
20524f02 1932 path->nodes[*level-1] = next;
5f39d397 1933 *level = btrfs_header_level(next);
20524f02
CM
1934 path->slots[*level] = 0;
1935 }
1936out:
5caf2a00
CM
1937 WARN_ON(*level < 0);
1938 WARN_ON(*level >= BTRFS_MAX_LEVEL);
7bb86316
CM
1939
1940 if (path->nodes[*level] == root->node) {
1941 root_owner = root->root_key.objectid;
1942 parent = path->nodes[*level];
1943 } else {
1944 parent = path->nodes[*level + 1];
1945 root_owner = btrfs_header_owner(parent);
1946 }
1947
1948 root_gen = btrfs_header_generation(parent);
db94535d 1949 ret = btrfs_free_extent(trans, root, path->nodes[*level]->start,
7bb86316
CM
1950 path->nodes[*level]->len,
1951 root_owner, root_gen, 0, 0, 1);
5f39d397 1952 free_extent_buffer(path->nodes[*level]);
20524f02
CM
1953 path->nodes[*level] = NULL;
1954 *level += 1;
1955 BUG_ON(ret);
1956 return 0;
1957}
1958
9aca1d51
CM
1959/*
1960 * helper for dropping snapshots. This walks back up the tree in the path
1961 * to find the first node higher up where we haven't yet gone through
1962 * all the slots
1963 */
e089f05c
CM
1964static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root
1965 *root, struct btrfs_path *path, int *level)
20524f02 1966{
7bb86316
CM
1967 u64 root_owner;
1968 u64 root_gen;
1969 struct btrfs_root_item *root_item = &root->root_item;
20524f02
CM
1970 int i;
1971 int slot;
1972 int ret;
9f3a7427 1973
234b63a0 1974 for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
20524f02 1975 slot = path->slots[i];
5f39d397
CM
1976 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
1977 struct extent_buffer *node;
1978 struct btrfs_disk_key disk_key;
1979 node = path->nodes[i];
20524f02
CM
1980 path->slots[i]++;
1981 *level = i;
9f3a7427 1982 WARN_ON(*level == 0);
5f39d397 1983 btrfs_node_key(node, &disk_key, path->slots[i]);
9f3a7427 1984 memcpy(&root_item->drop_progress,
5f39d397 1985 &disk_key, sizeof(disk_key));
9f3a7427 1986 root_item->drop_level = i;
20524f02
CM
1987 return 0;
1988 } else {
7bb86316
CM
1989 if (path->nodes[*level] == root->node) {
1990 root_owner = root->root_key.objectid;
1991 root_gen =
1992 btrfs_header_generation(path->nodes[*level]);
1993 } else {
1994 struct extent_buffer *node;
1995 node = path->nodes[*level + 1];
1996 root_owner = btrfs_header_owner(node);
1997 root_gen = btrfs_header_generation(node);
1998 }
e089f05c 1999 ret = btrfs_free_extent(trans, root,
db94535d 2000 path->nodes[*level]->start,
7bb86316
CM
2001 path->nodes[*level]->len,
2002 root_owner, root_gen, 0, 0, 1);
6407bf6d 2003 BUG_ON(ret);
5f39d397 2004 free_extent_buffer(path->nodes[*level]);
83e15a28 2005 path->nodes[*level] = NULL;
20524f02 2006 *level = i + 1;
20524f02
CM
2007 }
2008 }
2009 return 1;
2010}
2011
9aca1d51
CM
2012/*
2013 * drop the reference count on the tree rooted at 'snap'. This traverses
2014 * the tree freeing any blocks that have a ref count of zero after being
2015 * decremented.
2016 */
e089f05c 2017int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
9f3a7427 2018 *root)
20524f02 2019{
3768f368 2020 int ret = 0;
9aca1d51 2021 int wret;
20524f02 2022 int level;
5caf2a00 2023 struct btrfs_path *path;
20524f02
CM
2024 int i;
2025 int orig_level;
9f3a7427 2026 struct btrfs_root_item *root_item = &root->root_item;
20524f02 2027
5caf2a00
CM
2028 path = btrfs_alloc_path();
2029 BUG_ON(!path);
20524f02 2030
5f39d397 2031 level = btrfs_header_level(root->node);
20524f02 2032 orig_level = level;
9f3a7427
CM
2033 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
2034 path->nodes[level] = root->node;
f510cfec 2035 extent_buffer_get(root->node);
9f3a7427
CM
2036 path->slots[level] = 0;
2037 } else {
2038 struct btrfs_key key;
5f39d397
CM
2039 struct btrfs_disk_key found_key;
2040 struct extent_buffer *node;
6702ed49 2041
9f3a7427 2042 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6702ed49
CM
2043 level = root_item->drop_level;
2044 path->lowest_level = level;
9f3a7427 2045 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6702ed49 2046 if (wret < 0) {
9f3a7427
CM
2047 ret = wret;
2048 goto out;
2049 }
5f39d397
CM
2050 node = path->nodes[level];
2051 btrfs_node_key(node, &found_key, path->slots[level]);
2052 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
2053 sizeof(found_key)));
9f3a7427 2054 }
20524f02 2055 while(1) {
5caf2a00 2056 wret = walk_down_tree(trans, root, path, &level);
9aca1d51 2057 if (wret > 0)
20524f02 2058 break;
9aca1d51
CM
2059 if (wret < 0)
2060 ret = wret;
2061
5caf2a00 2062 wret = walk_up_tree(trans, root, path, &level);
9aca1d51 2063 if (wret > 0)
20524f02 2064 break;
9aca1d51
CM
2065 if (wret < 0)
2066 ret = wret;
409eb95d 2067 ret = -EAGAIN;
409eb95d 2068 break;
20524f02 2069 }
83e15a28 2070 for (i = 0; i <= orig_level; i++) {
5caf2a00 2071 if (path->nodes[i]) {
5f39d397 2072 free_extent_buffer(path->nodes[i]);
0f82731f 2073 path->nodes[i] = NULL;
83e15a28 2074 }
20524f02 2075 }
9f3a7427 2076out:
5caf2a00 2077 btrfs_free_path(path);
9aca1d51 2078 return ret;
20524f02 2079}
9078a3e1 2080
96b5179d 2081int btrfs_free_block_groups(struct btrfs_fs_info *info)
9078a3e1 2082{
96b5179d
CM
2083 u64 start;
2084 u64 end;
b97f9203 2085 u64 ptr;
9078a3e1 2086 int ret;
9078a3e1 2087 while(1) {
96b5179d
CM
2088 ret = find_first_extent_bit(&info->block_group_cache, 0,
2089 &start, &end, (unsigned int)-1);
2090 if (ret)
9078a3e1 2091 break;
b97f9203
Y
2092 ret = get_state_private(&info->block_group_cache, start, &ptr);
2093 if (!ret)
2094 kfree((void *)(unsigned long)ptr);
96b5179d
CM
2095 clear_extent_bits(&info->block_group_cache, start,
2096 end, (unsigned int)-1, GFP_NOFS);
9078a3e1 2097 }
e37c9e69 2098 while(1) {
f510cfec
CM
2099 ret = find_first_extent_bit(&info->free_space_cache, 0,
2100 &start, &end, EXTENT_DIRTY);
2101 if (ret)
e37c9e69 2102 break;
f510cfec
CM
2103 clear_extent_dirty(&info->free_space_cache, start,
2104 end, GFP_NOFS);
e37c9e69 2105 }
be744175
CM
2106 return 0;
2107}
2108
edbd8d4e
CM
2109static int relocate_inode_pages(struct inode *inode, u64 start, u64 len)
2110{
2111 u64 page_start;
2112 u64 page_end;
2113 u64 delalloc_start;
2114 u64 existing_delalloc;
2115 unsigned long last_index;
2116 unsigned long first_index;
2117 unsigned long i;
2118 struct page *page;
2119 struct btrfs_root *root = BTRFS_I(inode)->root;
2120 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2121 struct file_ra_state ra;
2122
2123 mutex_lock(&inode->i_mutex);
2124 first_index = start >> PAGE_CACHE_SHIFT;
2125 last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
2126
2127 memset(&ra, 0, sizeof(ra));
2128 file_ra_state_init(&ra, inode->i_mapping);
2129 btrfs_force_ra(inode->i_mapping, &ra, NULL, first_index, last_index);
2130
2131 for (i = first_index; i <= last_index; i++) {
2132 page = grab_cache_page(inode->i_mapping, i);
2133 if (!page)
2134 goto out_unlock;
2135 if (!PageUptodate(page)) {
2136 btrfs_readpage(NULL, page);
2137 lock_page(page);
2138 if (!PageUptodate(page)) {
2139 unlock_page(page);
2140 page_cache_release(page);
2141 goto out_unlock;
2142 }
2143 }
2144 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2145 page_end = page_start + PAGE_CACHE_SIZE - 1;
2146
2147 lock_extent(em_tree, page_start, page_end, GFP_NOFS);
2148
2149 delalloc_start = page_start;
2150 existing_delalloc =
2151 count_range_bits(&BTRFS_I(inode)->extent_tree,
2152 &delalloc_start, page_end,
2153 PAGE_CACHE_SIZE, EXTENT_DELALLOC);
2154
2155 set_extent_delalloc(em_tree, page_start,
2156 page_end, GFP_NOFS);
2157
2158 spin_lock(&root->fs_info->delalloc_lock);
2159 root->fs_info->delalloc_bytes += PAGE_CACHE_SIZE -
2160 existing_delalloc;
2161 spin_unlock(&root->fs_info->delalloc_lock);
2162
2163 unlock_extent(em_tree, page_start, page_end, GFP_NOFS);
2164 set_page_dirty(page);
2165 unlock_page(page);
2166 page_cache_release(page);
2167 }
2168
2169out_unlock:
2170 mutex_unlock(&inode->i_mutex);
2171 return 0;
2172}
2173
2174static int relocate_one_reference(struct btrfs_root *extent_root,
2175 struct btrfs_path *path,
2176 struct btrfs_key *extent_key,
2177 u64 ref_root, u64 ref_gen, u64 ref_objectid,
2178 u64 ref_offset)
2179{
2180 struct inode *inode;
2181 struct btrfs_root *found_root;
2182 struct btrfs_key root_location;
2183 int ret;
2184
2185 root_location.objectid = ref_root;
2186 if (ref_gen == 0)
2187 root_location.offset = 0;
2188 else
2189 root_location.offset = (u64)-1;
2190 root_location.type = BTRFS_ROOT_ITEM_KEY;
2191
2192 found_root = btrfs_read_fs_root_no_name(extent_root->fs_info,
2193 &root_location);
2194 BUG_ON(!found_root);
2195
2196 if (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
2197 mutex_unlock(&extent_root->fs_info->fs_mutex);
2198 inode = btrfs_iget_locked(extent_root->fs_info->sb,
2199 ref_objectid, found_root);
2200 if (inode->i_state & I_NEW) {
2201 /* the inode and parent dir are two different roots */
2202 BTRFS_I(inode)->root = found_root;
2203 BTRFS_I(inode)->location.objectid = ref_objectid;
2204 BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
2205 BTRFS_I(inode)->location.offset = 0;
2206 btrfs_read_locked_inode(inode);
2207 unlock_new_inode(inode);
2208
2209 }
2210 /* this can happen if the reference is not against
2211 * the latest version of the tree root
2212 */
2213 if (is_bad_inode(inode)) {
2214 mutex_lock(&extent_root->fs_info->fs_mutex);
2215 goto out;
2216 }
2217 relocate_inode_pages(inode, ref_offset, extent_key->offset);
2218 /* FIXME, data=ordered will help get rid of this */
2219 filemap_fdatawrite(inode->i_mapping);
2220 iput(inode);
2221 mutex_lock(&extent_root->fs_info->fs_mutex);
2222 } else {
2223 struct btrfs_trans_handle *trans;
2224 struct btrfs_key found_key;
2225 struct extent_buffer *eb;
2226 int level;
2227 int i;
2228
2229 trans = btrfs_start_transaction(found_root, 1);
2230 eb = read_tree_block(found_root, extent_key->objectid,
2231 extent_key->offset);
2232 level = btrfs_header_level(eb);
2233
2234 if (level == 0)
2235 btrfs_item_key_to_cpu(eb, &found_key, 0);
2236 else
2237 btrfs_node_key_to_cpu(eb, &found_key, 0);
2238
2239 free_extent_buffer(eb);
2240
2241 path->lowest_level = level;
8f662a76 2242 path->reada = 2;
edbd8d4e
CM
2243 ret = btrfs_search_slot(trans, found_root, &found_key, path,
2244 0, 1);
2245 path->lowest_level = 0;
2246 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
2247 if (!path->nodes[i])
2248 break;
2249 free_extent_buffer(path->nodes[i]);
2250 path->nodes[i] = NULL;
2251 }
2252 btrfs_release_path(found_root, path);
2253 btrfs_end_transaction(trans, found_root);
2254 }
2255
2256out:
2257 return 0;
2258}
2259
2260static int relocate_one_extent(struct btrfs_root *extent_root,
2261 struct btrfs_path *path,
2262 struct btrfs_key *extent_key)
2263{
2264 struct btrfs_key key;
2265 struct btrfs_key found_key;
2266 struct btrfs_extent_ref *ref;
2267 struct extent_buffer *leaf;
2268 u64 ref_root;
2269 u64 ref_gen;
2270 u64 ref_objectid;
2271 u64 ref_offset;
2272 u32 nritems;
2273 u32 item_size;
2274 int ret = 0;
2275
2276 key.objectid = extent_key->objectid;
2277 key.type = BTRFS_EXTENT_REF_KEY;
2278 key.offset = 0;
2279
2280 while(1) {
2281 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2282
2283 BUG_ON(ret == 0);
2284
2285 if (ret < 0)
2286 goto out;
2287
2288 ret = 0;
2289 leaf = path->nodes[0];
2290 nritems = btrfs_header_nritems(leaf);
2291 if (path->slots[0] == nritems)
2292 goto out;
2293
2294 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2295 if (found_key.objectid != extent_key->objectid)
2296 break;
2297
2298 if (found_key.type != BTRFS_EXTENT_REF_KEY)
2299 break;
2300
2301 key.offset = found_key.offset + 1;
2302 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2303
2304 ref = btrfs_item_ptr(leaf, path->slots[0],
2305 struct btrfs_extent_ref);
2306 ref_root = btrfs_ref_root(leaf, ref);
2307 ref_gen = btrfs_ref_generation(leaf, ref);
2308 ref_objectid = btrfs_ref_objectid(leaf, ref);
2309 ref_offset = btrfs_ref_offset(leaf, ref);
2310 btrfs_release_path(extent_root, path);
2311
2312 ret = relocate_one_reference(extent_root, path,
2313 extent_key, ref_root, ref_gen,
2314 ref_objectid, ref_offset);
2315 if (ret)
2316 goto out;
2317 }
2318 ret = 0;
2319out:
2320 btrfs_release_path(extent_root, path);
2321 return ret;
2322}
2323
2324static int find_overlapping_extent(struct btrfs_root *root,
2325 struct btrfs_path *path, u64 new_size)
2326{
2327 struct btrfs_key found_key;
2328 struct extent_buffer *leaf;
2329 int ret;
2330
2331 while(1) {
2332 if (path->slots[0] == 0) {
2333 ret = btrfs_prev_leaf(root, path);
2334 if (ret == 1) {
2335 return 1;
2336 }
2337 if (ret < 0)
2338 return ret;
2339 } else {
2340 path->slots[0]--;
2341 }
2342 leaf = path->nodes[0];
2343 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2344 if (found_key.type == BTRFS_EXTENT_ITEM_KEY) {
2345 if (found_key.objectid + found_key.offset > new_size)
2346 return 0;
2347 else
2348 return 1;
2349 }
2350 }
2351 return 1;
2352}
2353
2354int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size)
2355{
2356 struct btrfs_trans_handle *trans;
2357 struct btrfs_root *tree_root = root->fs_info->tree_root;
2358 struct btrfs_path *path;
2359 u64 cur_byte;
2360 u64 total_found;
2361 u64 ptr;
2362 struct btrfs_fs_info *info = root->fs_info;
2363 struct extent_map_tree *block_group_cache;
2364 struct btrfs_key key;
2365 struct btrfs_key found_key = { 0, 0, 0 };
2366 struct extent_buffer *leaf;
2367 u32 nritems;
2368 int ret;
2369 int slot;
2370
2371 btrfs_set_super_total_bytes(&info->super_copy, new_size);
2372 block_group_cache = &info->block_group_cache;
2373 path = btrfs_alloc_path();
2374 root = root->fs_info->extent_root;
8f662a76 2375 path->reada = 2;
edbd8d4e
CM
2376
2377again:
2378 total_found = 0;
2379 key.objectid = new_size;
2380 cur_byte = key.objectid;
2381 key.offset = 0;
2382 key.type = 0;
2383 while(1) {
2384 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2385 if (ret < 0)
2386 goto out;
2387next:
2388 leaf = path->nodes[0];
2389 if (key.objectid == new_size - 1) {
2390 ret = find_overlapping_extent(root, path, new_size);
2391 if (ret != 0) {
2392 btrfs_release_path(root, path);
2393 ret = btrfs_search_slot(NULL, root, &key,
2394 path, 0, 0);
2395 if (ret < 0)
2396 goto out;
2397 }
2398 }
2399 nritems = btrfs_header_nritems(leaf);
2400 ret = 0;
2401 slot = path->slots[0];
2402 if (slot < nritems)
2403 btrfs_item_key_to_cpu(leaf, &found_key, slot);
2404 if (slot == nritems ||
2405 btrfs_key_type(&found_key) != BTRFS_EXTENT_ITEM_KEY) {
2406 path->slots[0]++;
2407 if (path->slots[0] >= nritems) {
2408 ret = btrfs_next_leaf(root, path);
2409 if (ret < 0)
2410 goto out;
2411 if (ret == 1) {
2412 ret = 0;
2413 break;
2414 }
2415 }
2416 goto next;
2417 }
2418 btrfs_item_key_to_cpu(leaf, &found_key, slot);
2419 if (found_key.objectid + found_key.offset <= cur_byte)
2420 continue;
2421 total_found++;
2422 cur_byte = found_key.objectid + found_key.offset;
2423 key.objectid = cur_byte;
2424 btrfs_release_path(root, path);
2425 ret = relocate_one_extent(root, path, &found_key);
2426 }
2427
2428 btrfs_release_path(root, path);
2429
2430 if (total_found > 0) {
2431 trans = btrfs_start_transaction(tree_root, 1);
2432 btrfs_commit_transaction(trans, tree_root);
2433
2434 mutex_unlock(&root->fs_info->fs_mutex);
2435 btrfs_clean_old_snapshots(tree_root);
2436 mutex_lock(&root->fs_info->fs_mutex);
2437
2438 trans = btrfs_start_transaction(tree_root, 1);
2439 btrfs_commit_transaction(trans, tree_root);
2440 goto again;
2441 }
2442
2443 trans = btrfs_start_transaction(root, 1);
2444 key.objectid = new_size;
2445 key.offset = 0;
2446 key.type = 0;
2447 while(1) {
2448 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2449 if (ret < 0)
2450 goto out;
2451bg_next:
2452 leaf = path->nodes[0];
2453 nritems = btrfs_header_nritems(leaf);
2454 ret = 0;
2455 slot = path->slots[0];
2456 if (slot < nritems)
2457 btrfs_item_key_to_cpu(leaf, &found_key, slot);
2458 if (slot == nritems ||
2459 btrfs_key_type(&found_key) != BTRFS_BLOCK_GROUP_ITEM_KEY) {
2460 if (slot < nritems) {
2461 printk("shrinker found key %Lu %u %Lu\n",
2462 found_key.objectid, found_key.type,
2463 found_key.offset);
2464 path->slots[0]++;
2465 }
2466 if (path->slots[0] >= nritems) {
2467 ret = btrfs_next_leaf(root, path);
2468 if (ret < 0)
2469 break;
2470 if (ret == 1) {
2471 ret = 0;
2472 break;
2473 }
2474 }
2475 goto bg_next;
2476 }
2477 btrfs_item_key_to_cpu(leaf, &found_key, slot);
2478 ret = get_state_private(&info->block_group_cache,
2479 found_key.objectid, &ptr);
2480 if (!ret)
2481 kfree((void *)(unsigned long)ptr);
2482
2483 clear_extent_bits(&info->block_group_cache, found_key.objectid,
2484 found_key.objectid + found_key.offset - 1,
2485 (unsigned int)-1, GFP_NOFS);
2486
2487 key.objectid = found_key.objectid + 1;
2488 btrfs_del_item(trans, root, path);
2489 btrfs_release_path(root, path);
2490 }
2491 clear_extent_dirty(&info->free_space_cache, new_size, (u64)-1,
2492 GFP_NOFS);
2493 btrfs_commit_transaction(trans, root);
2494out:
2495 btrfs_free_path(path);
2496 return ret;
2497}
2498
2499int btrfs_grow_extent_tree(struct btrfs_trans_handle *trans,
2500 struct btrfs_root *root, u64 new_size)
2501{
2502 struct btrfs_path *path;
2503 u64 nr = 0;
2504 u64 cur_byte;
2505 u64 old_size;
2506 struct btrfs_block_group_cache *cache;
2507 struct btrfs_block_group_item *item;
2508 struct btrfs_fs_info *info = root->fs_info;
2509 struct extent_map_tree *block_group_cache;
2510 struct btrfs_key key;
2511 struct extent_buffer *leaf;
2512 int ret;
2513 int bit;
2514
2515 old_size = btrfs_super_total_bytes(&info->super_copy);
2516 block_group_cache = &info->block_group_cache;
2517
2518 root = info->extent_root;
2519
2520 cache = btrfs_lookup_block_group(root->fs_info, old_size - 1);
2521
2522 cur_byte = cache->key.objectid + cache->key.offset;
2523 if (cur_byte >= new_size)
2524 goto set_size;
2525
2526 key.offset = BTRFS_BLOCK_GROUP_SIZE;
2527 btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
2528
2529 path = btrfs_alloc_path();
2530 if (!path)
2531 return -ENOMEM;
2532
2533 while(cur_byte < new_size) {
2534 key.objectid = cur_byte;
2535 ret = btrfs_insert_empty_item(trans, root, path, &key,
2536 sizeof(struct btrfs_block_group_item));
2537 BUG_ON(ret);
2538 leaf = path->nodes[0];
2539 item = btrfs_item_ptr(leaf, path->slots[0],
2540 struct btrfs_block_group_item);
2541
2542 btrfs_set_disk_block_group_used(leaf, item, 0);
2543 if (nr % 3) {
2544 btrfs_set_disk_block_group_flags(leaf, item,
2545 BTRFS_BLOCK_GROUP_DATA);
2546 } else {
2547 btrfs_set_disk_block_group_flags(leaf, item, 0);
2548 }
2549 nr++;
2550
2551 cache = kmalloc(sizeof(*cache), GFP_NOFS);
2552 BUG_ON(!cache);
2553
2554 read_extent_buffer(leaf, &cache->item, (unsigned long)item,
2555 sizeof(cache->item));
2556
2557 memcpy(&cache->key, &key, sizeof(key));
2558 cache->cached = 0;
2559 cache->pinned = 0;
2560 cur_byte = key.objectid + key.offset;
2561 btrfs_release_path(root, path);
2562
2563 if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) {
2564 bit = BLOCK_GROUP_DATA;
2565 cache->data = BTRFS_BLOCK_GROUP_DATA;
2566 } else {
2567 bit = BLOCK_GROUP_METADATA;
2568 cache->data = 0;
2569 }
2570
2571 /* use EXTENT_LOCKED to prevent merging */
2572 set_extent_bits(block_group_cache, key.objectid,
2573 key.objectid + key.offset - 1,
2574 bit | EXTENT_LOCKED, GFP_NOFS);
2575 set_state_private(block_group_cache, key.objectid,
2576 (unsigned long)cache);
2577 }
2578 btrfs_free_path(path);
2579set_size:
2580 btrfs_set_super_total_bytes(&info->super_copy, new_size);
2581 return 0;
2582}
2583
9078a3e1
CM
2584int btrfs_read_block_groups(struct btrfs_root *root)
2585{
2586 struct btrfs_path *path;
2587 int ret;
2588 int err = 0;
96b5179d 2589 int bit;
9078a3e1 2590 struct btrfs_block_group_cache *cache;
be744175 2591 struct btrfs_fs_info *info = root->fs_info;
96b5179d 2592 struct extent_map_tree *block_group_cache;
9078a3e1
CM
2593 struct btrfs_key key;
2594 struct btrfs_key found_key;
5f39d397 2595 struct extent_buffer *leaf;
96b5179d
CM
2596
2597 block_group_cache = &info->block_group_cache;
9078a3e1 2598
be744175 2599 root = info->extent_root;
9078a3e1 2600 key.objectid = 0;
db94535d 2601 key.offset = BTRFS_BLOCK_GROUP_SIZE;
9078a3e1
CM
2602 btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
2603
2604 path = btrfs_alloc_path();
2605 if (!path)
2606 return -ENOMEM;
2607
2608 while(1) {
be744175 2609 ret = btrfs_search_slot(NULL, info->extent_root,
9078a3e1
CM
2610 &key, path, 0, 0);
2611 if (ret != 0) {
2612 err = ret;
2613 break;
2614 }
5f39d397
CM
2615 leaf = path->nodes[0];
2616 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9078a3e1
CM
2617 cache = kmalloc(sizeof(*cache), GFP_NOFS);
2618 if (!cache) {
2619 err = -1;
2620 break;
2621 }
3e1ad54f 2622
5f39d397
CM
2623 read_extent_buffer(leaf, &cache->item,
2624 btrfs_item_ptr_offset(leaf, path->slots[0]),
2625 sizeof(cache->item));
9078a3e1 2626 memcpy(&cache->key, &found_key, sizeof(found_key));
e37c9e69 2627 cache->cached = 0;
324ae4df 2628 cache->pinned = 0;
9078a3e1
CM
2629 key.objectid = found_key.objectid + found_key.offset;
2630 btrfs_release_path(root, path);
5f39d397 2631
f84a8b36
CM
2632 if (cache->item.flags & BTRFS_BLOCK_GROUP_MIXED) {
2633 bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
2634 cache->data = BTRFS_BLOCK_GROUP_MIXED;
2635 } else if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) {
96b5179d 2636 bit = BLOCK_GROUP_DATA;
f84a8b36 2637 cache->data = BTRFS_BLOCK_GROUP_DATA;
96b5179d
CM
2638 } else {
2639 bit = BLOCK_GROUP_METADATA;
2640 cache->data = 0;
31f3c99b 2641 }
96b5179d
CM
2642
2643 /* use EXTENT_LOCKED to prevent merging */
2644 set_extent_bits(block_group_cache, found_key.objectid,
2645 found_key.objectid + found_key.offset - 1,
2646 bit | EXTENT_LOCKED, GFP_NOFS);
2647 set_state_private(block_group_cache, found_key.objectid,
ae2f5411 2648 (unsigned long)cache);
96b5179d 2649
9078a3e1 2650 if (key.objectid >=
db94535d 2651 btrfs_super_total_bytes(&info->super_copy))
9078a3e1
CM
2652 break;
2653 }
2654
2655 btrfs_free_path(path);
2656 return 0;
2657}