2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include <linux/highmem.h>
22 #include "transaction.h"
23 #include "print-tree.h"
25 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
26 *root, struct btrfs_path *path, int level);
27 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
28 *root, struct btrfs_key *ins_key,
29 struct btrfs_path *path, int data_size);
30 static int push_node_left(struct btrfs_trans_handle *trans,
31 struct btrfs_root *root, struct extent_buffer *dst,
32 struct extent_buffer *src);
33 static int balance_node_right(struct btrfs_trans_handle *trans,
34 struct btrfs_root *root,
35 struct extent_buffer *dst_buf,
36 struct extent_buffer *src_buf);
37 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
38 struct btrfs_path *path, int level, int slot);
40 inline void btrfs_init_path(struct btrfs_path *p)
42 memset(p, 0, sizeof(*p));
45 struct btrfs_path *btrfs_alloc_path(void)
47 struct btrfs_path *path;
48 path = kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS);
50 btrfs_init_path(path);
56 void btrfs_free_path(struct btrfs_path *p)
58 btrfs_release_path(NULL, p);
59 kmem_cache_free(btrfs_path_cachep, p);
62 void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
65 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
68 free_extent_buffer(p->nodes[i]);
70 memset(p, 0, sizeof(*p));
73 static int __btrfs_cow_block(struct btrfs_trans_handle *trans,
74 struct btrfs_root *root,
75 struct extent_buffer *buf,
76 struct extent_buffer *parent, int parent_slot,
77 struct extent_buffer **cow_ret,
78 u64 search_start, u64 empty_size)
80 struct extent_buffer *cow;
82 int different_trans = 0;
84 WARN_ON(root->ref_cows && trans->transid != root->last_trans);
86 cow = btrfs_alloc_free_block(trans, root, search_start, empty_size);
90 if (buf->len != root->sectorsize || cow->len != root->sectorsize)
93 copy_extent_buffer(cow, buf, 0, 0, cow->len);
94 btrfs_set_header_blocknr(cow, extent_buffer_blocknr(cow));
95 btrfs_set_header_generation(cow, trans->transid);
96 btrfs_set_header_owner(cow, root->root_key.objectid);
98 WARN_ON(btrfs_header_generation(buf) > trans->transid);
99 if (btrfs_header_generation(buf) != trans->transid) {
101 ret = btrfs_inc_ref(trans, root, buf);
105 clean_tree_block(trans, root, buf);
108 if (buf == root->node) {
110 extent_buffer_get(cow);
111 if (buf != root->commit_root) {
112 btrfs_free_extent(trans, root,
113 extent_buffer_blocknr(buf), 1, 1);
115 free_extent_buffer(buf);
117 btrfs_set_node_blockptr(parent, parent_slot,
118 extent_buffer_blocknr(cow));
119 btrfs_mark_buffer_dirty(parent);
120 WARN_ON(btrfs_header_generation(parent) != trans->transid);
121 btrfs_free_extent(trans, root, extent_buffer_blocknr(buf),1,1);
123 free_extent_buffer(buf);
124 btrfs_mark_buffer_dirty(cow);
129 int btrfs_cow_block(struct btrfs_trans_handle *trans,
130 struct btrfs_root *root, struct extent_buffer *buf,
131 struct extent_buffer *parent, int parent_slot,
132 struct extent_buffer **cow_ret)
135 if (trans->transaction != root->fs_info->running_transaction) {
136 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
137 root->fs_info->running_transaction->transid);
140 if (trans->transid != root->fs_info->generation) {
141 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
142 root->fs_info->generation);
145 if (btrfs_header_generation(buf) == trans->transid) {
150 search_start = extent_buffer_blocknr(buf) & ~((u64)65535);
151 return __btrfs_cow_block(trans, root, buf, parent,
152 parent_slot, cow_ret, search_start, 0);
155 static int close_blocks(u64 blocknr, u64 other)
157 if (blocknr < other && other - blocknr < 8)
159 if (blocknr > other && blocknr - other < 8)
165 static int should_defrag_leaf(struct extent_buffer *eb)
168 struct btrfs_leaf *leaf = btrfs_buffer_leaf(eb);
169 struct btrfs_disk_key *key;
172 if (buffer_defrag(bh))
175 nritems = btrfs_header_nritems(&leaf->header);
179 key = &leaf->items[0].key;
180 if (btrfs_disk_key_type(key) == BTRFS_DIR_ITEM_KEY)
183 key = &leaf->items[nritems-1].key;
184 if (btrfs_disk_key_type(key) == BTRFS_DIR_ITEM_KEY)
187 key = &leaf->items[nritems/2].key;
188 if (btrfs_disk_key_type(key) == BTRFS_DIR_ITEM_KEY)
195 int btrfs_realloc_node(struct btrfs_trans_handle *trans,
196 struct btrfs_root *root, struct extent_buffer *parent,
197 int cache_only, u64 *last_ret)
201 struct btrfs_node *parent_node;
202 struct extent_buffer *cur_eb;
203 struct extent_buffer *tmp_eb;
205 u64 search_start = *last_ret;
215 if (trans->transaction != root->fs_info->running_transaction) {
216 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
217 root->fs_info->running_transaction->transid);
220 if (trans->transid != root->fs_info->generation) {
221 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
222 root->fs_info->generation);
225 if (buffer_defrag_done(parent))
228 parent_node = btrfs_buffer_node(parent);
229 parent_nritems = btrfs_header_nritems(&parent_node->header);
230 parent_level = btrfs_header_level(&parent_node->header);
233 end_slot = parent_nritems;
235 if (parent_nritems == 1)
238 for (i = start_slot; i < end_slot; i++) {
240 blocknr = btrfs_node_blockptr(parent_node, i);
242 last_block = blocknr;
244 other = btrfs_node_blockptr(parent_node, i - 1);
245 close = close_blocks(blocknr, other);
247 if (close && i < end_slot - 1) {
248 other = btrfs_node_blockptr(parent_node, i + 1);
249 close = close_blocks(blocknr, other);
252 last_block = blocknr;
256 cur_bh = btrfs_find_tree_block(root, blocknr);
257 if (!cur_bh || !buffer_uptodate(cur_bh) ||
258 buffer_locked(cur_bh) ||
259 (parent_level != 1 && !buffer_defrag(cur_bh)) ||
260 (parent_level == 1 && !should_defrag_leaf(cur_bh))) {
265 if (!cur_bh || !buffer_uptodate(cur_bh) ||
266 buffer_locked(cur_bh)) {
268 cur_bh = read_tree_block(root, blocknr);
271 if (search_start == 0)
272 search_start = last_block & ~((u64)65535);
274 err = __btrfs_cow_block(trans, root, cur_bh, parent, i,
275 &tmp_bh, search_start,
276 min(8, end_slot - i));
281 search_start = bh_blocknr(tmp_bh);
282 *last_ret = search_start;
283 if (parent_level == 1)
284 clear_buffer_defrag(tmp_bh);
285 set_buffer_defrag_done(tmp_bh);
293 * The leaf data grows from end-to-front in the node.
294 * this returns the address of the start of the last item,
295 * which is the stop of the leaf data stack
297 static inline unsigned int leaf_data_end(struct btrfs_root *root,
298 struct extent_buffer *leaf)
300 u32 nr = btrfs_header_nritems(leaf);
302 return BTRFS_LEAF_DATA_SIZE(root);
303 return btrfs_item_offset_nr(leaf, nr - 1);
307 * compare two keys in a memcmp fashion
309 static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
313 btrfs_disk_key_to_cpu(&k1, disk);
315 if (k1.objectid > k2->objectid)
317 if (k1.objectid < k2->objectid)
319 if (k1.type > k2->type)
321 if (k1.type < k2->type)
323 if (k1.offset > k2->offset)
325 if (k1.offset < k2->offset)
330 static int check_node(struct btrfs_root *root, struct btrfs_path *path,
333 struct extent_buffer *parent = NULL;
334 struct extent_buffer *node = path->nodes[level];
335 struct btrfs_disk_key parent_key;
336 struct btrfs_disk_key node_key;
339 struct btrfs_key cpukey;
340 u32 nritems = btrfs_header_nritems(node);
342 if (path->nodes[level + 1])
343 parent = path->nodes[level + 1];
345 slot = path->slots[level];
346 BUG_ON(nritems == 0);
348 parent_slot = path->slots[level + 1];
349 btrfs_node_key(parent, &parent_key, parent_slot);
350 btrfs_node_key(node, &node_key, 0);
351 BUG_ON(memcmp(&parent_key, &node_key,
352 sizeof(struct btrfs_disk_key)));
353 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
354 btrfs_header_blocknr(node));
356 BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
358 btrfs_node_key_to_cpu(node, &cpukey, slot - 1);
359 btrfs_node_key(node, &node_key, slot);
360 BUG_ON(comp_keys(&node_key, &cpukey) <= 0);
362 if (slot < nritems - 1) {
363 btrfs_node_key_to_cpu(node, &cpukey, slot + 1);
364 btrfs_node_key(node, &node_key, slot);
365 BUG_ON(comp_keys(&node_key, &cpukey) >= 0);
370 static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
373 struct extent_buffer *leaf = path->nodes[level];
374 struct extent_buffer *parent = NULL;
376 struct btrfs_key cpukey;
377 struct btrfs_disk_key parent_key;
378 struct btrfs_disk_key leaf_key;
379 int slot = path->slots[0];
381 u32 nritems = btrfs_header_nritems(leaf);
383 if (path->nodes[level + 1])
384 parent = path->nodes[level + 1];
390 parent_slot = path->slots[level + 1];
391 btrfs_node_key(parent, &parent_key, parent_slot);
392 btrfs_item_key(leaf, &leaf_key, 0);
394 BUG_ON(memcmp(&parent_key, &leaf_key,
395 sizeof(struct btrfs_disk_key)));
396 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
397 btrfs_header_blocknr(leaf));
400 for (i = 0; nritems > 1 && i < nritems - 2; i++) {
401 btrfs_item_key_to_cpu(leaf, &cpukey, i + 1);
402 btrfs_item_key(leaf, &leaf_key, i);
403 if (comp_keys(&leaf_key, &cpukey) >= 0) {
404 btrfs_print_leaf(root, leaf);
405 printk("slot %d offset bad key\n", i);
408 if (btrfs_item_offset_nr(leaf, i) !=
409 btrfs_item_end_nr(leaf, i + 1)) {
410 btrfs_print_leaf(root, leaf);
411 printk("slot %d offset bad\n", i);
415 if (btrfs_item_offset_nr(leaf, i) +
416 btrfs_item_size_nr(leaf, i) !=
417 BTRFS_LEAF_DATA_SIZE(root)) {
418 btrfs_print_leaf(root, leaf);
419 printk("slot %d first offset bad\n", i);
425 if (btrfs_item_size_nr(leaf, nritems - 1) > 4096) {
426 btrfs_print_leaf(root, leaf);
427 printk("slot %d bad size \n", nritems - 1);
432 if (slot != 0 && slot < nritems - 1) {
433 btrfs_item_key(leaf, &leaf_key, slot);
434 btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1);
435 if (comp_keys(&leaf_key, &cpukey) <= 0) {
436 btrfs_print_leaf(root, leaf);
437 printk("slot %d offset bad key\n", slot);
440 if (btrfs_item_offset_nr(leaf, slot - 1) !=
441 btrfs_item_end_nr(leaf, slot)) {
442 btrfs_print_leaf(root, leaf);
443 printk("slot %d offset bad\n", slot);
447 if (slot < nritems - 1) {
448 btrfs_item_key(leaf, &leaf_key, slot);
449 btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1);
450 BUG_ON(comp_keys(&leaf_key, &cpukey) >= 0);
451 if (btrfs_item_offset_nr(leaf, slot) !=
452 btrfs_item_end_nr(leaf, slot + 1)) {
453 btrfs_print_leaf(root, leaf);
454 printk("slot %d offset bad\n", slot);
458 BUG_ON(btrfs_item_offset_nr(leaf, 0) +
459 btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root));
463 static int check_block(struct btrfs_root *root, struct btrfs_path *path,
466 struct extent_buffer *buf = path->nodes[level];
467 char fsid[BTRFS_FSID_SIZE];
469 read_extent_buffer(buf, fsid, (unsigned long)btrfs_header_fsid(buf),
472 if (memcmp(fsid, root->fs_info->fsid, BTRFS_FSID_SIZE)) {
474 printk("warning bad block %Lu\n", buf->start);
475 if (!btrfs_buffer_uptodate(buf)) {
478 for (i = 0; i < BTRFS_FSID_SIZE; i++) {
479 printk("%x:%x ", root->fs_info->fsid[i], fsid[i]);
485 return check_leaf(root, path, level);
486 return check_node(root, path, level);
490 * search for key in the extent_buffer. The items start at offset p,
491 * and they are item_size apart. There are 'max' items in p.
493 * the slot in the array is returned via slot, and it points to
494 * the place where you would insert key if it is not found in
497 * slot may point to max if the key is bigger than all of the keys
499 static int generic_bin_search(struct extent_buffer *eb, unsigned long p,
500 int item_size, struct btrfs_key *key,
507 struct btrfs_disk_key *tmp;
508 struct btrfs_disk_key unaligned;
509 unsigned long offset;
510 char *map_token = NULL;
512 unsigned long map_start = 0;
513 unsigned long map_len = 0;
516 mid = (low + high) / 2;
517 offset = p + mid * item_size;
519 if (!map_token || offset < map_start ||
520 (offset + sizeof(struct btrfs_disk_key)) >
521 map_start + map_len) {
523 unmap_extent_buffer(eb, map_token, KM_USER0);
524 map_extent_buffer(eb, offset, &map_token, &kaddr,
525 &map_start, &map_len, KM_USER0);
528 if (offset + sizeof(struct btrfs_disk_key) >
529 map_start + map_len) {
530 unmap_extent_buffer(eb, map_token, KM_USER0);
531 read_extent_buffer(eb, &unaligned,
532 offset, sizeof(unaligned));
536 tmp = (struct btrfs_disk_key *)(kaddr + offset -
539 ret = comp_keys(tmp, key);
547 unmap_extent_buffer(eb, map_token, KM_USER0);
553 unmap_extent_buffer(eb, map_token, KM_USER0);
558 * simple bin_search frontend that does the right thing for
561 static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
562 int level, int *slot)
565 return generic_bin_search(eb,
566 offsetof(struct btrfs_leaf, items),
567 sizeof(struct btrfs_item),
568 key, btrfs_header_nritems(eb),
571 return generic_bin_search(eb,
572 offsetof(struct btrfs_node, ptrs),
573 sizeof(struct btrfs_key_ptr),
574 key, btrfs_header_nritems(eb),
580 static struct extent_buffer *read_node_slot(struct btrfs_root *root,
581 struct extent_buffer *parent, int slot)
585 if (slot >= btrfs_header_nritems(parent))
587 return read_tree_block(root, btrfs_node_blockptr(parent, slot));
590 static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root
591 *root, struct btrfs_path *path, int level)
593 struct extent_buffer *right = NULL;
594 struct extent_buffer *mid;
595 struct extent_buffer *left = NULL;
596 struct extent_buffer *parent = NULL;
600 int orig_slot = path->slots[level];
601 int err_on_enospc = 0;
607 mid = path->nodes[level];
608 orig_ptr = btrfs_node_blockptr(mid, orig_slot);
610 if (level < BTRFS_MAX_LEVEL - 1)
611 parent = path->nodes[level + 1];
612 pslot = path->slots[level + 1];
615 * deal with the case where there is only one pointer in the root
616 * by promoting the node below to a root
619 struct extent_buffer *child;
620 u64 blocknr = extent_buffer_blocknr(mid);
622 if (btrfs_header_nritems(mid) != 1)
625 /* promote the child to a root */
626 child = read_node_slot(root, mid, 0);
629 path->nodes[level] = NULL;
630 clean_tree_block(trans, root, mid);
631 wait_on_tree_block_writeback(root, mid);
632 /* once for the path */
633 free_extent_buffer(mid);
634 /* once for the root ptr */
635 free_extent_buffer(mid);
636 return btrfs_free_extent(trans, root, blocknr, 1, 1);
638 if (btrfs_header_nritems(mid) >
639 BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
642 if (btrfs_header_nritems(mid) < 2)
645 left = read_node_slot(root, parent, pslot - 1);
647 wret = btrfs_cow_block(trans, root, left,
648 parent, pslot - 1, &left);
654 right = read_node_slot(root, parent, pslot + 1);
656 wret = btrfs_cow_block(trans, root, right,
657 parent, pslot + 1, &right);
664 /* first, try to make some room in the middle buffer */
666 orig_slot += btrfs_header_nritems(left);
667 wret = push_node_left(trans, root, left, mid);
670 if (btrfs_header_nritems(mid) < 2)
675 * then try to empty the right most buffer into the middle
678 wret = push_node_left(trans, root, mid, right);
679 if (wret < 0 && wret != -ENOSPC)
681 if (btrfs_header_nritems(right) == 0) {
682 u64 blocknr = extent_buffer_blocknr(right);
683 clean_tree_block(trans, root, right);
684 wait_on_tree_block_writeback(root, right);
685 free_extent_buffer(right);
687 wret = del_ptr(trans, root, path, level + 1, pslot +
691 wret = btrfs_free_extent(trans, root, blocknr, 1, 1);
695 struct btrfs_disk_key right_key;
696 btrfs_node_key(right, &right_key, 0);
697 btrfs_set_node_key(parent, &right_key, pslot + 1);
698 btrfs_mark_buffer_dirty(parent);
701 if (btrfs_header_nritems(mid) == 1) {
703 * we're not allowed to leave a node with one item in the
704 * tree during a delete. A deletion from lower in the tree
705 * could try to delete the only pointer in this node.
706 * So, pull some keys from the left.
707 * There has to be a left pointer at this point because
708 * otherwise we would have pulled some pointers from the
712 wret = balance_node_right(trans, root, mid, left);
719 if (btrfs_header_nritems(mid) == 0) {
720 /* we've managed to empty the middle node, drop it */
721 u64 blocknr = extent_buffer_blocknr(mid);
722 clean_tree_block(trans, root, mid);
723 wait_on_tree_block_writeback(root, mid);
724 free_extent_buffer(mid);
726 wret = del_ptr(trans, root, path, level + 1, pslot);
729 wret = btrfs_free_extent(trans, root, blocknr, 1, 1);
733 /* update the parent key to reflect our changes */
734 struct btrfs_disk_key mid_key;
735 btrfs_node_key(mid, &mid_key, 0);
736 btrfs_set_node_key(parent, &mid_key, pslot);
737 btrfs_mark_buffer_dirty(parent);
740 /* update the path */
742 if (btrfs_header_nritems(left) > orig_slot) {
743 extent_buffer_get(left);
744 path->nodes[level] = left;
745 path->slots[level + 1] -= 1;
746 path->slots[level] = orig_slot;
748 free_extent_buffer(mid);
750 orig_slot -= btrfs_header_nritems(left);
751 path->slots[level] = orig_slot;
754 /* double check we haven't messed things up */
755 check_block(root, path, level);
757 btrfs_node_blockptr(path->nodes[level], path->slots[level]))
761 free_extent_buffer(right);
763 free_extent_buffer(left);
767 /* returns zero if the push worked, non-zero otherwise */
768 static int push_nodes_for_insert(struct btrfs_trans_handle *trans,
769 struct btrfs_root *root,
770 struct btrfs_path *path, int level)
772 struct extent_buffer *right = NULL;
773 struct extent_buffer *mid;
774 struct extent_buffer *left = NULL;
775 struct extent_buffer *parent = NULL;
779 int orig_slot = path->slots[level];
785 mid = path->nodes[level];
786 orig_ptr = btrfs_node_blockptr(mid, orig_slot);
788 if (level < BTRFS_MAX_LEVEL - 1)
789 parent = path->nodes[level + 1];
790 pslot = path->slots[level + 1];
795 left = read_node_slot(root, parent, pslot - 1);
797 /* first, try to make some room in the middle buffer */
800 left_nr = btrfs_header_nritems(left);
801 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
804 ret = btrfs_cow_block(trans, root, left, parent,
809 wret = push_node_left(trans, root,
816 struct btrfs_disk_key disk_key;
817 orig_slot += left_nr;
818 btrfs_node_key(mid, &disk_key, 0);
819 btrfs_set_node_key(parent, &disk_key, pslot);
820 btrfs_mark_buffer_dirty(parent);
821 if (btrfs_header_nritems(left) > orig_slot) {
822 path->nodes[level] = left;
823 path->slots[level + 1] -= 1;
824 path->slots[level] = orig_slot;
825 free_extent_buffer(mid);
828 btrfs_header_nritems(left);
829 path->slots[level] = orig_slot;
830 free_extent_buffer(left);
832 check_node(root, path, level);
835 free_extent_buffer(left);
837 right= read_node_slot(root, parent, pslot + 1);
840 * then try to empty the right most buffer into the middle
844 right_nr = btrfs_header_nritems(right);
845 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
848 ret = btrfs_cow_block(trans, root, right,
854 wret = balance_node_right(trans, root,
861 struct btrfs_disk_key disk_key;
863 btrfs_node_key(right, &disk_key, 0);
864 btrfs_set_node_key(parent, &disk_key, pslot + 1);
865 btrfs_mark_buffer_dirty(parent);
867 if (btrfs_header_nritems(mid) <= orig_slot) {
868 path->nodes[level] = right;
869 path->slots[level + 1] += 1;
870 path->slots[level] = orig_slot -
871 btrfs_header_nritems(mid);
872 free_extent_buffer(mid);
874 free_extent_buffer(right);
876 check_node(root, path, level);
879 free_extent_buffer(right);
881 check_node(root, path, level);
886 * readahead one full node of leaves
888 static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
891 struct extent_buffer *node;
899 int direction = path->reada;
900 struct radix_tree_root found;
901 unsigned long gang[8];
902 struct extent_buffer *eb;
907 if (!path->nodes[level])
910 node = path->nodes[level];
911 search = btrfs_node_blockptr(node, slot);
912 eb = btrfs_find_tree_block(root, search);
914 free_extent_buffer(eb);
918 init_bit_radix(&found);
919 nritems = btrfs_header_nritems(node);
920 for (i = slot; i < nritems; i++) {
921 blocknr = btrfs_node_blockptr(node, i);
922 set_radix_bit(&found, blocknr);
925 cluster_start = search - 4;
926 if (cluster_start > search)
929 cluster_start = search + 4;
931 ret = find_first_radix_bit(&found, gang, 0, ARRAY_SIZE(gang));
934 for (i = 0; i < ret; i++) {
936 clear_radix_bit(&found, blocknr);
937 if (path->reada == 1 && nread > 16)
939 if (close_blocks(cluster_start, blocknr)) {
940 readahead_tree_block(root, blocknr);
942 cluster_start = blocknr;
948 * look for key in the tree. path is filled in with nodes along the way
949 * if key is found, we return zero and you can find the item in the leaf
950 * level of the path (level 0)
952 * If the key isn't found, the path points to the slot where it should
953 * be inserted, and 1 is returned. If there are other errors during the
954 * search a negative error number is returned.
956 * if ins_len > 0, nodes and leaves will be split as we walk down the
957 * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if
960 int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
961 *root, struct btrfs_key *key, struct btrfs_path *p, int
964 struct extent_buffer *b;
969 int should_reada = p->reada;
972 lowest_level = p->lowest_level;
973 WARN_ON(lowest_level && ins_len);
974 WARN_ON(p->nodes[0] != NULL);
975 WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex));
978 extent_buffer_get(b);
980 level = btrfs_header_level(b);
983 wret = btrfs_cow_block(trans, root, b,
988 free_extent_buffer(b);
992 BUG_ON(!cow && ins_len);
993 if (level != btrfs_header_level(b))
995 level = btrfs_header_level(b);
997 ret = check_block(root, p, level);
1000 ret = bin_search(b, key, level, &slot);
1002 if (ret && slot > 0)
1004 p->slots[level] = slot;
1005 if (ins_len > 0 && btrfs_header_nritems(b) >=
1006 BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
1007 int sret = split_node(trans, root, p, level);
1011 b = p->nodes[level];
1012 slot = p->slots[level];
1013 } else if (ins_len < 0) {
1014 int sret = balance_level(trans, root, p,
1018 b = p->nodes[level];
1021 slot = p->slots[level];
1022 BUG_ON(btrfs_header_nritems(b) == 1);
1024 /* this is only true while dropping a snapshot */
1025 if (level == lowest_level)
1027 blocknr = btrfs_node_blockptr(b, slot);
1029 reada_for_search(root, p, level, slot);
1030 b = read_tree_block(root, btrfs_node_blockptr(b, slot));
1032 p->slots[level] = slot;
1033 if (ins_len > 0 && btrfs_leaf_free_space(root, b) <
1034 sizeof(struct btrfs_item) + ins_len) {
1035 int sret = split_leaf(trans, root, key,
1048 * adjust the pointers going up the tree, starting at level
1049 * making sure the right key of each node is points to 'key'.
1050 * This is used after shifting pointers to the left, so it stops
1051 * fixing up pointers when a given leaf/node is not in slot 0 of the
1054 * If this fails to write a tree block, it returns -1, but continues
1055 * fixing up the blocks in ram so the tree is consistent.
1057 static int fixup_low_keys(struct btrfs_trans_handle *trans,
1058 struct btrfs_root *root, struct btrfs_path *path,
1059 struct btrfs_disk_key *key, int level)
1063 struct extent_buffer *t;
1065 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1066 int tslot = path->slots[i];
1067 if (!path->nodes[i])
1070 btrfs_set_node_key(t, key, tslot);
1071 btrfs_mark_buffer_dirty(path->nodes[i]);
1079 * try to push data from one node into the next node left in the
1082 * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
1083 * error, and > 0 if there was no room in the left hand block.
1085 static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root
1086 *root, struct extent_buffer *dst,
1087 struct extent_buffer *src)
1094 src_nritems = btrfs_header_nritems(src);
1095 dst_nritems = btrfs_header_nritems(dst);
1096 push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1098 if (push_items <= 0) {
1102 if (src_nritems < push_items)
1103 push_items = src_nritems;
1105 copy_extent_buffer(dst, src,
1106 btrfs_node_key_ptr_offset(dst_nritems),
1107 btrfs_node_key_ptr_offset(0),
1108 push_items * sizeof(struct btrfs_key_ptr));
1110 if (push_items < src_nritems) {
1111 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
1112 btrfs_node_key_ptr_offset(push_items),
1113 (src_nritems - push_items) *
1114 sizeof(struct btrfs_key_ptr));
1116 btrfs_set_header_nritems(src, src_nritems - push_items);
1117 btrfs_set_header_nritems(dst, dst_nritems + push_items);
1118 btrfs_mark_buffer_dirty(src);
1119 btrfs_mark_buffer_dirty(dst);
1124 * try to push data from one node into the next node right in the
1127 * returns 0 if some ptrs were pushed, < 0 if there was some horrible
1128 * error, and > 0 if there was no room in the right hand block.
1130 * this will only push up to 1/2 the contents of the left node over
1132 static int balance_node_right(struct btrfs_trans_handle *trans,
1133 struct btrfs_root *root,
1134 struct extent_buffer *dst,
1135 struct extent_buffer *src)
1143 src_nritems = btrfs_header_nritems(src);
1144 dst_nritems = btrfs_header_nritems(dst);
1145 push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1146 if (push_items <= 0)
1149 max_push = src_nritems / 2 + 1;
1150 /* don't try to empty the node */
1151 if (max_push >= src_nritems)
1154 if (max_push < push_items)
1155 push_items = max_push;
1157 memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
1158 btrfs_node_key_ptr_offset(0),
1160 sizeof(struct btrfs_key_ptr));
1162 copy_extent_buffer(dst, src,
1163 btrfs_node_key_ptr_offset(0),
1164 btrfs_node_key_ptr_offset(src_nritems - push_items),
1165 push_items * sizeof(struct btrfs_key_ptr));
1167 btrfs_set_header_nritems(src, src_nritems - push_items);
1168 btrfs_set_header_nritems(dst, dst_nritems + push_items);
1170 btrfs_mark_buffer_dirty(src);
1171 btrfs_mark_buffer_dirty(dst);
1176 * helper function to insert a new root level in the tree.
1177 * A new node is allocated, and a single item is inserted to
1178 * point to the existing root
1180 * returns zero on success or < 0 on failure.
1182 static int insert_new_root(struct btrfs_trans_handle *trans,
1183 struct btrfs_root *root,
1184 struct btrfs_path *path, int level)
1186 struct extent_buffer *lower;
1187 struct extent_buffer *c;
1188 struct btrfs_disk_key lower_key;
1190 BUG_ON(path->nodes[level]);
1191 BUG_ON(path->nodes[level-1] != root->node);
1193 c = btrfs_alloc_free_block(trans, root,
1194 extent_buffer_blocknr(root->node), 0);
1197 memset_extent_buffer(c, 0, 0, root->nodesize);
1198 btrfs_set_header_nritems(c, 1);
1199 btrfs_set_header_level(c, level);
1200 btrfs_set_header_blocknr(c, extent_buffer_blocknr(c));
1201 btrfs_set_header_generation(c, trans->transid);
1202 btrfs_set_header_owner(c, root->root_key.objectid);
1203 lower = path->nodes[level-1];
1205 write_extent_buffer(c, root->fs_info->fsid,
1206 (unsigned long)btrfs_header_fsid(c),
1209 btrfs_item_key(lower, &lower_key, 0);
1211 btrfs_node_key(lower, &lower_key, 0);
1212 btrfs_set_node_key(c, &lower_key, 0);
1213 btrfs_set_node_blockptr(c, 0, extent_buffer_blocknr(lower));
1215 btrfs_mark_buffer_dirty(c);
1217 /* the super has an extra ref to root->node */
1218 free_extent_buffer(root->node);
1220 extent_buffer_get(c);
1221 path->nodes[level] = c;
1222 path->slots[level] = 0;
1227 * worker function to insert a single pointer in a node.
1228 * the node should have enough room for the pointer already
1230 * slot and level indicate where you want the key to go, and
1231 * blocknr is the block the key points to.
1233 * returns zero on success and < 0 on any error
1235 static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
1236 *root, struct btrfs_path *path, struct btrfs_disk_key
1237 *key, u64 blocknr, int slot, int level)
1239 struct extent_buffer *lower;
1242 BUG_ON(!path->nodes[level]);
1243 lower = path->nodes[level];
1244 nritems = btrfs_header_nritems(lower);
1247 if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
1249 if (slot != nritems) {
1250 memmove_extent_buffer(lower,
1251 btrfs_node_key_ptr_offset(slot + 1),
1252 btrfs_node_key_ptr_offset(slot),
1253 (nritems - slot) * sizeof(struct btrfs_key_ptr));
1255 btrfs_set_node_key(lower, key, slot);
1256 btrfs_set_node_blockptr(lower, slot, blocknr);
1257 btrfs_set_header_nritems(lower, nritems + 1);
1258 btrfs_mark_buffer_dirty(lower);
1259 check_node(root, path, level);
1264 * split the node at the specified level in path in two.
1265 * The path is corrected to point to the appropriate node after the split
1267 * Before splitting this tries to make some room in the node by pushing
1268 * left and right, if either one works, it returns right away.
1270 * returns 0 on success and < 0 on failure
1272 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
1273 *root, struct btrfs_path *path, int level)
1275 struct extent_buffer *c;
1276 struct extent_buffer *split;
1277 struct btrfs_disk_key disk_key;
1283 c = path->nodes[level];
1284 if (c == root->node) {
1285 /* trying to split the root, lets make a new one */
1286 ret = insert_new_root(trans, root, path, level + 1);
1290 ret = push_nodes_for_insert(trans, root, path, level);
1291 c = path->nodes[level];
1292 if (!ret && btrfs_header_nritems(c) <
1293 BTRFS_NODEPTRS_PER_BLOCK(root) - 1)
1299 c_nritems = btrfs_header_nritems(c);
1300 split = btrfs_alloc_free_block(trans, root,
1301 extent_buffer_blocknr(c), 0);
1303 return PTR_ERR(split);
1305 btrfs_set_header_flags(split, btrfs_header_flags(c));
1306 btrfs_set_header_level(split, btrfs_header_level(c));
1307 btrfs_set_header_blocknr(split, extent_buffer_blocknr(split));
1308 btrfs_set_header_generation(split, trans->transid);
1309 btrfs_set_header_owner(split, root->root_key.objectid);
1310 write_extent_buffer(split, root->fs_info->fsid,
1311 (unsigned long)btrfs_header_fsid(split),
1314 mid = (c_nritems + 1) / 2;
1316 copy_extent_buffer(split, c,
1317 btrfs_node_key_ptr_offset(0),
1318 btrfs_node_key_ptr_offset(mid),
1319 (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
1320 btrfs_set_header_nritems(split, c_nritems - mid);
1321 btrfs_set_header_nritems(c, mid);
1324 btrfs_mark_buffer_dirty(c);
1325 btrfs_mark_buffer_dirty(split);
1327 btrfs_node_key(split, &disk_key, 0);
1328 wret = insert_ptr(trans, root, path, &disk_key,
1329 extent_buffer_blocknr(split),
1330 path->slots[level + 1] + 1,
1335 if (path->slots[level] >= mid) {
1336 path->slots[level] -= mid;
1337 free_extent_buffer(c);
1338 path->nodes[level] = split;
1339 path->slots[level + 1] += 1;
1341 free_extent_buffer(split);
1347 * how many bytes are required to store the items in a leaf. start
1348 * and nr indicate which items in the leaf to check. This totals up the
1349 * space used both by the item structs and the item data
1351 static int leaf_space_used(struct extent_buffer *l, int start, int nr)
1354 int nritems = btrfs_header_nritems(l);
1355 int end = min(nritems, start + nr) - 1;
1359 data_len = btrfs_item_end_nr(l, start);
1360 data_len = data_len - btrfs_item_offset_nr(l, end);
1361 data_len += sizeof(struct btrfs_item) * nr;
1362 WARN_ON(data_len < 0);
1367 * The space between the end of the leaf items and
1368 * the start of the leaf data. IOW, how much room
1369 * the leaf has left for both items and data
1371 int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf)
1373 int nritems = btrfs_header_nritems(leaf);
1375 ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
1377 printk("leaf free space ret %d, leaf data size %lu, used %d nritems %d\n",
1378 ret, BTRFS_LEAF_DATA_SIZE(root),
1379 leaf_space_used(leaf, 0, nritems), nritems);
1385 * push some data in the path leaf to the right, trying to free up at
1386 * least data_size bytes. returns zero if the push worked, nonzero otherwise
1388 * returns 1 if the push failed because the other node didn't have enough
1389 * room, 0 if everything worked out and < 0 if there were major errors.
1391 static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
1392 *root, struct btrfs_path *path, int data_size)
1394 struct extent_buffer *left = path->nodes[0];
1395 struct extent_buffer *right;
1396 struct extent_buffer *upper;
1397 struct btrfs_disk_key disk_key;
1403 struct btrfs_item *item;
1409 slot = path->slots[1];
1410 if (!path->nodes[1]) {
1413 upper = path->nodes[1];
1414 if (slot >= btrfs_header_nritems(upper) - 1)
1417 right = read_tree_block(root, btrfs_node_blockptr(upper, slot + 1));
1418 free_space = btrfs_leaf_free_space(root, right);
1419 if (free_space < data_size + sizeof(struct btrfs_item)) {
1420 free_extent_buffer(right);
1424 /* cow and double check */
1425 ret = btrfs_cow_block(trans, root, right, upper,
1428 free_extent_buffer(right);
1431 free_space = btrfs_leaf_free_space(root, right);
1432 if (free_space < data_size + sizeof(struct btrfs_item)) {
1433 free_extent_buffer(right);
1437 left_nritems = btrfs_header_nritems(left);
1438 if (left_nritems == 0) {
1439 free_extent_buffer(right);
1443 for (i = left_nritems - 1; i >= 1; i--) {
1444 item = btrfs_item_nr(left, i);
1445 if (path->slots[0] == i)
1446 push_space += data_size + sizeof(*item);
1447 if (btrfs_item_size(left, item) + sizeof(*item) + push_space >
1451 push_space += btrfs_item_size(left, item) + sizeof(*item);
1454 if (push_items == 0) {
1455 free_extent_buffer(right);
1459 if (push_items == left_nritems)
1462 /* push left to right */
1463 right_nritems = btrfs_header_nritems(right);
1464 push_space = btrfs_item_end_nr(left, left_nritems - push_items);
1465 push_space -= leaf_data_end(root, left);
1467 /* make room in the right data area */
1468 data_end = leaf_data_end(root, right);
1469 memmove_extent_buffer(right,
1470 btrfs_leaf_data(right) + data_end - push_space,
1471 btrfs_leaf_data(right) + data_end,
1472 BTRFS_LEAF_DATA_SIZE(root) - data_end);
1474 /* copy from the left data area */
1475 copy_extent_buffer(right, left, btrfs_leaf_data(right) +
1476 BTRFS_LEAF_DATA_SIZE(root) - push_space,
1477 btrfs_leaf_data(left) + leaf_data_end(root, left),
1480 memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
1481 btrfs_item_nr_offset(0),
1482 right_nritems * sizeof(struct btrfs_item));
1484 /* copy the items from left to right */
1485 copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
1486 btrfs_item_nr_offset(left_nritems - push_items),
1487 push_items * sizeof(struct btrfs_item));
1489 /* update the item pointers */
1490 right_nritems += push_items;
1491 btrfs_set_header_nritems(right, right_nritems);
1492 push_space = BTRFS_LEAF_DATA_SIZE(root);
1493 for (i = 0; i < right_nritems; i++) {
1494 item = btrfs_item_nr(right, i);
1495 btrfs_set_item_offset(right, item, push_space -
1496 btrfs_item_size(right, item));
1497 push_space = btrfs_item_offset(right, item);
1499 left_nritems -= push_items;
1500 btrfs_set_header_nritems(left, left_nritems);
1502 btrfs_mark_buffer_dirty(left);
1503 btrfs_mark_buffer_dirty(right);
1505 btrfs_item_key(right, &disk_key, 0);
1506 btrfs_set_node_key(upper, &disk_key, slot + 1);
1507 btrfs_mark_buffer_dirty(upper);
1509 /* then fixup the leaf pointer in the path */
1510 if (path->slots[0] >= left_nritems) {
1511 path->slots[0] -= left_nritems;
1512 free_extent_buffer(path->nodes[0]);
1513 path->nodes[0] = right;
1514 path->slots[1] += 1;
1516 free_extent_buffer(right);
1519 check_node(root, path, 1);
1523 * push some data in the path leaf to the left, trying to free up at
1524 * least data_size bytes. returns zero if the push worked, nonzero otherwise
1526 static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
1527 *root, struct btrfs_path *path, int data_size)
1529 struct btrfs_disk_key disk_key;
1530 struct extent_buffer *right = path->nodes[0];
1531 struct extent_buffer *left;
1537 struct btrfs_item *item;
1538 u32 old_left_nritems;
1543 slot = path->slots[1];
1546 if (!path->nodes[1])
1549 left = read_tree_block(root, btrfs_node_blockptr(path->nodes[1],
1551 free_space = btrfs_leaf_free_space(root, left);
1552 if (free_space < data_size + sizeof(struct btrfs_item)) {
1553 free_extent_buffer(left);
1557 /* cow and double check */
1558 ret = btrfs_cow_block(trans, root, left,
1559 path->nodes[1], slot - 1, &left);
1561 /* we hit -ENOSPC, but it isn't fatal here */
1562 free_extent_buffer(left);
1565 free_space = btrfs_leaf_free_space(root, left);
1566 if (free_space < data_size + sizeof(struct btrfs_item)) {
1567 free_extent_buffer(left);
1571 right_nritems = btrfs_header_nritems(right);
1572 if (right_nritems == 0) {
1573 free_extent_buffer(left);
1577 for (i = 0; i < right_nritems - 1; i++) {
1578 item = btrfs_item_nr(right, i);
1579 if (path->slots[0] == i)
1580 push_space += data_size + sizeof(*item);
1581 if (btrfs_item_size(right, item) + sizeof(*item) + push_space >
1585 push_space += btrfs_item_size(right, item) + sizeof(*item);
1587 if (push_items == 0) {
1588 free_extent_buffer(left);
1591 if (push_items == btrfs_header_nritems(right))
1594 /* push data from right to left */
1595 copy_extent_buffer(left, right,
1596 btrfs_item_nr_offset(btrfs_header_nritems(left)),
1597 btrfs_item_nr_offset(0),
1598 push_items * sizeof(struct btrfs_item));
1600 push_space = BTRFS_LEAF_DATA_SIZE(root) -
1601 btrfs_item_offset_nr(right, push_items -1);
1603 copy_extent_buffer(left, right, btrfs_leaf_data(left) +
1604 leaf_data_end(root, left) - push_space,
1605 btrfs_leaf_data(right) +
1606 btrfs_item_offset_nr(right, push_items - 1),
1608 old_left_nritems = btrfs_header_nritems(left);
1609 BUG_ON(old_left_nritems < 0);
1611 for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
1613 item = btrfs_item_nr(left, i);
1614 ioff = btrfs_item_offset(left, item);
1615 btrfs_set_item_offset(left, item,
1616 ioff - (BTRFS_LEAF_DATA_SIZE(root) -
1617 btrfs_item_offset_nr(left, old_left_nritems - 1)));
1619 btrfs_set_header_nritems(left, old_left_nritems + push_items);
1621 /* fixup right node */
1622 push_space = btrfs_item_offset_nr(right, push_items - 1) -
1623 leaf_data_end(root, right);
1624 memmove_extent_buffer(right, btrfs_leaf_data(right) +
1625 BTRFS_LEAF_DATA_SIZE(root) - push_space,
1626 btrfs_leaf_data(right) +
1627 leaf_data_end(root, right), push_space);
1629 memmove_extent_buffer(right, btrfs_item_nr_offset(0),
1630 btrfs_item_nr_offset(push_items),
1631 (btrfs_header_nritems(right) - push_items) *
1632 sizeof(struct btrfs_item));
1634 right_nritems = btrfs_header_nritems(right) - push_items;
1635 btrfs_set_header_nritems(right, right_nritems);
1636 push_space = BTRFS_LEAF_DATA_SIZE(root);
1638 for (i = 0; i < right_nritems; i++) {
1639 item = btrfs_item_nr(right, i);
1640 btrfs_set_item_offset(right, item, push_space -
1641 btrfs_item_size(right, item));
1642 push_space = btrfs_item_offset(right, item);
1645 btrfs_mark_buffer_dirty(left);
1646 btrfs_mark_buffer_dirty(right);
1648 btrfs_item_key(right, &disk_key, 0);
1649 wret = fixup_low_keys(trans, root, path, &disk_key, 1);
1653 /* then fixup the leaf pointer in the path */
1654 if (path->slots[0] < push_items) {
1655 path->slots[0] += old_left_nritems;
1656 free_extent_buffer(path->nodes[0]);
1657 path->nodes[0] = left;
1658 path->slots[1] -= 1;
1660 free_extent_buffer(left);
1661 path->slots[0] -= push_items;
1663 BUG_ON(path->slots[0] < 0);
1665 check_node(root, path, 1);
1670 * split the path's leaf in two, making sure there is at least data_size
1671 * available for the resulting leaf level of the path.
1673 * returns 0 if all went well and < 0 on failure.
1675 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
1676 *root, struct btrfs_key *ins_key,
1677 struct btrfs_path *path, int data_size)
1679 struct extent_buffer *l;
1683 struct extent_buffer *right;
1684 int space_needed = data_size + sizeof(struct btrfs_item);
1690 int double_split = 0;
1691 struct btrfs_disk_key disk_key;
1693 /* first try to make some room by pushing left and right */
1694 wret = push_leaf_left(trans, root, path, data_size);
1698 wret = push_leaf_right(trans, root, path, data_size);
1704 /* did the pushes work? */
1705 if (btrfs_leaf_free_space(root, l) >=
1706 sizeof(struct btrfs_item) + data_size)
1709 if (!path->nodes[1]) {
1710 ret = insert_new_root(trans, root, path, 1);
1714 slot = path->slots[0];
1715 nritems = btrfs_header_nritems(l);
1716 mid = (nritems + 1)/ 2;
1718 right = btrfs_alloc_free_block(trans, root,
1719 extent_buffer_blocknr(l), 0);
1721 return PTR_ERR(right);
1723 memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
1724 btrfs_set_header_blocknr(right, extent_buffer_blocknr(right));
1725 btrfs_set_header_generation(right, trans->transid);
1726 btrfs_set_header_owner(right, root->root_key.objectid);
1727 btrfs_set_header_level(right, 0);
1728 write_extent_buffer(right, root->fs_info->fsid,
1729 (unsigned long)btrfs_header_fsid(right),
1734 leaf_space_used(l, mid, nritems - mid) + space_needed >
1735 BTRFS_LEAF_DATA_SIZE(root)) {
1736 if (slot >= nritems) {
1737 btrfs_cpu_key_to_disk(&disk_key, ins_key);
1738 btrfs_set_header_nritems(right, 0);
1739 wret = insert_ptr(trans, root, path,
1741 extent_buffer_blocknr(right),
1742 path->slots[1] + 1, 1);
1745 free_extent_buffer(path->nodes[0]);
1746 path->nodes[0] = right;
1748 path->slots[1] += 1;
1755 if (leaf_space_used(l, 0, mid + 1) + space_needed >
1756 BTRFS_LEAF_DATA_SIZE(root)) {
1758 btrfs_cpu_key_to_disk(&disk_key, ins_key);
1759 btrfs_set_header_nritems(right, 0);
1760 wret = insert_ptr(trans, root, path,
1762 extent_buffer_blocknr(right),
1766 free_extent_buffer(path->nodes[0]);
1767 path->nodes[0] = right;
1769 if (path->slots[1] == 0) {
1770 wret = fixup_low_keys(trans, root,
1771 path, &disk_key, 1);
1781 nritems = nritems - mid;
1782 btrfs_set_header_nritems(right, nritems);
1783 data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);
1785 copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
1786 btrfs_item_nr_offset(mid),
1787 nritems * sizeof(struct btrfs_item));
1789 copy_extent_buffer(right, l,
1790 btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
1791 data_copy_size, btrfs_leaf_data(l) +
1792 leaf_data_end(root, l), data_copy_size);
1794 rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
1795 btrfs_item_end_nr(l, mid);
1797 for (i = 0; i < nritems; i++) {
1798 struct btrfs_item *item = btrfs_item_nr(right, i);
1799 u32 ioff = btrfs_item_offset(right, item);
1800 btrfs_set_item_offset(right, item, ioff + rt_data_off);
1803 btrfs_set_header_nritems(l, mid);
1805 btrfs_item_key(right, &disk_key, 0);
1806 wret = insert_ptr(trans, root, path, &disk_key,
1807 extent_buffer_blocknr(right), path->slots[1] + 1, 1);
1811 btrfs_mark_buffer_dirty(right);
1812 btrfs_mark_buffer_dirty(l);
1813 BUG_ON(path->slots[0] != slot);
1816 free_extent_buffer(path->nodes[0]);
1817 path->nodes[0] = right;
1818 path->slots[0] -= mid;
1819 path->slots[1] += 1;
1821 free_extent_buffer(right);
1823 BUG_ON(path->slots[0] < 0);
1824 check_node(root, path, 1);
1825 check_leaf(root, path, 0);
1830 right = btrfs_alloc_free_block(trans, root,
1831 extent_buffer_blocknr(l), 0);
1833 return PTR_ERR(right);
1835 memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
1836 btrfs_set_header_blocknr(right, extent_buffer_blocknr(right));
1837 btrfs_set_header_generation(right, trans->transid);
1838 btrfs_set_header_owner(right, root->root_key.objectid);
1839 btrfs_set_header_level(right, 0);
1840 write_extent_buffer(right, root->fs_info->fsid,
1841 (unsigned long)btrfs_header_fsid(right),
1844 btrfs_cpu_key_to_disk(&disk_key, ins_key);
1845 btrfs_set_header_nritems(right, 0);
1846 wret = insert_ptr(trans, root, path,
1848 extent_buffer_blocknr(right),
1852 if (path->slots[1] == 0) {
1853 wret = fixup_low_keys(trans, root, path, &disk_key, 1);
1857 free_extent_buffer(path->nodes[0]);
1858 path->nodes[0] = right;
1860 check_node(root, path, 1);
1861 check_leaf(root, path, 0);
1865 int btrfs_truncate_item(struct btrfs_trans_handle *trans,
1866 struct btrfs_root *root,
1867 struct btrfs_path *path,
1873 struct extent_buffer *leaf;
1874 struct btrfs_item *item;
1876 unsigned int data_end;
1877 unsigned int old_data_start;
1878 unsigned int old_size;
1879 unsigned int size_diff;
1882 slot_orig = path->slots[0];
1883 leaf = path->nodes[0];
1885 nritems = btrfs_header_nritems(leaf);
1886 data_end = leaf_data_end(root, leaf);
1888 slot = path->slots[0];
1889 old_data_start = btrfs_item_offset_nr(leaf, slot);
1890 old_size = btrfs_item_size_nr(leaf, slot);
1891 BUG_ON(old_size <= new_size);
1892 size_diff = old_size - new_size;
1895 BUG_ON(slot >= nritems);
1898 * item0..itemN ... dataN.offset..dataN.size .. data0.size
1900 /* first correct the data pointers */
1901 for (i = slot; i < nritems; i++) {
1903 item = btrfs_item_nr(leaf, i);
1904 ioff = btrfs_item_offset(leaf, item);
1905 btrfs_set_item_offset(leaf, item, ioff + size_diff);
1907 /* shift the data */
1908 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
1909 data_end + size_diff, btrfs_leaf_data(leaf) +
1910 data_end, old_data_start + new_size - data_end);
1912 item = btrfs_item_nr(leaf, slot);
1913 btrfs_set_item_size(leaf, item, new_size);
1914 btrfs_mark_buffer_dirty(leaf);
1917 if (btrfs_leaf_free_space(root, leaf) < 0) {
1918 btrfs_print_leaf(root, leaf);
1921 check_leaf(root, path, 0);
1925 int btrfs_extend_item(struct btrfs_trans_handle *trans,
1926 struct btrfs_root *root, struct btrfs_path *path,
1932 struct extent_buffer *leaf;
1933 struct btrfs_item *item;
1935 unsigned int data_end;
1936 unsigned int old_data;
1937 unsigned int old_size;
1940 slot_orig = path->slots[0];
1941 leaf = path->nodes[0];
1943 nritems = btrfs_header_nritems(leaf);
1944 data_end = leaf_data_end(root, leaf);
1946 if (btrfs_leaf_free_space(root, leaf) < data_size) {
1947 btrfs_print_leaf(root, leaf);
1950 slot = path->slots[0];
1951 old_data = btrfs_item_end_nr(leaf, slot);
1954 BUG_ON(slot >= nritems);
1957 * item0..itemN ... dataN.offset..dataN.size .. data0.size
1959 /* first correct the data pointers */
1960 for (i = slot; i < nritems; i++) {
1962 item = btrfs_item_nr(leaf, i);
1963 ioff = btrfs_item_offset(leaf, item);
1964 btrfs_set_item_offset(leaf, item, ioff - data_size);
1967 /* shift the data */
1968 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
1969 data_end - data_size, btrfs_leaf_data(leaf) +
1970 data_end, old_data - data_end);
1972 data_end = old_data;
1973 old_size = btrfs_item_size_nr(leaf, slot);
1974 item = btrfs_item_nr(leaf, slot);
1975 btrfs_set_item_size(leaf, item, old_size + data_size);
1976 btrfs_mark_buffer_dirty(leaf);
1979 if (btrfs_leaf_free_space(root, leaf) < 0) {
1980 btrfs_print_leaf(root, leaf);
1983 check_leaf(root, path, 0);
1988 * Given a key and some data, insert an item into the tree.
1989 * This does all the path init required, making room in the tree if needed.
1991 int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
1992 struct btrfs_root *root,
1993 struct btrfs_path *path,
1994 struct btrfs_key *cpu_key, u32 data_size)
1996 struct extent_buffer *leaf;
1997 struct btrfs_item *item;
2002 unsigned int data_end;
2003 struct btrfs_disk_key disk_key;
2005 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
2007 /* create a root if there isn't one */
2011 ret = btrfs_search_slot(trans, root, cpu_key, path, data_size, 1);
2018 slot_orig = path->slots[0];
2019 leaf = path->nodes[0];
2021 nritems = btrfs_header_nritems(leaf);
2022 data_end = leaf_data_end(root, leaf);
2024 if (btrfs_leaf_free_space(root, leaf) <
2025 sizeof(struct btrfs_item) + data_size) {
2029 slot = path->slots[0];
2032 if (slot != nritems) {
2034 unsigned int old_data = btrfs_item_end_nr(leaf, slot);
2036 if (old_data < data_end) {
2037 btrfs_print_leaf(root, leaf);
2038 printk("slot %d old_data %d data_end %d\n",
2039 slot, old_data, data_end);
2043 * item0..itemN ... dataN.offset..dataN.size .. data0.size
2045 /* first correct the data pointers */
2046 for (i = slot; i < nritems; i++) {
2048 item = btrfs_item_nr(leaf, i);
2049 ioff = btrfs_item_offset(leaf, item);
2050 btrfs_set_item_offset(leaf, item, ioff - data_size);
2053 /* shift the items */
2054 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
2055 btrfs_item_nr_offset(slot),
2056 (nritems - slot) * sizeof(struct btrfs_item));
2058 /* shift the data */
2059 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2060 data_end - data_size, btrfs_leaf_data(leaf) +
2061 data_end, old_data - data_end);
2062 data_end = old_data;
2065 /* setup the item for the new data */
2066 btrfs_set_item_key(leaf, &disk_key, slot);
2067 item = btrfs_item_nr(leaf, slot);
2068 btrfs_set_item_offset(leaf, item, data_end - data_size);
2069 btrfs_set_item_size(leaf, item, data_size);
2070 btrfs_set_header_nritems(leaf, nritems + 1);
2071 btrfs_mark_buffer_dirty(leaf);
2075 ret = fixup_low_keys(trans, root, path, &disk_key, 1);
2077 if (btrfs_leaf_free_space(root, leaf) < 0) {
2078 btrfs_print_leaf(root, leaf);
2081 check_leaf(root, path, 0);
2087 * Given a key and some data, insert an item into the tree.
2088 * This does all the path init required, making room in the tree if needed.
2090 int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
2091 *root, struct btrfs_key *cpu_key, void *data, u32
2095 struct btrfs_path *path;
2096 struct extent_buffer *leaf;
2099 path = btrfs_alloc_path();
2101 ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
2103 leaf = path->nodes[0];
2104 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
2105 write_extent_buffer(leaf, data, ptr, data_size);
2106 btrfs_mark_buffer_dirty(leaf);
2108 btrfs_free_path(path);
2113 * delete the pointer from a given node.
2115 * If the delete empties a node, the node is removed from the tree,
2116 * continuing all the way the root if required. The root is converted into
2117 * a leaf if all the nodes are emptied.
2119 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2120 struct btrfs_path *path, int level, int slot)
2122 struct extent_buffer *parent = path->nodes[level];
2127 nritems = btrfs_header_nritems(parent);
2128 if (slot != nritems -1) {
2129 memmove_extent_buffer(parent,
2130 btrfs_node_key_ptr_offset(slot),
2131 btrfs_node_key_ptr_offset(slot + 1),
2132 sizeof(struct btrfs_key_ptr) *
2133 (nritems - slot - 1));
2136 btrfs_set_header_nritems(parent, nritems);
2137 if (nritems == 0 && parent == root->node) {
2138 BUG_ON(btrfs_header_level(root->node) != 1);
2139 /* just turn the root into a leaf and break */
2140 btrfs_set_header_level(root->node, 0);
2141 } else if (slot == 0) {
2142 struct btrfs_disk_key disk_key;
2144 btrfs_node_key(parent, &disk_key, 0);
2145 wret = fixup_low_keys(trans, root, path, &disk_key, level + 1);
2149 btrfs_mark_buffer_dirty(parent);
2154 * delete the item at the leaf level in path. If that empties
2155 * the leaf, remove it from the tree
2157 int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2158 struct btrfs_path *path)
2161 struct extent_buffer *leaf;
2162 struct btrfs_item *item;
2169 leaf = path->nodes[0];
2170 slot = path->slots[0];
2171 doff = btrfs_item_offset_nr(leaf, slot);
2172 dsize = btrfs_item_size_nr(leaf, slot);
2173 nritems = btrfs_header_nritems(leaf);
2175 if (slot != nritems - 1) {
2177 int data_end = leaf_data_end(root, leaf);
2179 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2181 btrfs_leaf_data(leaf) + data_end,
2184 for (i = slot + 1; i < nritems; i++) {
2186 item = btrfs_item_nr(leaf, i);
2187 ioff = btrfs_item_offset(leaf, item);
2188 btrfs_set_item_offset(leaf, item, ioff + dsize);
2190 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
2191 btrfs_item_nr_offset(slot + 1),
2192 sizeof(struct btrfs_item) *
2193 (nritems - slot - 1));
2195 btrfs_set_header_nritems(leaf, nritems - 1);
2198 /* delete the leaf if we've emptied it */
2200 if (leaf == root->node) {
2201 btrfs_set_header_level(leaf, 0);
2203 clean_tree_block(trans, root, leaf);
2204 wait_on_tree_block_writeback(root, leaf);
2205 wret = del_ptr(trans, root, path, 1, path->slots[1]);
2208 wret = btrfs_free_extent(trans, root,
2209 extent_buffer_blocknr(leaf),
2215 int used = leaf_space_used(leaf, 0, nritems);
2217 struct btrfs_disk_key disk_key;
2219 btrfs_item_key(leaf, &disk_key, 0);
2220 wret = fixup_low_keys(trans, root, path,
2226 /* delete the leaf if it is mostly empty */
2227 if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) {
2228 /* push_leaf_left fixes the path.
2229 * make sure the path still points to our leaf
2230 * for possible call to del_ptr below
2232 slot = path->slots[1];
2233 extent_buffer_get(leaf);
2235 wret = push_leaf_left(trans, root, path, 1);
2236 if (wret < 0 && wret != -ENOSPC)
2239 if (path->nodes[0] == leaf &&
2240 btrfs_header_nritems(leaf)) {
2241 wret = push_leaf_right(trans, root, path, 1);
2242 if (wret < 0 && wret != -ENOSPC)
2246 if (btrfs_header_nritems(leaf) == 0) {
2247 u64 blocknr = extent_buffer_blocknr(leaf);
2249 clean_tree_block(trans, root, leaf);
2250 wait_on_tree_block_writeback(root, leaf);
2252 wret = del_ptr(trans, root, path, 1, slot);
2256 free_extent_buffer(leaf);
2257 wret = btrfs_free_extent(trans, root, blocknr,
2262 btrfs_mark_buffer_dirty(leaf);
2263 free_extent_buffer(leaf);
2266 btrfs_mark_buffer_dirty(leaf);
2273 * walk up the tree as far as required to find the next leaf.
2274 * returns 0 if it found something or 1 if there are no greater leaves.
2275 * returns < 0 on io errors.
2277 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
2282 struct extent_buffer *c;
2283 struct extent_buffer *next = NULL;
2285 while(level < BTRFS_MAX_LEVEL) {
2286 if (!path->nodes[level])
2289 slot = path->slots[level] + 1;
2290 c = path->nodes[level];
2291 if (slot >= btrfs_header_nritems(c)) {
2296 blocknr = btrfs_node_blockptr(c, slot);
2298 free_extent_buffer(next);
2301 reada_for_search(root, path, level, slot);
2303 next = read_tree_block(root, blocknr);
2306 path->slots[level] = slot;
2309 c = path->nodes[level];
2310 free_extent_buffer(c);
2311 path->nodes[level] = next;
2312 path->slots[level] = 0;
2316 reada_for_search(root, path, level, 0);
2317 next = read_tree_block(root, btrfs_node_blockptr(next, 0));