Btrfs: add GPLv2
[linux-2.6-block.git] / fs / btrfs / ctree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/module.h>
20 #include "ctree.h"
21 #include "disk-io.h"
22 #include "transaction.h"
23
24 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
25                       *root, struct btrfs_path *path, int level);
26 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
27                       *root, struct btrfs_key *ins_key,
28                       struct btrfs_path *path, int data_size);
29 static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root
30                           *root, struct buffer_head *dst, struct buffer_head
31                           *src);
32 static int balance_node_right(struct btrfs_trans_handle *trans, struct
33                               btrfs_root *root, struct buffer_head *dst_buf,
34                               struct buffer_head *src_buf);
35 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
36                    struct btrfs_path *path, int level, int slot);
37
38 inline void btrfs_init_path(struct btrfs_path *p)
39 {
40         memset(p, 0, sizeof(*p));
41 }
42
43 struct btrfs_path *btrfs_alloc_path(void)
44 {
45         struct btrfs_path *path;
46         path = kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS);
47         if (path)
48                 btrfs_init_path(path);
49         return path;
50 }
51
52 void btrfs_free_path(struct btrfs_path *p)
53 {
54         btrfs_release_path(NULL, p);
55         kmem_cache_free(btrfs_path_cachep, p);
56 }
57
58 void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
59 {
60         int i;
61         for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
62                 if (!p->nodes[i])
63                         break;
64                 btrfs_block_release(root, p->nodes[i]);
65         }
66         memset(p, 0, sizeof(*p));
67 }
68
69 static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root
70                            *root, struct buffer_head *buf, struct buffer_head
71                            *parent, int parent_slot, struct buffer_head
72                            **cow_ret)
73 {
74         struct buffer_head *cow;
75         struct btrfs_node *cow_node;
76
77         if (btrfs_header_generation(btrfs_buffer_header(buf)) ==
78                                     trans->transid) {
79                 *cow_ret = buf;
80                 return 0;
81         }
82         cow = btrfs_alloc_free_block(trans, root, buf->b_blocknr);
83         cow_node = btrfs_buffer_node(cow);
84         if (buf->b_size != root->blocksize || cow->b_size != root->blocksize)
85                 WARN_ON(1);
86         memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize);
87         btrfs_set_header_blocknr(&cow_node->header, bh_blocknr(cow));
88         btrfs_set_header_generation(&cow_node->header, trans->transid);
89         btrfs_set_header_owner(&cow_node->header, root->root_key.objectid);
90         btrfs_inc_ref(trans, root, buf);
91         if (buf == root->node) {
92                 root->node = cow;
93                 get_bh(cow);
94                 if (buf != root->commit_root) {
95                         btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1);
96                 }
97                 btrfs_block_release(root, buf);
98         } else {
99                 btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot,
100                                         bh_blocknr(cow));
101                 btrfs_mark_buffer_dirty(parent);
102                 btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1);
103         }
104         btrfs_block_release(root, buf);
105         mark_buffer_dirty(cow);
106         *cow_ret = cow;
107         return 0;
108 }
109
110 /*
111  * The leaf data grows from end-to-front in the node.
112  * this returns the address of the start of the last item,
113  * which is the stop of the leaf data stack
114  */
115 static inline unsigned int leaf_data_end(struct btrfs_root *root,
116                                          struct btrfs_leaf *leaf)
117 {
118         u32 nr = btrfs_header_nritems(&leaf->header);
119         if (nr == 0)
120                 return BTRFS_LEAF_DATA_SIZE(root);
121         return btrfs_item_offset(leaf->items + nr - 1);
122 }
123
124 /*
125  * compare two keys in a memcmp fashion
126  */
127 static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
128 {
129         struct btrfs_key k1;
130
131         btrfs_disk_key_to_cpu(&k1, disk);
132
133         if (k1.objectid > k2->objectid)
134                 return 1;
135         if (k1.objectid < k2->objectid)
136                 return -1;
137         if (k1.flags > k2->flags)
138                 return 1;
139         if (k1.flags < k2->flags)
140                 return -1;
141         if (k1.offset > k2->offset)
142                 return 1;
143         if (k1.offset < k2->offset)
144                 return -1;
145         return 0;
146 }
147
148 static int check_node(struct btrfs_root *root, struct btrfs_path *path,
149                       int level)
150 {
151         struct btrfs_node *parent = NULL;
152         struct btrfs_node *node = btrfs_buffer_node(path->nodes[level]);
153         int parent_slot;
154         int slot;
155         struct btrfs_key cpukey;
156         u32 nritems = btrfs_header_nritems(&node->header);
157
158         if (path->nodes[level + 1])
159                 parent = btrfs_buffer_node(path->nodes[level + 1]);
160         parent_slot = path->slots[level + 1];
161         slot = path->slots[level];
162         BUG_ON(nritems == 0);
163         if (parent) {
164                 struct btrfs_disk_key *parent_key;
165                 parent_key = &parent->ptrs[parent_slot].key;
166                 BUG_ON(memcmp(parent_key, &node->ptrs[0].key,
167                               sizeof(struct btrfs_disk_key)));
168                 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
169                        btrfs_header_blocknr(&node->header));
170         }
171         BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
172         if (slot != 0) {
173                 btrfs_disk_key_to_cpu(&cpukey, &node->ptrs[slot - 1].key);
174                 BUG_ON(comp_keys(&node->ptrs[slot].key, &cpukey) <= 0);
175         }
176         if (slot < nritems - 1) {
177                 btrfs_disk_key_to_cpu(&cpukey, &node->ptrs[slot + 1].key);
178                 BUG_ON(comp_keys(&node->ptrs[slot].key, &cpukey) >= 0);
179         }
180         return 0;
181 }
182
183 static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
184                       int level)
185 {
186         struct btrfs_leaf *leaf = btrfs_buffer_leaf(path->nodes[level]);
187         struct btrfs_node *parent = NULL;
188         int parent_slot;
189         int slot = path->slots[0];
190         struct btrfs_key cpukey;
191
192         u32 nritems = btrfs_header_nritems(&leaf->header);
193
194         if (path->nodes[level + 1])
195                 parent = btrfs_buffer_node(path->nodes[level + 1]);
196         parent_slot = path->slots[level + 1];
197         BUG_ON(btrfs_leaf_free_space(root, leaf) < 0);
198
199         if (nritems == 0)
200                 return 0;
201
202         if (parent) {
203                 struct btrfs_disk_key *parent_key;
204                 parent_key = &parent->ptrs[parent_slot].key;
205                 BUG_ON(memcmp(parent_key, &leaf->items[0].key,
206                        sizeof(struct btrfs_disk_key)));
207                 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
208                        btrfs_header_blocknr(&leaf->header));
209         }
210         if (slot != 0) {
211                 btrfs_disk_key_to_cpu(&cpukey, &leaf->items[slot - 1].key);
212                 BUG_ON(comp_keys(&leaf->items[slot].key, &cpukey) <= 0);
213                 BUG_ON(btrfs_item_offset(leaf->items + slot - 1) !=
214                         btrfs_item_end(leaf->items + slot));
215         }
216         if (slot < nritems - 1) {
217                 btrfs_disk_key_to_cpu(&cpukey, &leaf->items[slot + 1].key);
218                 BUG_ON(comp_keys(&leaf->items[slot].key, &cpukey) >= 0);
219                 BUG_ON(btrfs_item_offset(leaf->items + slot) !=
220                         btrfs_item_end(leaf->items + slot + 1));
221         }
222         BUG_ON(btrfs_item_offset(leaf->items) +
223                btrfs_item_size(leaf->items) != BTRFS_LEAF_DATA_SIZE(root));
224         return 0;
225 }
226
227 static int check_block(struct btrfs_root *root, struct btrfs_path *path,
228                         int level)
229 {
230         struct btrfs_node *node = btrfs_buffer_node(path->nodes[level]);
231         if (memcmp(node->header.fsid, root->fs_info->disk_super->fsid,
232                    sizeof(node->header.fsid)))
233                 BUG();
234         if (level == 0)
235                 return check_leaf(root, path, level);
236         return check_node(root, path, level);
237 }
238
239 /*
240  * search for key in the array p.  items p are item_size apart
241  * and there are 'max' items in p
242  * the slot in the array is returned via slot, and it points to
243  * the place where you would insert key if it is not found in
244  * the array.
245  *
246  * slot may point to max if the key is bigger than all of the keys
247  */
248 static int generic_bin_search(char *p, int item_size, struct btrfs_key *key,
249                        int max, int *slot)
250 {
251         int low = 0;
252         int high = max;
253         int mid;
254         int ret;
255         struct btrfs_disk_key *tmp;
256
257         while(low < high) {
258                 mid = (low + high) / 2;
259                 tmp = (struct btrfs_disk_key *)(p + mid * item_size);
260                 ret = comp_keys(tmp, key);
261
262                 if (ret < 0)
263                         low = mid + 1;
264                 else if (ret > 0)
265                         high = mid;
266                 else {
267                         *slot = mid;
268                         return 0;
269                 }
270         }
271         *slot = low;
272         return 1;
273 }
274
275 /*
276  * simple bin_search frontend that does the right thing for
277  * leaves vs nodes
278  */
279 static int bin_search(struct btrfs_node *c, struct btrfs_key *key, int *slot)
280 {
281         if (btrfs_is_leaf(c)) {
282                 struct btrfs_leaf *l = (struct btrfs_leaf *)c;
283                 return generic_bin_search((void *)l->items,
284                                           sizeof(struct btrfs_item),
285                                           key, btrfs_header_nritems(&c->header),
286                                           slot);
287         } else {
288                 return generic_bin_search((void *)c->ptrs,
289                                           sizeof(struct btrfs_key_ptr),
290                                           key, btrfs_header_nritems(&c->header),
291                                           slot);
292         }
293         return -1;
294 }
295
296 static struct buffer_head *read_node_slot(struct btrfs_root *root,
297                                    struct buffer_head *parent_buf,
298                                    int slot)
299 {
300         struct btrfs_node *node = btrfs_buffer_node(parent_buf);
301         if (slot < 0)
302                 return NULL;
303         if (slot >= btrfs_header_nritems(&node->header))
304                 return NULL;
305         return read_tree_block(root, btrfs_node_blockptr(node, slot));
306 }
307
308 static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root
309                          *root, struct btrfs_path *path, int level)
310 {
311         struct buffer_head *right_buf;
312         struct buffer_head *mid_buf;
313         struct buffer_head *left_buf;
314         struct buffer_head *parent_buf = NULL;
315         struct btrfs_node *right = NULL;
316         struct btrfs_node *mid;
317         struct btrfs_node *left = NULL;
318         struct btrfs_node *parent = NULL;
319         int ret = 0;
320         int wret;
321         int pslot;
322         int orig_slot = path->slots[level];
323         u64 orig_ptr;
324
325         if (level == 0)
326                 return 0;
327
328         mid_buf = path->nodes[level];
329         mid = btrfs_buffer_node(mid_buf);
330         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
331
332         if (level < BTRFS_MAX_LEVEL - 1)
333                 parent_buf = path->nodes[level + 1];
334         pslot = path->slots[level + 1];
335
336         /*
337          * deal with the case where there is only one pointer in the root
338          * by promoting the node below to a root
339          */
340         if (!parent_buf) {
341                 struct buffer_head *child;
342                 u64 blocknr = bh_blocknr(mid_buf);
343
344                 if (btrfs_header_nritems(&mid->header) != 1)
345                         return 0;
346
347                 /* promote the child to a root */
348                 child = read_node_slot(root, mid_buf, 0);
349                 BUG_ON(!child);
350                 root->node = child;
351                 path->nodes[level] = NULL;
352                 clean_tree_block(trans, root, mid_buf);
353                 wait_on_buffer(mid_buf);
354                 /* once for the path */
355                 btrfs_block_release(root, mid_buf);
356                 /* once for the root ptr */
357                 btrfs_block_release(root, mid_buf);
358                 return btrfs_free_extent(trans, root, blocknr, 1, 1);
359         }
360         parent = btrfs_buffer_node(parent_buf);
361
362         if (btrfs_header_nritems(&mid->header) >
363             BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
364                 return 0;
365
366         left_buf = read_node_slot(root, parent_buf, pslot - 1);
367         right_buf = read_node_slot(root, parent_buf, pslot + 1);
368
369         /* first, try to make some room in the middle buffer */
370         if (left_buf) {
371                 btrfs_cow_block(trans, root, left_buf, parent_buf, pslot - 1,
372                                 &left_buf);
373                 left = btrfs_buffer_node(left_buf);
374                 orig_slot += btrfs_header_nritems(&left->header);
375                 wret = push_node_left(trans, root, left_buf, mid_buf);
376                 if (wret < 0)
377                         ret = wret;
378         }
379
380         /*
381          * then try to empty the right most buffer into the middle
382          */
383         if (right_buf) {
384                 btrfs_cow_block(trans, root, right_buf, parent_buf, pslot + 1,
385                                 &right_buf);
386                 right = btrfs_buffer_node(right_buf);
387                 wret = push_node_left(trans, root, mid_buf, right_buf);
388                 if (wret < 0)
389                         ret = wret;
390                 if (btrfs_header_nritems(&right->header) == 0) {
391                         u64 blocknr = bh_blocknr(right_buf);
392                         clean_tree_block(trans, root, right_buf);
393                         wait_on_buffer(right_buf);
394                         btrfs_block_release(root, right_buf);
395                         right_buf = NULL;
396                         right = NULL;
397                         wret = del_ptr(trans, root, path, level + 1, pslot +
398                                        1);
399                         if (wret)
400                                 ret = wret;
401                         wret = btrfs_free_extent(trans, root, blocknr, 1, 1);
402                         if (wret)
403                                 ret = wret;
404                 } else {
405                         btrfs_memcpy(root, parent,
406                                      &parent->ptrs[pslot + 1].key,
407                                      &right->ptrs[0].key,
408                                      sizeof(struct btrfs_disk_key));
409                         btrfs_mark_buffer_dirty(parent_buf);
410                 }
411         }
412         if (btrfs_header_nritems(&mid->header) == 1) {
413                 /*
414                  * we're not allowed to leave a node with one item in the
415                  * tree during a delete.  A deletion from lower in the tree
416                  * could try to delete the only pointer in this node.
417                  * So, pull some keys from the left.
418                  * There has to be a left pointer at this point because
419                  * otherwise we would have pulled some pointers from the
420                  * right
421                  */
422                 BUG_ON(!left_buf);
423                 wret = balance_node_right(trans, root, mid_buf, left_buf);
424                 if (wret < 0)
425                         ret = wret;
426                 BUG_ON(wret == 1);
427         }
428         if (btrfs_header_nritems(&mid->header) == 0) {
429                 /* we've managed to empty the middle node, drop it */
430                 u64 blocknr = bh_blocknr(mid_buf);
431                 clean_tree_block(trans, root, mid_buf);
432                 wait_on_buffer(mid_buf);
433                 btrfs_block_release(root, mid_buf);
434                 mid_buf = NULL;
435                 mid = NULL;
436                 wret = del_ptr(trans, root, path, level + 1, pslot);
437                 if (wret)
438                         ret = wret;
439                 wret = btrfs_free_extent(trans, root, blocknr, 1, 1);
440                 if (wret)
441                         ret = wret;
442         } else {
443                 /* update the parent key to reflect our changes */
444                 btrfs_memcpy(root, parent,
445                              &parent->ptrs[pslot].key, &mid->ptrs[0].key,
446                              sizeof(struct btrfs_disk_key));
447                 btrfs_mark_buffer_dirty(parent_buf);
448         }
449
450         /* update the path */
451         if (left_buf) {
452                 if (btrfs_header_nritems(&left->header) > orig_slot) {
453                         get_bh(left_buf);
454                         path->nodes[level] = left_buf;
455                         path->slots[level + 1] -= 1;
456                         path->slots[level] = orig_slot;
457                         if (mid_buf)
458                                 btrfs_block_release(root, mid_buf);
459                 } else {
460                         orig_slot -= btrfs_header_nritems(&left->header);
461                         path->slots[level] = orig_slot;
462                 }
463         }
464         /* double check we haven't messed things up */
465         check_block(root, path, level);
466         if (orig_ptr !=
467             btrfs_node_blockptr(btrfs_buffer_node(path->nodes[level]),
468                                 path->slots[level]))
469                 BUG();
470
471         if (right_buf)
472                 btrfs_block_release(root, right_buf);
473         if (left_buf)
474                 btrfs_block_release(root, left_buf);
475         return ret;
476 }
477
478 /* returns zero if the push worked, non-zero otherwise */
479 static int push_nodes_for_insert(struct btrfs_trans_handle *trans,
480                                 struct btrfs_root *root,
481                                 struct btrfs_path *path, int level)
482 {
483         struct buffer_head *right_buf;
484         struct buffer_head *mid_buf;
485         struct buffer_head *left_buf;
486         struct buffer_head *parent_buf = NULL;
487         struct btrfs_node *right = NULL;
488         struct btrfs_node *mid;
489         struct btrfs_node *left = NULL;
490         struct btrfs_node *parent = NULL;
491         int ret = 0;
492         int wret;
493         int pslot;
494         int orig_slot = path->slots[level];
495         u64 orig_ptr;
496
497         if (level == 0)
498                 return 1;
499
500         mid_buf = path->nodes[level];
501         mid = btrfs_buffer_node(mid_buf);
502         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
503
504         if (level < BTRFS_MAX_LEVEL - 1)
505                 parent_buf = path->nodes[level + 1];
506         pslot = path->slots[level + 1];
507
508         if (!parent_buf)
509                 return 1;
510         parent = btrfs_buffer_node(parent_buf);
511
512         left_buf = read_node_slot(root, parent_buf, pslot - 1);
513
514         /* first, try to make some room in the middle buffer */
515         if (left_buf) {
516                 u32 left_nr;
517                 left = btrfs_buffer_node(left_buf);
518                 left_nr = btrfs_header_nritems(&left->header);
519                 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
520                         wret = 1;
521                 } else {
522                         btrfs_cow_block(trans, root, left_buf, parent_buf,
523                                         pslot - 1, &left_buf);
524                         left = btrfs_buffer_node(left_buf);
525                         wret = push_node_left(trans, root, left_buf, mid_buf);
526                 }
527                 if (wret < 0)
528                         ret = wret;
529                 if (wret == 0) {
530                         orig_slot += left_nr;
531                         btrfs_memcpy(root, parent,
532                                      &parent->ptrs[pslot].key,
533                                      &mid->ptrs[0].key,
534                                      sizeof(struct btrfs_disk_key));
535                         btrfs_mark_buffer_dirty(parent_buf);
536                         if (btrfs_header_nritems(&left->header) > orig_slot) {
537                                 path->nodes[level] = left_buf;
538                                 path->slots[level + 1] -= 1;
539                                 path->slots[level] = orig_slot;
540                                 btrfs_block_release(root, mid_buf);
541                         } else {
542                                 orig_slot -=
543                                         btrfs_header_nritems(&left->header);
544                                 path->slots[level] = orig_slot;
545                                 btrfs_block_release(root, left_buf);
546                         }
547                         check_node(root, path, level);
548                         return 0;
549                 }
550                 btrfs_block_release(root, left_buf);
551         }
552         right_buf = read_node_slot(root, parent_buf, pslot + 1);
553
554         /*
555          * then try to empty the right most buffer into the middle
556          */
557         if (right_buf) {
558                 u32 right_nr;
559                 right = btrfs_buffer_node(right_buf);
560                 right_nr = btrfs_header_nritems(&right->header);
561                 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
562                         wret = 1;
563                 } else {
564                         btrfs_cow_block(trans, root, right_buf,
565                                         parent_buf, pslot + 1, &right_buf);
566                         right = btrfs_buffer_node(right_buf);
567                         wret = balance_node_right(trans, root,
568                                                   right_buf, mid_buf);
569                 }
570                 if (wret < 0)
571                         ret = wret;
572                 if (wret == 0) {
573                         btrfs_memcpy(root, parent,
574                                      &parent->ptrs[pslot + 1].key,
575                                      &right->ptrs[0].key,
576                                      sizeof(struct btrfs_disk_key));
577                         btrfs_mark_buffer_dirty(parent_buf);
578                         if (btrfs_header_nritems(&mid->header) <= orig_slot) {
579                                 path->nodes[level] = right_buf;
580                                 path->slots[level + 1] += 1;
581                                 path->slots[level] = orig_slot -
582                                         btrfs_header_nritems(&mid->header);
583                                 btrfs_block_release(root, mid_buf);
584                         } else {
585                                 btrfs_block_release(root, right_buf);
586                         }
587                         check_node(root, path, level);
588                         return 0;
589                 }
590                 btrfs_block_release(root, right_buf);
591         }
592         check_node(root, path, level);
593         return 1;
594 }
595
596 /*
597  * look for key in the tree.  path is filled in with nodes along the way
598  * if key is found, we return zero and you can find the item in the leaf
599  * level of the path (level 0)
600  *
601  * If the key isn't found, the path points to the slot where it should
602  * be inserted, and 1 is returned.  If there are other errors during the
603  * search a negative error number is returned.
604  *
605  * if ins_len > 0, nodes and leaves will be split as we walk down the
606  * tree.  if ins_len < 0, nodes will be merged as we walk down the tree (if
607  * possible)
608  */
609 int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
610                       *root, struct btrfs_key *key, struct btrfs_path *p, int
611                       ins_len, int cow)
612 {
613         struct buffer_head *b;
614         struct buffer_head *cow_buf;
615         struct btrfs_node *c;
616         int slot;
617         int ret;
618         int level;
619
620         WARN_ON(p->nodes[0] != NULL);
621         WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex));
622 again:
623         b = root->node;
624         get_bh(b);
625         while (b) {
626                 c = btrfs_buffer_node(b);
627                 level = btrfs_header_level(&c->header);
628                 if (cow) {
629                         int wret;
630                         wret = btrfs_cow_block(trans, root, b,
631                                                p->nodes[level + 1],
632                                                p->slots[level + 1],
633                                                &cow_buf);
634                         b = cow_buf;
635                         c = btrfs_buffer_node(b);
636                 }
637                 BUG_ON(!cow && ins_len);
638                 if (level != btrfs_header_level(&c->header))
639                         WARN_ON(1);
640                 level = btrfs_header_level(&c->header);
641                 p->nodes[level] = b;
642                 ret = check_block(root, p, level);
643                 if (ret)
644                         return -1;
645                 ret = bin_search(c, key, &slot);
646                 if (!btrfs_is_leaf(c)) {
647                         if (ret && slot > 0)
648                                 slot -= 1;
649                         p->slots[level] = slot;
650                         if (ins_len > 0 && btrfs_header_nritems(&c->header) >=
651                             BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
652                                 int sret = split_node(trans, root, p, level);
653                                 BUG_ON(sret > 0);
654                                 if (sret)
655                                         return sret;
656                                 b = p->nodes[level];
657                                 c = btrfs_buffer_node(b);
658                                 slot = p->slots[level];
659                         } else if (ins_len < 0) {
660                                 int sret = balance_level(trans, root, p,
661                                                          level);
662                                 if (sret)
663                                         return sret;
664                                 b = p->nodes[level];
665                                 if (!b)
666                                         goto again;
667                                 c = btrfs_buffer_node(b);
668                                 slot = p->slots[level];
669                                 BUG_ON(btrfs_header_nritems(&c->header) == 1);
670                         }
671                         b = read_tree_block(root, btrfs_node_blockptr(c, slot));
672                 } else {
673                         struct btrfs_leaf *l = (struct btrfs_leaf *)c;
674                         p->slots[level] = slot;
675                         if (ins_len > 0 && btrfs_leaf_free_space(root, l) <
676                             sizeof(struct btrfs_item) + ins_len) {
677                                 int sret = split_leaf(trans, root, key,
678                                                       p, ins_len);
679                                 BUG_ON(sret > 0);
680                                 if (sret)
681                                         return sret;
682                         }
683                         return ret;
684                 }
685         }
686         return 1;
687 }
688
689 /*
690  * adjust the pointers going up the tree, starting at level
691  * making sure the right key of each node is points to 'key'.
692  * This is used after shifting pointers to the left, so it stops
693  * fixing up pointers when a given leaf/node is not in slot 0 of the
694  * higher levels
695  *
696  * If this fails to write a tree block, it returns -1, but continues
697  * fixing up the blocks in ram so the tree is consistent.
698  */
699 static int fixup_low_keys(struct btrfs_trans_handle *trans, struct btrfs_root
700                           *root, struct btrfs_path *path, struct btrfs_disk_key
701                           *key, int level)
702 {
703         int i;
704         int ret = 0;
705         for (i = level; i < BTRFS_MAX_LEVEL; i++) {
706                 struct btrfs_node *t;
707                 int tslot = path->slots[i];
708                 if (!path->nodes[i])
709                         break;
710                 t = btrfs_buffer_node(path->nodes[i]);
711                 btrfs_memcpy(root, t, &t->ptrs[tslot].key, key, sizeof(*key));
712                 btrfs_mark_buffer_dirty(path->nodes[i]);
713                 if (tslot != 0)
714                         break;
715         }
716         return ret;
717 }
718
719 /*
720  * try to push data from one node into the next node left in the
721  * tree.
722  *
723  * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
724  * error, and > 0 if there was no room in the left hand block.
725  */
726 static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root
727                           *root, struct buffer_head *dst_buf, struct
728                           buffer_head *src_buf)
729 {
730         struct btrfs_node *src = btrfs_buffer_node(src_buf);
731         struct btrfs_node *dst = btrfs_buffer_node(dst_buf);
732         int push_items = 0;
733         int src_nritems;
734         int dst_nritems;
735         int ret = 0;
736
737         src_nritems = btrfs_header_nritems(&src->header);
738         dst_nritems = btrfs_header_nritems(&dst->header);
739         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
740         if (push_items <= 0) {
741                 return 1;
742         }
743
744         if (src_nritems < push_items)
745                 push_items = src_nritems;
746
747         btrfs_memcpy(root, dst, dst->ptrs + dst_nritems, src->ptrs,
748                      push_items * sizeof(struct btrfs_key_ptr));
749         if (push_items < src_nritems) {
750                 btrfs_memmove(root, src, src->ptrs, src->ptrs + push_items,
751                         (src_nritems - push_items) *
752                         sizeof(struct btrfs_key_ptr));
753         }
754         btrfs_set_header_nritems(&src->header, src_nritems - push_items);
755         btrfs_set_header_nritems(&dst->header, dst_nritems + push_items);
756         btrfs_mark_buffer_dirty(src_buf);
757         btrfs_mark_buffer_dirty(dst_buf);
758         return ret;
759 }
760
761 /*
762  * try to push data from one node into the next node right in the
763  * tree.
764  *
765  * returns 0 if some ptrs were pushed, < 0 if there was some horrible
766  * error, and > 0 if there was no room in the right hand block.
767  *
768  * this will  only push up to 1/2 the contents of the left node over
769  */
770 static int balance_node_right(struct btrfs_trans_handle *trans, struct
771                               btrfs_root *root, struct buffer_head *dst_buf,
772                               struct buffer_head *src_buf)
773 {
774         struct btrfs_node *src = btrfs_buffer_node(src_buf);
775         struct btrfs_node *dst = btrfs_buffer_node(dst_buf);
776         int push_items = 0;
777         int max_push;
778         int src_nritems;
779         int dst_nritems;
780         int ret = 0;
781
782         src_nritems = btrfs_header_nritems(&src->header);
783         dst_nritems = btrfs_header_nritems(&dst->header);
784         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
785         if (push_items <= 0) {
786                 return 1;
787         }
788
789         max_push = src_nritems / 2 + 1;
790         /* don't try to empty the node */
791         if (max_push > src_nritems)
792                 return 1;
793         if (max_push < push_items)
794                 push_items = max_push;
795
796         btrfs_memmove(root, dst, dst->ptrs + push_items, dst->ptrs,
797                       dst_nritems * sizeof(struct btrfs_key_ptr));
798
799         btrfs_memcpy(root, dst, dst->ptrs,
800                      src->ptrs + src_nritems - push_items,
801                      push_items * sizeof(struct btrfs_key_ptr));
802
803         btrfs_set_header_nritems(&src->header, src_nritems - push_items);
804         btrfs_set_header_nritems(&dst->header, dst_nritems + push_items);
805
806         btrfs_mark_buffer_dirty(src_buf);
807         btrfs_mark_buffer_dirty(dst_buf);
808         return ret;
809 }
810
811 /*
812  * helper function to insert a new root level in the tree.
813  * A new node is allocated, and a single item is inserted to
814  * point to the existing root
815  *
816  * returns zero on success or < 0 on failure.
817  */
818 static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root
819                            *root, struct btrfs_path *path, int level)
820 {
821         struct buffer_head *t;
822         struct btrfs_node *lower;
823         struct btrfs_node *c;
824         struct btrfs_disk_key *lower_key;
825
826         BUG_ON(path->nodes[level]);
827         BUG_ON(path->nodes[level-1] != root->node);
828
829         t = btrfs_alloc_free_block(trans, root, root->node->b_blocknr);
830         c = btrfs_buffer_node(t);
831         memset(c, 0, root->blocksize);
832         btrfs_set_header_nritems(&c->header, 1);
833         btrfs_set_header_level(&c->header, level);
834         btrfs_set_header_blocknr(&c->header, bh_blocknr(t));
835         btrfs_set_header_generation(&c->header, trans->transid);
836         btrfs_set_header_owner(&c->header, root->root_key.objectid);
837         lower = btrfs_buffer_node(path->nodes[level-1]);
838         memcpy(c->header.fsid, root->fs_info->disk_super->fsid,
839                sizeof(c->header.fsid));
840         if (btrfs_is_leaf(lower))
841                 lower_key = &((struct btrfs_leaf *)lower)->items[0].key;
842         else
843                 lower_key = &lower->ptrs[0].key;
844         btrfs_memcpy(root, c, &c->ptrs[0].key, lower_key,
845                      sizeof(struct btrfs_disk_key));
846         btrfs_set_node_blockptr(c, 0, bh_blocknr(path->nodes[level - 1]));
847
848         btrfs_mark_buffer_dirty(t);
849
850         /* the super has an extra ref to root->node */
851         btrfs_block_release(root, root->node);
852         root->node = t;
853         get_bh(t);
854         path->nodes[level] = t;
855         path->slots[level] = 0;
856         return 0;
857 }
858
859 /*
860  * worker function to insert a single pointer in a node.
861  * the node should have enough room for the pointer already
862  *
863  * slot and level indicate where you want the key to go, and
864  * blocknr is the block the key points to.
865  *
866  * returns zero on success and < 0 on any error
867  */
868 static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
869                       *root, struct btrfs_path *path, struct btrfs_disk_key
870                       *key, u64 blocknr, int slot, int level)
871 {
872         struct btrfs_node *lower;
873         int nritems;
874
875         BUG_ON(!path->nodes[level]);
876         lower = btrfs_buffer_node(path->nodes[level]);
877         nritems = btrfs_header_nritems(&lower->header);
878         if (slot > nritems)
879                 BUG();
880         if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
881                 BUG();
882         if (slot != nritems) {
883                 btrfs_memmove(root, lower, lower->ptrs + slot + 1,
884                               lower->ptrs + slot,
885                               (nritems - slot) * sizeof(struct btrfs_key_ptr));
886         }
887         btrfs_memcpy(root, lower, &lower->ptrs[slot].key,
888                      key, sizeof(struct btrfs_disk_key));
889         btrfs_set_node_blockptr(lower, slot, blocknr);
890         btrfs_set_header_nritems(&lower->header, nritems + 1);
891         btrfs_mark_buffer_dirty(path->nodes[level]);
892         check_node(root, path, level);
893         return 0;
894 }
895
896 /*
897  * split the node at the specified level in path in two.
898  * The path is corrected to point to the appropriate node after the split
899  *
900  * Before splitting this tries to make some room in the node by pushing
901  * left and right, if either one works, it returns right away.
902  *
903  * returns 0 on success and < 0 on failure
904  */
905 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
906                       *root, struct btrfs_path *path, int level)
907 {
908         struct buffer_head *t;
909         struct btrfs_node *c;
910         struct buffer_head *split_buffer;
911         struct btrfs_node *split;
912         int mid;
913         int ret;
914         int wret;
915         u32 c_nritems;
916
917         t = path->nodes[level];
918         c = btrfs_buffer_node(t);
919         if (t == root->node) {
920                 /* trying to split the root, lets make a new one */
921                 ret = insert_new_root(trans, root, path, level + 1);
922                 if (ret)
923                         return ret;
924         } else {
925                 ret = push_nodes_for_insert(trans, root, path, level);
926                 t = path->nodes[level];
927                 c = btrfs_buffer_node(t);
928                 if (!ret &&
929                     btrfs_header_nritems(&c->header) <
930                     BTRFS_NODEPTRS_PER_BLOCK(root) - 1)
931                         return 0;
932         }
933
934         c_nritems = btrfs_header_nritems(&c->header);
935         split_buffer = btrfs_alloc_free_block(trans, root, t->b_blocknr);
936         split = btrfs_buffer_node(split_buffer);
937         btrfs_set_header_flags(&split->header, btrfs_header_flags(&c->header));
938         btrfs_set_header_level(&split->header, btrfs_header_level(&c->header));
939         btrfs_set_header_blocknr(&split->header, bh_blocknr(split_buffer));
940         btrfs_set_header_generation(&split->header, trans->transid);
941         btrfs_set_header_owner(&split->header, root->root_key.objectid);
942         memcpy(split->header.fsid, root->fs_info->disk_super->fsid,
943                sizeof(split->header.fsid));
944         mid = (c_nritems + 1) / 2;
945         btrfs_memcpy(root, split, split->ptrs, c->ptrs + mid,
946                      (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
947         btrfs_set_header_nritems(&split->header, c_nritems - mid);
948         btrfs_set_header_nritems(&c->header, mid);
949         ret = 0;
950
951         btrfs_mark_buffer_dirty(t);
952         btrfs_mark_buffer_dirty(split_buffer);
953         wret = insert_ptr(trans, root, path, &split->ptrs[0].key,
954                           bh_blocknr(split_buffer), path->slots[level + 1] + 1,
955                           level + 1);
956         if (wret)
957                 ret = wret;
958
959         if (path->slots[level] >= mid) {
960                 path->slots[level] -= mid;
961                 btrfs_block_release(root, t);
962                 path->nodes[level] = split_buffer;
963                 path->slots[level + 1] += 1;
964         } else {
965                 btrfs_block_release(root, split_buffer);
966         }
967         return ret;
968 }
969
970 /*
971  * how many bytes are required to store the items in a leaf.  start
972  * and nr indicate which items in the leaf to check.  This totals up the
973  * space used both by the item structs and the item data
974  */
975 static int leaf_space_used(struct btrfs_leaf *l, int start, int nr)
976 {
977         int data_len;
978         int nritems = btrfs_header_nritems(&l->header);
979         int end = min(nritems, start + nr) - 1;
980
981         if (!nr)
982                 return 0;
983         data_len = btrfs_item_end(l->items + start);
984         data_len = data_len - btrfs_item_offset(l->items + end);
985         data_len += sizeof(struct btrfs_item) * nr;
986         WARN_ON(data_len < 0);
987         return data_len;
988 }
989
990 /*
991  * The space between the end of the leaf items and
992  * the start of the leaf data.  IOW, how much room
993  * the leaf has left for both items and data
994  */
995 int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf)
996 {
997         int nritems = btrfs_header_nritems(&leaf->header);
998         return BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
999 }
1000
1001 /*
1002  * push some data in the path leaf to the right, trying to free up at
1003  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
1004  *
1005  * returns 1 if the push failed because the other node didn't have enough
1006  * room, 0 if everything worked out and < 0 if there were major errors.
1007  */
1008 static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
1009                            *root, struct btrfs_path *path, int data_size)
1010 {
1011         struct buffer_head *left_buf = path->nodes[0];
1012         struct btrfs_leaf *left = btrfs_buffer_leaf(left_buf);
1013         struct btrfs_leaf *right;
1014         struct buffer_head *right_buf;
1015         struct buffer_head *upper;
1016         struct btrfs_node *upper_node;
1017         int slot;
1018         int i;
1019         int free_space;
1020         int push_space = 0;
1021         int push_items = 0;
1022         struct btrfs_item *item;
1023         u32 left_nritems;
1024         u32 right_nritems;
1025
1026         slot = path->slots[1];
1027         if (!path->nodes[1]) {
1028                 return 1;
1029         }
1030         upper = path->nodes[1];
1031         upper_node = btrfs_buffer_node(upper);
1032         if (slot >= btrfs_header_nritems(&upper_node->header) - 1) {
1033                 return 1;
1034         }
1035         right_buf = read_tree_block(root,
1036                     btrfs_node_blockptr(btrfs_buffer_node(upper), slot + 1));
1037         right = btrfs_buffer_leaf(right_buf);
1038         free_space = btrfs_leaf_free_space(root, right);
1039         if (free_space < data_size + sizeof(struct btrfs_item)) {
1040                 btrfs_block_release(root, right_buf);
1041                 return 1;
1042         }
1043         /* cow and double check */
1044         btrfs_cow_block(trans, root, right_buf, upper, slot + 1, &right_buf);
1045         right = btrfs_buffer_leaf(right_buf);
1046         free_space = btrfs_leaf_free_space(root, right);
1047         if (free_space < data_size + sizeof(struct btrfs_item)) {
1048                 btrfs_block_release(root, right_buf);
1049                 return 1;
1050         }
1051
1052         left_nritems = btrfs_header_nritems(&left->header);
1053         if (left_nritems == 0) {
1054                 btrfs_block_release(root, right_buf);
1055                 return 1;
1056         }
1057         for (i = left_nritems - 1; i >= 1; i--) {
1058                 item = left->items + i;
1059                 if (path->slots[0] == i)
1060                         push_space += data_size + sizeof(*item);
1061                 if (btrfs_item_size(item) + sizeof(*item) + push_space >
1062                     free_space)
1063                         break;
1064                 push_items++;
1065                 push_space += btrfs_item_size(item) + sizeof(*item);
1066         }
1067         if (push_items == 0) {
1068                 btrfs_block_release(root, right_buf);
1069                 return 1;
1070         }
1071         if (push_items == left_nritems)
1072                 WARN_ON(1);
1073         right_nritems = btrfs_header_nritems(&right->header);
1074         /* push left to right */
1075         push_space = btrfs_item_end(left->items + left_nritems - push_items);
1076         push_space -= leaf_data_end(root, left);
1077         /* make room in the right data area */
1078         btrfs_memmove(root, right, btrfs_leaf_data(right) +
1079                       leaf_data_end(root, right) - push_space,
1080                       btrfs_leaf_data(right) +
1081                       leaf_data_end(root, right), BTRFS_LEAF_DATA_SIZE(root) -
1082                       leaf_data_end(root, right));
1083         /* copy from the left data area */
1084         btrfs_memcpy(root, right, btrfs_leaf_data(right) +
1085                      BTRFS_LEAF_DATA_SIZE(root) - push_space,
1086                      btrfs_leaf_data(left) + leaf_data_end(root, left),
1087                      push_space);
1088         btrfs_memmove(root, right, right->items + push_items, right->items,
1089                 right_nritems * sizeof(struct btrfs_item));
1090         /* copy the items from left to right */
1091         btrfs_memcpy(root, right, right->items, left->items +
1092                      left_nritems - push_items,
1093                      push_items * sizeof(struct btrfs_item));
1094
1095         /* update the item pointers */
1096         right_nritems += push_items;
1097         btrfs_set_header_nritems(&right->header, right_nritems);
1098         push_space = BTRFS_LEAF_DATA_SIZE(root);
1099         for (i = 0; i < right_nritems; i++) {
1100                 btrfs_set_item_offset(right->items + i, push_space -
1101                                       btrfs_item_size(right->items + i));
1102                 push_space = btrfs_item_offset(right->items + i);
1103         }
1104         left_nritems -= push_items;
1105         btrfs_set_header_nritems(&left->header, left_nritems);
1106
1107         btrfs_mark_buffer_dirty(left_buf);
1108         btrfs_mark_buffer_dirty(right_buf);
1109
1110         btrfs_memcpy(root, upper_node, &upper_node->ptrs[slot + 1].key,
1111                 &right->items[0].key, sizeof(struct btrfs_disk_key));
1112         btrfs_mark_buffer_dirty(upper);
1113
1114         /* then fixup the leaf pointer in the path */
1115         if (path->slots[0] >= left_nritems) {
1116                 path->slots[0] -= left_nritems;
1117                 btrfs_block_release(root, path->nodes[0]);
1118                 path->nodes[0] = right_buf;
1119                 path->slots[1] += 1;
1120         } else {
1121                 btrfs_block_release(root, right_buf);
1122         }
1123         if (path->nodes[1])
1124                 check_node(root, path, 1);
1125         return 0;
1126 }
1127 /*
1128  * push some data in the path leaf to the left, trying to free up at
1129  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
1130  */
1131 static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
1132                           *root, struct btrfs_path *path, int data_size)
1133 {
1134         struct buffer_head *right_buf = path->nodes[0];
1135         struct btrfs_leaf *right = btrfs_buffer_leaf(right_buf);
1136         struct buffer_head *t;
1137         struct btrfs_leaf *left;
1138         int slot;
1139         int i;
1140         int free_space;
1141         int push_space = 0;
1142         int push_items = 0;
1143         struct btrfs_item *item;
1144         u32 old_left_nritems;
1145         int ret = 0;
1146         int wret;
1147
1148         slot = path->slots[1];
1149         if (slot == 0) {
1150                 return 1;
1151         }
1152         if (!path->nodes[1]) {
1153                 return 1;
1154         }
1155         t = read_tree_block(root,
1156             btrfs_node_blockptr(btrfs_buffer_node(path->nodes[1]), slot - 1));
1157         left = btrfs_buffer_leaf(t);
1158         free_space = btrfs_leaf_free_space(root, left);
1159         if (free_space < data_size + sizeof(struct btrfs_item)) {
1160                 btrfs_block_release(root, t);
1161                 return 1;
1162         }
1163
1164         /* cow and double check */
1165         btrfs_cow_block(trans, root, t, path->nodes[1], slot - 1, &t);
1166         left = btrfs_buffer_leaf(t);
1167         free_space = btrfs_leaf_free_space(root, left);
1168         if (free_space < data_size + sizeof(struct btrfs_item)) {
1169                 btrfs_block_release(root, t);
1170                 return 1;
1171         }
1172
1173         if (btrfs_header_nritems(&right->header) == 0) {
1174                 btrfs_block_release(root, t);
1175                 return 1;
1176         }
1177
1178         for (i = 0; i < btrfs_header_nritems(&right->header) - 1; i++) {
1179                 item = right->items + i;
1180                 if (path->slots[0] == i)
1181                         push_space += data_size + sizeof(*item);
1182                 if (btrfs_item_size(item) + sizeof(*item) + push_space >
1183                     free_space)
1184                         break;
1185                 push_items++;
1186                 push_space += btrfs_item_size(item) + sizeof(*item);
1187         }
1188         if (push_items == 0) {
1189                 btrfs_block_release(root, t);
1190                 return 1;
1191         }
1192         if (push_items == btrfs_header_nritems(&right->header))
1193                 WARN_ON(1);
1194         /* push data from right to left */
1195         btrfs_memcpy(root, left, left->items +
1196                      btrfs_header_nritems(&left->header),
1197                      right->items, push_items * sizeof(struct btrfs_item));
1198         push_space = BTRFS_LEAF_DATA_SIZE(root) -
1199                      btrfs_item_offset(right->items + push_items -1);
1200         btrfs_memcpy(root, left, btrfs_leaf_data(left) +
1201                      leaf_data_end(root, left) - push_space,
1202                      btrfs_leaf_data(right) +
1203                      btrfs_item_offset(right->items + push_items - 1),
1204                      push_space);
1205         old_left_nritems = btrfs_header_nritems(&left->header);
1206         BUG_ON(old_left_nritems < 0);
1207
1208         for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
1209                 u32 ioff = btrfs_item_offset(left->items + i);
1210                 btrfs_set_item_offset(left->items + i, ioff -
1211                                      (BTRFS_LEAF_DATA_SIZE(root) -
1212                                       btrfs_item_offset(left->items +
1213                                                         old_left_nritems - 1)));
1214         }
1215         btrfs_set_header_nritems(&left->header, old_left_nritems + push_items);
1216
1217         /* fixup right node */
1218         push_space = btrfs_item_offset(right->items + push_items - 1) -
1219                      leaf_data_end(root, right);
1220         btrfs_memmove(root, right, btrfs_leaf_data(right) +
1221                       BTRFS_LEAF_DATA_SIZE(root) - push_space,
1222                       btrfs_leaf_data(right) +
1223                       leaf_data_end(root, right), push_space);
1224         btrfs_memmove(root, right, right->items, right->items + push_items,
1225                 (btrfs_header_nritems(&right->header) - push_items) *
1226                 sizeof(struct btrfs_item));
1227         btrfs_set_header_nritems(&right->header,
1228                                  btrfs_header_nritems(&right->header) -
1229                                  push_items);
1230         push_space = BTRFS_LEAF_DATA_SIZE(root);
1231
1232         for (i = 0; i < btrfs_header_nritems(&right->header); i++) {
1233                 btrfs_set_item_offset(right->items + i, push_space -
1234                                       btrfs_item_size(right->items + i));
1235                 push_space = btrfs_item_offset(right->items + i);
1236         }
1237
1238         btrfs_mark_buffer_dirty(t);
1239         btrfs_mark_buffer_dirty(right_buf);
1240
1241         wret = fixup_low_keys(trans, root, path, &right->items[0].key, 1);
1242         if (wret)
1243                 ret = wret;
1244
1245         /* then fixup the leaf pointer in the path */
1246         if (path->slots[0] < push_items) {
1247                 path->slots[0] += old_left_nritems;
1248                 btrfs_block_release(root, path->nodes[0]);
1249                 path->nodes[0] = t;
1250                 path->slots[1] -= 1;
1251         } else {
1252                 btrfs_block_release(root, t);
1253                 path->slots[0] -= push_items;
1254         }
1255         BUG_ON(path->slots[0] < 0);
1256         if (path->nodes[1])
1257                 check_node(root, path, 1);
1258         return ret;
1259 }
1260
1261 /*
1262  * split the path's leaf in two, making sure there is at least data_size
1263  * available for the resulting leaf level of the path.
1264  *
1265  * returns 0 if all went well and < 0 on failure.
1266  */
1267 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
1268                       *root, struct btrfs_key *ins_key,
1269                       struct btrfs_path *path, int data_size)
1270 {
1271         struct buffer_head *l_buf;
1272         struct btrfs_leaf *l;
1273         u32 nritems;
1274         int mid;
1275         int slot;
1276         struct btrfs_leaf *right;
1277         struct buffer_head *right_buffer;
1278         int space_needed = data_size + sizeof(struct btrfs_item);
1279         int data_copy_size;
1280         int rt_data_off;
1281         int i;
1282         int ret = 0;
1283         int wret;
1284         int double_split = 0;
1285         struct btrfs_disk_key disk_key;
1286
1287         /* first try to make some room by pushing left and right */
1288         wret = push_leaf_left(trans, root, path, data_size);
1289         if (wret < 0)
1290                 return wret;
1291         if (wret) {
1292                 wret = push_leaf_right(trans, root, path, data_size);
1293                 if (wret < 0)
1294                         return wret;
1295         }
1296         l_buf = path->nodes[0];
1297         l = btrfs_buffer_leaf(l_buf);
1298
1299         /* did the pushes work? */
1300         if (btrfs_leaf_free_space(root, l) >=
1301             sizeof(struct btrfs_item) + data_size)
1302                 return 0;
1303
1304         if (!path->nodes[1]) {
1305                 ret = insert_new_root(trans, root, path, 1);
1306                 if (ret)
1307                         return ret;
1308         }
1309         slot = path->slots[0];
1310         nritems = btrfs_header_nritems(&l->header);
1311         mid = (nritems + 1)/ 2;
1312         right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr);
1313         BUG_ON(!right_buffer);
1314         right = btrfs_buffer_leaf(right_buffer);
1315         memset(&right->header, 0, sizeof(right->header));
1316         btrfs_set_header_blocknr(&right->header, bh_blocknr(right_buffer));
1317         btrfs_set_header_generation(&right->header, trans->transid);
1318         btrfs_set_header_owner(&right->header, root->root_key.objectid);
1319         btrfs_set_header_level(&right->header, 0);
1320         memcpy(right->header.fsid, root->fs_info->disk_super->fsid,
1321                sizeof(right->header.fsid));
1322         if (mid <= slot) {
1323                 if (nritems == 1 ||
1324                     leaf_space_used(l, mid, nritems - mid) + space_needed >
1325                         BTRFS_LEAF_DATA_SIZE(root)) {
1326                         if (slot >= nritems) {
1327                                 btrfs_cpu_key_to_disk(&disk_key, ins_key);
1328                                 btrfs_set_header_nritems(&right->header, 0);
1329                                 wret = insert_ptr(trans, root, path,
1330                                                   &disk_key,
1331                                                   bh_blocknr(right_buffer),
1332                                                   path->slots[1] + 1, 1);
1333                                 if (wret)
1334                                         ret = wret;
1335                                 btrfs_block_release(root, path->nodes[0]);
1336                                 path->nodes[0] = right_buffer;
1337                                 path->slots[0] = 0;
1338                                 path->slots[1] += 1;
1339                                 return ret;
1340                         }
1341                         mid = slot;
1342                         double_split = 1;
1343                 }
1344         } else {
1345                 if (leaf_space_used(l, 0, mid + 1) + space_needed >
1346                         BTRFS_LEAF_DATA_SIZE(root)) {
1347                         if (slot == 0) {
1348                                 btrfs_cpu_key_to_disk(&disk_key, ins_key);
1349                                 btrfs_set_header_nritems(&right->header, 0);
1350                                 wret = insert_ptr(trans, root, path,
1351                                                   &disk_key,
1352                                                   bh_blocknr(right_buffer),
1353                                                   path->slots[1], 1);
1354                                 if (wret)
1355                                         ret = wret;
1356                                 btrfs_block_release(root, path->nodes[0]);
1357                                 path->nodes[0] = right_buffer;
1358                                 path->slots[0] = 0;
1359                                 if (path->slots[1] == 0) {
1360                                         wret = fixup_low_keys(trans, root,
1361                                                    path, &disk_key, 1);
1362                                         if (wret)
1363                                                 ret = wret;
1364                                 }
1365                                 return ret;
1366                         }
1367                         mid = slot;
1368                         double_split = 1;
1369                 }
1370         }
1371         btrfs_set_header_nritems(&right->header, nritems - mid);
1372         data_copy_size = btrfs_item_end(l->items + mid) -
1373                          leaf_data_end(root, l);
1374         btrfs_memcpy(root, right, right->items, l->items + mid,
1375                      (nritems - mid) * sizeof(struct btrfs_item));
1376         btrfs_memcpy(root, right,
1377                      btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
1378                      data_copy_size, btrfs_leaf_data(l) +
1379                      leaf_data_end(root, l), data_copy_size);
1380         rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
1381                       btrfs_item_end(l->items + mid);
1382
1383         for (i = 0; i < btrfs_header_nritems(&right->header); i++) {
1384                 u32 ioff = btrfs_item_offset(right->items + i);
1385                 btrfs_set_item_offset(right->items + i, ioff + rt_data_off);
1386         }
1387
1388         btrfs_set_header_nritems(&l->header, mid);
1389         ret = 0;
1390         wret = insert_ptr(trans, root, path, &right->items[0].key,
1391                           bh_blocknr(right_buffer), path->slots[1] + 1, 1);
1392         if (wret)
1393                 ret = wret;
1394         btrfs_mark_buffer_dirty(right_buffer);
1395         btrfs_mark_buffer_dirty(l_buf);
1396         BUG_ON(path->slots[0] != slot);
1397         if (mid <= slot) {
1398                 btrfs_block_release(root, path->nodes[0]);
1399                 path->nodes[0] = right_buffer;
1400                 path->slots[0] -= mid;
1401                 path->slots[1] += 1;
1402         } else
1403                 btrfs_block_release(root, right_buffer);
1404         BUG_ON(path->slots[0] < 0);
1405         check_node(root, path, 1);
1406
1407         if (!double_split)
1408                 return ret;
1409         right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr);
1410         BUG_ON(!right_buffer);
1411         right = btrfs_buffer_leaf(right_buffer);
1412         memset(&right->header, 0, sizeof(right->header));
1413         btrfs_set_header_blocknr(&right->header, bh_blocknr(right_buffer));
1414         btrfs_set_header_generation(&right->header, trans->transid);
1415         btrfs_set_header_owner(&right->header, root->root_key.objectid);
1416         btrfs_set_header_level(&right->header, 0);
1417         memcpy(right->header.fsid, root->fs_info->disk_super->fsid,
1418                sizeof(right->header.fsid));
1419         btrfs_cpu_key_to_disk(&disk_key, ins_key);
1420         btrfs_set_header_nritems(&right->header, 0);
1421         wret = insert_ptr(trans, root, path,
1422                           &disk_key,
1423                           bh_blocknr(right_buffer),
1424                           path->slots[1], 1);
1425         if (wret)
1426                 ret = wret;
1427         if (path->slots[1] == 0) {
1428                 wret = fixup_low_keys(trans, root, path, &disk_key, 1);
1429                 if (wret)
1430                         ret = wret;
1431         }
1432         btrfs_block_release(root, path->nodes[0]);
1433         path->nodes[0] = right_buffer;
1434         path->slots[0] = 0;
1435         check_node(root, path, 1);
1436         check_leaf(root, path, 0);
1437         return ret;
1438 }
1439
1440 int btrfs_truncate_item(struct btrfs_trans_handle *trans,
1441                         struct btrfs_root *root,
1442                         struct btrfs_path *path,
1443                         u32 new_size)
1444 {
1445         int ret = 0;
1446         int slot;
1447         int slot_orig;
1448         struct btrfs_leaf *leaf;
1449         struct buffer_head *leaf_buf;
1450         u32 nritems;
1451         unsigned int data_end;
1452         unsigned int old_data_start;
1453         unsigned int old_size;
1454         unsigned int size_diff;
1455         int i;
1456
1457         slot_orig = path->slots[0];
1458         leaf_buf = path->nodes[0];
1459         leaf = btrfs_buffer_leaf(leaf_buf);
1460
1461         nritems = btrfs_header_nritems(&leaf->header);
1462         data_end = leaf_data_end(root, leaf);
1463
1464         slot = path->slots[0];
1465         old_data_start = btrfs_item_offset(leaf->items + slot);
1466         old_size = btrfs_item_size(leaf->items + slot);
1467         BUG_ON(old_size <= new_size);
1468         size_diff = old_size - new_size;
1469
1470         BUG_ON(slot < 0);
1471         BUG_ON(slot >= nritems);
1472
1473         /*
1474          * item0..itemN ... dataN.offset..dataN.size .. data0.size
1475          */
1476         /* first correct the data pointers */
1477         for (i = slot; i < nritems; i++) {
1478                 u32 ioff = btrfs_item_offset(leaf->items + i);
1479                 btrfs_set_item_offset(leaf->items + i,
1480                                       ioff + size_diff);
1481         }
1482         /* shift the data */
1483         btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) +
1484                       data_end + size_diff, btrfs_leaf_data(leaf) +
1485                       data_end, old_data_start + new_size - data_end);
1486         btrfs_set_item_size(leaf->items + slot, new_size);
1487         btrfs_mark_buffer_dirty(leaf_buf);
1488
1489         ret = 0;
1490         if (btrfs_leaf_free_space(root, leaf) < 0)
1491                 BUG();
1492         check_leaf(root, path, 0);
1493         return ret;
1494 }
1495
1496 int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root
1497                       *root, struct btrfs_path *path, u32 data_size)
1498 {
1499         int ret = 0;
1500         int slot;
1501         int slot_orig;
1502         struct btrfs_leaf *leaf;
1503         struct buffer_head *leaf_buf;
1504         u32 nritems;
1505         unsigned int data_end;
1506         unsigned int old_data;
1507         unsigned int old_size;
1508         int i;
1509
1510         slot_orig = path->slots[0];
1511         leaf_buf = path->nodes[0];
1512         leaf = btrfs_buffer_leaf(leaf_buf);
1513
1514         nritems = btrfs_header_nritems(&leaf->header);
1515         data_end = leaf_data_end(root, leaf);
1516
1517         if (btrfs_leaf_free_space(root, leaf) < data_size)
1518                 BUG();
1519         slot = path->slots[0];
1520         old_data = btrfs_item_end(leaf->items + slot);
1521
1522         BUG_ON(slot < 0);
1523         BUG_ON(slot >= nritems);
1524
1525         /*
1526          * item0..itemN ... dataN.offset..dataN.size .. data0.size
1527          */
1528         /* first correct the data pointers */
1529         for (i = slot; i < nritems; i++) {
1530                 u32 ioff = btrfs_item_offset(leaf->items + i);
1531                 btrfs_set_item_offset(leaf->items + i,
1532                                       ioff - data_size);
1533         }
1534         /* shift the data */
1535         btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) +
1536                       data_end - data_size, btrfs_leaf_data(leaf) +
1537                       data_end, old_data - data_end);
1538         data_end = old_data;
1539         old_size = btrfs_item_size(leaf->items + slot);
1540         btrfs_set_item_size(leaf->items + slot, old_size + data_size);
1541         btrfs_mark_buffer_dirty(leaf_buf);
1542
1543         ret = 0;
1544         if (btrfs_leaf_free_space(root, leaf) < 0)
1545                 BUG();
1546         check_leaf(root, path, 0);
1547         return ret;
1548 }
1549
1550 /*
1551  * Given a key and some data, insert an item into the tree.
1552  * This does all the path init required, making room in the tree if needed.
1553  */
1554 int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root
1555                             *root, struct btrfs_path *path, struct btrfs_key
1556                             *cpu_key, u32 data_size)
1557 {
1558         int ret = 0;
1559         int slot;
1560         int slot_orig;
1561         struct btrfs_leaf *leaf;
1562         struct buffer_head *leaf_buf;
1563         u32 nritems;
1564         unsigned int data_end;
1565         struct btrfs_disk_key disk_key;
1566
1567         btrfs_cpu_key_to_disk(&disk_key, cpu_key);
1568
1569         /* create a root if there isn't one */
1570         if (!root->node)
1571                 BUG();
1572         ret = btrfs_search_slot(trans, root, cpu_key, path, data_size, 1);
1573         if (ret == 0) {
1574                 return -EEXIST;
1575         }
1576         if (ret < 0)
1577                 goto out;
1578
1579         slot_orig = path->slots[0];
1580         leaf_buf = path->nodes[0];
1581         leaf = btrfs_buffer_leaf(leaf_buf);
1582
1583         nritems = btrfs_header_nritems(&leaf->header);
1584         data_end = leaf_data_end(root, leaf);
1585
1586         if (btrfs_leaf_free_space(root, leaf) <
1587             sizeof(struct btrfs_item) + data_size) {
1588                 BUG();
1589         }
1590         slot = path->slots[0];
1591         BUG_ON(slot < 0);
1592         if (slot != nritems) {
1593                 int i;
1594                 unsigned int old_data = btrfs_item_end(leaf->items + slot);
1595
1596                 /*
1597                  * item0..itemN ... dataN.offset..dataN.size .. data0.size
1598                  */
1599                 /* first correct the data pointers */
1600                 for (i = slot; i < nritems; i++) {
1601                         u32 ioff = btrfs_item_offset(leaf->items + i);
1602                         btrfs_set_item_offset(leaf->items + i,
1603                                               ioff - data_size);
1604                 }
1605
1606                 /* shift the items */
1607                 btrfs_memmove(root, leaf, leaf->items + slot + 1,
1608                               leaf->items + slot,
1609                               (nritems - slot) * sizeof(struct btrfs_item));
1610
1611                 /* shift the data */
1612                 btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) +
1613                               data_end - data_size, btrfs_leaf_data(leaf) +
1614                               data_end, old_data - data_end);
1615                 data_end = old_data;
1616         }
1617         /* setup the item for the new data */
1618         btrfs_memcpy(root, leaf, &leaf->items[slot].key, &disk_key,
1619                      sizeof(struct btrfs_disk_key));
1620         btrfs_set_item_offset(leaf->items + slot, data_end - data_size);
1621         btrfs_set_item_size(leaf->items + slot, data_size);
1622         btrfs_set_header_nritems(&leaf->header, nritems + 1);
1623         btrfs_mark_buffer_dirty(leaf_buf);
1624
1625         ret = 0;
1626         if (slot == 0)
1627                 ret = fixup_low_keys(trans, root, path, &disk_key, 1);
1628
1629         if (btrfs_leaf_free_space(root, leaf) < 0)
1630                 BUG();
1631         check_leaf(root, path, 0);
1632 out:
1633         return ret;
1634 }
1635
1636 /*
1637  * Given a key and some data, insert an item into the tree.
1638  * This does all the path init required, making room in the tree if needed.
1639  */
1640 int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
1641                       *root, struct btrfs_key *cpu_key, void *data, u32
1642                       data_size)
1643 {
1644         int ret = 0;
1645         struct btrfs_path *path;
1646         u8 *ptr;
1647
1648         path = btrfs_alloc_path();
1649         BUG_ON(!path);
1650         btrfs_init_path(path);
1651         ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
1652         if (!ret) {
1653                 ptr = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
1654                                      path->slots[0], u8);
1655                 btrfs_memcpy(root, path->nodes[0]->b_data,
1656                              ptr, data, data_size);
1657                 btrfs_mark_buffer_dirty(path->nodes[0]);
1658         }
1659         btrfs_release_path(root, path);
1660         btrfs_free_path(path);
1661         return ret;
1662 }
1663
1664 /*
1665  * delete the pointer from a given node.
1666  *
1667  * If the delete empties a node, the node is removed from the tree,
1668  * continuing all the way the root if required.  The root is converted into
1669  * a leaf if all the nodes are emptied.
1670  */
1671 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1672                    struct btrfs_path *path, int level, int slot)
1673 {
1674         struct btrfs_node *node;
1675         struct buffer_head *parent = path->nodes[level];
1676         u32 nritems;
1677         int ret = 0;
1678         int wret;
1679
1680         node = btrfs_buffer_node(parent);
1681         nritems = btrfs_header_nritems(&node->header);
1682         if (slot != nritems -1) {
1683                 btrfs_memmove(root, node, node->ptrs + slot,
1684                               node->ptrs + slot + 1,
1685                               sizeof(struct btrfs_key_ptr) *
1686                               (nritems - slot - 1));
1687         }
1688         nritems--;
1689         btrfs_set_header_nritems(&node->header, nritems);
1690         if (nritems == 0 && parent == root->node) {
1691                 struct btrfs_header *header = btrfs_buffer_header(root->node);
1692                 BUG_ON(btrfs_header_level(header) != 1);
1693                 /* just turn the root into a leaf and break */
1694                 btrfs_set_header_level(header, 0);
1695         } else if (slot == 0) {
1696                 wret = fixup_low_keys(trans, root, path, &node->ptrs[0].key,
1697                                       level + 1);
1698                 if (wret)
1699                         ret = wret;
1700         }
1701         btrfs_mark_buffer_dirty(parent);
1702         return ret;
1703 }
1704
1705 /*
1706  * delete the item at the leaf level in path.  If that empties
1707  * the leaf, remove it from the tree
1708  */
1709 int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1710                    struct btrfs_path *path)
1711 {
1712         int slot;
1713         struct btrfs_leaf *leaf;
1714         struct buffer_head *leaf_buf;
1715         int doff;
1716         int dsize;
1717         int ret = 0;
1718         int wret;
1719         u32 nritems;
1720
1721         leaf_buf = path->nodes[0];
1722         leaf = btrfs_buffer_leaf(leaf_buf);
1723         slot = path->slots[0];
1724         doff = btrfs_item_offset(leaf->items + slot);
1725         dsize = btrfs_item_size(leaf->items + slot);
1726         nritems = btrfs_header_nritems(&leaf->header);
1727
1728         if (slot != nritems - 1) {
1729                 int i;
1730                 int data_end = leaf_data_end(root, leaf);
1731                 btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) +
1732                               data_end + dsize,
1733                               btrfs_leaf_data(leaf) + data_end,
1734                               doff - data_end);
1735                 for (i = slot + 1; i < nritems; i++) {
1736                         u32 ioff = btrfs_item_offset(leaf->items + i);
1737                         btrfs_set_item_offset(leaf->items + i, ioff + dsize);
1738                 }
1739                 btrfs_memmove(root, leaf, leaf->items + slot,
1740                               leaf->items + slot + 1,
1741                               sizeof(struct btrfs_item) *
1742                               (nritems - slot - 1));
1743         }
1744         btrfs_set_header_nritems(&leaf->header, nritems - 1);
1745         nritems--;
1746         /* delete the leaf if we've emptied it */
1747         if (nritems == 0) {
1748                 if (leaf_buf == root->node) {
1749                         btrfs_set_header_level(&leaf->header, 0);
1750                 } else {
1751                         clean_tree_block(trans, root, leaf_buf);
1752                         wait_on_buffer(leaf_buf);
1753                         wret = del_ptr(trans, root, path, 1, path->slots[1]);
1754                         if (wret)
1755                                 ret = wret;
1756                         wret = btrfs_free_extent(trans, root,
1757                                                  bh_blocknr(leaf_buf), 1, 1);
1758                         if (wret)
1759                                 ret = wret;
1760                 }
1761         } else {
1762                 int used = leaf_space_used(leaf, 0, nritems);
1763                 if (slot == 0) {
1764                         wret = fixup_low_keys(trans, root, path,
1765                                               &leaf->items[0].key, 1);
1766                         if (wret)
1767                                 ret = wret;
1768                 }
1769
1770                 /* delete the leaf if it is mostly empty */
1771                 if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) {
1772                         /* push_leaf_left fixes the path.
1773                          * make sure the path still points to our leaf
1774                          * for possible call to del_ptr below
1775                          */
1776                         slot = path->slots[1];
1777                         get_bh(leaf_buf);
1778                         wret = push_leaf_left(trans, root, path, 1);
1779                         if (wret < 0)
1780                                 ret = wret;
1781                         if (path->nodes[0] == leaf_buf &&
1782                             btrfs_header_nritems(&leaf->header)) {
1783                                 wret = push_leaf_right(trans, root, path, 1);
1784                                 if (wret < 0)
1785                                         ret = wret;
1786                         }
1787                         if (btrfs_header_nritems(&leaf->header) == 0) {
1788                                 u64 blocknr = bh_blocknr(leaf_buf);
1789                                 clean_tree_block(trans, root, leaf_buf);
1790                                 wait_on_buffer(leaf_buf);
1791                                 wret = del_ptr(trans, root, path, 1, slot);
1792                                 if (wret)
1793                                         ret = wret;
1794                                 btrfs_block_release(root, leaf_buf);
1795                                 wret = btrfs_free_extent(trans, root, blocknr,
1796                                                          1, 1);
1797                                 if (wret)
1798                                         ret = wret;
1799                         } else {
1800                                 btrfs_mark_buffer_dirty(leaf_buf);
1801                                 btrfs_block_release(root, leaf_buf);
1802                         }
1803                 } else {
1804                         btrfs_mark_buffer_dirty(leaf_buf);
1805                 }
1806         }
1807         return ret;
1808 }
1809
1810 /*
1811  * walk up the tree as far as required to find the next leaf.
1812  * returns 0 if it found something or 1 if there are no greater leaves.
1813  * returns < 0 on io errors.
1814  */
1815 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
1816 {
1817         int slot;
1818         int level = 1;
1819         u64 blocknr;
1820         struct buffer_head *c;
1821         struct btrfs_node *c_node;
1822         struct buffer_head *next = NULL;
1823
1824         while(level < BTRFS_MAX_LEVEL) {
1825                 if (!path->nodes[level])
1826                         return 1;
1827                 slot = path->slots[level] + 1;
1828                 c = path->nodes[level];
1829                 c_node = btrfs_buffer_node(c);
1830                 if (slot >= btrfs_header_nritems(&c_node->header)) {
1831                         level++;
1832                         continue;
1833                 }
1834                 blocknr = btrfs_node_blockptr(c_node, slot);
1835                 if (next)
1836                         btrfs_block_release(root, next);
1837                 next = read_tree_block(root, blocknr);
1838                 break;
1839         }
1840         path->slots[level] = slot;
1841         while(1) {
1842                 level--;
1843                 c = path->nodes[level];
1844                 btrfs_block_release(root, c);
1845                 path->nodes[level] = next;
1846                 path->slots[level] = 0;
1847                 if (!level)
1848                         break;
1849                 next = read_tree_block(root,
1850                        btrfs_node_blockptr(btrfs_buffer_node(next), 0));
1851         }
1852         return 0;
1853 }