Change btrfs_map_block to return a structure with mappings for all stripes
[linux-2.6-block.git] / fs / btrfs / ctree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/sched.h>
20 #include "ctree.h"
21 #include "disk-io.h"
22 #include "transaction.h"
23 #include "print-tree.h"
24
25 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
26                       *root, struct btrfs_path *path, int level);
27 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
28                       *root, struct btrfs_key *ins_key,
29                       struct btrfs_path *path, int data_size, int extend);
30 static int push_node_left(struct btrfs_trans_handle *trans,
31                           struct btrfs_root *root, struct extent_buffer *dst,
32                           struct extent_buffer *src);
33 static int balance_node_right(struct btrfs_trans_handle *trans,
34                               struct btrfs_root *root,
35                               struct extent_buffer *dst_buf,
36                               struct extent_buffer *src_buf);
37 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
38                    struct btrfs_path *path, int level, int slot);
39
40 inline void btrfs_init_path(struct btrfs_path *p)
41 {
42         memset(p, 0, sizeof(*p));
43 }
44
45 struct btrfs_path *btrfs_alloc_path(void)
46 {
47         struct btrfs_path *path;
48         path = kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS);
49         if (path) {
50                 btrfs_init_path(path);
51                 path->reada = 1;
52         }
53         return path;
54 }
55
56 void btrfs_free_path(struct btrfs_path *p)
57 {
58         btrfs_release_path(NULL, p);
59         kmem_cache_free(btrfs_path_cachep, p);
60 }
61
62 void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
63 {
64         int i;
65         for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
66                 if (!p->nodes[i])
67                         break;
68                 free_extent_buffer(p->nodes[i]);
69         }
70         memset(p, 0, sizeof(*p));
71 }
72
73 static void add_root_to_dirty_list(struct btrfs_root *root)
74 {
75         if (root->track_dirty && list_empty(&root->dirty_list)) {
76                 list_add(&root->dirty_list,
77                          &root->fs_info->dirty_cowonly_roots);
78         }
79 }
80
81 int btrfs_copy_root(struct btrfs_trans_handle *trans,
82                       struct btrfs_root *root,
83                       struct extent_buffer *buf,
84                       struct extent_buffer **cow_ret, u64 new_root_objectid)
85 {
86         struct extent_buffer *cow;
87         u32 nritems;
88         int ret = 0;
89         int level;
90         struct btrfs_key first_key;
91         struct btrfs_root *new_root;
92
93         new_root = kmalloc(sizeof(*new_root), GFP_NOFS);
94         if (!new_root)
95                 return -ENOMEM;
96
97         memcpy(new_root, root, sizeof(*new_root));
98         new_root->root_key.objectid = new_root_objectid;
99
100         WARN_ON(root->ref_cows && trans->transid !=
101                 root->fs_info->running_transaction->transid);
102         WARN_ON(root->ref_cows && trans->transid != root->last_trans);
103
104         level = btrfs_header_level(buf);
105         nritems = btrfs_header_nritems(buf);
106         if (nritems) {
107                 if (level == 0)
108                         btrfs_item_key_to_cpu(buf, &first_key, 0);
109                 else
110                         btrfs_node_key_to_cpu(buf, &first_key, 0);
111         } else {
112                 first_key.objectid = 0;
113         }
114         cow = __btrfs_alloc_free_block(trans, new_root, buf->len,
115                                        new_root_objectid,
116                                        trans->transid, first_key.objectid,
117                                        level, buf->start, 0);
118         if (IS_ERR(cow)) {
119                 kfree(new_root);
120                 return PTR_ERR(cow);
121         }
122
123         copy_extent_buffer(cow, buf, 0, 0, cow->len);
124         btrfs_set_header_bytenr(cow, cow->start);
125         btrfs_set_header_generation(cow, trans->transid);
126         btrfs_set_header_owner(cow, new_root_objectid);
127         btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN);
128
129         WARN_ON(btrfs_header_generation(buf) > trans->transid);
130         ret = btrfs_inc_ref(trans, new_root, buf);
131         kfree(new_root);
132
133         if (ret)
134                 return ret;
135
136         btrfs_mark_buffer_dirty(cow);
137         *cow_ret = cow;
138         return 0;
139 }
140
141 int __btrfs_cow_block(struct btrfs_trans_handle *trans,
142                              struct btrfs_root *root,
143                              struct extent_buffer *buf,
144                              struct extent_buffer *parent, int parent_slot,
145                              struct extent_buffer **cow_ret,
146                              u64 search_start, u64 empty_size)
147 {
148         u64 root_gen;
149         struct extent_buffer *cow;
150         u32 nritems;
151         int ret = 0;
152         int different_trans = 0;
153         int level;
154         struct btrfs_key first_key;
155
156         if (root->ref_cows) {
157                 root_gen = trans->transid;
158         } else {
159                 root_gen = 0;
160         }
161         if (!(buf->flags & EXTENT_CSUM))
162                 WARN_ON(1);
163
164         WARN_ON(root->ref_cows && trans->transid !=
165                 root->fs_info->running_transaction->transid);
166         WARN_ON(root->ref_cows && trans->transid != root->last_trans);
167
168         level = btrfs_header_level(buf);
169         nritems = btrfs_header_nritems(buf);
170         if (nritems) {
171                 if (level == 0)
172                         btrfs_item_key_to_cpu(buf, &first_key, 0);
173                 else
174                         btrfs_node_key_to_cpu(buf, &first_key, 0);
175         } else {
176                 first_key.objectid = 0;
177         }
178         cow = __btrfs_alloc_free_block(trans, root, buf->len,
179                                      root->root_key.objectid,
180                                      root_gen, first_key.objectid, level,
181                                      search_start, empty_size);
182         if (IS_ERR(cow))
183                 return PTR_ERR(cow);
184
185         copy_extent_buffer(cow, buf, 0, 0, cow->len);
186         btrfs_set_header_bytenr(cow, cow->start);
187         btrfs_set_header_generation(cow, trans->transid);
188         btrfs_set_header_owner(cow, root->root_key.objectid);
189         btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN);
190
191         WARN_ON(btrfs_header_generation(buf) > trans->transid);
192         if (btrfs_header_generation(buf) != trans->transid) {
193                 different_trans = 1;
194                 ret = btrfs_inc_ref(trans, root, buf);
195                 if (ret)
196                         return ret;
197         } else {
198                 clean_tree_block(trans, root, buf);
199         }
200
201         if (buf == root->node) {
202                 root_gen = btrfs_header_generation(buf);
203                 root->node = cow;
204                 extent_buffer_get(cow);
205                 if (buf != root->commit_root) {
206                         btrfs_free_extent(trans, root, buf->start,
207                                           buf->len, root->root_key.objectid,
208                                           root_gen, 0, 0, 1);
209                 }
210                 free_extent_buffer(buf);
211                 add_root_to_dirty_list(root);
212         } else {
213                 root_gen = btrfs_header_generation(parent);
214                 btrfs_set_node_blockptr(parent, parent_slot,
215                                         cow->start);
216                 WARN_ON(trans->transid == 0);
217                 btrfs_set_node_ptr_generation(parent, parent_slot,
218                                               trans->transid);
219                 btrfs_mark_buffer_dirty(parent);
220                 WARN_ON(btrfs_header_generation(parent) != trans->transid);
221                 btrfs_free_extent(trans, root, buf->start, buf->len,
222                                   btrfs_header_owner(parent), root_gen,
223                                   0, 0, 1);
224         }
225         free_extent_buffer(buf);
226         btrfs_mark_buffer_dirty(cow);
227         *cow_ret = cow;
228         return 0;
229 }
230
231 int btrfs_cow_block(struct btrfs_trans_handle *trans,
232                     struct btrfs_root *root, struct extent_buffer *buf,
233                     struct extent_buffer *parent, int parent_slot,
234                     struct extent_buffer **cow_ret)
235 {
236         u64 search_start;
237         u64 header_trans;
238         int ret;
239
240         if (trans->transaction != root->fs_info->running_transaction) {
241                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
242                        root->fs_info->running_transaction->transid);
243                 WARN_ON(1);
244         }
245         if (trans->transid != root->fs_info->generation) {
246                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
247                        root->fs_info->generation);
248                 WARN_ON(1);
249         }
250         if (!(buf->flags & EXTENT_CSUM))
251                 WARN_ON(1);
252
253         header_trans = btrfs_header_generation(buf);
254         spin_lock(&root->fs_info->hash_lock);
255         if (header_trans == trans->transid &&
256             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
257                 *cow_ret = buf;
258                 spin_unlock(&root->fs_info->hash_lock);
259                 return 0;
260         }
261         spin_unlock(&root->fs_info->hash_lock);
262         search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1);
263         ret = __btrfs_cow_block(trans, root, buf, parent,
264                                  parent_slot, cow_ret, search_start, 0);
265         return ret;
266 }
267
268 static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
269 {
270         if (blocknr < other && other - (blocknr + blocksize) < 32768)
271                 return 1;
272         if (blocknr > other && blocknr - (other + blocksize) < 32768)
273                 return 1;
274         return 0;
275 }
276
277 /*
278  * compare two keys in a memcmp fashion
279  */
280 static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
281 {
282         struct btrfs_key k1;
283
284         btrfs_disk_key_to_cpu(&k1, disk);
285
286         if (k1.objectid > k2->objectid)
287                 return 1;
288         if (k1.objectid < k2->objectid)
289                 return -1;
290         if (k1.type > k2->type)
291                 return 1;
292         if (k1.type < k2->type)
293                 return -1;
294         if (k1.offset > k2->offset)
295                 return 1;
296         if (k1.offset < k2->offset)
297                 return -1;
298         return 0;
299 }
300
301
302 int btrfs_realloc_node(struct btrfs_trans_handle *trans,
303                        struct btrfs_root *root, struct extent_buffer *parent,
304                        int start_slot, int cache_only, u64 *last_ret,
305                        struct btrfs_key *progress)
306 {
307         struct extent_buffer *cur;
308         struct extent_buffer *tmp;
309         u64 blocknr;
310         u64 search_start = *last_ret;
311         u64 last_block = 0;
312         u64 other;
313         u32 parent_nritems;
314         int end_slot;
315         int i;
316         int err = 0;
317         int parent_level;
318         int uptodate;
319         u32 blocksize;
320         int progress_passed = 0;
321         struct btrfs_disk_key disk_key;
322
323         parent_level = btrfs_header_level(parent);
324         if (cache_only && parent_level != 1)
325                 return 0;
326
327         if (trans->transaction != root->fs_info->running_transaction) {
328                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
329                        root->fs_info->running_transaction->transid);
330                 WARN_ON(1);
331         }
332         if (trans->transid != root->fs_info->generation) {
333                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
334                        root->fs_info->generation);
335                 WARN_ON(1);
336         }
337
338         parent_nritems = btrfs_header_nritems(parent);
339         blocksize = btrfs_level_size(root, parent_level - 1);
340         end_slot = parent_nritems;
341
342         if (parent_nritems == 1)
343                 return 0;
344
345         for (i = start_slot; i < end_slot; i++) {
346                 int close = 1;
347
348                 if (!parent->map_token) {
349                         map_extent_buffer(parent,
350                                         btrfs_node_key_ptr_offset(i),
351                                         sizeof(struct btrfs_key_ptr),
352                                         &parent->map_token, &parent->kaddr,
353                                         &parent->map_start, &parent->map_len,
354                                         KM_USER1);
355                 }
356                 btrfs_node_key(parent, &disk_key, i);
357                 if (!progress_passed && comp_keys(&disk_key, progress) < 0)
358                         continue;
359
360                 progress_passed = 1;
361                 blocknr = btrfs_node_blockptr(parent, i);
362                 if (last_block == 0)
363                         last_block = blocknr;
364
365                 if (i > 0) {
366                         other = btrfs_node_blockptr(parent, i - 1);
367                         close = close_blocks(blocknr, other, blocksize);
368                 }
369                 if (close && i < end_slot - 2) {
370                         other = btrfs_node_blockptr(parent, i + 1);
371                         close = close_blocks(blocknr, other, blocksize);
372                 }
373                 if (close) {
374                         last_block = blocknr;
375                         continue;
376                 }
377                 if (parent->map_token) {
378                         unmap_extent_buffer(parent, parent->map_token,
379                                             KM_USER1);
380                         parent->map_token = NULL;
381                 }
382
383                 cur = btrfs_find_tree_block(root, blocknr, blocksize);
384                 if (cur)
385                         uptodate = btrfs_buffer_uptodate(cur);
386                 else
387                         uptodate = 0;
388                 if (!cur || !uptodate) {
389                         if (cache_only) {
390                                 free_extent_buffer(cur);
391                                 continue;
392                         }
393                         if (!cur) {
394                                 cur = read_tree_block(root, blocknr,
395                                                          blocksize);
396                         } else if (!uptodate) {
397                                 btrfs_read_buffer(cur);
398                         }
399                 }
400                 if (search_start == 0)
401                         search_start = last_block;
402
403                 btrfs_verify_block_csum(root, cur);
404                 err = __btrfs_cow_block(trans, root, cur, parent, i,
405                                         &tmp, search_start,
406                                         min(16 * blocksize,
407                                             (end_slot - i) * blocksize));
408                 if (err) {
409                         free_extent_buffer(cur);
410                         break;
411                 }
412                 search_start = tmp->start;
413                 last_block = tmp->start;
414                 *last_ret = search_start;
415                 if (parent_level == 1)
416                         btrfs_clear_buffer_defrag(tmp);
417                 free_extent_buffer(tmp);
418         }
419         if (parent->map_token) {
420                 unmap_extent_buffer(parent, parent->map_token,
421                                     KM_USER1);
422                 parent->map_token = NULL;
423         }
424         return err;
425 }
426
427 /*
428  * The leaf data grows from end-to-front in the node.
429  * this returns the address of the start of the last item,
430  * which is the stop of the leaf data stack
431  */
432 static inline unsigned int leaf_data_end(struct btrfs_root *root,
433                                          struct extent_buffer *leaf)
434 {
435         u32 nr = btrfs_header_nritems(leaf);
436         if (nr == 0)
437                 return BTRFS_LEAF_DATA_SIZE(root);
438         return btrfs_item_offset_nr(leaf, nr - 1);
439 }
440
441 static int check_node(struct btrfs_root *root, struct btrfs_path *path,
442                       int level)
443 {
444         struct extent_buffer *parent = NULL;
445         struct extent_buffer *node = path->nodes[level];
446         struct btrfs_disk_key parent_key;
447         struct btrfs_disk_key node_key;
448         int parent_slot;
449         int slot;
450         struct btrfs_key cpukey;
451         u32 nritems = btrfs_header_nritems(node);
452
453         if (path->nodes[level + 1])
454                 parent = path->nodes[level + 1];
455
456         slot = path->slots[level];
457         BUG_ON(nritems == 0);
458         if (parent) {
459                 parent_slot = path->slots[level + 1];
460                 btrfs_node_key(parent, &parent_key, parent_slot);
461                 btrfs_node_key(node, &node_key, 0);
462                 BUG_ON(memcmp(&parent_key, &node_key,
463                               sizeof(struct btrfs_disk_key)));
464                 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
465                        btrfs_header_bytenr(node));
466         }
467         BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
468         if (slot != 0) {
469                 btrfs_node_key_to_cpu(node, &cpukey, slot - 1);
470                 btrfs_node_key(node, &node_key, slot);
471                 BUG_ON(comp_keys(&node_key, &cpukey) <= 0);
472         }
473         if (slot < nritems - 1) {
474                 btrfs_node_key_to_cpu(node, &cpukey, slot + 1);
475                 btrfs_node_key(node, &node_key, slot);
476                 BUG_ON(comp_keys(&node_key, &cpukey) >= 0);
477         }
478         return 0;
479 }
480
481 static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
482                       int level)
483 {
484         struct extent_buffer *leaf = path->nodes[level];
485         struct extent_buffer *parent = NULL;
486         int parent_slot;
487         struct btrfs_key cpukey;
488         struct btrfs_disk_key parent_key;
489         struct btrfs_disk_key leaf_key;
490         int slot = path->slots[0];
491
492         u32 nritems = btrfs_header_nritems(leaf);
493
494         if (path->nodes[level + 1])
495                 parent = path->nodes[level + 1];
496
497         if (nritems == 0)
498                 return 0;
499
500         if (parent) {
501                 parent_slot = path->slots[level + 1];
502                 btrfs_node_key(parent, &parent_key, parent_slot);
503                 btrfs_item_key(leaf, &leaf_key, 0);
504
505                 BUG_ON(memcmp(&parent_key, &leaf_key,
506                        sizeof(struct btrfs_disk_key)));
507                 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
508                        btrfs_header_bytenr(leaf));
509         }
510 #if 0
511         for (i = 0; nritems > 1 && i < nritems - 2; i++) {
512                 btrfs_item_key_to_cpu(leaf, &cpukey, i + 1);
513                 btrfs_item_key(leaf, &leaf_key, i);
514                 if (comp_keys(&leaf_key, &cpukey) >= 0) {
515                         btrfs_print_leaf(root, leaf);
516                         printk("slot %d offset bad key\n", i);
517                         BUG_ON(1);
518                 }
519                 if (btrfs_item_offset_nr(leaf, i) !=
520                         btrfs_item_end_nr(leaf, i + 1)) {
521                         btrfs_print_leaf(root, leaf);
522                         printk("slot %d offset bad\n", i);
523                         BUG_ON(1);
524                 }
525                 if (i == 0) {
526                         if (btrfs_item_offset_nr(leaf, i) +
527                                btrfs_item_size_nr(leaf, i) !=
528                                BTRFS_LEAF_DATA_SIZE(root)) {
529                                 btrfs_print_leaf(root, leaf);
530                                 printk("slot %d first offset bad\n", i);
531                                 BUG_ON(1);
532                         }
533                 }
534         }
535         if (nritems > 0) {
536                 if (btrfs_item_size_nr(leaf, nritems - 1) > 4096) {
537                                 btrfs_print_leaf(root, leaf);
538                                 printk("slot %d bad size \n", nritems - 1);
539                                 BUG_ON(1);
540                 }
541         }
542 #endif
543         if (slot != 0 && slot < nritems - 1) {
544                 btrfs_item_key(leaf, &leaf_key, slot);
545                 btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1);
546                 if (comp_keys(&leaf_key, &cpukey) <= 0) {
547                         btrfs_print_leaf(root, leaf);
548                         printk("slot %d offset bad key\n", slot);
549                         BUG_ON(1);
550                 }
551                 if (btrfs_item_offset_nr(leaf, slot - 1) !=
552                        btrfs_item_end_nr(leaf, slot)) {
553                         btrfs_print_leaf(root, leaf);
554                         printk("slot %d offset bad\n", slot);
555                         BUG_ON(1);
556                 }
557         }
558         if (slot < nritems - 1) {
559                 btrfs_item_key(leaf, &leaf_key, slot);
560                 btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1);
561                 BUG_ON(comp_keys(&leaf_key, &cpukey) >= 0);
562                 if (btrfs_item_offset_nr(leaf, slot) !=
563                         btrfs_item_end_nr(leaf, slot + 1)) {
564                         btrfs_print_leaf(root, leaf);
565                         printk("slot %d offset bad\n", slot);
566                         BUG_ON(1);
567                 }
568         }
569         BUG_ON(btrfs_item_offset_nr(leaf, 0) +
570                btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root));
571         return 0;
572 }
573
574 static int noinline check_block(struct btrfs_root *root,
575                                 struct btrfs_path *path, int level)
576 {
577         return 0;
578 #if 0
579         struct extent_buffer *buf = path->nodes[level];
580
581         if (memcmp_extent_buffer(buf, root->fs_info->fsid,
582                                  (unsigned long)btrfs_header_fsid(buf),
583                                  BTRFS_FSID_SIZE)) {
584                 printk("warning bad block %Lu\n", buf->start);
585                 return 1;
586         }
587 #endif
588         if (level == 0)
589                 return check_leaf(root, path, level);
590         return check_node(root, path, level);
591 }
592
593 /*
594  * search for key in the extent_buffer.  The items start at offset p,
595  * and they are item_size apart.  There are 'max' items in p.
596  *
597  * the slot in the array is returned via slot, and it points to
598  * the place where you would insert key if it is not found in
599  * the array.
600  *
601  * slot may point to max if the key is bigger than all of the keys
602  */
603 static int generic_bin_search(struct extent_buffer *eb, unsigned long p,
604                               int item_size, struct btrfs_key *key,
605                               int max, int *slot)
606 {
607         int low = 0;
608         int high = max;
609         int mid;
610         int ret;
611         struct btrfs_disk_key *tmp = NULL;
612         struct btrfs_disk_key unaligned;
613         unsigned long offset;
614         char *map_token = NULL;
615         char *kaddr = NULL;
616         unsigned long map_start = 0;
617         unsigned long map_len = 0;
618         int err;
619
620         while(low < high) {
621                 mid = (low + high) / 2;
622                 offset = p + mid * item_size;
623
624                 if (!map_token || offset < map_start ||
625                     (offset + sizeof(struct btrfs_disk_key)) >
626                     map_start + map_len) {
627                         if (map_token) {
628                                 unmap_extent_buffer(eb, map_token, KM_USER0);
629                                 map_token = NULL;
630                         }
631                         err = map_extent_buffer(eb, offset,
632                                                 sizeof(struct btrfs_disk_key),
633                                                 &map_token, &kaddr,
634                                                 &map_start, &map_len, KM_USER0);
635
636                         if (!err) {
637                                 tmp = (struct btrfs_disk_key *)(kaddr + offset -
638                                                         map_start);
639                         } else {
640                                 read_extent_buffer(eb, &unaligned,
641                                                    offset, sizeof(unaligned));
642                                 tmp = &unaligned;
643                         }
644
645                 } else {
646                         tmp = (struct btrfs_disk_key *)(kaddr + offset -
647                                                         map_start);
648                 }
649                 ret = comp_keys(tmp, key);
650
651                 if (ret < 0)
652                         low = mid + 1;
653                 else if (ret > 0)
654                         high = mid;
655                 else {
656                         *slot = mid;
657                         if (map_token)
658                                 unmap_extent_buffer(eb, map_token, KM_USER0);
659                         return 0;
660                 }
661         }
662         *slot = low;
663         if (map_token)
664                 unmap_extent_buffer(eb, map_token, KM_USER0);
665         return 1;
666 }
667
668 /*
669  * simple bin_search frontend that does the right thing for
670  * leaves vs nodes
671  */
672 static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
673                       int level, int *slot)
674 {
675         if (level == 0) {
676                 return generic_bin_search(eb,
677                                           offsetof(struct btrfs_leaf, items),
678                                           sizeof(struct btrfs_item),
679                                           key, btrfs_header_nritems(eb),
680                                           slot);
681         } else {
682                 return generic_bin_search(eb,
683                                           offsetof(struct btrfs_node, ptrs),
684                                           sizeof(struct btrfs_key_ptr),
685                                           key, btrfs_header_nritems(eb),
686                                           slot);
687         }
688         return -1;
689 }
690
691 static struct extent_buffer *read_node_slot(struct btrfs_root *root,
692                                    struct extent_buffer *parent, int slot)
693 {
694         if (slot < 0)
695                 return NULL;
696         if (slot >= btrfs_header_nritems(parent))
697                 return NULL;
698         return read_tree_block(root, btrfs_node_blockptr(parent, slot),
699                        btrfs_level_size(root, btrfs_header_level(parent) - 1));
700 }
701
702 static int balance_level(struct btrfs_trans_handle *trans,
703                          struct btrfs_root *root,
704                          struct btrfs_path *path, int level)
705 {
706         struct extent_buffer *right = NULL;
707         struct extent_buffer *mid;
708         struct extent_buffer *left = NULL;
709         struct extent_buffer *parent = NULL;
710         int ret = 0;
711         int wret;
712         int pslot;
713         int orig_slot = path->slots[level];
714         int err_on_enospc = 0;
715         u64 orig_ptr;
716
717         if (level == 0)
718                 return 0;
719
720         mid = path->nodes[level];
721         WARN_ON(btrfs_header_generation(mid) != trans->transid);
722
723         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
724
725         if (level < BTRFS_MAX_LEVEL - 1)
726                 parent = path->nodes[level + 1];
727         pslot = path->slots[level + 1];
728
729         /*
730          * deal with the case where there is only one pointer in the root
731          * by promoting the node below to a root
732          */
733         if (!parent) {
734                 struct extent_buffer *child;
735
736                 if (btrfs_header_nritems(mid) != 1)
737                         return 0;
738
739                 /* promote the child to a root */
740                 child = read_node_slot(root, mid, 0);
741                 BUG_ON(!child);
742                 ret = btrfs_cow_block(trans, root, child, mid, 0, &child);
743                 BUG_ON(ret);
744
745                 root->node = child;
746                 add_root_to_dirty_list(root);
747                 path->nodes[level] = NULL;
748                 clean_tree_block(trans, root, mid);
749                 wait_on_tree_block_writeback(root, mid);
750                 /* once for the path */
751                 free_extent_buffer(mid);
752                 ret = btrfs_free_extent(trans, root, mid->start, mid->len,
753                                         root->root_key.objectid,
754                                         btrfs_header_generation(mid), 0, 0, 1);
755                 /* once for the root ptr */
756                 free_extent_buffer(mid);
757                 return ret;
758         }
759         if (btrfs_header_nritems(mid) >
760             BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
761                 return 0;
762
763         if (btrfs_header_nritems(mid) < 2)
764                 err_on_enospc = 1;
765
766         left = read_node_slot(root, parent, pslot - 1);
767         if (left) {
768                 wret = btrfs_cow_block(trans, root, left,
769                                        parent, pslot - 1, &left);
770                 if (wret) {
771                         ret = wret;
772                         goto enospc;
773                 }
774         }
775         right = read_node_slot(root, parent, pslot + 1);
776         if (right) {
777                 wret = btrfs_cow_block(trans, root, right,
778                                        parent, pslot + 1, &right);
779                 if (wret) {
780                         ret = wret;
781                         goto enospc;
782                 }
783         }
784
785         /* first, try to make some room in the middle buffer */
786         if (left) {
787                 orig_slot += btrfs_header_nritems(left);
788                 wret = push_node_left(trans, root, left, mid);
789                 if (wret < 0)
790                         ret = wret;
791                 if (btrfs_header_nritems(mid) < 2)
792                         err_on_enospc = 1;
793         }
794
795         /*
796          * then try to empty the right most buffer into the middle
797          */
798         if (right) {
799                 wret = push_node_left(trans, root, mid, right);
800                 if (wret < 0 && wret != -ENOSPC)
801                         ret = wret;
802                 if (btrfs_header_nritems(right) == 0) {
803                         u64 bytenr = right->start;
804                         u64 generation = btrfs_header_generation(parent);
805                         u32 blocksize = right->len;
806
807                         clean_tree_block(trans, root, right);
808                         wait_on_tree_block_writeback(root, right);
809                         free_extent_buffer(right);
810                         right = NULL;
811                         wret = del_ptr(trans, root, path, level + 1, pslot +
812                                        1);
813                         if (wret)
814                                 ret = wret;
815                         wret = btrfs_free_extent(trans, root, bytenr,
816                                                  blocksize,
817                                                  btrfs_header_owner(parent),
818                                                  generation, 0, 0, 1);
819                         if (wret)
820                                 ret = wret;
821                 } else {
822                         struct btrfs_disk_key right_key;
823                         btrfs_node_key(right, &right_key, 0);
824                         btrfs_set_node_key(parent, &right_key, pslot + 1);
825                         btrfs_mark_buffer_dirty(parent);
826                 }
827         }
828         if (btrfs_header_nritems(mid) == 1) {
829                 /*
830                  * we're not allowed to leave a node with one item in the
831                  * tree during a delete.  A deletion from lower in the tree
832                  * could try to delete the only pointer in this node.
833                  * So, pull some keys from the left.
834                  * There has to be a left pointer at this point because
835                  * otherwise we would have pulled some pointers from the
836                  * right
837                  */
838                 BUG_ON(!left);
839                 wret = balance_node_right(trans, root, mid, left);
840                 if (wret < 0) {
841                         ret = wret;
842                         goto enospc;
843                 }
844                 BUG_ON(wret == 1);
845         }
846         if (btrfs_header_nritems(mid) == 0) {
847                 /* we've managed to empty the middle node, drop it */
848                 u64 root_gen = btrfs_header_generation(parent);
849                 u64 bytenr = mid->start;
850                 u32 blocksize = mid->len;
851                 clean_tree_block(trans, root, mid);
852                 wait_on_tree_block_writeback(root, mid);
853                 free_extent_buffer(mid);
854                 mid = NULL;
855                 wret = del_ptr(trans, root, path, level + 1, pslot);
856                 if (wret)
857                         ret = wret;
858                 wret = btrfs_free_extent(trans, root, bytenr, blocksize,
859                                          btrfs_header_owner(parent),
860                                          root_gen, 0, 0, 1);
861                 if (wret)
862                         ret = wret;
863         } else {
864                 /* update the parent key to reflect our changes */
865                 struct btrfs_disk_key mid_key;
866                 btrfs_node_key(mid, &mid_key, 0);
867                 btrfs_set_node_key(parent, &mid_key, pslot);
868                 btrfs_mark_buffer_dirty(parent);
869         }
870
871         /* update the path */
872         if (left) {
873                 if (btrfs_header_nritems(left) > orig_slot) {
874                         extent_buffer_get(left);
875                         path->nodes[level] = left;
876                         path->slots[level + 1] -= 1;
877                         path->slots[level] = orig_slot;
878                         if (mid)
879                                 free_extent_buffer(mid);
880                 } else {
881                         orig_slot -= btrfs_header_nritems(left);
882                         path->slots[level] = orig_slot;
883                 }
884         }
885         /* double check we haven't messed things up */
886         check_block(root, path, level);
887         if (orig_ptr !=
888             btrfs_node_blockptr(path->nodes[level], path->slots[level]))
889                 BUG();
890 enospc:
891         if (right)
892                 free_extent_buffer(right);
893         if (left)
894                 free_extent_buffer(left);
895         return ret;
896 }
897
898 /* returns zero if the push worked, non-zero otherwise */
899 static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans,
900                                           struct btrfs_root *root,
901                                           struct btrfs_path *path, int level)
902 {
903         struct extent_buffer *right = NULL;
904         struct extent_buffer *mid;
905         struct extent_buffer *left = NULL;
906         struct extent_buffer *parent = NULL;
907         int ret = 0;
908         int wret;
909         int pslot;
910         int orig_slot = path->slots[level];
911         u64 orig_ptr;
912
913         if (level == 0)
914                 return 1;
915
916         mid = path->nodes[level];
917         WARN_ON(btrfs_header_generation(mid) != trans->transid);
918         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
919
920         if (level < BTRFS_MAX_LEVEL - 1)
921                 parent = path->nodes[level + 1];
922         pslot = path->slots[level + 1];
923
924         if (!parent)
925                 return 1;
926
927         left = read_node_slot(root, parent, pslot - 1);
928
929         /* first, try to make some room in the middle buffer */
930         if (left) {
931                 u32 left_nr;
932                 left_nr = btrfs_header_nritems(left);
933                 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
934                         wret = 1;
935                 } else {
936                         ret = btrfs_cow_block(trans, root, left, parent,
937                                               pslot - 1, &left);
938                         if (ret)
939                                 wret = 1;
940                         else {
941                                 wret = push_node_left(trans, root,
942                                                       left, mid);
943                         }
944                 }
945                 if (wret < 0)
946                         ret = wret;
947                 if (wret == 0) {
948                         struct btrfs_disk_key disk_key;
949                         orig_slot += left_nr;
950                         btrfs_node_key(mid, &disk_key, 0);
951                         btrfs_set_node_key(parent, &disk_key, pslot);
952                         btrfs_mark_buffer_dirty(parent);
953                         if (btrfs_header_nritems(left) > orig_slot) {
954                                 path->nodes[level] = left;
955                                 path->slots[level + 1] -= 1;
956                                 path->slots[level] = orig_slot;
957                                 free_extent_buffer(mid);
958                         } else {
959                                 orig_slot -=
960                                         btrfs_header_nritems(left);
961                                 path->slots[level] = orig_slot;
962                                 free_extent_buffer(left);
963                         }
964                         return 0;
965                 }
966                 free_extent_buffer(left);
967         }
968         right= read_node_slot(root, parent, pslot + 1);
969
970         /*
971          * then try to empty the right most buffer into the middle
972          */
973         if (right) {
974                 u32 right_nr;
975                 right_nr = btrfs_header_nritems(right);
976                 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
977                         wret = 1;
978                 } else {
979                         ret = btrfs_cow_block(trans, root, right,
980                                               parent, pslot + 1,
981                                               &right);
982                         if (ret)
983                                 wret = 1;
984                         else {
985                                 wret = balance_node_right(trans, root,
986                                                           right, mid);
987                         }
988                 }
989                 if (wret < 0)
990                         ret = wret;
991                 if (wret == 0) {
992                         struct btrfs_disk_key disk_key;
993
994                         btrfs_node_key(right, &disk_key, 0);
995                         btrfs_set_node_key(parent, &disk_key, pslot + 1);
996                         btrfs_mark_buffer_dirty(parent);
997
998                         if (btrfs_header_nritems(mid) <= orig_slot) {
999                                 path->nodes[level] = right;
1000                                 path->slots[level + 1] += 1;
1001                                 path->slots[level] = orig_slot -
1002                                         btrfs_header_nritems(mid);
1003                                 free_extent_buffer(mid);
1004                         } else {
1005                                 free_extent_buffer(right);
1006                         }
1007                         return 0;
1008                 }
1009                 free_extent_buffer(right);
1010         }
1011         return 1;
1012 }
1013
1014 /*
1015  * readahead one full node of leaves
1016  */
1017 static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
1018                              int level, int slot, u64 objectid)
1019 {
1020         struct extent_buffer *node;
1021         struct btrfs_disk_key disk_key;
1022         u32 nritems;
1023         u64 search;
1024         u64 lowest_read;
1025         u64 highest_read;
1026         u64 nread = 0;
1027         int direction = path->reada;
1028         struct extent_buffer *eb;
1029         u32 nr;
1030         u32 blocksize;
1031         u32 nscan = 0;
1032
1033         if (level != 1)
1034                 return;
1035
1036         if (!path->nodes[level])
1037                 return;
1038
1039         node = path->nodes[level];
1040         search = btrfs_node_blockptr(node, slot);
1041         blocksize = btrfs_level_size(root, level - 1);
1042         eb = btrfs_find_tree_block(root, search, blocksize);
1043         if (eb) {
1044                 free_extent_buffer(eb);
1045                 return;
1046         }
1047
1048         highest_read = search;
1049         lowest_read = search;
1050
1051         nritems = btrfs_header_nritems(node);
1052         nr = slot;
1053         while(1) {
1054                 if (direction < 0) {
1055                         if (nr == 0)
1056                                 break;
1057                         nr--;
1058                 } else if (direction > 0) {
1059                         nr++;
1060                         if (nr >= nritems)
1061                                 break;
1062                 }
1063                 if (path->reada < 0 && objectid) {
1064                         btrfs_node_key(node, &disk_key, nr);
1065                         if (btrfs_disk_key_objectid(&disk_key) != objectid)
1066                                 break;
1067                 }
1068                 search = btrfs_node_blockptr(node, nr);
1069                 if ((search >= lowest_read && search <= highest_read) ||
1070                     (search < lowest_read && lowest_read - search <= 32768) ||
1071                     (search > highest_read && search - highest_read <= 32768)) {
1072                         readahead_tree_block(root, search, blocksize);
1073                         nread += blocksize;
1074                 }
1075                 nscan++;
1076                 if (path->reada < 2 && (nread > (256 * 1024) || nscan > 32))
1077                         break;
1078                 if(nread > (1024 * 1024) || nscan > 128)
1079                         break;
1080
1081                 if (search < lowest_read)
1082                         lowest_read = search;
1083                 if (search > highest_read)
1084                         highest_read = search;
1085         }
1086 }
1087 /*
1088  * look for key in the tree.  path is filled in with nodes along the way
1089  * if key is found, we return zero and you can find the item in the leaf
1090  * level of the path (level 0)
1091  *
1092  * If the key isn't found, the path points to the slot where it should
1093  * be inserted, and 1 is returned.  If there are other errors during the
1094  * search a negative error number is returned.
1095  *
1096  * if ins_len > 0, nodes and leaves will be split as we walk down the
1097  * tree.  if ins_len < 0, nodes will be merged as we walk down the tree (if
1098  * possible)
1099  */
1100 int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
1101                       *root, struct btrfs_key *key, struct btrfs_path *p, int
1102                       ins_len, int cow)
1103 {
1104         struct extent_buffer *b;
1105         u64 bytenr;
1106         u64 ptr_gen;
1107         int slot;
1108         int ret;
1109         int level;
1110         int should_reada = p->reada;
1111         u8 lowest_level = 0;
1112
1113         lowest_level = p->lowest_level;
1114         WARN_ON(lowest_level && ins_len);
1115         WARN_ON(p->nodes[0] != NULL);
1116         WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex));
1117 again:
1118         b = root->node;
1119         extent_buffer_get(b);
1120         while (b) {
1121                 level = btrfs_header_level(b);
1122                 if (cow) {
1123                         int wret;
1124                         wret = btrfs_cow_block(trans, root, b,
1125                                                p->nodes[level + 1],
1126                                                p->slots[level + 1],
1127                                                &b);
1128                         if (wret) {
1129                                 free_extent_buffer(b);
1130                                 return wret;
1131                         }
1132                 }
1133                 BUG_ON(!cow && ins_len);
1134                 if (level != btrfs_header_level(b))
1135                         WARN_ON(1);
1136                 level = btrfs_header_level(b);
1137                 p->nodes[level] = b;
1138                 ret = check_block(root, p, level);
1139                 if (ret)
1140                         return -1;
1141                 ret = bin_search(b, key, level, &slot);
1142                 if (level != 0) {
1143                         if (ret && slot > 0)
1144                                 slot -= 1;
1145                         p->slots[level] = slot;
1146                         if (ins_len > 0 && btrfs_header_nritems(b) >=
1147                             BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
1148                                 int sret = split_node(trans, root, p, level);
1149                                 BUG_ON(sret > 0);
1150                                 if (sret)
1151                                         return sret;
1152                                 b = p->nodes[level];
1153                                 slot = p->slots[level];
1154                         } else if (ins_len < 0) {
1155                                 int sret = balance_level(trans, root, p,
1156                                                          level);
1157                                 if (sret)
1158                                         return sret;
1159                                 b = p->nodes[level];
1160                                 if (!b) {
1161                                         btrfs_release_path(NULL, p);
1162                                         goto again;
1163                                 }
1164                                 slot = p->slots[level];
1165                                 BUG_ON(btrfs_header_nritems(b) == 1);
1166                         }
1167                         /* this is only true while dropping a snapshot */
1168                         if (level == lowest_level)
1169                                 break;
1170                         bytenr = btrfs_node_blockptr(b, slot);
1171                         ptr_gen = btrfs_node_ptr_generation(b, slot);
1172                         if (should_reada)
1173                                 reada_for_search(root, p, level, slot,
1174                                                  key->objectid);
1175                         b = read_tree_block(root, bytenr,
1176                                             btrfs_level_size(root, level - 1));
1177                         if (ptr_gen != btrfs_header_generation(b)) {
1178                                 printk("block %llu bad gen wanted %llu "
1179                                        "found %llu\n",
1180                                 (unsigned long long)b->start,
1181                                 (unsigned long long)ptr_gen,
1182                                 (unsigned long long)btrfs_header_generation(b));
1183                         }
1184                 } else {
1185                         p->slots[level] = slot;
1186                         if (ins_len > 0 && btrfs_leaf_free_space(root, b) <
1187                             sizeof(struct btrfs_item) + ins_len) {
1188                                 int sret = split_leaf(trans, root, key,
1189                                                       p, ins_len, ret == 0);
1190                                 BUG_ON(sret > 0);
1191                                 if (sret)
1192                                         return sret;
1193                         }
1194                         return ret;
1195                 }
1196         }
1197         return 1;
1198 }
1199
1200 /*
1201  * adjust the pointers going up the tree, starting at level
1202  * making sure the right key of each node is points to 'key'.
1203  * This is used after shifting pointers to the left, so it stops
1204  * fixing up pointers when a given leaf/node is not in slot 0 of the
1205  * higher levels
1206  *
1207  * If this fails to write a tree block, it returns -1, but continues
1208  * fixing up the blocks in ram so the tree is consistent.
1209  */
1210 static int fixup_low_keys(struct btrfs_trans_handle *trans,
1211                           struct btrfs_root *root, struct btrfs_path *path,
1212                           struct btrfs_disk_key *key, int level)
1213 {
1214         int i;
1215         int ret = 0;
1216         struct extent_buffer *t;
1217
1218         for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1219                 int tslot = path->slots[i];
1220                 if (!path->nodes[i])
1221                         break;
1222                 t = path->nodes[i];
1223                 btrfs_set_node_key(t, key, tslot);
1224                 btrfs_mark_buffer_dirty(path->nodes[i]);
1225                 if (tslot != 0)
1226                         break;
1227         }
1228         return ret;
1229 }
1230
1231 /*
1232  * try to push data from one node into the next node left in the
1233  * tree.
1234  *
1235  * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
1236  * error, and > 0 if there was no room in the left hand block.
1237  */
1238 static int push_node_left(struct btrfs_trans_handle *trans,
1239                           struct btrfs_root *root, struct extent_buffer *dst,
1240                           struct extent_buffer *src)
1241 {
1242         int push_items = 0;
1243         int src_nritems;
1244         int dst_nritems;
1245         int ret = 0;
1246
1247         src_nritems = btrfs_header_nritems(src);
1248         dst_nritems = btrfs_header_nritems(dst);
1249         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1250         WARN_ON(btrfs_header_generation(src) != trans->transid);
1251         WARN_ON(btrfs_header_generation(dst) != trans->transid);
1252
1253         if (push_items <= 0) {
1254                 return 1;
1255         }
1256
1257         if (src_nritems < push_items)
1258                 push_items = src_nritems;
1259
1260         copy_extent_buffer(dst, src,
1261                            btrfs_node_key_ptr_offset(dst_nritems),
1262                            btrfs_node_key_ptr_offset(0),
1263                            push_items * sizeof(struct btrfs_key_ptr));
1264
1265         if (push_items < src_nritems) {
1266                 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
1267                                       btrfs_node_key_ptr_offset(push_items),
1268                                       (src_nritems - push_items) *
1269                                       sizeof(struct btrfs_key_ptr));
1270         }
1271         btrfs_set_header_nritems(src, src_nritems - push_items);
1272         btrfs_set_header_nritems(dst, dst_nritems + push_items);
1273         btrfs_mark_buffer_dirty(src);
1274         btrfs_mark_buffer_dirty(dst);
1275         return ret;
1276 }
1277
1278 /*
1279  * try to push data from one node into the next node right in the
1280  * tree.
1281  *
1282  * returns 0 if some ptrs were pushed, < 0 if there was some horrible
1283  * error, and > 0 if there was no room in the right hand block.
1284  *
1285  * this will  only push up to 1/2 the contents of the left node over
1286  */
1287 static int balance_node_right(struct btrfs_trans_handle *trans,
1288                               struct btrfs_root *root,
1289                               struct extent_buffer *dst,
1290                               struct extent_buffer *src)
1291 {
1292         int push_items = 0;
1293         int max_push;
1294         int src_nritems;
1295         int dst_nritems;
1296         int ret = 0;
1297
1298         WARN_ON(btrfs_header_generation(src) != trans->transid);
1299         WARN_ON(btrfs_header_generation(dst) != trans->transid);
1300
1301         src_nritems = btrfs_header_nritems(src);
1302         dst_nritems = btrfs_header_nritems(dst);
1303         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1304         if (push_items <= 0)
1305                 return 1;
1306
1307         max_push = src_nritems / 2 + 1;
1308         /* don't try to empty the node */
1309         if (max_push >= src_nritems)
1310                 return 1;
1311
1312         if (max_push < push_items)
1313                 push_items = max_push;
1314
1315         memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
1316                                       btrfs_node_key_ptr_offset(0),
1317                                       (dst_nritems) *
1318                                       sizeof(struct btrfs_key_ptr));
1319
1320         copy_extent_buffer(dst, src,
1321                            btrfs_node_key_ptr_offset(0),
1322                            btrfs_node_key_ptr_offset(src_nritems - push_items),
1323                            push_items * sizeof(struct btrfs_key_ptr));
1324
1325         btrfs_set_header_nritems(src, src_nritems - push_items);
1326         btrfs_set_header_nritems(dst, dst_nritems + push_items);
1327
1328         btrfs_mark_buffer_dirty(src);
1329         btrfs_mark_buffer_dirty(dst);
1330         return ret;
1331 }
1332
1333 /*
1334  * helper function to insert a new root level in the tree.
1335  * A new node is allocated, and a single item is inserted to
1336  * point to the existing root
1337  *
1338  * returns zero on success or < 0 on failure.
1339  */
1340 static int noinline insert_new_root(struct btrfs_trans_handle *trans,
1341                            struct btrfs_root *root,
1342                            struct btrfs_path *path, int level)
1343 {
1344         u64 root_gen;
1345         u64 lower_gen;
1346         struct extent_buffer *lower;
1347         struct extent_buffer *c;
1348         struct btrfs_disk_key lower_key;
1349
1350         BUG_ON(path->nodes[level]);
1351         BUG_ON(path->nodes[level-1] != root->node);
1352
1353         if (root->ref_cows)
1354                 root_gen = trans->transid;
1355         else
1356                 root_gen = 0;
1357
1358         lower = path->nodes[level-1];
1359         if (level == 1)
1360                 btrfs_item_key(lower, &lower_key, 0);
1361         else
1362                 btrfs_node_key(lower, &lower_key, 0);
1363
1364         c = __btrfs_alloc_free_block(trans, root, root->nodesize,
1365                                    root->root_key.objectid,
1366                                    root_gen, lower_key.objectid, level,
1367                                    root->node->start, 0);
1368         if (IS_ERR(c))
1369                 return PTR_ERR(c);
1370         memset_extent_buffer(c, 0, 0, root->nodesize);
1371         btrfs_set_header_nritems(c, 1);
1372         btrfs_set_header_level(c, level);
1373         btrfs_set_header_bytenr(c, c->start);
1374         btrfs_set_header_generation(c, trans->transid);
1375         btrfs_set_header_owner(c, root->root_key.objectid);
1376
1377         write_extent_buffer(c, root->fs_info->fsid,
1378                             (unsigned long)btrfs_header_fsid(c),
1379                             BTRFS_FSID_SIZE);
1380         btrfs_set_node_key(c, &lower_key, 0);
1381         btrfs_set_node_blockptr(c, 0, lower->start);
1382         lower_gen = btrfs_header_generation(lower);
1383         WARN_ON(lower_gen == 0);
1384
1385         btrfs_set_node_ptr_generation(c, 0, lower_gen);
1386
1387         btrfs_mark_buffer_dirty(c);
1388
1389         /* the super has an extra ref to root->node */
1390         free_extent_buffer(root->node);
1391         root->node = c;
1392         add_root_to_dirty_list(root);
1393         extent_buffer_get(c);
1394         path->nodes[level] = c;
1395         path->slots[level] = 0;
1396
1397         if (root->ref_cows && lower_gen != trans->transid) {
1398                 struct btrfs_path *back_path = btrfs_alloc_path();
1399                 int ret;
1400                 ret = btrfs_insert_extent_backref(trans,
1401                                                   root->fs_info->extent_root,
1402                                                   path, lower->start,
1403                                                   root->root_key.objectid,
1404                                                   trans->transid, 0, 0);
1405                 BUG_ON(ret);
1406                 btrfs_free_path(back_path);
1407         }
1408         return 0;
1409 }
1410
1411 /*
1412  * worker function to insert a single pointer in a node.
1413  * the node should have enough room for the pointer already
1414  *
1415  * slot and level indicate where you want the key to go, and
1416  * blocknr is the block the key points to.
1417  *
1418  * returns zero on success and < 0 on any error
1419  */
1420 static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
1421                       *root, struct btrfs_path *path, struct btrfs_disk_key
1422                       *key, u64 bytenr, int slot, int level)
1423 {
1424         struct extent_buffer *lower;
1425         int nritems;
1426
1427         BUG_ON(!path->nodes[level]);
1428         lower = path->nodes[level];
1429         nritems = btrfs_header_nritems(lower);
1430         if (slot > nritems)
1431                 BUG();
1432         if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
1433                 BUG();
1434         if (slot != nritems) {
1435                 memmove_extent_buffer(lower,
1436                               btrfs_node_key_ptr_offset(slot + 1),
1437                               btrfs_node_key_ptr_offset(slot),
1438                               (nritems - slot) * sizeof(struct btrfs_key_ptr));
1439         }
1440         btrfs_set_node_key(lower, key, slot);
1441         btrfs_set_node_blockptr(lower, slot, bytenr);
1442         WARN_ON(trans->transid == 0);
1443         btrfs_set_node_ptr_generation(lower, slot, trans->transid);
1444         btrfs_set_header_nritems(lower, nritems + 1);
1445         btrfs_mark_buffer_dirty(lower);
1446         return 0;
1447 }
1448
1449 /*
1450  * split the node at the specified level in path in two.
1451  * The path is corrected to point to the appropriate node after the split
1452  *
1453  * Before splitting this tries to make some room in the node by pushing
1454  * left and right, if either one works, it returns right away.
1455  *
1456  * returns 0 on success and < 0 on failure
1457  */
1458 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
1459                       *root, struct btrfs_path *path, int level)
1460 {
1461         u64 root_gen;
1462         struct extent_buffer *c;
1463         struct extent_buffer *split;
1464         struct btrfs_disk_key disk_key;
1465         int mid;
1466         int ret;
1467         int wret;
1468         u32 c_nritems;
1469
1470         c = path->nodes[level];
1471         WARN_ON(btrfs_header_generation(c) != trans->transid);
1472         if (c == root->node) {
1473                 /* trying to split the root, lets make a new one */
1474                 ret = insert_new_root(trans, root, path, level + 1);
1475                 if (ret)
1476                         return ret;
1477         } else {
1478                 ret = push_nodes_for_insert(trans, root, path, level);
1479                 c = path->nodes[level];
1480                 if (!ret && btrfs_header_nritems(c) <
1481                     BTRFS_NODEPTRS_PER_BLOCK(root) - 1)
1482                         return 0;
1483                 if (ret < 0)
1484                         return ret;
1485         }
1486
1487         c_nritems = btrfs_header_nritems(c);
1488         if (root->ref_cows)
1489                 root_gen = trans->transid;
1490         else
1491                 root_gen = 0;
1492
1493         btrfs_node_key(c, &disk_key, 0);
1494         split = __btrfs_alloc_free_block(trans, root, root->nodesize,
1495                                          root->root_key.objectid,
1496                                          root_gen,
1497                                          btrfs_disk_key_objectid(&disk_key),
1498                                          level, c->start, 0);
1499         if (IS_ERR(split))
1500                 return PTR_ERR(split);
1501
1502         btrfs_set_header_flags(split, btrfs_header_flags(c));
1503         btrfs_set_header_level(split, btrfs_header_level(c));
1504         btrfs_set_header_bytenr(split, split->start);
1505         btrfs_set_header_generation(split, trans->transid);
1506         btrfs_set_header_owner(split, root->root_key.objectid);
1507         btrfs_set_header_flags(split, 0);
1508         write_extent_buffer(split, root->fs_info->fsid,
1509                             (unsigned long)btrfs_header_fsid(split),
1510                             BTRFS_FSID_SIZE);
1511
1512         mid = (c_nritems + 1) / 2;
1513
1514         copy_extent_buffer(split, c,
1515                            btrfs_node_key_ptr_offset(0),
1516                            btrfs_node_key_ptr_offset(mid),
1517                            (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
1518         btrfs_set_header_nritems(split, c_nritems - mid);
1519         btrfs_set_header_nritems(c, mid);
1520         ret = 0;
1521
1522         btrfs_mark_buffer_dirty(c);
1523         btrfs_mark_buffer_dirty(split);
1524
1525         btrfs_node_key(split, &disk_key, 0);
1526         wret = insert_ptr(trans, root, path, &disk_key, split->start,
1527                           path->slots[level + 1] + 1,
1528                           level + 1);
1529         if (wret)
1530                 ret = wret;
1531
1532         if (path->slots[level] >= mid) {
1533                 path->slots[level] -= mid;
1534                 free_extent_buffer(c);
1535                 path->nodes[level] = split;
1536                 path->slots[level + 1] += 1;
1537         } else {
1538                 free_extent_buffer(split);
1539         }
1540         return ret;
1541 }
1542
1543 /*
1544  * how many bytes are required to store the items in a leaf.  start
1545  * and nr indicate which items in the leaf to check.  This totals up the
1546  * space used both by the item structs and the item data
1547  */
1548 static int leaf_space_used(struct extent_buffer *l, int start, int nr)
1549 {
1550         int data_len;
1551         int nritems = btrfs_header_nritems(l);
1552         int end = min(nritems, start + nr) - 1;
1553
1554         if (!nr)
1555                 return 0;
1556         data_len = btrfs_item_end_nr(l, start);
1557         data_len = data_len - btrfs_item_offset_nr(l, end);
1558         data_len += sizeof(struct btrfs_item) * nr;
1559         WARN_ON(data_len < 0);
1560         return data_len;
1561 }
1562
1563 /*
1564  * The space between the end of the leaf items and
1565  * the start of the leaf data.  IOW, how much room
1566  * the leaf has left for both items and data
1567  */
1568 int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf)
1569 {
1570         int nritems = btrfs_header_nritems(leaf);
1571         int ret;
1572         ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
1573         if (ret < 0) {
1574                 printk("leaf free space ret %d, leaf data size %lu, used %d nritems %d\n",
1575                        ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root),
1576                        leaf_space_used(leaf, 0, nritems), nritems);
1577         }
1578         return ret;
1579 }
1580
1581 /*
1582  * push some data in the path leaf to the right, trying to free up at
1583  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
1584  *
1585  * returns 1 if the push failed because the other node didn't have enough
1586  * room, 0 if everything worked out and < 0 if there were major errors.
1587  */
1588 static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
1589                            *root, struct btrfs_path *path, int data_size,
1590                            int empty)
1591 {
1592         struct extent_buffer *left = path->nodes[0];
1593         struct extent_buffer *right;
1594         struct extent_buffer *upper;
1595         struct btrfs_disk_key disk_key;
1596         int slot;
1597         u32 i;
1598         int free_space;
1599         int push_space = 0;
1600         int push_items = 0;
1601         struct btrfs_item *item;
1602         u32 left_nritems;
1603         u32 nr;
1604         u32 right_nritems;
1605         u32 data_end;
1606         u32 this_item_size;
1607         int ret;
1608
1609         slot = path->slots[1];
1610         if (!path->nodes[1]) {
1611                 return 1;
1612         }
1613         upper = path->nodes[1];
1614         if (slot >= btrfs_header_nritems(upper) - 1)
1615                 return 1;
1616
1617         right = read_tree_block(root, btrfs_node_blockptr(upper, slot + 1),
1618                                 root->leafsize);
1619         free_space = btrfs_leaf_free_space(root, right);
1620         if (free_space < data_size + sizeof(struct btrfs_item)) {
1621                 free_extent_buffer(right);
1622                 return 1;
1623         }
1624
1625         /* cow and double check */
1626         ret = btrfs_cow_block(trans, root, right, upper,
1627                               slot + 1, &right);
1628         if (ret) {
1629                 free_extent_buffer(right);
1630                 return 1;
1631         }
1632         free_space = btrfs_leaf_free_space(root, right);
1633         if (free_space < data_size + sizeof(struct btrfs_item)) {
1634                 free_extent_buffer(right);
1635                 return 1;
1636         }
1637
1638         left_nritems = btrfs_header_nritems(left);
1639         if (left_nritems == 0) {
1640                 free_extent_buffer(right);
1641                 return 1;
1642         }
1643
1644         if (empty)
1645                 nr = 0;
1646         else
1647                 nr = 1;
1648
1649         i = left_nritems - 1;
1650         while (i >= nr) {
1651                 item = btrfs_item_nr(left, i);
1652
1653                 if (path->slots[0] == i)
1654                         push_space += data_size + sizeof(*item);
1655
1656                 if (!left->map_token) {
1657                         map_extent_buffer(left, (unsigned long)item,
1658                                         sizeof(struct btrfs_item),
1659                                         &left->map_token, &left->kaddr,
1660                                         &left->map_start, &left->map_len,
1661                                         KM_USER1);
1662                 }
1663
1664                 this_item_size = btrfs_item_size(left, item);
1665                 if (this_item_size + sizeof(*item) + push_space > free_space)
1666                         break;
1667                 push_items++;
1668                 push_space += this_item_size + sizeof(*item);
1669                 if (i == 0)
1670                         break;
1671                 i--;
1672         }
1673         if (left->map_token) {
1674                 unmap_extent_buffer(left, left->map_token, KM_USER1);
1675                 left->map_token = NULL;
1676         }
1677
1678         if (push_items == 0) {
1679                 free_extent_buffer(right);
1680                 return 1;
1681         }
1682
1683         if (!empty && push_items == left_nritems)
1684                 WARN_ON(1);
1685
1686         /* push left to right */
1687         right_nritems = btrfs_header_nritems(right);
1688
1689         push_space = btrfs_item_end_nr(left, left_nritems - push_items);
1690         push_space -= leaf_data_end(root, left);
1691
1692         /* make room in the right data area */
1693         data_end = leaf_data_end(root, right);
1694         memmove_extent_buffer(right,
1695                               btrfs_leaf_data(right) + data_end - push_space,
1696                               btrfs_leaf_data(right) + data_end,
1697                               BTRFS_LEAF_DATA_SIZE(root) - data_end);
1698
1699         /* copy from the left data area */
1700         copy_extent_buffer(right, left, btrfs_leaf_data(right) +
1701                      BTRFS_LEAF_DATA_SIZE(root) - push_space,
1702                      btrfs_leaf_data(left) + leaf_data_end(root, left),
1703                      push_space);
1704
1705         memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
1706                               btrfs_item_nr_offset(0),
1707                               right_nritems * sizeof(struct btrfs_item));
1708
1709         /* copy the items from left to right */
1710         copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
1711                    btrfs_item_nr_offset(left_nritems - push_items),
1712                    push_items * sizeof(struct btrfs_item));
1713
1714         /* update the item pointers */
1715         right_nritems += push_items;
1716         btrfs_set_header_nritems(right, right_nritems);
1717         push_space = BTRFS_LEAF_DATA_SIZE(root);
1718         for (i = 0; i < right_nritems; i++) {
1719                 item = btrfs_item_nr(right, i);
1720                 if (!right->map_token) {
1721                         map_extent_buffer(right, (unsigned long)item,
1722                                         sizeof(struct btrfs_item),
1723                                         &right->map_token, &right->kaddr,
1724                                         &right->map_start, &right->map_len,
1725                                         KM_USER1);
1726                 }
1727                 push_space -= btrfs_item_size(right, item);
1728                 btrfs_set_item_offset(right, item, push_space);
1729         }
1730
1731         if (right->map_token) {
1732                 unmap_extent_buffer(right, right->map_token, KM_USER1);
1733                 right->map_token = NULL;
1734         }
1735         left_nritems -= push_items;
1736         btrfs_set_header_nritems(left, left_nritems);
1737
1738         if (left_nritems)
1739                 btrfs_mark_buffer_dirty(left);
1740         btrfs_mark_buffer_dirty(right);
1741
1742         btrfs_item_key(right, &disk_key, 0);
1743         btrfs_set_node_key(upper, &disk_key, slot + 1);
1744         btrfs_mark_buffer_dirty(upper);
1745
1746         /* then fixup the leaf pointer in the path */
1747         if (path->slots[0] >= left_nritems) {
1748                 path->slots[0] -= left_nritems;
1749                 free_extent_buffer(path->nodes[0]);
1750                 path->nodes[0] = right;
1751                 path->slots[1] += 1;
1752         } else {
1753                 free_extent_buffer(right);
1754         }
1755         return 0;
1756 }
1757 /*
1758  * push some data in the path leaf to the left, trying to free up at
1759  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
1760  */
1761 static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
1762                           *root, struct btrfs_path *path, int data_size,
1763                           int empty)
1764 {
1765         struct btrfs_disk_key disk_key;
1766         struct extent_buffer *right = path->nodes[0];
1767         struct extent_buffer *left;
1768         int slot;
1769         int i;
1770         int free_space;
1771         int push_space = 0;
1772         int push_items = 0;
1773         struct btrfs_item *item;
1774         u32 old_left_nritems;
1775         u32 right_nritems;
1776         u32 nr;
1777         int ret = 0;
1778         int wret;
1779         u32 this_item_size;
1780         u32 old_left_item_size;
1781
1782         slot = path->slots[1];
1783         if (slot == 0)
1784                 return 1;
1785         if (!path->nodes[1])
1786                 return 1;
1787
1788         right_nritems = btrfs_header_nritems(right);
1789         if (right_nritems == 0) {
1790                 return 1;
1791         }
1792
1793         left = read_tree_block(root, btrfs_node_blockptr(path->nodes[1],
1794                                slot - 1), root->leafsize);
1795         free_space = btrfs_leaf_free_space(root, left);
1796         if (free_space < data_size + sizeof(struct btrfs_item)) {
1797                 free_extent_buffer(left);
1798                 return 1;
1799         }
1800
1801         /* cow and double check */
1802         ret = btrfs_cow_block(trans, root, left,
1803                               path->nodes[1], slot - 1, &left);
1804         if (ret) {
1805                 /* we hit -ENOSPC, but it isn't fatal here */
1806                 free_extent_buffer(left);
1807                 return 1;
1808         }
1809
1810         free_space = btrfs_leaf_free_space(root, left);
1811         if (free_space < data_size + sizeof(struct btrfs_item)) {
1812                 free_extent_buffer(left);
1813                 return 1;
1814         }
1815
1816         if (empty)
1817                 nr = right_nritems;
1818         else
1819                 nr = right_nritems - 1;
1820
1821         for (i = 0; i < nr; i++) {
1822                 item = btrfs_item_nr(right, i);
1823                 if (!right->map_token) {
1824                         map_extent_buffer(right, (unsigned long)item,
1825                                         sizeof(struct btrfs_item),
1826                                         &right->map_token, &right->kaddr,
1827                                         &right->map_start, &right->map_len,
1828                                         KM_USER1);
1829                 }
1830
1831                 if (path->slots[0] == i)
1832                         push_space += data_size + sizeof(*item);
1833
1834                 this_item_size = btrfs_item_size(right, item);
1835                 if (this_item_size + sizeof(*item) + push_space > free_space)
1836                         break;
1837
1838                 push_items++;
1839                 push_space += this_item_size + sizeof(*item);
1840         }
1841
1842         if (right->map_token) {
1843                 unmap_extent_buffer(right, right->map_token, KM_USER1);
1844                 right->map_token = NULL;
1845         }
1846
1847         if (push_items == 0) {
1848                 free_extent_buffer(left);
1849                 return 1;
1850         }
1851         if (!empty && push_items == btrfs_header_nritems(right))
1852                 WARN_ON(1);
1853
1854         /* push data from right to left */
1855         copy_extent_buffer(left, right,
1856                            btrfs_item_nr_offset(btrfs_header_nritems(left)),
1857                            btrfs_item_nr_offset(0),
1858                            push_items * sizeof(struct btrfs_item));
1859
1860         push_space = BTRFS_LEAF_DATA_SIZE(root) -
1861                      btrfs_item_offset_nr(right, push_items -1);
1862
1863         copy_extent_buffer(left, right, btrfs_leaf_data(left) +
1864                      leaf_data_end(root, left) - push_space,
1865                      btrfs_leaf_data(right) +
1866                      btrfs_item_offset_nr(right, push_items - 1),
1867                      push_space);
1868         old_left_nritems = btrfs_header_nritems(left);
1869         BUG_ON(old_left_nritems < 0);
1870
1871         old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
1872         for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
1873                 u32 ioff;
1874
1875                 item = btrfs_item_nr(left, i);
1876                 if (!left->map_token) {
1877                         map_extent_buffer(left, (unsigned long)item,
1878                                         sizeof(struct btrfs_item),
1879                                         &left->map_token, &left->kaddr,
1880                                         &left->map_start, &left->map_len,
1881                                         KM_USER1);
1882                 }
1883
1884                 ioff = btrfs_item_offset(left, item);
1885                 btrfs_set_item_offset(left, item,
1886                       ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size));
1887         }
1888         btrfs_set_header_nritems(left, old_left_nritems + push_items);
1889         if (left->map_token) {
1890                 unmap_extent_buffer(left, left->map_token, KM_USER1);
1891                 left->map_token = NULL;
1892         }
1893
1894         /* fixup right node */
1895         if (push_items > right_nritems) {
1896                 printk("push items %d nr %u\n", push_items, right_nritems);
1897                 WARN_ON(1);
1898         }
1899
1900         if (push_items < right_nritems) {
1901                 push_space = btrfs_item_offset_nr(right, push_items - 1) -
1902                                                   leaf_data_end(root, right);
1903                 memmove_extent_buffer(right, btrfs_leaf_data(right) +
1904                                       BTRFS_LEAF_DATA_SIZE(root) - push_space,
1905                                       btrfs_leaf_data(right) +
1906                                       leaf_data_end(root, right), push_space);
1907
1908                 memmove_extent_buffer(right, btrfs_item_nr_offset(0),
1909                               btrfs_item_nr_offset(push_items),
1910                              (btrfs_header_nritems(right) - push_items) *
1911                              sizeof(struct btrfs_item));
1912         }
1913         right_nritems -= push_items;
1914         btrfs_set_header_nritems(right, right_nritems);
1915         push_space = BTRFS_LEAF_DATA_SIZE(root);
1916         for (i = 0; i < right_nritems; i++) {
1917                 item = btrfs_item_nr(right, i);
1918
1919                 if (!right->map_token) {
1920                         map_extent_buffer(right, (unsigned long)item,
1921                                         sizeof(struct btrfs_item),
1922                                         &right->map_token, &right->kaddr,
1923                                         &right->map_start, &right->map_len,
1924                                         KM_USER1);
1925                 }
1926
1927                 push_space = push_space - btrfs_item_size(right, item);
1928                 btrfs_set_item_offset(right, item, push_space);
1929         }
1930         if (right->map_token) {
1931                 unmap_extent_buffer(right, right->map_token, KM_USER1);
1932                 right->map_token = NULL;
1933         }
1934
1935         btrfs_mark_buffer_dirty(left);
1936         if (right_nritems)
1937                 btrfs_mark_buffer_dirty(right);
1938
1939         btrfs_item_key(right, &disk_key, 0);
1940         wret = fixup_low_keys(trans, root, path, &disk_key, 1);
1941         if (wret)
1942                 ret = wret;
1943
1944         /* then fixup the leaf pointer in the path */
1945         if (path->slots[0] < push_items) {
1946                 path->slots[0] += old_left_nritems;
1947                 free_extent_buffer(path->nodes[0]);
1948                 path->nodes[0] = left;
1949                 path->slots[1] -= 1;
1950         } else {
1951                 free_extent_buffer(left);
1952                 path->slots[0] -= push_items;
1953         }
1954         BUG_ON(path->slots[0] < 0);
1955         return ret;
1956 }
1957
1958 /*
1959  * split the path's leaf in two, making sure there is at least data_size
1960  * available for the resulting leaf level of the path.
1961  *
1962  * returns 0 if all went well and < 0 on failure.
1963  */
1964 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
1965                       *root, struct btrfs_key *ins_key,
1966                       struct btrfs_path *path, int data_size, int extend)
1967 {
1968         u64 root_gen;
1969         struct extent_buffer *l;
1970         u32 nritems;
1971         int mid;
1972         int slot;
1973         struct extent_buffer *right;
1974         int space_needed = data_size + sizeof(struct btrfs_item);
1975         int data_copy_size;
1976         int rt_data_off;
1977         int i;
1978         int ret = 0;
1979         int wret;
1980         int double_split;
1981         int num_doubles = 0;
1982         struct btrfs_disk_key disk_key;
1983
1984         if (extend)
1985                 space_needed = data_size;
1986
1987         if (root->ref_cows)
1988                 root_gen = trans->transid;
1989         else
1990                 root_gen = 0;
1991
1992         /* first try to make some room by pushing left and right */
1993         if (ins_key->type != BTRFS_DIR_ITEM_KEY) {
1994                 wret = push_leaf_right(trans, root, path, data_size, 0);
1995                 if (wret < 0) {
1996                         return wret;
1997                 }
1998                 if (wret) {
1999                         wret = push_leaf_left(trans, root, path, data_size, 0);
2000                         if (wret < 0)
2001                                 return wret;
2002                 }
2003                 l = path->nodes[0];
2004
2005                 /* did the pushes work? */
2006                 if (btrfs_leaf_free_space(root, l) >= space_needed)
2007                         return 0;
2008         }
2009
2010         if (!path->nodes[1]) {
2011                 ret = insert_new_root(trans, root, path, 1);
2012                 if (ret)
2013                         return ret;
2014         }
2015 again:
2016         double_split = 0;
2017         l = path->nodes[0];
2018         slot = path->slots[0];
2019         nritems = btrfs_header_nritems(l);
2020         mid = (nritems + 1)/ 2;
2021
2022         btrfs_item_key(l, &disk_key, 0);
2023
2024         right = __btrfs_alloc_free_block(trans, root, root->leafsize,
2025                                          root->root_key.objectid,
2026                                          root_gen, disk_key.objectid, 0,
2027                                          l->start, 0);
2028         if (IS_ERR(right)) {
2029                 BUG_ON(1);
2030                 return PTR_ERR(right);
2031         }
2032
2033         memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
2034         btrfs_set_header_bytenr(right, right->start);
2035         btrfs_set_header_generation(right, trans->transid);
2036         btrfs_set_header_owner(right, root->root_key.objectid);
2037         btrfs_set_header_level(right, 0);
2038         write_extent_buffer(right, root->fs_info->fsid,
2039                             (unsigned long)btrfs_header_fsid(right),
2040                             BTRFS_FSID_SIZE);
2041         if (mid <= slot) {
2042                 if (nritems == 1 ||
2043                     leaf_space_used(l, mid, nritems - mid) + space_needed >
2044                         BTRFS_LEAF_DATA_SIZE(root)) {
2045                         if (slot >= nritems) {
2046                                 btrfs_cpu_key_to_disk(&disk_key, ins_key);
2047                                 btrfs_set_header_nritems(right, 0);
2048                                 wret = insert_ptr(trans, root, path,
2049                                                   &disk_key, right->start,
2050                                                   path->slots[1] + 1, 1);
2051                                 if (wret)
2052                                         ret = wret;
2053                                 free_extent_buffer(path->nodes[0]);
2054                                 path->nodes[0] = right;
2055                                 path->slots[0] = 0;
2056                                 path->slots[1] += 1;
2057                                 btrfs_mark_buffer_dirty(right);
2058                                 return ret;
2059                         }
2060                         mid = slot;
2061                         if (mid != nritems &&
2062                             leaf_space_used(l, mid, nritems - mid) +
2063                             space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
2064                                 double_split = 1;
2065                         }
2066                 }
2067         } else {
2068                 if (leaf_space_used(l, 0, mid + 1) + space_needed >
2069                         BTRFS_LEAF_DATA_SIZE(root)) {
2070                         if (!extend && slot == 0) {
2071                                 btrfs_cpu_key_to_disk(&disk_key, ins_key);
2072                                 btrfs_set_header_nritems(right, 0);
2073                                 wret = insert_ptr(trans, root, path,
2074                                                   &disk_key,
2075                                                   right->start,
2076                                                   path->slots[1], 1);
2077                                 if (wret)
2078                                         ret = wret;
2079                                 free_extent_buffer(path->nodes[0]);
2080                                 path->nodes[0] = right;
2081                                 path->slots[0] = 0;
2082                                 if (path->slots[1] == 0) {
2083                                         wret = fixup_low_keys(trans, root,
2084                                                    path, &disk_key, 1);
2085                                         if (wret)
2086                                                 ret = wret;
2087                                 }
2088                                 btrfs_mark_buffer_dirty(right);
2089                                 return ret;
2090                         } else if (extend && slot == 0) {
2091                                 mid = 1;
2092                         } else {
2093                                 mid = slot;
2094                                 if (mid != nritems &&
2095                                     leaf_space_used(l, mid, nritems - mid) +
2096                                     space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
2097                                         double_split = 1;
2098                                 }
2099                         }
2100                 }
2101         }
2102         nritems = nritems - mid;
2103         btrfs_set_header_nritems(right, nritems);
2104         data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);
2105
2106         copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
2107                            btrfs_item_nr_offset(mid),
2108                            nritems * sizeof(struct btrfs_item));
2109
2110         copy_extent_buffer(right, l,
2111                      btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
2112                      data_copy_size, btrfs_leaf_data(l) +
2113                      leaf_data_end(root, l), data_copy_size);
2114
2115         rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
2116                       btrfs_item_end_nr(l, mid);
2117
2118         for (i = 0; i < nritems; i++) {
2119                 struct btrfs_item *item = btrfs_item_nr(right, i);
2120                 u32 ioff;
2121
2122                 if (!right->map_token) {
2123                         map_extent_buffer(right, (unsigned long)item,
2124                                         sizeof(struct btrfs_item),
2125                                         &right->map_token, &right->kaddr,
2126                                         &right->map_start, &right->map_len,
2127                                         KM_USER1);
2128                 }
2129
2130                 ioff = btrfs_item_offset(right, item);
2131                 btrfs_set_item_offset(right, item, ioff + rt_data_off);
2132         }
2133
2134         if (right->map_token) {
2135                 unmap_extent_buffer(right, right->map_token, KM_USER1);
2136                 right->map_token = NULL;
2137         }
2138
2139         btrfs_set_header_nritems(l, mid);
2140         ret = 0;
2141         btrfs_item_key(right, &disk_key, 0);
2142         wret = insert_ptr(trans, root, path, &disk_key, right->start,
2143                           path->slots[1] + 1, 1);
2144         if (wret)
2145                 ret = wret;
2146
2147         btrfs_mark_buffer_dirty(right);
2148         btrfs_mark_buffer_dirty(l);
2149         BUG_ON(path->slots[0] != slot);
2150
2151         if (mid <= slot) {
2152                 free_extent_buffer(path->nodes[0]);
2153                 path->nodes[0] = right;
2154                 path->slots[0] -= mid;
2155                 path->slots[1] += 1;
2156         } else
2157                 free_extent_buffer(right);
2158
2159         BUG_ON(path->slots[0] < 0);
2160
2161         if (double_split) {
2162                 BUG_ON(num_doubles != 0);
2163                 num_doubles++;
2164                 goto again;
2165         }
2166         return ret;
2167 }
2168
2169 int btrfs_truncate_item(struct btrfs_trans_handle *trans,
2170                         struct btrfs_root *root,
2171                         struct btrfs_path *path,
2172                         u32 new_size, int from_end)
2173 {
2174         int ret = 0;
2175         int slot;
2176         int slot_orig;
2177         struct extent_buffer *leaf;
2178         struct btrfs_item *item;
2179         u32 nritems;
2180         unsigned int data_end;
2181         unsigned int old_data_start;
2182         unsigned int old_size;
2183         unsigned int size_diff;
2184         int i;
2185
2186         slot_orig = path->slots[0];
2187         leaf = path->nodes[0];
2188         slot = path->slots[0];
2189
2190         old_size = btrfs_item_size_nr(leaf, slot);
2191         if (old_size == new_size)
2192                 return 0;
2193
2194         nritems = btrfs_header_nritems(leaf);
2195         data_end = leaf_data_end(root, leaf);
2196
2197         old_data_start = btrfs_item_offset_nr(leaf, slot);
2198
2199         size_diff = old_size - new_size;
2200
2201         BUG_ON(slot < 0);
2202         BUG_ON(slot >= nritems);
2203
2204         /*
2205          * item0..itemN ... dataN.offset..dataN.size .. data0.size
2206          */
2207         /* first correct the data pointers */
2208         for (i = slot; i < nritems; i++) {
2209                 u32 ioff;
2210                 item = btrfs_item_nr(leaf, i);
2211
2212                 if (!leaf->map_token) {
2213                         map_extent_buffer(leaf, (unsigned long)item,
2214                                         sizeof(struct btrfs_item),
2215                                         &leaf->map_token, &leaf->kaddr,
2216                                         &leaf->map_start, &leaf->map_len,
2217                                         KM_USER1);
2218                 }
2219
2220                 ioff = btrfs_item_offset(leaf, item);
2221                 btrfs_set_item_offset(leaf, item, ioff + size_diff);
2222         }
2223
2224         if (leaf->map_token) {
2225                 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2226                 leaf->map_token = NULL;
2227         }
2228
2229         /* shift the data */
2230         if (from_end) {
2231                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2232                               data_end + size_diff, btrfs_leaf_data(leaf) +
2233                               data_end, old_data_start + new_size - data_end);
2234         } else {
2235                 struct btrfs_disk_key disk_key;
2236                 u64 offset;
2237
2238                 btrfs_item_key(leaf, &disk_key, slot);
2239
2240                 if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
2241                         unsigned long ptr;
2242                         struct btrfs_file_extent_item *fi;
2243
2244                         fi = btrfs_item_ptr(leaf, slot,
2245                                             struct btrfs_file_extent_item);
2246                         fi = (struct btrfs_file_extent_item *)(
2247                              (unsigned long)fi - size_diff);
2248
2249                         if (btrfs_file_extent_type(leaf, fi) ==
2250                             BTRFS_FILE_EXTENT_INLINE) {
2251                                 ptr = btrfs_item_ptr_offset(leaf, slot);
2252                                 memmove_extent_buffer(leaf, ptr,
2253                                         (unsigned long)fi,
2254                                         offsetof(struct btrfs_file_extent_item,
2255                                                  disk_bytenr));
2256                         }
2257                 }
2258
2259                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2260                               data_end + size_diff, btrfs_leaf_data(leaf) +
2261                               data_end, old_data_start - data_end);
2262
2263                 offset = btrfs_disk_key_offset(&disk_key);
2264                 btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
2265                 btrfs_set_item_key(leaf, &disk_key, slot);
2266                 if (slot == 0)
2267                         fixup_low_keys(trans, root, path, &disk_key, 1);
2268         }
2269
2270         item = btrfs_item_nr(leaf, slot);
2271         btrfs_set_item_size(leaf, item, new_size);
2272         btrfs_mark_buffer_dirty(leaf);
2273
2274         ret = 0;
2275         if (btrfs_leaf_free_space(root, leaf) < 0) {
2276                 btrfs_print_leaf(root, leaf);
2277                 BUG();
2278         }
2279         return ret;
2280 }
2281
2282 int btrfs_extend_item(struct btrfs_trans_handle *trans,
2283                       struct btrfs_root *root, struct btrfs_path *path,
2284                       u32 data_size)
2285 {
2286         int ret = 0;
2287         int slot;
2288         int slot_orig;
2289         struct extent_buffer *leaf;
2290         struct btrfs_item *item;
2291         u32 nritems;
2292         unsigned int data_end;
2293         unsigned int old_data;
2294         unsigned int old_size;
2295         int i;
2296
2297         slot_orig = path->slots[0];
2298         leaf = path->nodes[0];
2299
2300         nritems = btrfs_header_nritems(leaf);
2301         data_end = leaf_data_end(root, leaf);
2302
2303         if (btrfs_leaf_free_space(root, leaf) < data_size) {
2304                 btrfs_print_leaf(root, leaf);
2305                 BUG();
2306         }
2307         slot = path->slots[0];
2308         old_data = btrfs_item_end_nr(leaf, slot);
2309
2310         BUG_ON(slot < 0);
2311         if (slot >= nritems) {
2312                 btrfs_print_leaf(root, leaf);
2313                 printk("slot %d too large, nritems %d\n", slot, nritems);
2314                 BUG_ON(1);
2315         }
2316
2317         /*
2318          * item0..itemN ... dataN.offset..dataN.size .. data0.size
2319          */
2320         /* first correct the data pointers */
2321         for (i = slot; i < nritems; i++) {
2322                 u32 ioff;
2323                 item = btrfs_item_nr(leaf, i);
2324
2325                 if (!leaf->map_token) {
2326                         map_extent_buffer(leaf, (unsigned long)item,
2327                                         sizeof(struct btrfs_item),
2328                                         &leaf->map_token, &leaf->kaddr,
2329                                         &leaf->map_start, &leaf->map_len,
2330                                         KM_USER1);
2331                 }
2332                 ioff = btrfs_item_offset(leaf, item);
2333                 btrfs_set_item_offset(leaf, item, ioff - data_size);
2334         }
2335
2336         if (leaf->map_token) {
2337                 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2338                 leaf->map_token = NULL;
2339         }
2340
2341         /* shift the data */
2342         memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2343                       data_end - data_size, btrfs_leaf_data(leaf) +
2344                       data_end, old_data - data_end);
2345
2346         data_end = old_data;
2347         old_size = btrfs_item_size_nr(leaf, slot);
2348         item = btrfs_item_nr(leaf, slot);
2349         btrfs_set_item_size(leaf, item, old_size + data_size);
2350         btrfs_mark_buffer_dirty(leaf);
2351
2352         ret = 0;
2353         if (btrfs_leaf_free_space(root, leaf) < 0) {
2354                 btrfs_print_leaf(root, leaf);
2355                 BUG();
2356         }
2357         return ret;
2358 }
2359
2360 /*
2361  * Given a key and some data, insert an item into the tree.
2362  * This does all the path init required, making room in the tree if needed.
2363  */
2364 int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
2365                             struct btrfs_root *root,
2366                             struct btrfs_path *path,
2367                             struct btrfs_key *cpu_key, u32 *data_size,
2368                             int nr)
2369 {
2370         struct extent_buffer *leaf;
2371         struct btrfs_item *item;
2372         int ret = 0;
2373         int slot;
2374         int slot_orig;
2375         int i;
2376         u32 nritems;
2377         u32 total_size = 0;
2378         u32 total_data = 0;
2379         unsigned int data_end;
2380         struct btrfs_disk_key disk_key;
2381
2382         for (i = 0; i < nr; i++) {
2383                 total_data += data_size[i];
2384         }
2385
2386         /* create a root if there isn't one */
2387         if (!root->node)
2388                 BUG();
2389
2390         total_size = total_data + (nr - 1) * sizeof(struct btrfs_item);
2391         ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
2392         if (ret == 0) {
2393                 return -EEXIST;
2394         }
2395         if (ret < 0)
2396                 goto out;
2397
2398         slot_orig = path->slots[0];
2399         leaf = path->nodes[0];
2400
2401         nritems = btrfs_header_nritems(leaf);
2402         data_end = leaf_data_end(root, leaf);
2403
2404         if (btrfs_leaf_free_space(root, leaf) <
2405             sizeof(struct btrfs_item) + total_size) {
2406                 btrfs_print_leaf(root, leaf);
2407                 printk("not enough freespace need %u have %d\n",
2408                        total_size, btrfs_leaf_free_space(root, leaf));
2409                 BUG();
2410         }
2411
2412         slot = path->slots[0];
2413         BUG_ON(slot < 0);
2414
2415         if (slot != nritems) {
2416                 int i;
2417                 unsigned int old_data = btrfs_item_end_nr(leaf, slot);
2418
2419                 if (old_data < data_end) {
2420                         btrfs_print_leaf(root, leaf);
2421                         printk("slot %d old_data %d data_end %d\n",
2422                                slot, old_data, data_end);
2423                         BUG_ON(1);
2424                 }
2425                 /*
2426                  * item0..itemN ... dataN.offset..dataN.size .. data0.size
2427                  */
2428                 /* first correct the data pointers */
2429                 WARN_ON(leaf->map_token);
2430                 for (i = slot; i < nritems; i++) {
2431                         u32 ioff;
2432
2433                         item = btrfs_item_nr(leaf, i);
2434                         if (!leaf->map_token) {
2435                                 map_extent_buffer(leaf, (unsigned long)item,
2436                                         sizeof(struct btrfs_item),
2437                                         &leaf->map_token, &leaf->kaddr,
2438                                         &leaf->map_start, &leaf->map_len,
2439                                         KM_USER1);
2440                         }
2441
2442                         ioff = btrfs_item_offset(leaf, item);
2443                         btrfs_set_item_offset(leaf, item, ioff - total_data);
2444                 }
2445                 if (leaf->map_token) {
2446                         unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2447                         leaf->map_token = NULL;
2448                 }
2449
2450                 /* shift the items */
2451                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
2452                               btrfs_item_nr_offset(slot),
2453                               (nritems - slot) * sizeof(struct btrfs_item));
2454
2455                 /* shift the data */
2456                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2457                               data_end - total_data, btrfs_leaf_data(leaf) +
2458                               data_end, old_data - data_end);
2459                 data_end = old_data;
2460         }
2461
2462         /* setup the item for the new data */
2463         for (i = 0; i < nr; i++) {
2464                 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
2465                 btrfs_set_item_key(leaf, &disk_key, slot + i);
2466                 item = btrfs_item_nr(leaf, slot + i);
2467                 btrfs_set_item_offset(leaf, item, data_end - data_size[i]);
2468                 data_end -= data_size[i];
2469                 btrfs_set_item_size(leaf, item, data_size[i]);
2470         }
2471         btrfs_set_header_nritems(leaf, nritems + nr);
2472         btrfs_mark_buffer_dirty(leaf);
2473
2474         ret = 0;
2475         if (slot == 0) {
2476                 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
2477                 ret = fixup_low_keys(trans, root, path, &disk_key, 1);
2478         }
2479
2480         if (btrfs_leaf_free_space(root, leaf) < 0) {
2481                 btrfs_print_leaf(root, leaf);
2482                 BUG();
2483         }
2484
2485 out:
2486         return ret;
2487 }
2488
2489 /*
2490  * Given a key and some data, insert an item into the tree.
2491  * This does all the path init required, making room in the tree if needed.
2492  */
2493 int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
2494                       *root, struct btrfs_key *cpu_key, void *data, u32
2495                       data_size)
2496 {
2497         int ret = 0;
2498         struct btrfs_path *path;
2499         struct extent_buffer *leaf;
2500         unsigned long ptr;
2501
2502         path = btrfs_alloc_path();
2503         BUG_ON(!path);
2504         ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
2505         if (!ret) {
2506                 leaf = path->nodes[0];
2507                 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
2508                 write_extent_buffer(leaf, data, ptr, data_size);
2509                 btrfs_mark_buffer_dirty(leaf);
2510         }
2511         btrfs_free_path(path);
2512         return ret;
2513 }
2514
2515 /*
2516  * delete the pointer from a given node.
2517  *
2518  * If the delete empties a node, the node is removed from the tree,
2519  * continuing all the way the root if required.  The root is converted into
2520  * a leaf if all the nodes are emptied.
2521  */
2522 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2523                    struct btrfs_path *path, int level, int slot)
2524 {
2525         struct extent_buffer *parent = path->nodes[level];
2526         u32 nritems;
2527         int ret = 0;
2528         int wret;
2529
2530         nritems = btrfs_header_nritems(parent);
2531         if (slot != nritems -1) {
2532                 memmove_extent_buffer(parent,
2533                               btrfs_node_key_ptr_offset(slot),
2534                               btrfs_node_key_ptr_offset(slot + 1),
2535                               sizeof(struct btrfs_key_ptr) *
2536                               (nritems - slot - 1));
2537         }
2538         nritems--;
2539         btrfs_set_header_nritems(parent, nritems);
2540         if (nritems == 0 && parent == root->node) {
2541                 BUG_ON(btrfs_header_level(root->node) != 1);
2542                 /* just turn the root into a leaf and break */
2543                 btrfs_set_header_level(root->node, 0);
2544         } else if (slot == 0) {
2545                 struct btrfs_disk_key disk_key;
2546
2547                 btrfs_node_key(parent, &disk_key, 0);
2548                 wret = fixup_low_keys(trans, root, path, &disk_key, level + 1);
2549                 if (wret)
2550                         ret = wret;
2551         }
2552         btrfs_mark_buffer_dirty(parent);
2553         return ret;
2554 }
2555
2556 /*
2557  * delete the item at the leaf level in path.  If that empties
2558  * the leaf, remove it from the tree
2559  */
2560 int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2561                     struct btrfs_path *path, int slot, int nr)
2562 {
2563         struct extent_buffer *leaf;
2564         struct btrfs_item *item;
2565         int last_off;
2566         int dsize = 0;
2567         int ret = 0;
2568         int wret;
2569         int i;
2570         u32 nritems;
2571
2572         leaf = path->nodes[0];
2573         last_off = btrfs_item_offset_nr(leaf, slot + nr - 1);
2574
2575         for (i = 0; i < nr; i++)
2576                 dsize += btrfs_item_size_nr(leaf, slot + i);
2577
2578         nritems = btrfs_header_nritems(leaf);
2579
2580         if (slot + nr != nritems) {
2581                 int i;
2582                 int data_end = leaf_data_end(root, leaf);
2583
2584                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2585                               data_end + dsize,
2586                               btrfs_leaf_data(leaf) + data_end,
2587                               last_off - data_end);
2588
2589                 for (i = slot + nr; i < nritems; i++) {
2590                         u32 ioff;
2591
2592                         item = btrfs_item_nr(leaf, i);
2593                         if (!leaf->map_token) {
2594                                 map_extent_buffer(leaf, (unsigned long)item,
2595                                         sizeof(struct btrfs_item),
2596                                         &leaf->map_token, &leaf->kaddr,
2597                                         &leaf->map_start, &leaf->map_len,
2598                                         KM_USER1);
2599                         }
2600                         ioff = btrfs_item_offset(leaf, item);
2601                         btrfs_set_item_offset(leaf, item, ioff + dsize);
2602                 }
2603
2604                 if (leaf->map_token) {
2605                         unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2606                         leaf->map_token = NULL;
2607                 }
2608
2609                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
2610                               btrfs_item_nr_offset(slot + nr),
2611                               sizeof(struct btrfs_item) *
2612                               (nritems - slot - nr));
2613         }
2614         btrfs_set_header_nritems(leaf, nritems - nr);
2615         nritems -= nr;
2616
2617         /* delete the leaf if we've emptied it */
2618         if (nritems == 0) {
2619                 if (leaf == root->node) {
2620                         btrfs_set_header_level(leaf, 0);
2621                 } else {
2622                         u64 root_gen = btrfs_header_generation(path->nodes[1]);
2623                         clean_tree_block(trans, root, leaf);
2624                         wait_on_tree_block_writeback(root, leaf);
2625                         wret = del_ptr(trans, root, path, 1, path->slots[1]);
2626                         if (wret)
2627                                 ret = wret;
2628                         wret = btrfs_free_extent(trans, root,
2629                                          leaf->start, leaf->len,
2630                                          btrfs_header_owner(path->nodes[1]),
2631                                          root_gen, 0, 0, 1);
2632                         if (wret)
2633                                 ret = wret;
2634                 }
2635         } else {
2636                 int used = leaf_space_used(leaf, 0, nritems);
2637                 if (slot == 0) {
2638                         struct btrfs_disk_key disk_key;
2639
2640                         btrfs_item_key(leaf, &disk_key, 0);
2641                         wret = fixup_low_keys(trans, root, path,
2642                                               &disk_key, 1);
2643                         if (wret)
2644                                 ret = wret;
2645                 }
2646
2647                 /* delete the leaf if it is mostly empty */
2648                 if (used < BTRFS_LEAF_DATA_SIZE(root) / 4) {
2649                         /* push_leaf_left fixes the path.
2650                          * make sure the path still points to our leaf
2651                          * for possible call to del_ptr below
2652                          */
2653                         slot = path->slots[1];
2654                         extent_buffer_get(leaf);
2655
2656                         wret = push_leaf_left(trans, root, path, 1, 1);
2657                         if (wret < 0 && wret != -ENOSPC)
2658                                 ret = wret;
2659
2660                         if (path->nodes[0] == leaf &&
2661                             btrfs_header_nritems(leaf)) {
2662                                 wret = push_leaf_right(trans, root, path, 1, 1);
2663                                 if (wret < 0 && wret != -ENOSPC)
2664                                         ret = wret;
2665                         }
2666
2667                         if (btrfs_header_nritems(leaf) == 0) {
2668                                 u64 root_gen;
2669                                 u64 bytenr = leaf->start;
2670                                 u32 blocksize = leaf->len;
2671
2672                                 root_gen = btrfs_header_generation(
2673                                                            path->nodes[1]);
2674
2675                                 clean_tree_block(trans, root, leaf);
2676                                 wait_on_tree_block_writeback(root, leaf);
2677
2678                                 wret = del_ptr(trans, root, path, 1, slot);
2679                                 if (wret)
2680                                         ret = wret;
2681
2682                                 free_extent_buffer(leaf);
2683                                 wret = btrfs_free_extent(trans, root, bytenr,
2684                                              blocksize,
2685                                              btrfs_header_owner(path->nodes[1]),
2686                                              root_gen, 0, 0, 1);
2687                                 if (wret)
2688                                         ret = wret;
2689                         } else {
2690                                 btrfs_mark_buffer_dirty(leaf);
2691                                 free_extent_buffer(leaf);
2692                         }
2693                 } else {
2694                         btrfs_mark_buffer_dirty(leaf);
2695                 }
2696         }
2697         return ret;
2698 }
2699
2700 /*
2701  * walk up the tree as far as required to find the previous leaf.
2702  * returns 0 if it found something or 1 if there are no lesser leaves.
2703  * returns < 0 on io errors.
2704  */
2705 int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
2706 {
2707         u64 bytenr;
2708         int slot;
2709         int level = 1;
2710         struct extent_buffer *c;
2711         struct extent_buffer *next = NULL;
2712
2713         while(level < BTRFS_MAX_LEVEL) {
2714                 if (!path->nodes[level])
2715                         return 1;
2716
2717                 slot = path->slots[level];
2718                 c = path->nodes[level];
2719                 if (slot == 0) {
2720                         level++;
2721                         if (level == BTRFS_MAX_LEVEL)
2722                                 return 1;
2723                         continue;
2724                 }
2725                 slot--;
2726
2727                 bytenr = btrfs_node_blockptr(c, slot);
2728                 if (next)
2729                         free_extent_buffer(next);
2730
2731                 next = read_tree_block(root, bytenr,
2732                                        btrfs_level_size(root, level - 1));
2733                 break;
2734         }
2735         path->slots[level] = slot;
2736         while(1) {
2737                 level--;
2738                 c = path->nodes[level];
2739                 free_extent_buffer(c);
2740                 slot = btrfs_header_nritems(next);
2741                 if (slot != 0)
2742                         slot--;
2743                 path->nodes[level] = next;
2744                 path->slots[level] = slot;
2745                 if (!level)
2746                         break;
2747                 next = read_tree_block(root, btrfs_node_blockptr(next, slot),
2748                                        btrfs_level_size(root, level - 1));
2749         }
2750         return 0;
2751 }
2752
2753 /*
2754  * walk up the tree as far as required to find the next leaf.
2755  * returns 0 if it found something or 1 if there are no greater leaves.
2756  * returns < 0 on io errors.
2757  */
2758 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
2759 {
2760         int slot;
2761         int level = 1;
2762         u64 bytenr;
2763         struct extent_buffer *c;
2764         struct extent_buffer *next = NULL;
2765
2766         while(level < BTRFS_MAX_LEVEL) {
2767                 if (!path->nodes[level])
2768                         return 1;
2769
2770                 slot = path->slots[level] + 1;
2771                 c = path->nodes[level];
2772                 if (slot >= btrfs_header_nritems(c)) {
2773                         level++;
2774                         if (level == BTRFS_MAX_LEVEL)
2775                                 return 1;
2776                         continue;
2777                 }
2778
2779                 bytenr = btrfs_node_blockptr(c, slot);
2780                 if (next)
2781                         free_extent_buffer(next);
2782
2783                 if (path->reada)
2784                         reada_for_search(root, path, level, slot, 0);
2785
2786                 next = read_tree_block(root, bytenr,
2787                                        btrfs_level_size(root, level -1));
2788                 break;
2789         }
2790         path->slots[level] = slot;
2791         while(1) {
2792                 level--;
2793                 c = path->nodes[level];
2794                 free_extent_buffer(c);
2795                 path->nodes[level] = next;
2796                 path->slots[level] = 0;
2797                 if (!level)
2798                         break;
2799                 if (path->reada)
2800                         reada_for_search(root, path, level, 0, 0);
2801                 next = read_tree_block(root, btrfs_node_blockptr(next, 0),
2802                                        btrfs_level_size(root, level - 1));
2803         }
2804         return 0;
2805 }
2806
2807 int btrfs_previous_item(struct btrfs_root *root,
2808                         struct btrfs_path *path, u64 min_objectid,
2809                         int type)
2810 {
2811         struct btrfs_key found_key;
2812         struct extent_buffer *leaf;
2813         int ret;
2814
2815         while(1) {
2816                 if (path->slots[0] == 0) {
2817                         ret = btrfs_prev_leaf(root, path);
2818                         if (ret != 0)
2819                                 return ret;
2820                 } else {
2821                         path->slots[0]--;
2822                 }
2823                 leaf = path->nodes[0];
2824                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2825                 if (found_key.type == type)
2826                         return 0;
2827         }
2828         return 1;
2829 }
2830