Call btrfs_cow_block while lowering tree level.
[linux-2.6-block.git] / fs / btrfs / ctree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/sched.h>
20 #include "ctree.h"
21 #include "disk-io.h"
22 #include "transaction.h"
23 #include "print-tree.h"
24
25 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
26                       *root, struct btrfs_path *path, int level);
27 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
28                       *root, struct btrfs_key *ins_key,
29                       struct btrfs_path *path, int data_size, int extend);
30 static int push_node_left(struct btrfs_trans_handle *trans,
31                           struct btrfs_root *root, struct extent_buffer *dst,
32                           struct extent_buffer *src);
33 static int balance_node_right(struct btrfs_trans_handle *trans,
34                               struct btrfs_root *root,
35                               struct extent_buffer *dst_buf,
36                               struct extent_buffer *src_buf);
37 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
38                    struct btrfs_path *path, int level, int slot);
39
40 inline void btrfs_init_path(struct btrfs_path *p)
41 {
42         memset(p, 0, sizeof(*p));
43 }
44
45 struct btrfs_path *btrfs_alloc_path(void)
46 {
47         struct btrfs_path *path;
48         path = kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS);
49         if (path) {
50                 btrfs_init_path(path);
51                 path->reada = 1;
52         }
53         return path;
54 }
55
56 void btrfs_free_path(struct btrfs_path *p)
57 {
58         btrfs_release_path(NULL, p);
59         kmem_cache_free(btrfs_path_cachep, p);
60 }
61
62 void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
63 {
64         int i;
65         for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
66                 if (!p->nodes[i])
67                         break;
68                 free_extent_buffer(p->nodes[i]);
69         }
70         memset(p, 0, sizeof(*p));
71 }
72
73 int btrfs_copy_root(struct btrfs_trans_handle *trans,
74                       struct btrfs_root *root,
75                       struct extent_buffer *buf,
76                       struct extent_buffer **cow_ret, u64 new_root_objectid)
77 {
78         struct extent_buffer *cow;
79         u32 nritems;
80         int ret = 0;
81         int level;
82         struct btrfs_key first_key;
83         struct btrfs_root *new_root;
84
85         new_root = kmalloc(sizeof(*new_root), GFP_NOFS);
86         if (!new_root)
87                 return -ENOMEM;
88
89         memcpy(new_root, root, sizeof(*new_root));
90         new_root->root_key.objectid = new_root_objectid;
91
92         WARN_ON(root->ref_cows && trans->transid !=
93                 root->fs_info->running_transaction->transid);
94         WARN_ON(root->ref_cows && trans->transid != root->last_trans);
95
96         level = btrfs_header_level(buf);
97         nritems = btrfs_header_nritems(buf);
98         if (nritems) {
99                 if (level == 0)
100                         btrfs_item_key_to_cpu(buf, &first_key, 0);
101                 else
102                         btrfs_node_key_to_cpu(buf, &first_key, 0);
103         } else {
104                 first_key.objectid = 0;
105         }
106         cow = __btrfs_alloc_free_block(trans, new_root, buf->len,
107                                        new_root_objectid,
108                                        trans->transid, first_key.objectid,
109                                        level, buf->start, 0);
110         if (IS_ERR(cow)) {
111                 kfree(new_root);
112                 return PTR_ERR(cow);
113         }
114
115         copy_extent_buffer(cow, buf, 0, 0, cow->len);
116         btrfs_set_header_bytenr(cow, cow->start);
117         btrfs_set_header_generation(cow, trans->transid);
118         btrfs_set_header_owner(cow, new_root_objectid);
119
120         WARN_ON(btrfs_header_generation(buf) > trans->transid);
121         ret = btrfs_inc_ref(trans, new_root, buf);
122         kfree(new_root);
123
124         if (ret)
125                 return ret;
126
127         btrfs_mark_buffer_dirty(cow);
128         *cow_ret = cow;
129         return 0;
130 }
131
132 int __btrfs_cow_block(struct btrfs_trans_handle *trans,
133                              struct btrfs_root *root,
134                              struct extent_buffer *buf,
135                              struct extent_buffer *parent, int parent_slot,
136                              struct extent_buffer **cow_ret,
137                              u64 search_start, u64 empty_size)
138 {
139         u64 root_gen;
140         struct extent_buffer *cow;
141         u32 nritems;
142         int ret = 0;
143         int different_trans = 0;
144         int level;
145         struct btrfs_key first_key;
146
147         if (root->ref_cows) {
148                 root_gen = trans->transid;
149         } else {
150                 root_gen = 0;
151         }
152
153         WARN_ON(root->ref_cows && trans->transid !=
154                 root->fs_info->running_transaction->transid);
155         WARN_ON(root->ref_cows && trans->transid != root->last_trans);
156
157         level = btrfs_header_level(buf);
158         nritems = btrfs_header_nritems(buf);
159         if (nritems) {
160                 if (level == 0)
161                         btrfs_item_key_to_cpu(buf, &first_key, 0);
162                 else
163                         btrfs_node_key_to_cpu(buf, &first_key, 0);
164         } else {
165                 first_key.objectid = 0;
166         }
167         cow = __btrfs_alloc_free_block(trans, root, buf->len,
168                                      root->root_key.objectid,
169                                      root_gen, first_key.objectid, level,
170                                      search_start, empty_size);
171         if (IS_ERR(cow))
172                 return PTR_ERR(cow);
173
174         copy_extent_buffer(cow, buf, 0, 0, cow->len);
175         btrfs_set_header_bytenr(cow, cow->start);
176         btrfs_set_header_generation(cow, trans->transid);
177         btrfs_set_header_owner(cow, root->root_key.objectid);
178
179         WARN_ON(btrfs_header_generation(buf) > trans->transid);
180         if (btrfs_header_generation(buf) != trans->transid) {
181                 different_trans = 1;
182                 ret = btrfs_inc_ref(trans, root, buf);
183                 if (ret)
184                         return ret;
185         } else {
186                 clean_tree_block(trans, root, buf);
187         }
188
189         if (buf == root->node) {
190                 root_gen = btrfs_header_generation(buf);
191                 root->node = cow;
192                 extent_buffer_get(cow);
193                 if (buf != root->commit_root) {
194                         btrfs_free_extent(trans, root, buf->start,
195                                           buf->len, root->root_key.objectid,
196                                           root_gen, 0, 0, 1);
197                 }
198                 free_extent_buffer(buf);
199         } else {
200                 root_gen = btrfs_header_generation(parent);
201                 btrfs_set_node_blockptr(parent, parent_slot,
202                                         cow->start);
203                 WARN_ON(trans->transid == 0);
204                 btrfs_set_node_ptr_generation(parent, parent_slot,
205                                               trans->transid);
206                 btrfs_mark_buffer_dirty(parent);
207                 WARN_ON(btrfs_header_generation(parent) != trans->transid);
208                 btrfs_free_extent(trans, root, buf->start, buf->len,
209                                   btrfs_header_owner(parent), root_gen,
210                                   0, 0, 1);
211         }
212         free_extent_buffer(buf);
213         btrfs_mark_buffer_dirty(cow);
214         *cow_ret = cow;
215         return 0;
216 }
217
218 int btrfs_cow_block(struct btrfs_trans_handle *trans,
219                     struct btrfs_root *root, struct extent_buffer *buf,
220                     struct extent_buffer *parent, int parent_slot,
221                     struct extent_buffer **cow_ret)
222 {
223         u64 search_start;
224         u64 header_trans;
225         int ret;
226
227         if (trans->transaction != root->fs_info->running_transaction) {
228                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
229                        root->fs_info->running_transaction->transid);
230                 WARN_ON(1);
231         }
232         if (trans->transid != root->fs_info->generation) {
233                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
234                        root->fs_info->generation);
235                 WARN_ON(1);
236         }
237
238         header_trans = btrfs_header_generation(buf);
239         if (header_trans == trans->transid) {
240                 *cow_ret = buf;
241                 return 0;
242         }
243
244         search_start = buf->start & ~((u64)BTRFS_BLOCK_GROUP_SIZE - 1);
245         ret = __btrfs_cow_block(trans, root, buf, parent,
246                                  parent_slot, cow_ret, search_start, 0);
247         return ret;
248 }
249
250 static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
251 {
252         if (blocknr < other && other - (blocknr + blocksize) < 32768)
253                 return 1;
254         if (blocknr > other && blocknr - (other + blocksize) < 32768)
255                 return 1;
256         return 0;
257 }
258
259 /*
260  * compare two keys in a memcmp fashion
261  */
262 static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
263 {
264         struct btrfs_key k1;
265
266         btrfs_disk_key_to_cpu(&k1, disk);
267
268         if (k1.objectid > k2->objectid)
269                 return 1;
270         if (k1.objectid < k2->objectid)
271                 return -1;
272         if (k1.type > k2->type)
273                 return 1;
274         if (k1.type < k2->type)
275                 return -1;
276         if (k1.offset > k2->offset)
277                 return 1;
278         if (k1.offset < k2->offset)
279                 return -1;
280         return 0;
281 }
282
283
284 int btrfs_realloc_node(struct btrfs_trans_handle *trans,
285                        struct btrfs_root *root, struct extent_buffer *parent,
286                        int start_slot, int cache_only, u64 *last_ret,
287                        struct btrfs_key *progress)
288 {
289         struct extent_buffer *cur;
290         struct extent_buffer *tmp;
291         u64 blocknr;
292         u64 search_start = *last_ret;
293         u64 last_block = 0;
294         u64 other;
295         u32 parent_nritems;
296         int end_slot;
297         int i;
298         int err = 0;
299         int parent_level;
300         int uptodate;
301         u32 blocksize;
302         int progress_passed = 0;
303         struct btrfs_disk_key disk_key;
304
305         parent_level = btrfs_header_level(parent);
306         if (cache_only && parent_level != 1)
307                 return 0;
308
309         if (trans->transaction != root->fs_info->running_transaction) {
310                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
311                        root->fs_info->running_transaction->transid);
312                 WARN_ON(1);
313         }
314         if (trans->transid != root->fs_info->generation) {
315                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
316                        root->fs_info->generation);
317                 WARN_ON(1);
318         }
319
320         parent_nritems = btrfs_header_nritems(parent);
321         blocksize = btrfs_level_size(root, parent_level - 1);
322         end_slot = parent_nritems;
323
324         if (parent_nritems == 1)
325                 return 0;
326
327         for (i = start_slot; i < end_slot; i++) {
328                 int close = 1;
329
330                 if (!parent->map_token) {
331                         map_extent_buffer(parent,
332                                         btrfs_node_key_ptr_offset(i),
333                                         sizeof(struct btrfs_key_ptr),
334                                         &parent->map_token, &parent->kaddr,
335                                         &parent->map_start, &parent->map_len,
336                                         KM_USER1);
337                 }
338                 btrfs_node_key(parent, &disk_key, i);
339                 if (!progress_passed && comp_keys(&disk_key, progress) < 0)
340                         continue;
341
342                 progress_passed = 1;
343                 blocknr = btrfs_node_blockptr(parent, i);
344                 if (last_block == 0)
345                         last_block = blocknr;
346
347                 if (i > 0) {
348                         other = btrfs_node_blockptr(parent, i - 1);
349                         close = close_blocks(blocknr, other, blocksize);
350                 }
351                 if (close && i < end_slot - 2) {
352                         other = btrfs_node_blockptr(parent, i + 1);
353                         close = close_blocks(blocknr, other, blocksize);
354                 }
355                 if (close) {
356                         last_block = blocknr;
357                         continue;
358                 }
359                 if (parent->map_token) {
360                         unmap_extent_buffer(parent, parent->map_token,
361                                             KM_USER1);
362                         parent->map_token = NULL;
363                 }
364
365                 cur = btrfs_find_tree_block(root, blocknr, blocksize);
366                 if (cur)
367                         uptodate = btrfs_buffer_uptodate(cur);
368                 else
369                         uptodate = 0;
370                 if (!cur || !uptodate) {
371                         if (cache_only) {
372                                 free_extent_buffer(cur);
373                                 continue;
374                         }
375                         if (!cur) {
376                                 cur = read_tree_block(root, blocknr,
377                                                          blocksize);
378                         } else if (!uptodate) {
379                                 btrfs_read_buffer(cur);
380                         }
381                 }
382                 if (search_start == 0)
383                         search_start = last_block;
384
385                 err = __btrfs_cow_block(trans, root, cur, parent, i,
386                                         &tmp, search_start,
387                                         min(16 * blocksize,
388                                             (end_slot - i) * blocksize));
389                 if (err) {
390                         free_extent_buffer(cur);
391                         break;
392                 }
393                 search_start = tmp->start;
394                 last_block = tmp->start;
395                 *last_ret = search_start;
396                 if (parent_level == 1)
397                         btrfs_clear_buffer_defrag(tmp);
398                 free_extent_buffer(tmp);
399         }
400         if (parent->map_token) {
401                 unmap_extent_buffer(parent, parent->map_token,
402                                     KM_USER1);
403                 parent->map_token = NULL;
404         }
405         return err;
406 }
407
408 /*
409  * The leaf data grows from end-to-front in the node.
410  * this returns the address of the start of the last item,
411  * which is the stop of the leaf data stack
412  */
413 static inline unsigned int leaf_data_end(struct btrfs_root *root,
414                                          struct extent_buffer *leaf)
415 {
416         u32 nr = btrfs_header_nritems(leaf);
417         if (nr == 0)
418                 return BTRFS_LEAF_DATA_SIZE(root);
419         return btrfs_item_offset_nr(leaf, nr - 1);
420 }
421
422 static int check_node(struct btrfs_root *root, struct btrfs_path *path,
423                       int level)
424 {
425         struct extent_buffer *parent = NULL;
426         struct extent_buffer *node = path->nodes[level];
427         struct btrfs_disk_key parent_key;
428         struct btrfs_disk_key node_key;
429         int parent_slot;
430         int slot;
431         struct btrfs_key cpukey;
432         u32 nritems = btrfs_header_nritems(node);
433
434         if (path->nodes[level + 1])
435                 parent = path->nodes[level + 1];
436
437         slot = path->slots[level];
438         BUG_ON(nritems == 0);
439         if (parent) {
440                 parent_slot = path->slots[level + 1];
441                 btrfs_node_key(parent, &parent_key, parent_slot);
442                 btrfs_node_key(node, &node_key, 0);
443                 BUG_ON(memcmp(&parent_key, &node_key,
444                               sizeof(struct btrfs_disk_key)));
445                 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
446                        btrfs_header_bytenr(node));
447         }
448         BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
449         if (slot != 0) {
450                 btrfs_node_key_to_cpu(node, &cpukey, slot - 1);
451                 btrfs_node_key(node, &node_key, slot);
452                 BUG_ON(comp_keys(&node_key, &cpukey) <= 0);
453         }
454         if (slot < nritems - 1) {
455                 btrfs_node_key_to_cpu(node, &cpukey, slot + 1);
456                 btrfs_node_key(node, &node_key, slot);
457                 BUG_ON(comp_keys(&node_key, &cpukey) >= 0);
458         }
459         return 0;
460 }
461
462 static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
463                       int level)
464 {
465         struct extent_buffer *leaf = path->nodes[level];
466         struct extent_buffer *parent = NULL;
467         int parent_slot;
468         struct btrfs_key cpukey;
469         struct btrfs_disk_key parent_key;
470         struct btrfs_disk_key leaf_key;
471         int slot = path->slots[0];
472
473         u32 nritems = btrfs_header_nritems(leaf);
474
475         if (path->nodes[level + 1])
476                 parent = path->nodes[level + 1];
477
478         if (nritems == 0)
479                 return 0;
480
481         if (parent) {
482                 parent_slot = path->slots[level + 1];
483                 btrfs_node_key(parent, &parent_key, parent_slot);
484                 btrfs_item_key(leaf, &leaf_key, 0);
485
486                 BUG_ON(memcmp(&parent_key, &leaf_key,
487                        sizeof(struct btrfs_disk_key)));
488                 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
489                        btrfs_header_bytenr(leaf));
490         }
491 #if 0
492         for (i = 0; nritems > 1 && i < nritems - 2; i++) {
493                 btrfs_item_key_to_cpu(leaf, &cpukey, i + 1);
494                 btrfs_item_key(leaf, &leaf_key, i);
495                 if (comp_keys(&leaf_key, &cpukey) >= 0) {
496                         btrfs_print_leaf(root, leaf);
497                         printk("slot %d offset bad key\n", i);
498                         BUG_ON(1);
499                 }
500                 if (btrfs_item_offset_nr(leaf, i) !=
501                         btrfs_item_end_nr(leaf, i + 1)) {
502                         btrfs_print_leaf(root, leaf);
503                         printk("slot %d offset bad\n", i);
504                         BUG_ON(1);
505                 }
506                 if (i == 0) {
507                         if (btrfs_item_offset_nr(leaf, i) +
508                                btrfs_item_size_nr(leaf, i) !=
509                                BTRFS_LEAF_DATA_SIZE(root)) {
510                                 btrfs_print_leaf(root, leaf);
511                                 printk("slot %d first offset bad\n", i);
512                                 BUG_ON(1);
513                         }
514                 }
515         }
516         if (nritems > 0) {
517                 if (btrfs_item_size_nr(leaf, nritems - 1) > 4096) {
518                                 btrfs_print_leaf(root, leaf);
519                                 printk("slot %d bad size \n", nritems - 1);
520                                 BUG_ON(1);
521                 }
522         }
523 #endif
524         if (slot != 0 && slot < nritems - 1) {
525                 btrfs_item_key(leaf, &leaf_key, slot);
526                 btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1);
527                 if (comp_keys(&leaf_key, &cpukey) <= 0) {
528                         btrfs_print_leaf(root, leaf);
529                         printk("slot %d offset bad key\n", slot);
530                         BUG_ON(1);
531                 }
532                 if (btrfs_item_offset_nr(leaf, slot - 1) !=
533                        btrfs_item_end_nr(leaf, slot)) {
534                         btrfs_print_leaf(root, leaf);
535                         printk("slot %d offset bad\n", slot);
536                         BUG_ON(1);
537                 }
538         }
539         if (slot < nritems - 1) {
540                 btrfs_item_key(leaf, &leaf_key, slot);
541                 btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1);
542                 BUG_ON(comp_keys(&leaf_key, &cpukey) >= 0);
543                 if (btrfs_item_offset_nr(leaf, slot) !=
544                         btrfs_item_end_nr(leaf, slot + 1)) {
545                         btrfs_print_leaf(root, leaf);
546                         printk("slot %d offset bad\n", slot);
547                         BUG_ON(1);
548                 }
549         }
550         BUG_ON(btrfs_item_offset_nr(leaf, 0) +
551                btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root));
552         return 0;
553 }
554
555 static int noinline check_block(struct btrfs_root *root,
556                                 struct btrfs_path *path, int level)
557 {
558         return 0;
559 #if 0
560         struct extent_buffer *buf = path->nodes[level];
561
562         if (memcmp_extent_buffer(buf, root->fs_info->fsid,
563                                  (unsigned long)btrfs_header_fsid(buf),
564                                  BTRFS_FSID_SIZE)) {
565                 printk("warning bad block %Lu\n", buf->start);
566                 return 1;
567         }
568 #endif
569         if (level == 0)
570                 return check_leaf(root, path, level);
571         return check_node(root, path, level);
572 }
573
574 /*
575  * search for key in the extent_buffer.  The items start at offset p,
576  * and they are item_size apart.  There are 'max' items in p.
577  *
578  * the slot in the array is returned via slot, and it points to
579  * the place where you would insert key if it is not found in
580  * the array.
581  *
582  * slot may point to max if the key is bigger than all of the keys
583  */
584 static int generic_bin_search(struct extent_buffer *eb, unsigned long p,
585                               int item_size, struct btrfs_key *key,
586                               int max, int *slot)
587 {
588         int low = 0;
589         int high = max;
590         int mid;
591         int ret;
592         struct btrfs_disk_key *tmp = NULL;
593         struct btrfs_disk_key unaligned;
594         unsigned long offset;
595         char *map_token = NULL;
596         char *kaddr = NULL;
597         unsigned long map_start = 0;
598         unsigned long map_len = 0;
599         int err;
600
601         while(low < high) {
602                 mid = (low + high) / 2;
603                 offset = p + mid * item_size;
604
605                 if (!map_token || offset < map_start ||
606                     (offset + sizeof(struct btrfs_disk_key)) >
607                     map_start + map_len) {
608                         if (map_token) {
609                                 unmap_extent_buffer(eb, map_token, KM_USER0);
610                                 map_token = NULL;
611                         }
612                         err = map_extent_buffer(eb, offset,
613                                                 sizeof(struct btrfs_disk_key),
614                                                 &map_token, &kaddr,
615                                                 &map_start, &map_len, KM_USER0);
616
617                         if (!err) {
618                                 tmp = (struct btrfs_disk_key *)(kaddr + offset -
619                                                         map_start);
620                         } else {
621                                 read_extent_buffer(eb, &unaligned,
622                                                    offset, sizeof(unaligned));
623                                 tmp = &unaligned;
624                         }
625
626                 } else {
627                         tmp = (struct btrfs_disk_key *)(kaddr + offset -
628                                                         map_start);
629                 }
630                 ret = comp_keys(tmp, key);
631
632                 if (ret < 0)
633                         low = mid + 1;
634                 else if (ret > 0)
635                         high = mid;
636                 else {
637                         *slot = mid;
638                         if (map_token)
639                                 unmap_extent_buffer(eb, map_token, KM_USER0);
640                         return 0;
641                 }
642         }
643         *slot = low;
644         if (map_token)
645                 unmap_extent_buffer(eb, map_token, KM_USER0);
646         return 1;
647 }
648
649 /*
650  * simple bin_search frontend that does the right thing for
651  * leaves vs nodes
652  */
653 static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
654                       int level, int *slot)
655 {
656         if (level == 0) {
657                 return generic_bin_search(eb,
658                                           offsetof(struct btrfs_leaf, items),
659                                           sizeof(struct btrfs_item),
660                                           key, btrfs_header_nritems(eb),
661                                           slot);
662         } else {
663                 return generic_bin_search(eb,
664                                           offsetof(struct btrfs_node, ptrs),
665                                           sizeof(struct btrfs_key_ptr),
666                                           key, btrfs_header_nritems(eb),
667                                           slot);
668         }
669         return -1;
670 }
671
672 static struct extent_buffer *read_node_slot(struct btrfs_root *root,
673                                    struct extent_buffer *parent, int slot)
674 {
675         if (slot < 0)
676                 return NULL;
677         if (slot >= btrfs_header_nritems(parent))
678                 return NULL;
679         return read_tree_block(root, btrfs_node_blockptr(parent, slot),
680                        btrfs_level_size(root, btrfs_header_level(parent) - 1));
681 }
682
683 static int balance_level(struct btrfs_trans_handle *trans,
684                          struct btrfs_root *root,
685                          struct btrfs_path *path, int level)
686 {
687         struct extent_buffer *right = NULL;
688         struct extent_buffer *mid;
689         struct extent_buffer *left = NULL;
690         struct extent_buffer *parent = NULL;
691         int ret = 0;
692         int wret;
693         int pslot;
694         int orig_slot = path->slots[level];
695         int err_on_enospc = 0;
696         u64 orig_ptr;
697
698         if (level == 0)
699                 return 0;
700
701         mid = path->nodes[level];
702         WARN_ON(btrfs_header_generation(mid) != trans->transid);
703
704         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
705
706         if (level < BTRFS_MAX_LEVEL - 1)
707                 parent = path->nodes[level + 1];
708         pslot = path->slots[level + 1];
709
710         /*
711          * deal with the case where there is only one pointer in the root
712          * by promoting the node below to a root
713          */
714         if (!parent) {
715                 struct extent_buffer *child;
716
717                 if (btrfs_header_nritems(mid) != 1)
718                         return 0;
719
720                 /* promote the child to a root */
721                 child = read_node_slot(root, mid, 0);
722                 BUG_ON(!child);
723                 ret = btrfs_cow_block(trans, root, child, mid, 0, &child);
724                 BUG_ON(ret);
725
726                 root->node = child;
727                 path->nodes[level] = NULL;
728                 clean_tree_block(trans, root, mid);
729                 wait_on_tree_block_writeback(root, mid);
730                 /* once for the path */
731                 free_extent_buffer(mid);
732                 ret = btrfs_free_extent(trans, root, mid->start, mid->len,
733                                         root->root_key.objectid,
734                                         btrfs_header_generation(mid), 0, 0, 1);
735                 /* once for the root ptr */
736                 free_extent_buffer(mid);
737                 return ret;
738         }
739         if (btrfs_header_nritems(mid) >
740             BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
741                 return 0;
742
743         if (btrfs_header_nritems(mid) < 2)
744                 err_on_enospc = 1;
745
746         left = read_node_slot(root, parent, pslot - 1);
747         if (left) {
748                 wret = btrfs_cow_block(trans, root, left,
749                                        parent, pslot - 1, &left);
750                 if (wret) {
751                         ret = wret;
752                         goto enospc;
753                 }
754         }
755         right = read_node_slot(root, parent, pslot + 1);
756         if (right) {
757                 wret = btrfs_cow_block(trans, root, right,
758                                        parent, pslot + 1, &right);
759                 if (wret) {
760                         ret = wret;
761                         goto enospc;
762                 }
763         }
764
765         /* first, try to make some room in the middle buffer */
766         if (left) {
767                 orig_slot += btrfs_header_nritems(left);
768                 wret = push_node_left(trans, root, left, mid);
769                 if (wret < 0)
770                         ret = wret;
771                 if (btrfs_header_nritems(mid) < 2)
772                         err_on_enospc = 1;
773         }
774
775         /*
776          * then try to empty the right most buffer into the middle
777          */
778         if (right) {
779                 wret = push_node_left(trans, root, mid, right);
780                 if (wret < 0 && wret != -ENOSPC)
781                         ret = wret;
782                 if (btrfs_header_nritems(right) == 0) {
783                         u64 bytenr = right->start;
784                         u64 generation = btrfs_header_generation(parent);
785                         u32 blocksize = right->len;
786
787                         clean_tree_block(trans, root, right);
788                         wait_on_tree_block_writeback(root, right);
789                         free_extent_buffer(right);
790                         right = NULL;
791                         wret = del_ptr(trans, root, path, level + 1, pslot +
792                                        1);
793                         if (wret)
794                                 ret = wret;
795                         wret = btrfs_free_extent(trans, root, bytenr,
796                                                  blocksize,
797                                                  btrfs_header_owner(parent),
798                                                  generation, 0, 0, 1);
799                         if (wret)
800                                 ret = wret;
801                 } else {
802                         struct btrfs_disk_key right_key;
803                         btrfs_node_key(right, &right_key, 0);
804                         btrfs_set_node_key(parent, &right_key, pslot + 1);
805                         btrfs_mark_buffer_dirty(parent);
806                 }
807         }
808         if (btrfs_header_nritems(mid) == 1) {
809                 /*
810                  * we're not allowed to leave a node with one item in the
811                  * tree during a delete.  A deletion from lower in the tree
812                  * could try to delete the only pointer in this node.
813                  * So, pull some keys from the left.
814                  * There has to be a left pointer at this point because
815                  * otherwise we would have pulled some pointers from the
816                  * right
817                  */
818                 BUG_ON(!left);
819                 wret = balance_node_right(trans, root, mid, left);
820                 if (wret < 0) {
821                         ret = wret;
822                         goto enospc;
823                 }
824                 BUG_ON(wret == 1);
825         }
826         if (btrfs_header_nritems(mid) == 0) {
827                 /* we've managed to empty the middle node, drop it */
828                 u64 root_gen = btrfs_header_generation(parent);
829                 u64 bytenr = mid->start;
830                 u32 blocksize = mid->len;
831                 clean_tree_block(trans, root, mid);
832                 wait_on_tree_block_writeback(root, mid);
833                 free_extent_buffer(mid);
834                 mid = NULL;
835                 wret = del_ptr(trans, root, path, level + 1, pslot);
836                 if (wret)
837                         ret = wret;
838                 wret = btrfs_free_extent(trans, root, bytenr, blocksize,
839                                          btrfs_header_owner(parent),
840                                          root_gen, 0, 0, 1);
841                 if (wret)
842                         ret = wret;
843         } else {
844                 /* update the parent key to reflect our changes */
845                 struct btrfs_disk_key mid_key;
846                 btrfs_node_key(mid, &mid_key, 0);
847                 btrfs_set_node_key(parent, &mid_key, pslot);
848                 btrfs_mark_buffer_dirty(parent);
849         }
850
851         /* update the path */
852         if (left) {
853                 if (btrfs_header_nritems(left) > orig_slot) {
854                         extent_buffer_get(left);
855                         path->nodes[level] = left;
856                         path->slots[level + 1] -= 1;
857                         path->slots[level] = orig_slot;
858                         if (mid)
859                                 free_extent_buffer(mid);
860                 } else {
861                         orig_slot -= btrfs_header_nritems(left);
862                         path->slots[level] = orig_slot;
863                 }
864         }
865         /* double check we haven't messed things up */
866         check_block(root, path, level);
867         if (orig_ptr !=
868             btrfs_node_blockptr(path->nodes[level], path->slots[level]))
869                 BUG();
870 enospc:
871         if (right)
872                 free_extent_buffer(right);
873         if (left)
874                 free_extent_buffer(left);
875         return ret;
876 }
877
878 /* returns zero if the push worked, non-zero otherwise */
879 static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans,
880                                           struct btrfs_root *root,
881                                           struct btrfs_path *path, int level)
882 {
883         struct extent_buffer *right = NULL;
884         struct extent_buffer *mid;
885         struct extent_buffer *left = NULL;
886         struct extent_buffer *parent = NULL;
887         int ret = 0;
888         int wret;
889         int pslot;
890         int orig_slot = path->slots[level];
891         u64 orig_ptr;
892
893         if (level == 0)
894                 return 1;
895
896         mid = path->nodes[level];
897         WARN_ON(btrfs_header_generation(mid) != trans->transid);
898         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
899
900         if (level < BTRFS_MAX_LEVEL - 1)
901                 parent = path->nodes[level + 1];
902         pslot = path->slots[level + 1];
903
904         if (!parent)
905                 return 1;
906
907         left = read_node_slot(root, parent, pslot - 1);
908
909         /* first, try to make some room in the middle buffer */
910         if (left) {
911                 u32 left_nr;
912                 left_nr = btrfs_header_nritems(left);
913                 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
914                         wret = 1;
915                 } else {
916                         ret = btrfs_cow_block(trans, root, left, parent,
917                                               pslot - 1, &left);
918                         if (ret)
919                                 wret = 1;
920                         else {
921                                 wret = push_node_left(trans, root,
922                                                       left, mid);
923                         }
924                 }
925                 if (wret < 0)
926                         ret = wret;
927                 if (wret == 0) {
928                         struct btrfs_disk_key disk_key;
929                         orig_slot += left_nr;
930                         btrfs_node_key(mid, &disk_key, 0);
931                         btrfs_set_node_key(parent, &disk_key, pslot);
932                         btrfs_mark_buffer_dirty(parent);
933                         if (btrfs_header_nritems(left) > orig_slot) {
934                                 path->nodes[level] = left;
935                                 path->slots[level + 1] -= 1;
936                                 path->slots[level] = orig_slot;
937                                 free_extent_buffer(mid);
938                         } else {
939                                 orig_slot -=
940                                         btrfs_header_nritems(left);
941                                 path->slots[level] = orig_slot;
942                                 free_extent_buffer(left);
943                         }
944                         return 0;
945                 }
946                 free_extent_buffer(left);
947         }
948         right= read_node_slot(root, parent, pslot + 1);
949
950         /*
951          * then try to empty the right most buffer into the middle
952          */
953         if (right) {
954                 u32 right_nr;
955                 right_nr = btrfs_header_nritems(right);
956                 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
957                         wret = 1;
958                 } else {
959                         ret = btrfs_cow_block(trans, root, right,
960                                               parent, pslot + 1,
961                                               &right);
962                         if (ret)
963                                 wret = 1;
964                         else {
965                                 wret = balance_node_right(trans, root,
966                                                           right, mid);
967                         }
968                 }
969                 if (wret < 0)
970                         ret = wret;
971                 if (wret == 0) {
972                         struct btrfs_disk_key disk_key;
973
974                         btrfs_node_key(right, &disk_key, 0);
975                         btrfs_set_node_key(parent, &disk_key, pslot + 1);
976                         btrfs_mark_buffer_dirty(parent);
977
978                         if (btrfs_header_nritems(mid) <= orig_slot) {
979                                 path->nodes[level] = right;
980                                 path->slots[level + 1] += 1;
981                                 path->slots[level] = orig_slot -
982                                         btrfs_header_nritems(mid);
983                                 free_extent_buffer(mid);
984                         } else {
985                                 free_extent_buffer(right);
986                         }
987                         return 0;
988                 }
989                 free_extent_buffer(right);
990         }
991         return 1;
992 }
993
994 /*
995  * readahead one full node of leaves
996  */
997 static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
998                              int level, int slot, u64 objectid)
999 {
1000         struct extent_buffer *node;
1001         struct btrfs_disk_key disk_key;
1002         u32 nritems;
1003         u64 search;
1004         u64 lowest_read;
1005         u64 highest_read;
1006         u64 nread = 0;
1007         int direction = path->reada;
1008         struct extent_buffer *eb;
1009         u32 nr;
1010         u32 blocksize;
1011         u32 nscan = 0;
1012
1013         if (level != 1)
1014                 return;
1015
1016         if (!path->nodes[level])
1017                 return;
1018
1019         node = path->nodes[level];
1020         search = btrfs_node_blockptr(node, slot);
1021         blocksize = btrfs_level_size(root, level - 1);
1022         eb = btrfs_find_tree_block(root, search, blocksize);
1023         if (eb) {
1024                 free_extent_buffer(eb);
1025                 return;
1026         }
1027
1028         highest_read = search;
1029         lowest_read = search;
1030
1031         nritems = btrfs_header_nritems(node);
1032         nr = slot;
1033         while(1) {
1034                 if (direction < 0) {
1035                         if (nr == 0)
1036                                 break;
1037                         nr--;
1038                 } else if (direction > 0) {
1039                         nr++;
1040                         if (nr >= nritems)
1041                                 break;
1042                 }
1043                 if (path->reada < 0 && objectid) {
1044                         btrfs_node_key(node, &disk_key, nr);
1045                         if (btrfs_disk_key_objectid(&disk_key) != objectid)
1046                                 break;
1047                 }
1048                 search = btrfs_node_blockptr(node, nr);
1049                 if ((search >= lowest_read && search <= highest_read) ||
1050                     (search < lowest_read && lowest_read - search <= 32768) ||
1051                     (search > highest_read && search - highest_read <= 32768)) {
1052                         readahead_tree_block(root, search, blocksize);
1053                         nread += blocksize;
1054                 }
1055                 nscan++;
1056                 if (path->reada < 2 && (nread > (256 * 1024) || nscan > 32))
1057                         break;
1058                 if(nread > (1024 * 1024) || nscan > 128)
1059                         break;
1060
1061                 if (search < lowest_read)
1062                         lowest_read = search;
1063                 if (search > highest_read)
1064                         highest_read = search;
1065         }
1066 }
1067 /*
1068  * look for key in the tree.  path is filled in with nodes along the way
1069  * if key is found, we return zero and you can find the item in the leaf
1070  * level of the path (level 0)
1071  *
1072  * If the key isn't found, the path points to the slot where it should
1073  * be inserted, and 1 is returned.  If there are other errors during the
1074  * search a negative error number is returned.
1075  *
1076  * if ins_len > 0, nodes and leaves will be split as we walk down the
1077  * tree.  if ins_len < 0, nodes will be merged as we walk down the tree (if
1078  * possible)
1079  */
1080 int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
1081                       *root, struct btrfs_key *key, struct btrfs_path *p, int
1082                       ins_len, int cow)
1083 {
1084         struct extent_buffer *b;
1085         u64 bytenr;
1086         u64 ptr_gen;
1087         int slot;
1088         int ret;
1089         int level;
1090         int should_reada = p->reada;
1091         u8 lowest_level = 0;
1092
1093         lowest_level = p->lowest_level;
1094         WARN_ON(lowest_level && ins_len);
1095         WARN_ON(p->nodes[0] != NULL);
1096         WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex));
1097 again:
1098         b = root->node;
1099         extent_buffer_get(b);
1100         while (b) {
1101                 level = btrfs_header_level(b);
1102                 if (cow) {
1103                         int wret;
1104                         wret = btrfs_cow_block(trans, root, b,
1105                                                p->nodes[level + 1],
1106                                                p->slots[level + 1],
1107                                                &b);
1108                         if (wret) {
1109                                 free_extent_buffer(b);
1110                                 return wret;
1111                         }
1112                 }
1113                 BUG_ON(!cow && ins_len);
1114                 if (level != btrfs_header_level(b))
1115                         WARN_ON(1);
1116                 level = btrfs_header_level(b);
1117                 p->nodes[level] = b;
1118                 ret = check_block(root, p, level);
1119                 if (ret)
1120                         return -1;
1121                 ret = bin_search(b, key, level, &slot);
1122                 if (level != 0) {
1123                         if (ret && slot > 0)
1124                                 slot -= 1;
1125                         p->slots[level] = slot;
1126                         if (ins_len > 0 && btrfs_header_nritems(b) >=
1127                             BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
1128                                 int sret = split_node(trans, root, p, level);
1129                                 BUG_ON(sret > 0);
1130                                 if (sret)
1131                                         return sret;
1132                                 b = p->nodes[level];
1133                                 slot = p->slots[level];
1134                         } else if (ins_len < 0) {
1135                                 int sret = balance_level(trans, root, p,
1136                                                          level);
1137                                 if (sret)
1138                                         return sret;
1139                                 b = p->nodes[level];
1140                                 if (!b) {
1141                                         btrfs_release_path(NULL, p);
1142                                         goto again;
1143                                 }
1144                                 slot = p->slots[level];
1145                                 BUG_ON(btrfs_header_nritems(b) == 1);
1146                         }
1147                         /* this is only true while dropping a snapshot */
1148                         if (level == lowest_level)
1149                                 break;
1150                         bytenr = btrfs_node_blockptr(b, slot);
1151                         ptr_gen = btrfs_node_ptr_generation(b, slot);
1152                         if (should_reada)
1153                                 reada_for_search(root, p, level, slot,
1154                                                  key->objectid);
1155                         b = read_tree_block(root, bytenr,
1156                                             btrfs_level_size(root, level - 1));
1157                         if (ptr_gen != btrfs_header_generation(b)) {
1158                                 printk("block %llu bad gen wanted %llu "
1159                                        "found %llu\n",
1160                                 (unsigned long long)b->start,
1161                                 (unsigned long long)ptr_gen,
1162                                 (unsigned long long)btrfs_header_generation(b));
1163                         }
1164                 } else {
1165                         p->slots[level] = slot;
1166                         if (ins_len > 0 && btrfs_leaf_free_space(root, b) <
1167                             sizeof(struct btrfs_item) + ins_len) {
1168                                 int sret = split_leaf(trans, root, key,
1169                                                       p, ins_len, ret == 0);
1170                                 BUG_ON(sret > 0);
1171                                 if (sret)
1172                                         return sret;
1173                         }
1174                         return ret;
1175                 }
1176         }
1177         return 1;
1178 }
1179
1180 /*
1181  * adjust the pointers going up the tree, starting at level
1182  * making sure the right key of each node is points to 'key'.
1183  * This is used after shifting pointers to the left, so it stops
1184  * fixing up pointers when a given leaf/node is not in slot 0 of the
1185  * higher levels
1186  *
1187  * If this fails to write a tree block, it returns -1, but continues
1188  * fixing up the blocks in ram so the tree is consistent.
1189  */
1190 static int fixup_low_keys(struct btrfs_trans_handle *trans,
1191                           struct btrfs_root *root, struct btrfs_path *path,
1192                           struct btrfs_disk_key *key, int level)
1193 {
1194         int i;
1195         int ret = 0;
1196         struct extent_buffer *t;
1197
1198         for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1199                 int tslot = path->slots[i];
1200                 if (!path->nodes[i])
1201                         break;
1202                 t = path->nodes[i];
1203                 btrfs_set_node_key(t, key, tslot);
1204                 btrfs_mark_buffer_dirty(path->nodes[i]);
1205                 if (tslot != 0)
1206                         break;
1207         }
1208         return ret;
1209 }
1210
1211 /*
1212  * try to push data from one node into the next node left in the
1213  * tree.
1214  *
1215  * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
1216  * error, and > 0 if there was no room in the left hand block.
1217  */
1218 static int push_node_left(struct btrfs_trans_handle *trans,
1219                           struct btrfs_root *root, struct extent_buffer *dst,
1220                           struct extent_buffer *src)
1221 {
1222         int push_items = 0;
1223         int src_nritems;
1224         int dst_nritems;
1225         int ret = 0;
1226
1227         src_nritems = btrfs_header_nritems(src);
1228         dst_nritems = btrfs_header_nritems(dst);
1229         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1230         WARN_ON(btrfs_header_generation(src) != trans->transid);
1231         WARN_ON(btrfs_header_generation(dst) != trans->transid);
1232
1233         if (push_items <= 0) {
1234                 return 1;
1235         }
1236
1237         if (src_nritems < push_items)
1238                 push_items = src_nritems;
1239
1240         copy_extent_buffer(dst, src,
1241                            btrfs_node_key_ptr_offset(dst_nritems),
1242                            btrfs_node_key_ptr_offset(0),
1243                            push_items * sizeof(struct btrfs_key_ptr));
1244
1245         if (push_items < src_nritems) {
1246                 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
1247                                       btrfs_node_key_ptr_offset(push_items),
1248                                       (src_nritems - push_items) *
1249                                       sizeof(struct btrfs_key_ptr));
1250         }
1251         btrfs_set_header_nritems(src, src_nritems - push_items);
1252         btrfs_set_header_nritems(dst, dst_nritems + push_items);
1253         btrfs_mark_buffer_dirty(src);
1254         btrfs_mark_buffer_dirty(dst);
1255         return ret;
1256 }
1257
1258 /*
1259  * try to push data from one node into the next node right in the
1260  * tree.
1261  *
1262  * returns 0 if some ptrs were pushed, < 0 if there was some horrible
1263  * error, and > 0 if there was no room in the right hand block.
1264  *
1265  * this will  only push up to 1/2 the contents of the left node over
1266  */
1267 static int balance_node_right(struct btrfs_trans_handle *trans,
1268                               struct btrfs_root *root,
1269                               struct extent_buffer *dst,
1270                               struct extent_buffer *src)
1271 {
1272         int push_items = 0;
1273         int max_push;
1274         int src_nritems;
1275         int dst_nritems;
1276         int ret = 0;
1277
1278         WARN_ON(btrfs_header_generation(src) != trans->transid);
1279         WARN_ON(btrfs_header_generation(dst) != trans->transid);
1280
1281         src_nritems = btrfs_header_nritems(src);
1282         dst_nritems = btrfs_header_nritems(dst);
1283         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1284         if (push_items <= 0)
1285                 return 1;
1286
1287         max_push = src_nritems / 2 + 1;
1288         /* don't try to empty the node */
1289         if (max_push >= src_nritems)
1290                 return 1;
1291
1292         if (max_push < push_items)
1293                 push_items = max_push;
1294
1295         memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
1296                                       btrfs_node_key_ptr_offset(0),
1297                                       (dst_nritems) *
1298                                       sizeof(struct btrfs_key_ptr));
1299
1300         copy_extent_buffer(dst, src,
1301                            btrfs_node_key_ptr_offset(0),
1302                            btrfs_node_key_ptr_offset(src_nritems - push_items),
1303                            push_items * sizeof(struct btrfs_key_ptr));
1304
1305         btrfs_set_header_nritems(src, src_nritems - push_items);
1306         btrfs_set_header_nritems(dst, dst_nritems + push_items);
1307
1308         btrfs_mark_buffer_dirty(src);
1309         btrfs_mark_buffer_dirty(dst);
1310         return ret;
1311 }
1312
1313 /*
1314  * helper function to insert a new root level in the tree.
1315  * A new node is allocated, and a single item is inserted to
1316  * point to the existing root
1317  *
1318  * returns zero on success or < 0 on failure.
1319  */
1320 static int noinline insert_new_root(struct btrfs_trans_handle *trans,
1321                            struct btrfs_root *root,
1322                            struct btrfs_path *path, int level)
1323 {
1324         u64 root_gen;
1325         u64 lower_gen;
1326         struct extent_buffer *lower;
1327         struct extent_buffer *c;
1328         struct btrfs_disk_key lower_key;
1329
1330         BUG_ON(path->nodes[level]);
1331         BUG_ON(path->nodes[level-1] != root->node);
1332
1333         if (root->ref_cows)
1334                 root_gen = trans->transid;
1335         else
1336                 root_gen = 0;
1337
1338         lower = path->nodes[level-1];
1339         if (level == 1)
1340                 btrfs_item_key(lower, &lower_key, 0);
1341         else
1342                 btrfs_node_key(lower, &lower_key, 0);
1343
1344         c = __btrfs_alloc_free_block(trans, root, root->nodesize,
1345                                    root->root_key.objectid,
1346                                    root_gen, lower_key.objectid, level,
1347                                    root->node->start, 0);
1348         if (IS_ERR(c))
1349                 return PTR_ERR(c);
1350         memset_extent_buffer(c, 0, 0, root->nodesize);
1351         btrfs_set_header_nritems(c, 1);
1352         btrfs_set_header_level(c, level);
1353         btrfs_set_header_bytenr(c, c->start);
1354         btrfs_set_header_generation(c, trans->transid);
1355         btrfs_set_header_owner(c, root->root_key.objectid);
1356
1357         write_extent_buffer(c, root->fs_info->fsid,
1358                             (unsigned long)btrfs_header_fsid(c),
1359                             BTRFS_FSID_SIZE);
1360         btrfs_set_node_key(c, &lower_key, 0);
1361         btrfs_set_node_blockptr(c, 0, lower->start);
1362         lower_gen = btrfs_header_generation(lower);
1363         WARN_ON(lower_gen == 0);
1364
1365         btrfs_set_node_ptr_generation(c, 0, lower_gen);
1366
1367         btrfs_mark_buffer_dirty(c);
1368
1369         /* the super has an extra ref to root->node */
1370         free_extent_buffer(root->node);
1371         root->node = c;
1372         extent_buffer_get(c);
1373         path->nodes[level] = c;
1374         path->slots[level] = 0;
1375
1376         if (root->ref_cows && lower_gen != trans->transid) {
1377                 struct btrfs_path *back_path = btrfs_alloc_path();
1378                 int ret;
1379                 ret = btrfs_insert_extent_backref(trans,
1380                                                   root->fs_info->extent_root,
1381                                                   path, lower->start,
1382                                                   root->root_key.objectid,
1383                                                   trans->transid, 0, 0);
1384                 BUG_ON(ret);
1385                 btrfs_free_path(back_path);
1386         }
1387         return 0;
1388 }
1389
1390 /*
1391  * worker function to insert a single pointer in a node.
1392  * the node should have enough room for the pointer already
1393  *
1394  * slot and level indicate where you want the key to go, and
1395  * blocknr is the block the key points to.
1396  *
1397  * returns zero on success and < 0 on any error
1398  */
1399 static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
1400                       *root, struct btrfs_path *path, struct btrfs_disk_key
1401                       *key, u64 bytenr, int slot, int level)
1402 {
1403         struct extent_buffer *lower;
1404         int nritems;
1405
1406         BUG_ON(!path->nodes[level]);
1407         lower = path->nodes[level];
1408         nritems = btrfs_header_nritems(lower);
1409         if (slot > nritems)
1410                 BUG();
1411         if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
1412                 BUG();
1413         if (slot != nritems) {
1414                 memmove_extent_buffer(lower,
1415                               btrfs_node_key_ptr_offset(slot + 1),
1416                               btrfs_node_key_ptr_offset(slot),
1417                               (nritems - slot) * sizeof(struct btrfs_key_ptr));
1418         }
1419         btrfs_set_node_key(lower, key, slot);
1420         btrfs_set_node_blockptr(lower, slot, bytenr);
1421         WARN_ON(trans->transid == 0);
1422         btrfs_set_node_ptr_generation(lower, slot, trans->transid);
1423         btrfs_set_header_nritems(lower, nritems + 1);
1424         btrfs_mark_buffer_dirty(lower);
1425         return 0;
1426 }
1427
1428 /*
1429  * split the node at the specified level in path in two.
1430  * The path is corrected to point to the appropriate node after the split
1431  *
1432  * Before splitting this tries to make some room in the node by pushing
1433  * left and right, if either one works, it returns right away.
1434  *
1435  * returns 0 on success and < 0 on failure
1436  */
1437 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
1438                       *root, struct btrfs_path *path, int level)
1439 {
1440         u64 root_gen;
1441         struct extent_buffer *c;
1442         struct extent_buffer *split;
1443         struct btrfs_disk_key disk_key;
1444         int mid;
1445         int ret;
1446         int wret;
1447         u32 c_nritems;
1448
1449         c = path->nodes[level];
1450         WARN_ON(btrfs_header_generation(c) != trans->transid);
1451         if (c == root->node) {
1452                 /* trying to split the root, lets make a new one */
1453                 ret = insert_new_root(trans, root, path, level + 1);
1454                 if (ret)
1455                         return ret;
1456         } else {
1457                 ret = push_nodes_for_insert(trans, root, path, level);
1458                 c = path->nodes[level];
1459                 if (!ret && btrfs_header_nritems(c) <
1460                     BTRFS_NODEPTRS_PER_BLOCK(root) - 1)
1461                         return 0;
1462                 if (ret < 0)
1463                         return ret;
1464         }
1465
1466         c_nritems = btrfs_header_nritems(c);
1467         if (root->ref_cows)
1468                 root_gen = trans->transid;
1469         else
1470                 root_gen = 0;
1471
1472         btrfs_node_key(c, &disk_key, 0);
1473         split = __btrfs_alloc_free_block(trans, root, root->nodesize,
1474                                          root->root_key.objectid,
1475                                          root_gen,
1476                                          btrfs_disk_key_objectid(&disk_key),
1477                                          level, c->start, 0);
1478         if (IS_ERR(split))
1479                 return PTR_ERR(split);
1480
1481         btrfs_set_header_flags(split, btrfs_header_flags(c));
1482         btrfs_set_header_level(split, btrfs_header_level(c));
1483         btrfs_set_header_bytenr(split, split->start);
1484         btrfs_set_header_generation(split, trans->transid);
1485         btrfs_set_header_owner(split, root->root_key.objectid);
1486         write_extent_buffer(split, root->fs_info->fsid,
1487                             (unsigned long)btrfs_header_fsid(split),
1488                             BTRFS_FSID_SIZE);
1489
1490         mid = (c_nritems + 1) / 2;
1491
1492         copy_extent_buffer(split, c,
1493                            btrfs_node_key_ptr_offset(0),
1494                            btrfs_node_key_ptr_offset(mid),
1495                            (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
1496         btrfs_set_header_nritems(split, c_nritems - mid);
1497         btrfs_set_header_nritems(c, mid);
1498         ret = 0;
1499
1500         btrfs_mark_buffer_dirty(c);
1501         btrfs_mark_buffer_dirty(split);
1502
1503         btrfs_node_key(split, &disk_key, 0);
1504         wret = insert_ptr(trans, root, path, &disk_key, split->start,
1505                           path->slots[level + 1] + 1,
1506                           level + 1);
1507         if (wret)
1508                 ret = wret;
1509
1510         if (path->slots[level] >= mid) {
1511                 path->slots[level] -= mid;
1512                 free_extent_buffer(c);
1513                 path->nodes[level] = split;
1514                 path->slots[level + 1] += 1;
1515         } else {
1516                 free_extent_buffer(split);
1517         }
1518         return ret;
1519 }
1520
1521 /*
1522  * how many bytes are required to store the items in a leaf.  start
1523  * and nr indicate which items in the leaf to check.  This totals up the
1524  * space used both by the item structs and the item data
1525  */
1526 static int leaf_space_used(struct extent_buffer *l, int start, int nr)
1527 {
1528         int data_len;
1529         int nritems = btrfs_header_nritems(l);
1530         int end = min(nritems, start + nr) - 1;
1531
1532         if (!nr)
1533                 return 0;
1534         data_len = btrfs_item_end_nr(l, start);
1535         data_len = data_len - btrfs_item_offset_nr(l, end);
1536         data_len += sizeof(struct btrfs_item) * nr;
1537         WARN_ON(data_len < 0);
1538         return data_len;
1539 }
1540
1541 /*
1542  * The space between the end of the leaf items and
1543  * the start of the leaf data.  IOW, how much room
1544  * the leaf has left for both items and data
1545  */
1546 int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf)
1547 {
1548         int nritems = btrfs_header_nritems(leaf);
1549         int ret;
1550         ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
1551         if (ret < 0) {
1552                 printk("leaf free space ret %d, leaf data size %lu, used %d nritems %d\n",
1553                        ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root),
1554                        leaf_space_used(leaf, 0, nritems), nritems);
1555         }
1556         return ret;
1557 }
1558
1559 /*
1560  * push some data in the path leaf to the right, trying to free up at
1561  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
1562  *
1563  * returns 1 if the push failed because the other node didn't have enough
1564  * room, 0 if everything worked out and < 0 if there were major errors.
1565  */
1566 static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
1567                            *root, struct btrfs_path *path, int data_size,
1568                            int empty)
1569 {
1570         struct extent_buffer *left = path->nodes[0];
1571         struct extent_buffer *right;
1572         struct extent_buffer *upper;
1573         struct btrfs_disk_key disk_key;
1574         int slot;
1575         u32 i;
1576         int free_space;
1577         int push_space = 0;
1578         int push_items = 0;
1579         struct btrfs_item *item;
1580         u32 left_nritems;
1581         u32 nr;
1582         u32 right_nritems;
1583         u32 data_end;
1584         u32 this_item_size;
1585         int ret;
1586
1587         slot = path->slots[1];
1588         if (!path->nodes[1]) {
1589                 return 1;
1590         }
1591         upper = path->nodes[1];
1592         if (slot >= btrfs_header_nritems(upper) - 1)
1593                 return 1;
1594
1595         right = read_tree_block(root, btrfs_node_blockptr(upper, slot + 1),
1596                                 root->leafsize);
1597         free_space = btrfs_leaf_free_space(root, right);
1598         if (free_space < data_size + sizeof(struct btrfs_item)) {
1599                 free_extent_buffer(right);
1600                 return 1;
1601         }
1602
1603         /* cow and double check */
1604         ret = btrfs_cow_block(trans, root, right, upper,
1605                               slot + 1, &right);
1606         if (ret) {
1607                 free_extent_buffer(right);
1608                 return 1;
1609         }
1610         free_space = btrfs_leaf_free_space(root, right);
1611         if (free_space < data_size + sizeof(struct btrfs_item)) {
1612                 free_extent_buffer(right);
1613                 return 1;
1614         }
1615
1616         left_nritems = btrfs_header_nritems(left);
1617         if (left_nritems == 0) {
1618                 free_extent_buffer(right);
1619                 return 1;
1620         }
1621
1622         if (empty)
1623                 nr = 0;
1624         else
1625                 nr = 1;
1626
1627         i = left_nritems - 1;
1628         while (i >= nr) {
1629                 item = btrfs_item_nr(left, i);
1630
1631                 if (path->slots[0] == i)
1632                         push_space += data_size + sizeof(*item);
1633
1634                 if (!left->map_token) {
1635                         map_extent_buffer(left, (unsigned long)item,
1636                                         sizeof(struct btrfs_item),
1637                                         &left->map_token, &left->kaddr,
1638                                         &left->map_start, &left->map_len,
1639                                         KM_USER1);
1640                 }
1641
1642                 this_item_size = btrfs_item_size(left, item);
1643                 if (this_item_size + sizeof(*item) + push_space > free_space)
1644                         break;
1645                 push_items++;
1646                 push_space += this_item_size + sizeof(*item);
1647                 if (i == 0)
1648                         break;
1649                 i--;
1650         }
1651         if (left->map_token) {
1652                 unmap_extent_buffer(left, left->map_token, KM_USER1);
1653                 left->map_token = NULL;
1654         }
1655
1656         if (push_items == 0) {
1657                 free_extent_buffer(right);
1658                 return 1;
1659         }
1660
1661         if (!empty && push_items == left_nritems)
1662                 WARN_ON(1);
1663
1664         /* push left to right */
1665         right_nritems = btrfs_header_nritems(right);
1666
1667         push_space = btrfs_item_end_nr(left, left_nritems - push_items);
1668         push_space -= leaf_data_end(root, left);
1669
1670         /* make room in the right data area */
1671         data_end = leaf_data_end(root, right);
1672         memmove_extent_buffer(right,
1673                               btrfs_leaf_data(right) + data_end - push_space,
1674                               btrfs_leaf_data(right) + data_end,
1675                               BTRFS_LEAF_DATA_SIZE(root) - data_end);
1676
1677         /* copy from the left data area */
1678         copy_extent_buffer(right, left, btrfs_leaf_data(right) +
1679                      BTRFS_LEAF_DATA_SIZE(root) - push_space,
1680                      btrfs_leaf_data(left) + leaf_data_end(root, left),
1681                      push_space);
1682
1683         memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
1684                               btrfs_item_nr_offset(0),
1685                               right_nritems * sizeof(struct btrfs_item));
1686
1687         /* copy the items from left to right */
1688         copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
1689                    btrfs_item_nr_offset(left_nritems - push_items),
1690                    push_items * sizeof(struct btrfs_item));
1691
1692         /* update the item pointers */
1693         right_nritems += push_items;
1694         btrfs_set_header_nritems(right, right_nritems);
1695         push_space = BTRFS_LEAF_DATA_SIZE(root);
1696         for (i = 0; i < right_nritems; i++) {
1697                 item = btrfs_item_nr(right, i);
1698                 if (!right->map_token) {
1699                         map_extent_buffer(right, (unsigned long)item,
1700                                         sizeof(struct btrfs_item),
1701                                         &right->map_token, &right->kaddr,
1702                                         &right->map_start, &right->map_len,
1703                                         KM_USER1);
1704                 }
1705                 push_space -= btrfs_item_size(right, item);
1706                 btrfs_set_item_offset(right, item, push_space);
1707         }
1708
1709         if (right->map_token) {
1710                 unmap_extent_buffer(right, right->map_token, KM_USER1);
1711                 right->map_token = NULL;
1712         }
1713         left_nritems -= push_items;
1714         btrfs_set_header_nritems(left, left_nritems);
1715
1716         if (left_nritems)
1717                 btrfs_mark_buffer_dirty(left);
1718         btrfs_mark_buffer_dirty(right);
1719
1720         btrfs_item_key(right, &disk_key, 0);
1721         btrfs_set_node_key(upper, &disk_key, slot + 1);
1722         btrfs_mark_buffer_dirty(upper);
1723
1724         /* then fixup the leaf pointer in the path */
1725         if (path->slots[0] >= left_nritems) {
1726                 path->slots[0] -= left_nritems;
1727                 free_extent_buffer(path->nodes[0]);
1728                 path->nodes[0] = right;
1729                 path->slots[1] += 1;
1730         } else {
1731                 free_extent_buffer(right);
1732         }
1733         return 0;
1734 }
1735 /*
1736  * push some data in the path leaf to the left, trying to free up at
1737  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
1738  */
1739 static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
1740                           *root, struct btrfs_path *path, int data_size,
1741                           int empty)
1742 {
1743         struct btrfs_disk_key disk_key;
1744         struct extent_buffer *right = path->nodes[0];
1745         struct extent_buffer *left;
1746         int slot;
1747         int i;
1748         int free_space;
1749         int push_space = 0;
1750         int push_items = 0;
1751         struct btrfs_item *item;
1752         u32 old_left_nritems;
1753         u32 right_nritems;
1754         u32 nr;
1755         int ret = 0;
1756         int wret;
1757         u32 this_item_size;
1758         u32 old_left_item_size;
1759
1760         slot = path->slots[1];
1761         if (slot == 0)
1762                 return 1;
1763         if (!path->nodes[1])
1764                 return 1;
1765
1766         right_nritems = btrfs_header_nritems(right);
1767         if (right_nritems == 0) {
1768                 return 1;
1769         }
1770
1771         left = read_tree_block(root, btrfs_node_blockptr(path->nodes[1],
1772                                slot - 1), root->leafsize);
1773         free_space = btrfs_leaf_free_space(root, left);
1774         if (free_space < data_size + sizeof(struct btrfs_item)) {
1775                 free_extent_buffer(left);
1776                 return 1;
1777         }
1778
1779         /* cow and double check */
1780         ret = btrfs_cow_block(trans, root, left,
1781                               path->nodes[1], slot - 1, &left);
1782         if (ret) {
1783                 /* we hit -ENOSPC, but it isn't fatal here */
1784                 free_extent_buffer(left);
1785                 return 1;
1786         }
1787
1788         free_space = btrfs_leaf_free_space(root, left);
1789         if (free_space < data_size + sizeof(struct btrfs_item)) {
1790                 free_extent_buffer(left);
1791                 return 1;
1792         }
1793
1794         if (empty)
1795                 nr = right_nritems;
1796         else
1797                 nr = right_nritems - 1;
1798
1799         for (i = 0; i < nr; i++) {
1800                 item = btrfs_item_nr(right, i);
1801                 if (!right->map_token) {
1802                         map_extent_buffer(right, (unsigned long)item,
1803                                         sizeof(struct btrfs_item),
1804                                         &right->map_token, &right->kaddr,
1805                                         &right->map_start, &right->map_len,
1806                                         KM_USER1);
1807                 }
1808
1809                 if (path->slots[0] == i)
1810                         push_space += data_size + sizeof(*item);
1811
1812                 this_item_size = btrfs_item_size(right, item);
1813                 if (this_item_size + sizeof(*item) + push_space > free_space)
1814                         break;
1815
1816                 push_items++;
1817                 push_space += this_item_size + sizeof(*item);
1818         }
1819
1820         if (right->map_token) {
1821                 unmap_extent_buffer(right, right->map_token, KM_USER1);
1822                 right->map_token = NULL;
1823         }
1824
1825         if (push_items == 0) {
1826                 free_extent_buffer(left);
1827                 return 1;
1828         }
1829         if (!empty && push_items == btrfs_header_nritems(right))
1830                 WARN_ON(1);
1831
1832         /* push data from right to left */
1833         copy_extent_buffer(left, right,
1834                            btrfs_item_nr_offset(btrfs_header_nritems(left)),
1835                            btrfs_item_nr_offset(0),
1836                            push_items * sizeof(struct btrfs_item));
1837
1838         push_space = BTRFS_LEAF_DATA_SIZE(root) -
1839                      btrfs_item_offset_nr(right, push_items -1);
1840
1841         copy_extent_buffer(left, right, btrfs_leaf_data(left) +
1842                      leaf_data_end(root, left) - push_space,
1843                      btrfs_leaf_data(right) +
1844                      btrfs_item_offset_nr(right, push_items - 1),
1845                      push_space);
1846         old_left_nritems = btrfs_header_nritems(left);
1847         BUG_ON(old_left_nritems < 0);
1848
1849         old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
1850         for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
1851                 u32 ioff;
1852
1853                 item = btrfs_item_nr(left, i);
1854                 if (!left->map_token) {
1855                         map_extent_buffer(left, (unsigned long)item,
1856                                         sizeof(struct btrfs_item),
1857                                         &left->map_token, &left->kaddr,
1858                                         &left->map_start, &left->map_len,
1859                                         KM_USER1);
1860                 }
1861
1862                 ioff = btrfs_item_offset(left, item);
1863                 btrfs_set_item_offset(left, item,
1864                       ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size));
1865         }
1866         btrfs_set_header_nritems(left, old_left_nritems + push_items);
1867         if (left->map_token) {
1868                 unmap_extent_buffer(left, left->map_token, KM_USER1);
1869                 left->map_token = NULL;
1870         }
1871
1872         /* fixup right node */
1873         if (push_items > right_nritems) {
1874                 printk("push items %d nr %u\n", push_items, right_nritems);
1875                 WARN_ON(1);
1876         }
1877
1878         if (push_items < right_nritems) {
1879                 push_space = btrfs_item_offset_nr(right, push_items - 1) -
1880                                                   leaf_data_end(root, right);
1881                 memmove_extent_buffer(right, btrfs_leaf_data(right) +
1882                                       BTRFS_LEAF_DATA_SIZE(root) - push_space,
1883                                       btrfs_leaf_data(right) +
1884                                       leaf_data_end(root, right), push_space);
1885
1886                 memmove_extent_buffer(right, btrfs_item_nr_offset(0),
1887                               btrfs_item_nr_offset(push_items),
1888                              (btrfs_header_nritems(right) - push_items) *
1889                              sizeof(struct btrfs_item));
1890         }
1891         right_nritems -= push_items;
1892         btrfs_set_header_nritems(right, right_nritems);
1893         push_space = BTRFS_LEAF_DATA_SIZE(root);
1894         for (i = 0; i < right_nritems; i++) {
1895                 item = btrfs_item_nr(right, i);
1896
1897                 if (!right->map_token) {
1898                         map_extent_buffer(right, (unsigned long)item,
1899                                         sizeof(struct btrfs_item),
1900                                         &right->map_token, &right->kaddr,
1901                                         &right->map_start, &right->map_len,
1902                                         KM_USER1);
1903                 }
1904
1905                 push_space = push_space - btrfs_item_size(right, item);
1906                 btrfs_set_item_offset(right, item, push_space);
1907         }
1908         if (right->map_token) {
1909                 unmap_extent_buffer(right, right->map_token, KM_USER1);
1910                 right->map_token = NULL;
1911         }
1912
1913         btrfs_mark_buffer_dirty(left);
1914         if (right_nritems)
1915                 btrfs_mark_buffer_dirty(right);
1916
1917         btrfs_item_key(right, &disk_key, 0);
1918         wret = fixup_low_keys(trans, root, path, &disk_key, 1);
1919         if (wret)
1920                 ret = wret;
1921
1922         /* then fixup the leaf pointer in the path */
1923         if (path->slots[0] < push_items) {
1924                 path->slots[0] += old_left_nritems;
1925                 free_extent_buffer(path->nodes[0]);
1926                 path->nodes[0] = left;
1927                 path->slots[1] -= 1;
1928         } else {
1929                 free_extent_buffer(left);
1930                 path->slots[0] -= push_items;
1931         }
1932         BUG_ON(path->slots[0] < 0);
1933         return ret;
1934 }
1935
1936 /*
1937  * split the path's leaf in two, making sure there is at least data_size
1938  * available for the resulting leaf level of the path.
1939  *
1940  * returns 0 if all went well and < 0 on failure.
1941  */
1942 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
1943                       *root, struct btrfs_key *ins_key,
1944                       struct btrfs_path *path, int data_size, int extend)
1945 {
1946         u64 root_gen;
1947         struct extent_buffer *l;
1948         u32 nritems;
1949         int mid;
1950         int slot;
1951         struct extent_buffer *right;
1952         int space_needed = data_size + sizeof(struct btrfs_item);
1953         int data_copy_size;
1954         int rt_data_off;
1955         int i;
1956         int ret = 0;
1957         int wret;
1958         int double_split;
1959         int num_doubles = 0;
1960         struct btrfs_disk_key disk_key;
1961
1962         if (extend)
1963                 space_needed = data_size;
1964
1965         if (root->ref_cows)
1966                 root_gen = trans->transid;
1967         else
1968                 root_gen = 0;
1969
1970         /* first try to make some room by pushing left and right */
1971         if (ins_key->type != BTRFS_DIR_ITEM_KEY) {
1972                 wret = push_leaf_right(trans, root, path, data_size, 0);
1973                 if (wret < 0) {
1974                         return wret;
1975                 }
1976                 if (wret) {
1977                         wret = push_leaf_left(trans, root, path, data_size, 0);
1978                         if (wret < 0)
1979                                 return wret;
1980                 }
1981                 l = path->nodes[0];
1982
1983                 /* did the pushes work? */
1984                 if (btrfs_leaf_free_space(root, l) >= space_needed)
1985                         return 0;
1986         }
1987
1988         if (!path->nodes[1]) {
1989                 ret = insert_new_root(trans, root, path, 1);
1990                 if (ret)
1991                         return ret;
1992         }
1993 again:
1994         double_split = 0;
1995         l = path->nodes[0];
1996         slot = path->slots[0];
1997         nritems = btrfs_header_nritems(l);
1998         mid = (nritems + 1)/ 2;
1999
2000         btrfs_item_key(l, &disk_key, 0);
2001
2002         right = __btrfs_alloc_free_block(trans, root, root->leafsize,
2003                                          root->root_key.objectid,
2004                                          root_gen, disk_key.objectid, 0,
2005                                          l->start, 0);
2006         if (IS_ERR(right))
2007                 return PTR_ERR(right);
2008
2009         memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
2010         btrfs_set_header_bytenr(right, right->start);
2011         btrfs_set_header_generation(right, trans->transid);
2012         btrfs_set_header_owner(right, root->root_key.objectid);
2013         btrfs_set_header_level(right, 0);
2014         write_extent_buffer(right, root->fs_info->fsid,
2015                             (unsigned long)btrfs_header_fsid(right),
2016                             BTRFS_FSID_SIZE);
2017         if (mid <= slot) {
2018                 if (nritems == 1 ||
2019                     leaf_space_used(l, mid, nritems - mid) + space_needed >
2020                         BTRFS_LEAF_DATA_SIZE(root)) {
2021                         if (slot >= nritems) {
2022                                 btrfs_cpu_key_to_disk(&disk_key, ins_key);
2023                                 btrfs_set_header_nritems(right, 0);
2024                                 wret = insert_ptr(trans, root, path,
2025                                                   &disk_key, right->start,
2026                                                   path->slots[1] + 1, 1);
2027                                 if (wret)
2028                                         ret = wret;
2029                                 free_extent_buffer(path->nodes[0]);
2030                                 path->nodes[0] = right;
2031                                 path->slots[0] = 0;
2032                                 path->slots[1] += 1;
2033                                 return ret;
2034                         }
2035                         mid = slot;
2036                         if (mid != nritems &&
2037                             leaf_space_used(l, mid, nritems - mid) +
2038                             space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
2039                                 double_split = 1;
2040                         }
2041                 }
2042         } else {
2043                 if (leaf_space_used(l, 0, mid + 1) + space_needed >
2044                         BTRFS_LEAF_DATA_SIZE(root)) {
2045                         if (!extend && slot == 0) {
2046                                 btrfs_cpu_key_to_disk(&disk_key, ins_key);
2047                                 btrfs_set_header_nritems(right, 0);
2048                                 wret = insert_ptr(trans, root, path,
2049                                                   &disk_key,
2050                                                   right->start,
2051                                                   path->slots[1], 1);
2052                                 if (wret)
2053                                         ret = wret;
2054                                 free_extent_buffer(path->nodes[0]);
2055                                 path->nodes[0] = right;
2056                                 path->slots[0] = 0;
2057                                 if (path->slots[1] == 0) {
2058                                         wret = fixup_low_keys(trans, root,
2059                                                    path, &disk_key, 1);
2060                                         if (wret)
2061                                                 ret = wret;
2062                                 }
2063                                 return ret;
2064                         } else if (extend && slot == 0) {
2065                                 mid = 1;
2066                         } else {
2067                                 mid = slot;
2068                                 if (mid != nritems &&
2069                                     leaf_space_used(l, mid, nritems - mid) +
2070                                     space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
2071                                         double_split = 1;
2072                                 }
2073                         }
2074                 }
2075         }
2076         nritems = nritems - mid;
2077         btrfs_set_header_nritems(right, nritems);
2078         data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);
2079
2080         copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
2081                            btrfs_item_nr_offset(mid),
2082                            nritems * sizeof(struct btrfs_item));
2083
2084         copy_extent_buffer(right, l,
2085                      btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
2086                      data_copy_size, btrfs_leaf_data(l) +
2087                      leaf_data_end(root, l), data_copy_size);
2088
2089         rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
2090                       btrfs_item_end_nr(l, mid);
2091
2092         for (i = 0; i < nritems; i++) {
2093                 struct btrfs_item *item = btrfs_item_nr(right, i);
2094                 u32 ioff;
2095
2096                 if (!right->map_token) {
2097                         map_extent_buffer(right, (unsigned long)item,
2098                                         sizeof(struct btrfs_item),
2099                                         &right->map_token, &right->kaddr,
2100                                         &right->map_start, &right->map_len,
2101                                         KM_USER1);
2102                 }
2103
2104                 ioff = btrfs_item_offset(right, item);
2105                 btrfs_set_item_offset(right, item, ioff + rt_data_off);
2106         }
2107
2108         if (right->map_token) {
2109                 unmap_extent_buffer(right, right->map_token, KM_USER1);
2110                 right->map_token = NULL;
2111         }
2112
2113         btrfs_set_header_nritems(l, mid);
2114         ret = 0;
2115         btrfs_item_key(right, &disk_key, 0);
2116         wret = insert_ptr(trans, root, path, &disk_key, right->start,
2117                           path->slots[1] + 1, 1);
2118         if (wret)
2119                 ret = wret;
2120
2121         btrfs_mark_buffer_dirty(right);
2122         btrfs_mark_buffer_dirty(l);
2123         BUG_ON(path->slots[0] != slot);
2124
2125         if (mid <= slot) {
2126                 free_extent_buffer(path->nodes[0]);
2127                 path->nodes[0] = right;
2128                 path->slots[0] -= mid;
2129                 path->slots[1] += 1;
2130         } else
2131                 free_extent_buffer(right);
2132
2133         BUG_ON(path->slots[0] < 0);
2134
2135         if (double_split) {
2136                 BUG_ON(num_doubles != 0);
2137                 num_doubles++;
2138                 goto again;
2139         }
2140         return ret;
2141 }
2142
2143 int btrfs_truncate_item(struct btrfs_trans_handle *trans,
2144                         struct btrfs_root *root,
2145                         struct btrfs_path *path,
2146                         u32 new_size, int from_end)
2147 {
2148         int ret = 0;
2149         int slot;
2150         int slot_orig;
2151         struct extent_buffer *leaf;
2152         struct btrfs_item *item;
2153         u32 nritems;
2154         unsigned int data_end;
2155         unsigned int old_data_start;
2156         unsigned int old_size;
2157         unsigned int size_diff;
2158         int i;
2159
2160         slot_orig = path->slots[0];
2161         leaf = path->nodes[0];
2162         slot = path->slots[0];
2163
2164         old_size = btrfs_item_size_nr(leaf, slot);
2165         if (old_size == new_size)
2166                 return 0;
2167
2168         nritems = btrfs_header_nritems(leaf);
2169         data_end = leaf_data_end(root, leaf);
2170
2171         old_data_start = btrfs_item_offset_nr(leaf, slot);
2172
2173         size_diff = old_size - new_size;
2174
2175         BUG_ON(slot < 0);
2176         BUG_ON(slot >= nritems);
2177
2178         /*
2179          * item0..itemN ... dataN.offset..dataN.size .. data0.size
2180          */
2181         /* first correct the data pointers */
2182         for (i = slot; i < nritems; i++) {
2183                 u32 ioff;
2184                 item = btrfs_item_nr(leaf, i);
2185
2186                 if (!leaf->map_token) {
2187                         map_extent_buffer(leaf, (unsigned long)item,
2188                                         sizeof(struct btrfs_item),
2189                                         &leaf->map_token, &leaf->kaddr,
2190                                         &leaf->map_start, &leaf->map_len,
2191                                         KM_USER1);
2192                 }
2193
2194                 ioff = btrfs_item_offset(leaf, item);
2195                 btrfs_set_item_offset(leaf, item, ioff + size_diff);
2196         }
2197
2198         if (leaf->map_token) {
2199                 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2200                 leaf->map_token = NULL;
2201         }
2202
2203         /* shift the data */
2204         if (from_end) {
2205                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2206                               data_end + size_diff, btrfs_leaf_data(leaf) +
2207                               data_end, old_data_start + new_size - data_end);
2208         } else {
2209                 struct btrfs_disk_key disk_key;
2210                 u64 offset;
2211
2212                 btrfs_item_key(leaf, &disk_key, slot);
2213
2214                 if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
2215                         unsigned long ptr;
2216                         struct btrfs_file_extent_item *fi;
2217
2218                         fi = btrfs_item_ptr(leaf, slot,
2219                                             struct btrfs_file_extent_item);
2220                         fi = (struct btrfs_file_extent_item *)(
2221                              (unsigned long)fi - size_diff);
2222
2223                         if (btrfs_file_extent_type(leaf, fi) ==
2224                             BTRFS_FILE_EXTENT_INLINE) {
2225                                 ptr = btrfs_item_ptr_offset(leaf, slot);
2226                                 memmove_extent_buffer(leaf, ptr,
2227                                         (unsigned long)fi,
2228                                         offsetof(struct btrfs_file_extent_item,
2229                                                  disk_bytenr));
2230                         }
2231                 }
2232
2233                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2234                               data_end + size_diff, btrfs_leaf_data(leaf) +
2235                               data_end, old_data_start - data_end);
2236
2237                 offset = btrfs_disk_key_offset(&disk_key);
2238                 btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
2239                 btrfs_set_item_key(leaf, &disk_key, slot);
2240                 if (slot == 0)
2241                         fixup_low_keys(trans, root, path, &disk_key, 1);
2242         }
2243
2244         item = btrfs_item_nr(leaf, slot);
2245         btrfs_set_item_size(leaf, item, new_size);
2246         btrfs_mark_buffer_dirty(leaf);
2247
2248         ret = 0;
2249         if (btrfs_leaf_free_space(root, leaf) < 0) {
2250                 btrfs_print_leaf(root, leaf);
2251                 BUG();
2252         }
2253         return ret;
2254 }
2255
2256 int btrfs_extend_item(struct btrfs_trans_handle *trans,
2257                       struct btrfs_root *root, struct btrfs_path *path,
2258                       u32 data_size)
2259 {
2260         int ret = 0;
2261         int slot;
2262         int slot_orig;
2263         struct extent_buffer *leaf;
2264         struct btrfs_item *item;
2265         u32 nritems;
2266         unsigned int data_end;
2267         unsigned int old_data;
2268         unsigned int old_size;
2269         int i;
2270
2271         slot_orig = path->slots[0];
2272         leaf = path->nodes[0];
2273
2274         nritems = btrfs_header_nritems(leaf);
2275         data_end = leaf_data_end(root, leaf);
2276
2277         if (btrfs_leaf_free_space(root, leaf) < data_size) {
2278                 btrfs_print_leaf(root, leaf);
2279                 BUG();
2280         }
2281         slot = path->slots[0];
2282         old_data = btrfs_item_end_nr(leaf, slot);
2283
2284         BUG_ON(slot < 0);
2285         if (slot >= nritems) {
2286                 btrfs_print_leaf(root, leaf);
2287                 printk("slot %d too large, nritems %d\n", slot, nritems);
2288                 BUG_ON(1);
2289         }
2290
2291         /*
2292          * item0..itemN ... dataN.offset..dataN.size .. data0.size
2293          */
2294         /* first correct the data pointers */
2295         for (i = slot; i < nritems; i++) {
2296                 u32 ioff;
2297                 item = btrfs_item_nr(leaf, i);
2298
2299                 if (!leaf->map_token) {
2300                         map_extent_buffer(leaf, (unsigned long)item,
2301                                         sizeof(struct btrfs_item),
2302                                         &leaf->map_token, &leaf->kaddr,
2303                                         &leaf->map_start, &leaf->map_len,
2304                                         KM_USER1);
2305                 }
2306                 ioff = btrfs_item_offset(leaf, item);
2307                 btrfs_set_item_offset(leaf, item, ioff - data_size);
2308         }
2309
2310         if (leaf->map_token) {
2311                 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2312                 leaf->map_token = NULL;
2313         }
2314
2315         /* shift the data */
2316         memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2317                       data_end - data_size, btrfs_leaf_data(leaf) +
2318                       data_end, old_data - data_end);
2319
2320         data_end = old_data;
2321         old_size = btrfs_item_size_nr(leaf, slot);
2322         item = btrfs_item_nr(leaf, slot);
2323         btrfs_set_item_size(leaf, item, old_size + data_size);
2324         btrfs_mark_buffer_dirty(leaf);
2325
2326         ret = 0;
2327         if (btrfs_leaf_free_space(root, leaf) < 0) {
2328                 btrfs_print_leaf(root, leaf);
2329                 BUG();
2330         }
2331         return ret;
2332 }
2333
2334 /*
2335  * Given a key and some data, insert an item into the tree.
2336  * This does all the path init required, making room in the tree if needed.
2337  */
2338 int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
2339                             struct btrfs_root *root,
2340                             struct btrfs_path *path,
2341                             struct btrfs_key *cpu_key, u32 *data_size,
2342                             int nr)
2343 {
2344         struct extent_buffer *leaf;
2345         struct btrfs_item *item;
2346         int ret = 0;
2347         int slot;
2348         int slot_orig;
2349         int i;
2350         u32 nritems;
2351         u32 total_size = 0;
2352         u32 total_data = 0;
2353         unsigned int data_end;
2354         struct btrfs_disk_key disk_key;
2355
2356         for (i = 0; i < nr; i++) {
2357                 total_data += data_size[i];
2358         }
2359
2360         /* create a root if there isn't one */
2361         if (!root->node)
2362                 BUG();
2363
2364         total_size = total_data + (nr - 1) * sizeof(struct btrfs_item);
2365         ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
2366         if (ret == 0) {
2367                 return -EEXIST;
2368         }
2369         if (ret < 0)
2370                 goto out;
2371
2372         slot_orig = path->slots[0];
2373         leaf = path->nodes[0];
2374
2375         nritems = btrfs_header_nritems(leaf);
2376         data_end = leaf_data_end(root, leaf);
2377
2378         if (btrfs_leaf_free_space(root, leaf) <
2379             sizeof(struct btrfs_item) + total_size) {
2380                 btrfs_print_leaf(root, leaf);
2381                 printk("not enough freespace need %u have %d\n",
2382                        total_size, btrfs_leaf_free_space(root, leaf));
2383                 BUG();
2384         }
2385
2386         slot = path->slots[0];
2387         BUG_ON(slot < 0);
2388
2389         if (slot != nritems) {
2390                 int i;
2391                 unsigned int old_data = btrfs_item_end_nr(leaf, slot);
2392
2393                 if (old_data < data_end) {
2394                         btrfs_print_leaf(root, leaf);
2395                         printk("slot %d old_data %d data_end %d\n",
2396                                slot, old_data, data_end);
2397                         BUG_ON(1);
2398                 }
2399                 /*
2400                  * item0..itemN ... dataN.offset..dataN.size .. data0.size
2401                  */
2402                 /* first correct the data pointers */
2403                 WARN_ON(leaf->map_token);
2404                 for (i = slot; i < nritems; i++) {
2405                         u32 ioff;
2406
2407                         item = btrfs_item_nr(leaf, i);
2408                         if (!leaf->map_token) {
2409                                 map_extent_buffer(leaf, (unsigned long)item,
2410                                         sizeof(struct btrfs_item),
2411                                         &leaf->map_token, &leaf->kaddr,
2412                                         &leaf->map_start, &leaf->map_len,
2413                                         KM_USER1);
2414                         }
2415
2416                         ioff = btrfs_item_offset(leaf, item);
2417                         btrfs_set_item_offset(leaf, item, ioff - total_data);
2418                 }
2419                 if (leaf->map_token) {
2420                         unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2421                         leaf->map_token = NULL;
2422                 }
2423
2424                 /* shift the items */
2425                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
2426                               btrfs_item_nr_offset(slot),
2427                               (nritems - slot) * sizeof(struct btrfs_item));
2428
2429                 /* shift the data */
2430                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2431                               data_end - total_data, btrfs_leaf_data(leaf) +
2432                               data_end, old_data - data_end);
2433                 data_end = old_data;
2434         }
2435
2436         /* setup the item for the new data */
2437         for (i = 0; i < nr; i++) {
2438                 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
2439                 btrfs_set_item_key(leaf, &disk_key, slot + i);
2440                 item = btrfs_item_nr(leaf, slot + i);
2441                 btrfs_set_item_offset(leaf, item, data_end - data_size[i]);
2442                 data_end -= data_size[i];
2443                 btrfs_set_item_size(leaf, item, data_size[i]);
2444         }
2445         btrfs_set_header_nritems(leaf, nritems + nr);
2446         btrfs_mark_buffer_dirty(leaf);
2447
2448         ret = 0;
2449         if (slot == 0) {
2450                 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
2451                 ret = fixup_low_keys(trans, root, path, &disk_key, 1);
2452         }
2453
2454         if (btrfs_leaf_free_space(root, leaf) < 0) {
2455                 btrfs_print_leaf(root, leaf);
2456                 BUG();
2457         }
2458
2459 out:
2460         return ret;
2461 }
2462
2463 /*
2464  * Given a key and some data, insert an item into the tree.
2465  * This does all the path init required, making room in the tree if needed.
2466  */
2467 int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
2468                       *root, struct btrfs_key *cpu_key, void *data, u32
2469                       data_size)
2470 {
2471         int ret = 0;
2472         struct btrfs_path *path;
2473         struct extent_buffer *leaf;
2474         unsigned long ptr;
2475
2476         path = btrfs_alloc_path();
2477         BUG_ON(!path);
2478         ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
2479         if (!ret) {
2480                 leaf = path->nodes[0];
2481                 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
2482                 write_extent_buffer(leaf, data, ptr, data_size);
2483                 btrfs_mark_buffer_dirty(leaf);
2484         }
2485         btrfs_free_path(path);
2486         return ret;
2487 }
2488
2489 /*
2490  * delete the pointer from a given node.
2491  *
2492  * If the delete empties a node, the node is removed from the tree,
2493  * continuing all the way the root if required.  The root is converted into
2494  * a leaf if all the nodes are emptied.
2495  */
2496 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2497                    struct btrfs_path *path, int level, int slot)
2498 {
2499         struct extent_buffer *parent = path->nodes[level];
2500         u32 nritems;
2501         int ret = 0;
2502         int wret;
2503
2504         nritems = btrfs_header_nritems(parent);
2505         if (slot != nritems -1) {
2506                 memmove_extent_buffer(parent,
2507                               btrfs_node_key_ptr_offset(slot),
2508                               btrfs_node_key_ptr_offset(slot + 1),
2509                               sizeof(struct btrfs_key_ptr) *
2510                               (nritems - slot - 1));
2511         }
2512         nritems--;
2513         btrfs_set_header_nritems(parent, nritems);
2514         if (nritems == 0 && parent == root->node) {
2515                 BUG_ON(btrfs_header_level(root->node) != 1);
2516                 /* just turn the root into a leaf and break */
2517                 btrfs_set_header_level(root->node, 0);
2518         } else if (slot == 0) {
2519                 struct btrfs_disk_key disk_key;
2520
2521                 btrfs_node_key(parent, &disk_key, 0);
2522                 wret = fixup_low_keys(trans, root, path, &disk_key, level + 1);
2523                 if (wret)
2524                         ret = wret;
2525         }
2526         btrfs_mark_buffer_dirty(parent);
2527         return ret;
2528 }
2529
2530 /*
2531  * delete the item at the leaf level in path.  If that empties
2532  * the leaf, remove it from the tree
2533  */
2534 int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2535                     struct btrfs_path *path, int slot, int nr)
2536 {
2537         struct extent_buffer *leaf;
2538         struct btrfs_item *item;
2539         int last_off;
2540         int dsize = 0;
2541         int ret = 0;
2542         int wret;
2543         int i;
2544         u32 nritems;
2545
2546         leaf = path->nodes[0];
2547         last_off = btrfs_item_offset_nr(leaf, slot + nr - 1);
2548
2549         for (i = 0; i < nr; i++)
2550                 dsize += btrfs_item_size_nr(leaf, slot + i);
2551
2552         nritems = btrfs_header_nritems(leaf);
2553
2554         if (slot + nr != nritems) {
2555                 int i;
2556                 int data_end = leaf_data_end(root, leaf);
2557
2558                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2559                               data_end + dsize,
2560                               btrfs_leaf_data(leaf) + data_end,
2561                               last_off - data_end);
2562
2563                 for (i = slot + nr; i < nritems; i++) {
2564                         u32 ioff;
2565
2566                         item = btrfs_item_nr(leaf, i);
2567                         if (!leaf->map_token) {
2568                                 map_extent_buffer(leaf, (unsigned long)item,
2569                                         sizeof(struct btrfs_item),
2570                                         &leaf->map_token, &leaf->kaddr,
2571                                         &leaf->map_start, &leaf->map_len,
2572                                         KM_USER1);
2573                         }
2574                         ioff = btrfs_item_offset(leaf, item);
2575                         btrfs_set_item_offset(leaf, item, ioff + dsize);
2576                 }
2577
2578                 if (leaf->map_token) {
2579                         unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2580                         leaf->map_token = NULL;
2581                 }
2582
2583                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
2584                               btrfs_item_nr_offset(slot + nr),
2585                               sizeof(struct btrfs_item) *
2586                               (nritems - slot - nr));
2587         }
2588         btrfs_set_header_nritems(leaf, nritems - nr);
2589         nritems -= nr;
2590
2591         /* delete the leaf if we've emptied it */
2592         if (nritems == 0) {
2593                 if (leaf == root->node) {
2594                         btrfs_set_header_level(leaf, 0);
2595                 } else {
2596                         u64 root_gen = btrfs_header_generation(path->nodes[1]);
2597                         clean_tree_block(trans, root, leaf);
2598                         wait_on_tree_block_writeback(root, leaf);
2599                         wret = del_ptr(trans, root, path, 1, path->slots[1]);
2600                         if (wret)
2601                                 ret = wret;
2602                         wret = btrfs_free_extent(trans, root,
2603                                          leaf->start, leaf->len,
2604                                          btrfs_header_owner(path->nodes[1]),
2605                                          root_gen, 0, 0, 1);
2606                         if (wret)
2607                                 ret = wret;
2608                 }
2609         } else {
2610                 int used = leaf_space_used(leaf, 0, nritems);
2611                 if (slot == 0) {
2612                         struct btrfs_disk_key disk_key;
2613
2614                         btrfs_item_key(leaf, &disk_key, 0);
2615                         wret = fixup_low_keys(trans, root, path,
2616                                               &disk_key, 1);
2617                         if (wret)
2618                                 ret = wret;
2619                 }
2620
2621                 /* delete the leaf if it is mostly empty */
2622                 if (used < BTRFS_LEAF_DATA_SIZE(root) / 4) {
2623                         /* push_leaf_left fixes the path.
2624                          * make sure the path still points to our leaf
2625                          * for possible call to del_ptr below
2626                          */
2627                         slot = path->slots[1];
2628                         extent_buffer_get(leaf);
2629
2630                         wret = push_leaf_left(trans, root, path, 1, 1);
2631                         if (wret < 0 && wret != -ENOSPC)
2632                                 ret = wret;
2633
2634                         if (path->nodes[0] == leaf &&
2635                             btrfs_header_nritems(leaf)) {
2636                                 wret = push_leaf_right(trans, root, path, 1, 1);
2637                                 if (wret < 0 && wret != -ENOSPC)
2638                                         ret = wret;
2639                         }
2640
2641                         if (btrfs_header_nritems(leaf) == 0) {
2642                                 u64 root_gen;
2643                                 u64 bytenr = leaf->start;
2644                                 u32 blocksize = leaf->len;
2645
2646                                 root_gen = btrfs_header_generation(
2647                                                            path->nodes[1]);
2648
2649                                 clean_tree_block(trans, root, leaf);
2650                                 wait_on_tree_block_writeback(root, leaf);
2651
2652                                 wret = del_ptr(trans, root, path, 1, slot);
2653                                 if (wret)
2654                                         ret = wret;
2655
2656                                 free_extent_buffer(leaf);
2657                                 wret = btrfs_free_extent(trans, root, bytenr,
2658                                              blocksize,
2659                                              btrfs_header_owner(path->nodes[1]),
2660                                              root_gen, 0, 0, 1);
2661                                 if (wret)
2662                                         ret = wret;
2663                         } else {
2664                                 btrfs_mark_buffer_dirty(leaf);
2665                                 free_extent_buffer(leaf);
2666                         }
2667                 } else {
2668                         btrfs_mark_buffer_dirty(leaf);
2669                 }
2670         }
2671         return ret;
2672 }
2673
2674 /*
2675  * walk up the tree as far as required to find the previous leaf.
2676  * returns 0 if it found something or 1 if there are no lesser leaves.
2677  * returns < 0 on io errors.
2678  */
2679 int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
2680 {
2681         u64 bytenr;
2682         int slot;
2683         int level = 1;
2684         struct extent_buffer *c;
2685         struct extent_buffer *next = NULL;
2686
2687         while(level < BTRFS_MAX_LEVEL) {
2688                 if (!path->nodes[level])
2689                         return 1;
2690
2691                 slot = path->slots[level];
2692                 c = path->nodes[level];
2693                 if (slot == 0) {
2694                         level++;
2695                         if (level == BTRFS_MAX_LEVEL)
2696                                 return 1;
2697                         continue;
2698                 }
2699                 slot--;
2700
2701                 bytenr = btrfs_node_blockptr(c, slot);
2702                 if (next)
2703                         free_extent_buffer(next);
2704
2705                 next = read_tree_block(root, bytenr,
2706                                        btrfs_level_size(root, level - 1));
2707                 break;
2708         }
2709         path->slots[level] = slot;
2710         while(1) {
2711                 level--;
2712                 c = path->nodes[level];
2713                 free_extent_buffer(c);
2714                 slot = btrfs_header_nritems(next);
2715                 if (slot != 0)
2716                         slot--;
2717                 path->nodes[level] = next;
2718                 path->slots[level] = slot;
2719                 if (!level)
2720                         break;
2721                 next = read_tree_block(root, btrfs_node_blockptr(next, slot),
2722                                        btrfs_level_size(root, level - 1));
2723         }
2724         return 0;
2725 }
2726
2727 /*
2728  * walk up the tree as far as required to find the next leaf.
2729  * returns 0 if it found something or 1 if there are no greater leaves.
2730  * returns < 0 on io errors.
2731  */
2732 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
2733 {
2734         int slot;
2735         int level = 1;
2736         u64 bytenr;
2737         struct extent_buffer *c;
2738         struct extent_buffer *next = NULL;
2739
2740         while(level < BTRFS_MAX_LEVEL) {
2741                 if (!path->nodes[level])
2742                         return 1;
2743
2744                 slot = path->slots[level] + 1;
2745                 c = path->nodes[level];
2746                 if (slot >= btrfs_header_nritems(c)) {
2747                         level++;
2748                         if (level == BTRFS_MAX_LEVEL)
2749                                 return 1;
2750                         continue;
2751                 }
2752
2753                 bytenr = btrfs_node_blockptr(c, slot);
2754                 if (next)
2755                         free_extent_buffer(next);
2756
2757                 if (path->reada)
2758                         reada_for_search(root, path, level, slot, 0);
2759
2760                 next = read_tree_block(root, bytenr,
2761                                        btrfs_level_size(root, level -1));
2762                 break;
2763         }
2764         path->slots[level] = slot;
2765         while(1) {
2766                 level--;
2767                 c = path->nodes[level];
2768                 free_extent_buffer(c);
2769                 path->nodes[level] = next;
2770                 path->slots[level] = 0;
2771                 if (!level)
2772                         break;
2773                 if (path->reada)
2774                         reada_for_search(root, path, level, 0, 0);
2775                 next = read_tree_block(root, btrfs_node_blockptr(next, 0),
2776                                        btrfs_level_size(root, level - 1));
2777         }
2778         return 0;
2779 }