f60920e8a0e061f7d4cd4eaebe51a483cceb4645
[linux-2.6-block.git] / fs / btrfs / ctree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/highmem.h>
20 #include "ctree.h"
21 #include "disk-io.h"
22 #include "transaction.h"
23 #include "print-tree.h"
24
25 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
26                       *root, struct btrfs_path *path, int level);
27 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
28                       *root, struct btrfs_key *ins_key,
29                       struct btrfs_path *path, int data_size);
30 static int push_node_left(struct btrfs_trans_handle *trans,
31                           struct btrfs_root *root, struct extent_buffer *dst,
32                           struct extent_buffer *src);
33 static int balance_node_right(struct btrfs_trans_handle *trans,
34                               struct btrfs_root *root,
35                               struct extent_buffer *dst_buf,
36                               struct extent_buffer *src_buf);
37 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
38                    struct btrfs_path *path, int level, int slot);
39
40 inline void btrfs_init_path(struct btrfs_path *p)
41 {
42         memset(p, 0, sizeof(*p));
43 }
44
45 struct btrfs_path *btrfs_alloc_path(void)
46 {
47         struct btrfs_path *path;
48         path = kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS);
49         if (path) {
50                 btrfs_init_path(path);
51                 path->reada = 1;
52         }
53         return path;
54 }
55
56 void btrfs_free_path(struct btrfs_path *p)
57 {
58         btrfs_release_path(NULL, p);
59         kmem_cache_free(btrfs_path_cachep, p);
60 }
61
62 void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
63 {
64         int i;
65         for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
66                 if (!p->nodes[i])
67                         break;
68                 free_extent_buffer(p->nodes[i]);
69         }
70         memset(p, 0, sizeof(*p));
71 }
72
73 static int __btrfs_cow_block(struct btrfs_trans_handle *trans,
74                              struct btrfs_root *root,
75                              struct extent_buffer *buf,
76                              struct extent_buffer *parent, int parent_slot,
77                              struct extent_buffer **cow_ret,
78                              u64 search_start, u64 empty_size)
79 {
80         struct extent_buffer *cow;
81         int ret = 0;
82         int different_trans = 0;
83
84         WARN_ON(root->ref_cows && trans->transid != root->last_trans);
85
86         cow = btrfs_alloc_free_block(trans, root, search_start, empty_size);
87         if (IS_ERR(cow))
88                 return PTR_ERR(cow);
89
90         if (buf->len != root->sectorsize || cow->len != root->sectorsize)
91                 WARN_ON(1);
92
93         copy_extent_buffer(cow, buf, 0, 0, cow->len);
94         btrfs_set_header_blocknr(cow, extent_buffer_blocknr(cow));
95         btrfs_set_header_generation(cow, trans->transid);
96         btrfs_set_header_owner(cow, root->root_key.objectid);
97
98         WARN_ON(btrfs_header_generation(buf) > trans->transid);
99         if (btrfs_header_generation(buf) != trans->transid) {
100                 different_trans = 1;
101                 ret = btrfs_inc_ref(trans, root, buf);
102                 if (ret)
103                         return ret;
104         } else {
105                 clean_tree_block(trans, root, buf);
106         }
107
108         if (buf == root->node) {
109                 root->node = cow;
110                 extent_buffer_get(cow);
111                 if (buf != root->commit_root) {
112                         btrfs_free_extent(trans, root,
113                                           extent_buffer_blocknr(buf), 1, 1);
114                 }
115                 free_extent_buffer(buf);
116         } else {
117                 btrfs_set_node_blockptr(parent, parent_slot,
118                                         extent_buffer_blocknr(cow));
119                 btrfs_mark_buffer_dirty(parent);
120                 WARN_ON(btrfs_header_generation(parent) != trans->transid);
121                 btrfs_free_extent(trans, root, extent_buffer_blocknr(buf),1,1);
122         }
123         free_extent_buffer(buf);
124         btrfs_mark_buffer_dirty(cow);
125         *cow_ret = cow;
126         return 0;
127 }
128
129 int btrfs_cow_block(struct btrfs_trans_handle *trans,
130                     struct btrfs_root *root, struct extent_buffer *buf,
131                     struct extent_buffer *parent, int parent_slot,
132                     struct extent_buffer **cow_ret)
133 {
134         u64 search_start;
135         if (trans->transaction != root->fs_info->running_transaction) {
136                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
137                        root->fs_info->running_transaction->transid);
138                 WARN_ON(1);
139         }
140         if (trans->transid != root->fs_info->generation) {
141                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
142                        root->fs_info->generation);
143                 WARN_ON(1);
144         }
145         if (btrfs_header_generation(buf) == trans->transid) {
146                 *cow_ret = buf;
147                 return 0;
148         }
149
150         search_start = extent_buffer_blocknr(buf) & ~((u64)65535);
151         return __btrfs_cow_block(trans, root, buf, parent,
152                                  parent_slot, cow_ret, search_start, 0);
153 }
154
155 static int close_blocks(u64 blocknr, u64 other)
156 {
157         if (blocknr < other && other - blocknr < 8)
158                 return 1;
159         if (blocknr > other && blocknr - other < 8)
160                 return 1;
161         return 0;
162 }
163
164 #if 0
165 static int should_defrag_leaf(struct extent_buffer *eb)
166 {
167         return 0;
168         struct btrfs_leaf *leaf = btrfs_buffer_leaf(eb);
169         struct btrfs_disk_key *key;
170         u32 nritems;
171
172         if (buffer_defrag(bh))
173                 return 1;
174
175         nritems = btrfs_header_nritems(&leaf->header);
176         if (nritems == 0)
177                 return 0;
178
179         key = &leaf->items[0].key;
180         if (btrfs_disk_key_type(key) == BTRFS_DIR_ITEM_KEY)
181                 return 1;
182
183         key = &leaf->items[nritems-1].key;
184         if (btrfs_disk_key_type(key) == BTRFS_DIR_ITEM_KEY)
185                 return 1;
186         if (nritems > 4) {
187                 key = &leaf->items[nritems/2].key;
188                 if (btrfs_disk_key_type(key) == BTRFS_DIR_ITEM_KEY)
189                         return 1;
190         }
191         return 0;
192 }
193 #endif
194
195 int btrfs_realloc_node(struct btrfs_trans_handle *trans,
196                        struct btrfs_root *root, struct extent_buffer *parent,
197                        int cache_only, u64 *last_ret)
198 {
199         return 0;
200 #if 0
201         struct btrfs_node *parent_node;
202         struct extent_buffer *cur_eb;
203         struct extent_buffer *tmp_eb;
204         u64 blocknr;
205         u64 search_start = *last_ret;
206         u64 last_block = 0;
207         u64 other;
208         u32 parent_nritems;
209         int start_slot;
210         int end_slot;
211         int i;
212         int err = 0;
213         int parent_level;
214
215         if (trans->transaction != root->fs_info->running_transaction) {
216                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
217                        root->fs_info->running_transaction->transid);
218                 WARN_ON(1);
219         }
220         if (trans->transid != root->fs_info->generation) {
221                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
222                        root->fs_info->generation);
223                 WARN_ON(1);
224         }
225         if (buffer_defrag_done(parent))
226                 return 0;
227
228         parent_node = btrfs_buffer_node(parent);
229         parent_nritems = btrfs_header_nritems(&parent_node->header);
230         parent_level = btrfs_header_level(&parent_node->header);
231
232         start_slot = 0;
233         end_slot = parent_nritems;
234
235         if (parent_nritems == 1)
236                 return 0;
237
238         for (i = start_slot; i < end_slot; i++) {
239                 int close = 1;
240                 blocknr = btrfs_node_blockptr(parent_node, i);
241                 if (last_block == 0)
242                         last_block = blocknr;
243                 if (i > 0) {
244                         other = btrfs_node_blockptr(parent_node, i - 1);
245                         close = close_blocks(blocknr, other);
246                 }
247                 if (close && i < end_slot - 1) {
248                         other = btrfs_node_blockptr(parent_node, i + 1);
249                         close = close_blocks(blocknr, other);
250                 }
251                 if (close) {
252                         last_block = blocknr;
253                         continue;
254                 }
255
256                 cur_bh = btrfs_find_tree_block(root, blocknr);
257                 if (!cur_bh || !buffer_uptodate(cur_bh) ||
258                     buffer_locked(cur_bh) ||
259                     (parent_level != 1 && !buffer_defrag(cur_bh)) ||
260                     (parent_level == 1 && !should_defrag_leaf(cur_bh))) {
261                         if (cache_only) {
262                                 brelse(cur_bh);
263                                 continue;
264                         }
265                         if (!cur_bh || !buffer_uptodate(cur_bh) ||
266                             buffer_locked(cur_bh)) {
267                                 brelse(cur_bh);
268                                 cur_bh = read_tree_block(root, blocknr);
269                         }
270                 }
271                 if (search_start == 0)
272                         search_start = last_block & ~((u64)65535);
273
274                 err = __btrfs_cow_block(trans, root, cur_bh, parent, i,
275                                         &tmp_bh, search_start,
276                                         min(8, end_slot - i));
277                 if (err) {
278                         brelse(cur_bh);
279                         break;
280                 }
281                 search_start = bh_blocknr(tmp_bh);
282                 *last_ret = search_start;
283                 if (parent_level == 1)
284                         clear_buffer_defrag(tmp_bh);
285                 set_buffer_defrag_done(tmp_bh);
286                 brelse(tmp_bh);
287         }
288         return err;
289 #endif
290 }
291
292 /*
293  * The leaf data grows from end-to-front in the node.
294  * this returns the address of the start of the last item,
295  * which is the stop of the leaf data stack
296  */
297 static inline unsigned int leaf_data_end(struct btrfs_root *root,
298                                          struct extent_buffer *leaf)
299 {
300         u32 nr = btrfs_header_nritems(leaf);
301         if (nr == 0)
302                 return BTRFS_LEAF_DATA_SIZE(root);
303         return btrfs_item_offset_nr(leaf, nr - 1);
304 }
305
306 /*
307  * compare two keys in a memcmp fashion
308  */
309 static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
310 {
311         struct btrfs_key k1;
312
313         btrfs_disk_key_to_cpu(&k1, disk);
314
315         if (k1.objectid > k2->objectid)
316                 return 1;
317         if (k1.objectid < k2->objectid)
318                 return -1;
319         if (k1.type > k2->type)
320                 return 1;
321         if (k1.type < k2->type)
322                 return -1;
323         if (k1.offset > k2->offset)
324                 return 1;
325         if (k1.offset < k2->offset)
326                 return -1;
327         return 0;
328 }
329
330 static int check_node(struct btrfs_root *root, struct btrfs_path *path,
331                       int level)
332 {
333         struct extent_buffer *parent = NULL;
334         struct extent_buffer *node = path->nodes[level];
335         struct btrfs_disk_key parent_key;
336         struct btrfs_disk_key node_key;
337         int parent_slot;
338         int slot;
339         struct btrfs_key cpukey;
340         u32 nritems = btrfs_header_nritems(node);
341
342         if (path->nodes[level + 1])
343                 parent = path->nodes[level + 1];
344
345         slot = path->slots[level];
346         BUG_ON(nritems == 0);
347         if (parent) {
348                 parent_slot = path->slots[level + 1];
349                 btrfs_node_key(parent, &parent_key, parent_slot);
350                 btrfs_node_key(node, &node_key, 0);
351                 BUG_ON(memcmp(&parent_key, &node_key,
352                               sizeof(struct btrfs_disk_key)));
353                 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
354                        btrfs_header_blocknr(node));
355         }
356         BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
357         if (slot != 0) {
358                 btrfs_node_key_to_cpu(node, &cpukey, slot - 1);
359                 btrfs_node_key(node, &node_key, slot);
360                 BUG_ON(comp_keys(&node_key, &cpukey) <= 0);
361         }
362         if (slot < nritems - 1) {
363                 btrfs_node_key_to_cpu(node, &cpukey, slot + 1);
364                 btrfs_node_key(node, &node_key, slot);
365                 BUG_ON(comp_keys(&node_key, &cpukey) >= 0);
366         }
367         return 0;
368 }
369
370 static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
371                       int level)
372 {
373         struct extent_buffer *leaf = path->nodes[level];
374         struct extent_buffer *parent = NULL;
375         int parent_slot;
376         struct btrfs_key cpukey;
377         struct btrfs_disk_key parent_key;
378         struct btrfs_disk_key leaf_key;
379         int slot = path->slots[0];
380
381         u32 nritems = btrfs_header_nritems(leaf);
382
383         if (path->nodes[level + 1])
384                 parent = path->nodes[level + 1];
385
386         if (nritems == 0)
387                 return 0;
388
389         if (parent) {
390                 parent_slot = path->slots[level + 1];
391                 btrfs_node_key(parent, &parent_key, parent_slot);
392                 btrfs_item_key(leaf, &leaf_key, 0);
393
394                 BUG_ON(memcmp(&parent_key, &leaf_key,
395                        sizeof(struct btrfs_disk_key)));
396                 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
397                        btrfs_header_blocknr(leaf));
398         }
399 #if 0
400         for (i = 0; nritems > 1 && i < nritems - 2; i++) {
401                 btrfs_item_key_to_cpu(leaf, &cpukey, i + 1);
402                 btrfs_item_key(leaf, &leaf_key, i);
403                 if (comp_keys(&leaf_key, &cpukey) >= 0) {
404                         btrfs_print_leaf(root, leaf);
405                         printk("slot %d offset bad key\n", i);
406                         BUG_ON(1);
407                 }
408                 if (btrfs_item_offset_nr(leaf, i) !=
409                         btrfs_item_end_nr(leaf, i + 1)) {
410                         btrfs_print_leaf(root, leaf);
411                         printk("slot %d offset bad\n", i);
412                         BUG_ON(1);
413                 }
414                 if (i == 0) {
415                         if (btrfs_item_offset_nr(leaf, i) +
416                                btrfs_item_size_nr(leaf, i) !=
417                                BTRFS_LEAF_DATA_SIZE(root)) {
418                                 btrfs_print_leaf(root, leaf);
419                                 printk("slot %d first offset bad\n", i);
420                                 BUG_ON(1);
421                         }
422                 }
423         }
424         if (nritems > 0) {
425                 if (btrfs_item_size_nr(leaf, nritems - 1) > 4096) {
426                                 btrfs_print_leaf(root, leaf);
427                                 printk("slot %d bad size \n", nritems - 1);
428                                 BUG_ON(1);
429                 }
430         }
431 #endif
432         if (slot != 0 && slot < nritems - 1) {
433                 btrfs_item_key(leaf, &leaf_key, slot);
434                 btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1);
435                 if (comp_keys(&leaf_key, &cpukey) <= 0) {
436                         btrfs_print_leaf(root, leaf);
437                         printk("slot %d offset bad key\n", slot);
438                         BUG_ON(1);
439                 }
440                 if (btrfs_item_offset_nr(leaf, slot - 1) !=
441                        btrfs_item_end_nr(leaf, slot)) {
442                         btrfs_print_leaf(root, leaf);
443                         printk("slot %d offset bad\n", slot);
444                         BUG_ON(1);
445                 }
446         }
447         if (slot < nritems - 1) {
448                 btrfs_item_key(leaf, &leaf_key, slot);
449                 btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1);
450                 BUG_ON(comp_keys(&leaf_key, &cpukey) >= 0);
451                 if (btrfs_item_offset_nr(leaf, slot) !=
452                         btrfs_item_end_nr(leaf, slot + 1)) {
453                         btrfs_print_leaf(root, leaf);
454                         printk("slot %d offset bad\n", slot);
455                         BUG_ON(1);
456                 }
457         }
458         BUG_ON(btrfs_item_offset_nr(leaf, 0) +
459                btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root));
460         return 0;
461 }
462
463 static int check_block(struct btrfs_root *root, struct btrfs_path *path,
464                         int level)
465 {
466         struct extent_buffer *buf = path->nodes[level];
467         char fsid[BTRFS_FSID_SIZE];
468
469         read_extent_buffer(buf, fsid, (unsigned long)btrfs_header_fsid(buf),
470                            BTRFS_FSID_SIZE);
471
472         if (memcmp(fsid, root->fs_info->fsid, BTRFS_FSID_SIZE)) {
473                 int i = 0;
474                 printk("warning bad block %Lu\n", buf->start);
475                 if (!btrfs_buffer_uptodate(buf)) {
476                         WARN_ON(1);
477                 }
478                 for (i = 0; i < BTRFS_FSID_SIZE; i++) {
479                         printk("%x:%x ", root->fs_info->fsid[i], fsid[i]);
480                 }
481                 printk("\n");
482                 // BUG();
483         }
484         if (level == 0)
485                 return check_leaf(root, path, level);
486         return check_node(root, path, level);
487 }
488
489 /*
490  * search for key in the extent_buffer.  The items start at offset p,
491  * and they are item_size apart.  There are 'max' items in p.
492  *
493  * the slot in the array is returned via slot, and it points to
494  * the place where you would insert key if it is not found in
495  * the array.
496  *
497  * slot may point to max if the key is bigger than all of the keys
498  */
499 static int generic_bin_search(struct extent_buffer *eb, unsigned long p,
500                               int item_size, struct btrfs_key *key,
501                               int max, int *slot)
502 {
503         int low = 0;
504         int high = max;
505         int mid;
506         int ret;
507         struct btrfs_disk_key *tmp;
508         struct btrfs_disk_key unaligned;
509         unsigned long offset;
510         char *map_token = NULL;
511         char *kaddr = NULL;
512         unsigned long map_start = 0;
513         unsigned long map_len = 0;
514
515         while(low < high) {
516                 mid = (low + high) / 2;
517                 offset = p + mid * item_size;
518
519                 if (!map_token || offset < map_start ||
520                     (offset + sizeof(struct btrfs_disk_key)) >
521                     map_start + map_len) {
522                         if (map_token)
523                                 unmap_extent_buffer(eb, map_token, KM_USER0);
524                         map_extent_buffer(eb, offset, &map_token, &kaddr,
525                                           &map_start, &map_len, KM_USER0);
526
527                 }
528                 if (offset + sizeof(struct btrfs_disk_key) >
529                     map_start + map_len) {
530                         unmap_extent_buffer(eb, map_token, KM_USER0);
531                         read_extent_buffer(eb, &unaligned,
532                                            offset, sizeof(unaligned));
533                         map_token = NULL;
534                         tmp = &unaligned;
535                 } else {
536                         tmp = (struct btrfs_disk_key *)(kaddr + offset -
537                                                         map_start);
538                 }
539                 ret = comp_keys(tmp, key);
540
541                 if (ret < 0)
542                         low = mid + 1;
543                 else if (ret > 0)
544                         high = mid;
545                 else {
546                         *slot = mid;
547                         unmap_extent_buffer(eb, map_token, KM_USER0);
548                         return 0;
549                 }
550         }
551         *slot = low;
552         if (map_token)
553                 unmap_extent_buffer(eb, map_token, KM_USER0);
554         return 1;
555 }
556
557 /*
558  * simple bin_search frontend that does the right thing for
559  * leaves vs nodes
560  */
561 static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
562                       int level, int *slot)
563 {
564         if (level == 0) {
565                 return generic_bin_search(eb,
566                                           offsetof(struct btrfs_leaf, items),
567                                           sizeof(struct btrfs_item),
568                                           key, btrfs_header_nritems(eb),
569                                           slot);
570         } else {
571                 return generic_bin_search(eb,
572                                           offsetof(struct btrfs_node, ptrs),
573                                           sizeof(struct btrfs_key_ptr),
574                                           key, btrfs_header_nritems(eb),
575                                           slot);
576         }
577         return -1;
578 }
579
580 static struct extent_buffer *read_node_slot(struct btrfs_root *root,
581                                    struct extent_buffer *parent, int slot)
582 {
583         if (slot < 0)
584                 return NULL;
585         if (slot >= btrfs_header_nritems(parent))
586                 return NULL;
587         return read_tree_block(root, btrfs_node_blockptr(parent, slot));
588 }
589
590 static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root
591                          *root, struct btrfs_path *path, int level)
592 {
593         struct extent_buffer *right = NULL;
594         struct extent_buffer *mid;
595         struct extent_buffer *left = NULL;
596         struct extent_buffer *parent = NULL;
597         int ret = 0;
598         int wret;
599         int pslot;
600         int orig_slot = path->slots[level];
601         int err_on_enospc = 0;
602         u64 orig_ptr;
603
604         if (level == 0)
605                 return 0;
606
607         mid = path->nodes[level];
608         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
609
610         if (level < BTRFS_MAX_LEVEL - 1)
611                 parent = path->nodes[level + 1];
612         pslot = path->slots[level + 1];
613
614         /*
615          * deal with the case where there is only one pointer in the root
616          * by promoting the node below to a root
617          */
618         if (!parent) {
619                 struct extent_buffer *child;
620                 u64 blocknr = extent_buffer_blocknr(mid);
621
622                 if (btrfs_header_nritems(mid) != 1)
623                         return 0;
624
625                 /* promote the child to a root */
626                 child = read_node_slot(root, mid, 0);
627                 BUG_ON(!child);
628                 root->node = child;
629                 path->nodes[level] = NULL;
630                 clean_tree_block(trans, root, mid);
631                 wait_on_tree_block_writeback(root, mid);
632                 /* once for the path */
633                 free_extent_buffer(mid);
634                 /* once for the root ptr */
635                 free_extent_buffer(mid);
636                 return btrfs_free_extent(trans, root, blocknr, 1, 1);
637         }
638         if (btrfs_header_nritems(mid) >
639             BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
640                 return 0;
641
642         if (btrfs_header_nritems(mid) < 2)
643                 err_on_enospc = 1;
644
645         left = read_node_slot(root, parent, pslot - 1);
646         if (left) {
647                 wret = btrfs_cow_block(trans, root, left,
648                                        parent, pslot - 1, &left);
649                 if (wret) {
650                         ret = wret;
651                         goto enospc;
652                 }
653         }
654         right = read_node_slot(root, parent, pslot + 1);
655         if (right) {
656                 wret = btrfs_cow_block(trans, root, right,
657                                        parent, pslot + 1, &right);
658                 if (wret) {
659                         ret = wret;
660                         goto enospc;
661                 }
662         }
663
664         /* first, try to make some room in the middle buffer */
665         if (left) {
666                 orig_slot += btrfs_header_nritems(left);
667                 wret = push_node_left(trans, root, left, mid);
668                 if (wret < 0)
669                         ret = wret;
670                 if (btrfs_header_nritems(mid) < 2)
671                         err_on_enospc = 1;
672         }
673
674         /*
675          * then try to empty the right most buffer into the middle
676          */
677         if (right) {
678                 wret = push_node_left(trans, root, mid, right);
679                 if (wret < 0 && wret != -ENOSPC)
680                         ret = wret;
681                 if (btrfs_header_nritems(right) == 0) {
682                         u64 blocknr = extent_buffer_blocknr(right);
683                         clean_tree_block(trans, root, right);
684                         wait_on_tree_block_writeback(root, right);
685                         free_extent_buffer(right);
686                         right = NULL;
687                         wret = del_ptr(trans, root, path, level + 1, pslot +
688                                        1);
689                         if (wret)
690                                 ret = wret;
691                         wret = btrfs_free_extent(trans, root, blocknr, 1, 1);
692                         if (wret)
693                                 ret = wret;
694                 } else {
695                         struct btrfs_disk_key right_key;
696                         btrfs_node_key(right, &right_key, 0);
697                         btrfs_set_node_key(parent, &right_key, pslot + 1);
698                         btrfs_mark_buffer_dirty(parent);
699                 }
700         }
701         if (btrfs_header_nritems(mid) == 1) {
702                 /*
703                  * we're not allowed to leave a node with one item in the
704                  * tree during a delete.  A deletion from lower in the tree
705                  * could try to delete the only pointer in this node.
706                  * So, pull some keys from the left.
707                  * There has to be a left pointer at this point because
708                  * otherwise we would have pulled some pointers from the
709                  * right
710                  */
711                 BUG_ON(!left);
712                 wret = balance_node_right(trans, root, mid, left);
713                 if (wret < 0) {
714                         ret = wret;
715                         goto enospc;
716                 }
717                 BUG_ON(wret == 1);
718         }
719         if (btrfs_header_nritems(mid) == 0) {
720                 /* we've managed to empty the middle node, drop it */
721                 u64 blocknr = extent_buffer_blocknr(mid);
722                 clean_tree_block(trans, root, mid);
723                 wait_on_tree_block_writeback(root, mid);
724                 free_extent_buffer(mid);
725                 mid = NULL;
726                 wret = del_ptr(trans, root, path, level + 1, pslot);
727                 if (wret)
728                         ret = wret;
729                 wret = btrfs_free_extent(trans, root, blocknr, 1, 1);
730                 if (wret)
731                         ret = wret;
732         } else {
733                 /* update the parent key to reflect our changes */
734                 struct btrfs_disk_key mid_key;
735                 btrfs_node_key(mid, &mid_key, 0);
736                 btrfs_set_node_key(parent, &mid_key, pslot);
737                 btrfs_mark_buffer_dirty(parent);
738         }
739
740         /* update the path */
741         if (left) {
742                 if (btrfs_header_nritems(left) > orig_slot) {
743                         extent_buffer_get(left);
744                         path->nodes[level] = left;
745                         path->slots[level + 1] -= 1;
746                         path->slots[level] = orig_slot;
747                         if (mid)
748                                 free_extent_buffer(mid);
749                 } else {
750                         orig_slot -= btrfs_header_nritems(left);
751                         path->slots[level] = orig_slot;
752                 }
753         }
754         /* double check we haven't messed things up */
755         check_block(root, path, level);
756         if (orig_ptr !=
757             btrfs_node_blockptr(path->nodes[level], path->slots[level]))
758                 BUG();
759 enospc:
760         if (right)
761                 free_extent_buffer(right);
762         if (left)
763                 free_extent_buffer(left);
764         return ret;
765 }
766
767 /* returns zero if the push worked, non-zero otherwise */
768 static int push_nodes_for_insert(struct btrfs_trans_handle *trans,
769                                 struct btrfs_root *root,
770                                 struct btrfs_path *path, int level)
771 {
772         struct extent_buffer *right = NULL;
773         struct extent_buffer *mid;
774         struct extent_buffer *left = NULL;
775         struct extent_buffer *parent = NULL;
776         int ret = 0;
777         int wret;
778         int pslot;
779         int orig_slot = path->slots[level];
780         u64 orig_ptr;
781
782         if (level == 0)
783                 return 1;
784
785         mid = path->nodes[level];
786         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
787
788         if (level < BTRFS_MAX_LEVEL - 1)
789                 parent = path->nodes[level + 1];
790         pslot = path->slots[level + 1];
791
792         if (!parent)
793                 return 1;
794
795         left = read_node_slot(root, parent, pslot - 1);
796
797         /* first, try to make some room in the middle buffer */
798         if (left) {
799                 u32 left_nr;
800                 left_nr = btrfs_header_nritems(left);
801                 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
802                         wret = 1;
803                 } else {
804                         ret = btrfs_cow_block(trans, root, left, parent,
805                                               pslot - 1, &left);
806                         if (ret)
807                                 wret = 1;
808                         else {
809                                 wret = push_node_left(trans, root,
810                                                       left, mid);
811                         }
812                 }
813                 if (wret < 0)
814                         ret = wret;
815                 if (wret == 0) {
816                         struct btrfs_disk_key disk_key;
817                         orig_slot += left_nr;
818                         btrfs_node_key(mid, &disk_key, 0);
819                         btrfs_set_node_key(parent, &disk_key, pslot);
820                         btrfs_mark_buffer_dirty(parent);
821                         if (btrfs_header_nritems(left) > orig_slot) {
822                                 path->nodes[level] = left;
823                                 path->slots[level + 1] -= 1;
824                                 path->slots[level] = orig_slot;
825                                 free_extent_buffer(mid);
826                         } else {
827                                 orig_slot -=
828                                         btrfs_header_nritems(left);
829                                 path->slots[level] = orig_slot;
830                                 free_extent_buffer(left);
831                         }
832                         check_node(root, path, level);
833                         return 0;
834                 }
835                 free_extent_buffer(left);
836         }
837         right= read_node_slot(root, parent, pslot + 1);
838
839         /*
840          * then try to empty the right most buffer into the middle
841          */
842         if (right) {
843                 u32 right_nr;
844                 right_nr = btrfs_header_nritems(right);
845                 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
846                         wret = 1;
847                 } else {
848                         ret = btrfs_cow_block(trans, root, right,
849                                               parent, pslot + 1,
850                                               &right);
851                         if (ret)
852                                 wret = 1;
853                         else {
854                                 wret = balance_node_right(trans, root,
855                                                           right, mid);
856                         }
857                 }
858                 if (wret < 0)
859                         ret = wret;
860                 if (wret == 0) {
861                         struct btrfs_disk_key disk_key;
862
863                         btrfs_node_key(right, &disk_key, 0);
864                         btrfs_set_node_key(parent, &disk_key, pslot + 1);
865                         btrfs_mark_buffer_dirty(parent);
866
867                         if (btrfs_header_nritems(mid) <= orig_slot) {
868                                 path->nodes[level] = right;
869                                 path->slots[level + 1] += 1;
870                                 path->slots[level] = orig_slot -
871                                         btrfs_header_nritems(mid);
872                                 free_extent_buffer(mid);
873                         } else {
874                                 free_extent_buffer(right);
875                         }
876                         check_node(root, path, level);
877                         return 0;
878                 }
879                 free_extent_buffer(right);
880         }
881         check_node(root, path, level);
882         return 1;
883 }
884
885 /*
886  * readahead one full node of leaves
887  */
888 static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
889                              int level, int slot)
890 {
891         struct extent_buffer *node;
892         int i;
893         u32 nritems;
894         u64 blocknr;
895         u64 search;
896         u64 cluster_start;
897         int ret;
898         int nread = 0;
899         int direction = path->reada;
900         struct radix_tree_root found;
901         unsigned long gang[8];
902         struct extent_buffer *eb;
903
904         if (level == 0)
905                 return;
906
907         if (!path->nodes[level])
908                 return;
909
910         node = path->nodes[level];
911         search = btrfs_node_blockptr(node, slot);
912         eb = btrfs_find_tree_block(root, search);
913         if (eb) {
914                 free_extent_buffer(eb);
915                 return;
916         }
917
918         init_bit_radix(&found);
919         nritems = btrfs_header_nritems(node);
920         for (i = slot; i < nritems; i++) {
921                 blocknr = btrfs_node_blockptr(node, i);
922                 set_radix_bit(&found, blocknr);
923         }
924         if (direction > 0) {
925                 cluster_start = search - 4;
926                 if (cluster_start > search)
927                         cluster_start = 0;
928         } else
929                 cluster_start = search + 4;
930         while(1) {
931                 ret = find_first_radix_bit(&found, gang, 0, ARRAY_SIZE(gang));
932                 if (!ret)
933                         break;
934                 for (i = 0; i < ret; i++) {
935                         blocknr = gang[i];
936                         clear_radix_bit(&found, blocknr);
937                         if (path->reada == 1 && nread > 16)
938                                 continue;
939                         if (close_blocks(cluster_start, blocknr)) {
940                                 readahead_tree_block(root, blocknr);
941                                 nread++;
942                                 cluster_start = blocknr;
943                         }
944                 }
945         }
946 }
947 /*
948  * look for key in the tree.  path is filled in with nodes along the way
949  * if key is found, we return zero and you can find the item in the leaf
950  * level of the path (level 0)
951  *
952  * If the key isn't found, the path points to the slot where it should
953  * be inserted, and 1 is returned.  If there are other errors during the
954  * search a negative error number is returned.
955  *
956  * if ins_len > 0, nodes and leaves will be split as we walk down the
957  * tree.  if ins_len < 0, nodes will be merged as we walk down the tree (if
958  * possible)
959  */
960 int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
961                       *root, struct btrfs_key *key, struct btrfs_path *p, int
962                       ins_len, int cow)
963 {
964         struct extent_buffer *b;
965         u64 blocknr;
966         int slot;
967         int ret;
968         int level;
969         int should_reada = p->reada;
970         u8 lowest_level = 0;
971
972         lowest_level = p->lowest_level;
973         WARN_ON(lowest_level && ins_len);
974         WARN_ON(p->nodes[0] != NULL);
975         WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex));
976 again:
977         b = root->node;
978         extent_buffer_get(b);
979         while (b) {
980                 level = btrfs_header_level(b);
981                 if (cow) {
982                         int wret;
983                         wret = btrfs_cow_block(trans, root, b,
984                                                p->nodes[level + 1],
985                                                p->slots[level + 1],
986                                                &b);
987                         if (wret) {
988                                 free_extent_buffer(b);
989                                 return wret;
990                         }
991                 }
992                 BUG_ON(!cow && ins_len);
993                 if (level != btrfs_header_level(b))
994                         WARN_ON(1);
995                 level = btrfs_header_level(b);
996                 p->nodes[level] = b;
997                 ret = check_block(root, p, level);
998                 if (ret)
999                         return -1;
1000                 ret = bin_search(b, key, level, &slot);
1001                 if (level != 0) {
1002                         if (ret && slot > 0)
1003                                 slot -= 1;
1004                         p->slots[level] = slot;
1005                         if (ins_len > 0 && btrfs_header_nritems(b) >=
1006                             BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
1007                                 int sret = split_node(trans, root, p, level);
1008                                 BUG_ON(sret > 0);
1009                                 if (sret)
1010                                         return sret;
1011                                 b = p->nodes[level];
1012                                 slot = p->slots[level];
1013                         } else if (ins_len < 0) {
1014                                 int sret = balance_level(trans, root, p,
1015                                                          level);
1016                                 if (sret)
1017                                         return sret;
1018                                 b = p->nodes[level];
1019                                 if (!b)
1020                                         goto again;
1021                                 slot = p->slots[level];
1022                                 BUG_ON(btrfs_header_nritems(b) == 1);
1023                         }
1024                         /* this is only true while dropping a snapshot */
1025                         if (level == lowest_level)
1026                                 break;
1027                         blocknr = btrfs_node_blockptr(b, slot);
1028                         if (should_reada)
1029                                 reada_for_search(root, p, level, slot);
1030                         b = read_tree_block(root, btrfs_node_blockptr(b, slot));
1031                 } else {
1032                         p->slots[level] = slot;
1033                         if (ins_len > 0 && btrfs_leaf_free_space(root, b) <
1034                             sizeof(struct btrfs_item) + ins_len) {
1035                                 int sret = split_leaf(trans, root, key,
1036                                                       p, ins_len);
1037                                 BUG_ON(sret > 0);
1038                                 if (sret)
1039                                         return sret;
1040                         }
1041                         return ret;
1042                 }
1043         }
1044         return 1;
1045 }
1046
1047 /*
1048  * adjust the pointers going up the tree, starting at level
1049  * making sure the right key of each node is points to 'key'.
1050  * This is used after shifting pointers to the left, so it stops
1051  * fixing up pointers when a given leaf/node is not in slot 0 of the
1052  * higher levels
1053  *
1054  * If this fails to write a tree block, it returns -1, but continues
1055  * fixing up the blocks in ram so the tree is consistent.
1056  */
1057 static int fixup_low_keys(struct btrfs_trans_handle *trans,
1058                           struct btrfs_root *root, struct btrfs_path *path,
1059                           struct btrfs_disk_key *key, int level)
1060 {
1061         int i;
1062         int ret = 0;
1063         struct extent_buffer *t;
1064
1065         for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1066                 int tslot = path->slots[i];
1067                 if (!path->nodes[i])
1068                         break;
1069                 t = path->nodes[i];
1070                 btrfs_set_node_key(t, key, tslot);
1071                 btrfs_mark_buffer_dirty(path->nodes[i]);
1072                 if (tslot != 0)
1073                         break;
1074         }
1075         return ret;
1076 }
1077
1078 /*
1079  * try to push data from one node into the next node left in the
1080  * tree.
1081  *
1082  * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
1083  * error, and > 0 if there was no room in the left hand block.
1084  */
1085 static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root
1086                           *root, struct extent_buffer *dst,
1087                           struct extent_buffer *src)
1088 {
1089         int push_items = 0;
1090         int src_nritems;
1091         int dst_nritems;
1092         int ret = 0;
1093
1094         src_nritems = btrfs_header_nritems(src);
1095         dst_nritems = btrfs_header_nritems(dst);
1096         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1097
1098         if (push_items <= 0) {
1099                 return 1;
1100         }
1101
1102         if (src_nritems < push_items)
1103                 push_items = src_nritems;
1104
1105         copy_extent_buffer(dst, src,
1106                            btrfs_node_key_ptr_offset(dst_nritems),
1107                            btrfs_node_key_ptr_offset(0),
1108                            push_items * sizeof(struct btrfs_key_ptr));
1109
1110         if (push_items < src_nritems) {
1111                 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
1112                                       btrfs_node_key_ptr_offset(push_items),
1113                                       (src_nritems - push_items) *
1114                                       sizeof(struct btrfs_key_ptr));
1115         }
1116         btrfs_set_header_nritems(src, src_nritems - push_items);
1117         btrfs_set_header_nritems(dst, dst_nritems + push_items);
1118         btrfs_mark_buffer_dirty(src);
1119         btrfs_mark_buffer_dirty(dst);
1120         return ret;
1121 }
1122
1123 /*
1124  * try to push data from one node into the next node right in the
1125  * tree.
1126  *
1127  * returns 0 if some ptrs were pushed, < 0 if there was some horrible
1128  * error, and > 0 if there was no room in the right hand block.
1129  *
1130  * this will  only push up to 1/2 the contents of the left node over
1131  */
1132 static int balance_node_right(struct btrfs_trans_handle *trans,
1133                               struct btrfs_root *root,
1134                               struct extent_buffer *dst,
1135                               struct extent_buffer *src)
1136 {
1137         int push_items = 0;
1138         int max_push;
1139         int src_nritems;
1140         int dst_nritems;
1141         int ret = 0;
1142
1143         src_nritems = btrfs_header_nritems(src);
1144         dst_nritems = btrfs_header_nritems(dst);
1145         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1146         if (push_items <= 0)
1147                 return 1;
1148
1149         max_push = src_nritems / 2 + 1;
1150         /* don't try to empty the node */
1151         if (max_push >= src_nritems)
1152                 return 1;
1153
1154         if (max_push < push_items)
1155                 push_items = max_push;
1156
1157         memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
1158                                       btrfs_node_key_ptr_offset(0),
1159                                       (dst_nritems) *
1160                                       sizeof(struct btrfs_key_ptr));
1161
1162         copy_extent_buffer(dst, src,
1163                            btrfs_node_key_ptr_offset(0),
1164                            btrfs_node_key_ptr_offset(src_nritems - push_items),
1165                            push_items * sizeof(struct btrfs_key_ptr));
1166
1167         btrfs_set_header_nritems(src, src_nritems - push_items);
1168         btrfs_set_header_nritems(dst, dst_nritems + push_items);
1169
1170         btrfs_mark_buffer_dirty(src);
1171         btrfs_mark_buffer_dirty(dst);
1172         return ret;
1173 }
1174
1175 /*
1176  * helper function to insert a new root level in the tree.
1177  * A new node is allocated, and a single item is inserted to
1178  * point to the existing root
1179  *
1180  * returns zero on success or < 0 on failure.
1181  */
1182 static int insert_new_root(struct btrfs_trans_handle *trans,
1183                            struct btrfs_root *root,
1184                            struct btrfs_path *path, int level)
1185 {
1186         struct extent_buffer *lower;
1187         struct extent_buffer *c;
1188         struct btrfs_disk_key lower_key;
1189
1190         BUG_ON(path->nodes[level]);
1191         BUG_ON(path->nodes[level-1] != root->node);
1192
1193         c = btrfs_alloc_free_block(trans, root,
1194                                    extent_buffer_blocknr(root->node), 0);
1195         if (IS_ERR(c))
1196                 return PTR_ERR(c);
1197         memset_extent_buffer(c, 0, 0, root->nodesize);
1198         btrfs_set_header_nritems(c, 1);
1199         btrfs_set_header_level(c, level);
1200         btrfs_set_header_blocknr(c, extent_buffer_blocknr(c));
1201         btrfs_set_header_generation(c, trans->transid);
1202         btrfs_set_header_owner(c, root->root_key.objectid);
1203         lower = path->nodes[level-1];
1204
1205         write_extent_buffer(c, root->fs_info->fsid,
1206                             (unsigned long)btrfs_header_fsid(c),
1207                             BTRFS_FSID_SIZE);
1208         if (level == 1)
1209                 btrfs_item_key(lower, &lower_key, 0);
1210         else
1211                 btrfs_node_key(lower, &lower_key, 0);
1212         btrfs_set_node_key(c, &lower_key, 0);
1213         btrfs_set_node_blockptr(c, 0, extent_buffer_blocknr(lower));
1214
1215         btrfs_mark_buffer_dirty(c);
1216
1217         /* the super has an extra ref to root->node */
1218         free_extent_buffer(root->node);
1219         root->node = c;
1220         extent_buffer_get(c);
1221         path->nodes[level] = c;
1222         path->slots[level] = 0;
1223         return 0;
1224 }
1225
1226 /*
1227  * worker function to insert a single pointer in a node.
1228  * the node should have enough room for the pointer already
1229  *
1230  * slot and level indicate where you want the key to go, and
1231  * blocknr is the block the key points to.
1232  *
1233  * returns zero on success and < 0 on any error
1234  */
1235 static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
1236                       *root, struct btrfs_path *path, struct btrfs_disk_key
1237                       *key, u64 blocknr, int slot, int level)
1238 {
1239         struct extent_buffer *lower;
1240         int nritems;
1241
1242         BUG_ON(!path->nodes[level]);
1243         lower = path->nodes[level];
1244         nritems = btrfs_header_nritems(lower);
1245         if (slot > nritems)
1246                 BUG();
1247         if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
1248                 BUG();
1249         if (slot != nritems) {
1250                 memmove_extent_buffer(lower,
1251                               btrfs_node_key_ptr_offset(slot + 1),
1252                               btrfs_node_key_ptr_offset(slot),
1253                               (nritems - slot) * sizeof(struct btrfs_key_ptr));
1254         }
1255         btrfs_set_node_key(lower, key, slot);
1256         btrfs_set_node_blockptr(lower, slot, blocknr);
1257         btrfs_set_header_nritems(lower, nritems + 1);
1258         btrfs_mark_buffer_dirty(lower);
1259         check_node(root, path, level);
1260         return 0;
1261 }
1262
1263 /*
1264  * split the node at the specified level in path in two.
1265  * The path is corrected to point to the appropriate node after the split
1266  *
1267  * Before splitting this tries to make some room in the node by pushing
1268  * left and right, if either one works, it returns right away.
1269  *
1270  * returns 0 on success and < 0 on failure
1271  */
1272 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
1273                       *root, struct btrfs_path *path, int level)
1274 {
1275         struct extent_buffer *c;
1276         struct extent_buffer *split;
1277         struct btrfs_disk_key disk_key;
1278         int mid;
1279         int ret;
1280         int wret;
1281         u32 c_nritems;
1282
1283         c = path->nodes[level];
1284         if (c == root->node) {
1285                 /* trying to split the root, lets make a new one */
1286                 ret = insert_new_root(trans, root, path, level + 1);
1287                 if (ret)
1288                         return ret;
1289         } else {
1290                 ret = push_nodes_for_insert(trans, root, path, level);
1291                 c = path->nodes[level];
1292                 if (!ret && btrfs_header_nritems(c) <
1293                     BTRFS_NODEPTRS_PER_BLOCK(root) - 1)
1294                         return 0;
1295                 if (ret < 0)
1296                         return ret;
1297         }
1298
1299         c_nritems = btrfs_header_nritems(c);
1300         split = btrfs_alloc_free_block(trans, root,
1301                                        extent_buffer_blocknr(c), 0);
1302         if (IS_ERR(split))
1303                 return PTR_ERR(split);
1304
1305         btrfs_set_header_flags(split, btrfs_header_flags(c));
1306         btrfs_set_header_level(split, btrfs_header_level(c));
1307         btrfs_set_header_blocknr(split, extent_buffer_blocknr(split));
1308         btrfs_set_header_generation(split, trans->transid);
1309         btrfs_set_header_owner(split, root->root_key.objectid);
1310         write_extent_buffer(split, root->fs_info->fsid,
1311                             (unsigned long)btrfs_header_fsid(split),
1312                             BTRFS_FSID_SIZE);
1313
1314         mid = (c_nritems + 1) / 2;
1315
1316         copy_extent_buffer(split, c,
1317                            btrfs_node_key_ptr_offset(0),
1318                            btrfs_node_key_ptr_offset(mid),
1319                            (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
1320         btrfs_set_header_nritems(split, c_nritems - mid);
1321         btrfs_set_header_nritems(c, mid);
1322         ret = 0;
1323
1324         btrfs_mark_buffer_dirty(c);
1325         btrfs_mark_buffer_dirty(split);
1326
1327         btrfs_node_key(split, &disk_key, 0);
1328         wret = insert_ptr(trans, root, path, &disk_key,
1329                           extent_buffer_blocknr(split),
1330                           path->slots[level + 1] + 1,
1331                           level + 1);
1332         if (wret)
1333                 ret = wret;
1334
1335         if (path->slots[level] >= mid) {
1336                 path->slots[level] -= mid;
1337                 free_extent_buffer(c);
1338                 path->nodes[level] = split;
1339                 path->slots[level + 1] += 1;
1340         } else {
1341                 free_extent_buffer(split);
1342         }
1343         return ret;
1344 }
1345
1346 /*
1347  * how many bytes are required to store the items in a leaf.  start
1348  * and nr indicate which items in the leaf to check.  This totals up the
1349  * space used both by the item structs and the item data
1350  */
1351 static int leaf_space_used(struct extent_buffer *l, int start, int nr)
1352 {
1353         int data_len;
1354         int nritems = btrfs_header_nritems(l);
1355         int end = min(nritems, start + nr) - 1;
1356
1357         if (!nr)
1358                 return 0;
1359         data_len = btrfs_item_end_nr(l, start);
1360         data_len = data_len - btrfs_item_offset_nr(l, end);
1361         data_len += sizeof(struct btrfs_item) * nr;
1362         WARN_ON(data_len < 0);
1363         return data_len;
1364 }
1365
1366 /*
1367  * The space between the end of the leaf items and
1368  * the start of the leaf data.  IOW, how much room
1369  * the leaf has left for both items and data
1370  */
1371 int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf)
1372 {
1373         int nritems = btrfs_header_nritems(leaf);
1374         int ret;
1375         ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
1376         if (ret < 0) {
1377                 printk("leaf free space ret %d, leaf data size %lu, used %d nritems %d\n",
1378                        ret, BTRFS_LEAF_DATA_SIZE(root),
1379                        leaf_space_used(leaf, 0, nritems), nritems);
1380         }
1381         return ret;
1382 }
1383
1384 /*
1385  * push some data in the path leaf to the right, trying to free up at
1386  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
1387  *
1388  * returns 1 if the push failed because the other node didn't have enough
1389  * room, 0 if everything worked out and < 0 if there were major errors.
1390  */
1391 static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
1392                            *root, struct btrfs_path *path, int data_size)
1393 {
1394         struct extent_buffer *left = path->nodes[0];
1395         struct extent_buffer *right;
1396         struct extent_buffer *upper;
1397         struct btrfs_disk_key disk_key;
1398         int slot;
1399         int i;
1400         int free_space;
1401         int push_space = 0;
1402         int push_items = 0;
1403         struct btrfs_item *item;
1404         u32 left_nritems;
1405         u32 right_nritems;
1406         u32 data_end;
1407         int ret;
1408
1409         slot = path->slots[1];
1410         if (!path->nodes[1]) {
1411                 return 1;
1412         }
1413         upper = path->nodes[1];
1414         if (slot >= btrfs_header_nritems(upper) - 1)
1415                 return 1;
1416
1417         right = read_tree_block(root, btrfs_node_blockptr(upper, slot + 1));
1418         free_space = btrfs_leaf_free_space(root, right);
1419         if (free_space < data_size + sizeof(struct btrfs_item)) {
1420                 free_extent_buffer(right);
1421                 return 1;
1422         }
1423
1424         /* cow and double check */
1425         ret = btrfs_cow_block(trans, root, right, upper,
1426                               slot + 1, &right);
1427         if (ret) {
1428                 free_extent_buffer(right);
1429                 return 1;
1430         }
1431         free_space = btrfs_leaf_free_space(root, right);
1432         if (free_space < data_size + sizeof(struct btrfs_item)) {
1433                 free_extent_buffer(right);
1434                 return 1;
1435         }
1436
1437         left_nritems = btrfs_header_nritems(left);
1438         if (left_nritems == 0) {
1439                 free_extent_buffer(right);
1440                 return 1;
1441         }
1442
1443         for (i = left_nritems - 1; i >= 1; i--) {
1444                 item = btrfs_item_nr(left, i);
1445                 if (path->slots[0] == i)
1446                         push_space += data_size + sizeof(*item);
1447                 if (btrfs_item_size(left, item) + sizeof(*item) + push_space >
1448                     free_space)
1449                         break;
1450                 push_items++;
1451                 push_space += btrfs_item_size(left, item) + sizeof(*item);
1452         }
1453
1454         if (push_items == 0) {
1455                 free_extent_buffer(right);
1456                 return 1;
1457         }
1458
1459         if (push_items == left_nritems)
1460                 WARN_ON(1);
1461
1462         /* push left to right */
1463         right_nritems = btrfs_header_nritems(right);
1464         push_space = btrfs_item_end_nr(left, left_nritems - push_items);
1465         push_space -= leaf_data_end(root, left);
1466
1467         /* make room in the right data area */
1468         data_end = leaf_data_end(root, right);
1469         memmove_extent_buffer(right,
1470                               btrfs_leaf_data(right) + data_end - push_space,
1471                               btrfs_leaf_data(right) + data_end,
1472                               BTRFS_LEAF_DATA_SIZE(root) - data_end);
1473
1474         /* copy from the left data area */
1475         copy_extent_buffer(right, left, btrfs_leaf_data(right) +
1476                      BTRFS_LEAF_DATA_SIZE(root) - push_space,
1477                      btrfs_leaf_data(left) + leaf_data_end(root, left),
1478                      push_space);
1479
1480         memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
1481                               btrfs_item_nr_offset(0),
1482                               right_nritems * sizeof(struct btrfs_item));
1483
1484         /* copy the items from left to right */
1485         copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
1486                    btrfs_item_nr_offset(left_nritems - push_items),
1487                    push_items * sizeof(struct btrfs_item));
1488
1489         /* update the item pointers */
1490         right_nritems += push_items;
1491         btrfs_set_header_nritems(right, right_nritems);
1492         push_space = BTRFS_LEAF_DATA_SIZE(root);
1493         for (i = 0; i < right_nritems; i++) {
1494                 item = btrfs_item_nr(right, i);
1495                 btrfs_set_item_offset(right, item, push_space -
1496                                       btrfs_item_size(right, item));
1497                 push_space = btrfs_item_offset(right, item);
1498         }
1499         left_nritems -= push_items;
1500         btrfs_set_header_nritems(left, left_nritems);
1501
1502         btrfs_mark_buffer_dirty(left);
1503         btrfs_mark_buffer_dirty(right);
1504
1505         btrfs_item_key(right, &disk_key, 0);
1506         btrfs_set_node_key(upper, &disk_key, slot + 1);
1507         btrfs_mark_buffer_dirty(upper);
1508
1509         /* then fixup the leaf pointer in the path */
1510         if (path->slots[0] >= left_nritems) {
1511                 path->slots[0] -= left_nritems;
1512                 free_extent_buffer(path->nodes[0]);
1513                 path->nodes[0] = right;
1514                 path->slots[1] += 1;
1515         } else {
1516                 free_extent_buffer(right);
1517         }
1518         if (path->nodes[1])
1519                 check_node(root, path, 1);
1520         return 0;
1521 }
1522 /*
1523  * push some data in the path leaf to the left, trying to free up at
1524  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
1525  */
1526 static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
1527                           *root, struct btrfs_path *path, int data_size)
1528 {
1529         struct btrfs_disk_key disk_key;
1530         struct extent_buffer *right = path->nodes[0];
1531         struct extent_buffer *left;
1532         int slot;
1533         int i;
1534         int free_space;
1535         int push_space = 0;
1536         int push_items = 0;
1537         struct btrfs_item *item;
1538         u32 old_left_nritems;
1539         u32 right_nritems;
1540         int ret = 0;
1541         int wret;
1542
1543         slot = path->slots[1];
1544         if (slot == 0)
1545                 return 1;
1546         if (!path->nodes[1])
1547                 return 1;
1548
1549         left = read_tree_block(root, btrfs_node_blockptr(path->nodes[1],
1550                                                          slot - 1));
1551         free_space = btrfs_leaf_free_space(root, left);
1552         if (free_space < data_size + sizeof(struct btrfs_item)) {
1553                 free_extent_buffer(left);
1554                 return 1;
1555         }
1556
1557         /* cow and double check */
1558         ret = btrfs_cow_block(trans, root, left,
1559                               path->nodes[1], slot - 1, &left);
1560         if (ret) {
1561                 /* we hit -ENOSPC, but it isn't fatal here */
1562                 free_extent_buffer(left);
1563                 return 1;
1564         }
1565         free_space = btrfs_leaf_free_space(root, left);
1566         if (free_space < data_size + sizeof(struct btrfs_item)) {
1567                 free_extent_buffer(left);
1568                 return 1;
1569         }
1570
1571         right_nritems = btrfs_header_nritems(right);
1572         if (right_nritems == 0) {
1573                 free_extent_buffer(left);
1574                 return 1;
1575         }
1576
1577         for (i = 0; i < right_nritems - 1; i++) {
1578                 item = btrfs_item_nr(right, i);
1579                 if (path->slots[0] == i)
1580                         push_space += data_size + sizeof(*item);
1581                 if (btrfs_item_size(right, item) + sizeof(*item) + push_space >
1582                     free_space)
1583                         break;
1584                 push_items++;
1585                 push_space += btrfs_item_size(right, item) + sizeof(*item);
1586         }
1587         if (push_items == 0) {
1588                 free_extent_buffer(left);
1589                 return 1;
1590         }
1591         if (push_items == btrfs_header_nritems(right))
1592                 WARN_ON(1);
1593
1594         /* push data from right to left */
1595         copy_extent_buffer(left, right,
1596                            btrfs_item_nr_offset(btrfs_header_nritems(left)),
1597                            btrfs_item_nr_offset(0),
1598                            push_items * sizeof(struct btrfs_item));
1599
1600         push_space = BTRFS_LEAF_DATA_SIZE(root) -
1601                      btrfs_item_offset_nr(right, push_items -1);
1602
1603         copy_extent_buffer(left, right, btrfs_leaf_data(left) +
1604                      leaf_data_end(root, left) - push_space,
1605                      btrfs_leaf_data(right) +
1606                      btrfs_item_offset_nr(right, push_items - 1),
1607                      push_space);
1608         old_left_nritems = btrfs_header_nritems(left);
1609         BUG_ON(old_left_nritems < 0);
1610
1611         for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
1612                 u32 ioff;
1613                 item = btrfs_item_nr(left, i);
1614                 ioff = btrfs_item_offset(left, item);
1615                 btrfs_set_item_offset(left, item,
1616                       ioff - (BTRFS_LEAF_DATA_SIZE(root) -
1617                       btrfs_item_offset_nr(left, old_left_nritems - 1)));
1618         }
1619         btrfs_set_header_nritems(left, old_left_nritems + push_items);
1620
1621         /* fixup right node */
1622         push_space = btrfs_item_offset_nr(right, push_items - 1) -
1623                                           leaf_data_end(root, right);
1624         memmove_extent_buffer(right, btrfs_leaf_data(right) +
1625                               BTRFS_LEAF_DATA_SIZE(root) - push_space,
1626                               btrfs_leaf_data(right) +
1627                               leaf_data_end(root, right), push_space);
1628
1629         memmove_extent_buffer(right, btrfs_item_nr_offset(0),
1630                               btrfs_item_nr_offset(push_items),
1631                              (btrfs_header_nritems(right) - push_items) *
1632                              sizeof(struct btrfs_item));
1633
1634         right_nritems = btrfs_header_nritems(right) - push_items;
1635         btrfs_set_header_nritems(right, right_nritems);
1636         push_space = BTRFS_LEAF_DATA_SIZE(root);
1637
1638         for (i = 0; i < right_nritems; i++) {
1639                 item = btrfs_item_nr(right, i);
1640                 btrfs_set_item_offset(right, item, push_space -
1641                                       btrfs_item_size(right, item));
1642                 push_space = btrfs_item_offset(right, item);
1643         }
1644
1645         btrfs_mark_buffer_dirty(left);
1646         btrfs_mark_buffer_dirty(right);
1647
1648         btrfs_item_key(right, &disk_key, 0);
1649         wret = fixup_low_keys(trans, root, path, &disk_key, 1);
1650         if (wret)
1651                 ret = wret;
1652
1653         /* then fixup the leaf pointer in the path */
1654         if (path->slots[0] < push_items) {
1655                 path->slots[0] += old_left_nritems;
1656                 free_extent_buffer(path->nodes[0]);
1657                 path->nodes[0] = left;
1658                 path->slots[1] -= 1;
1659         } else {
1660                 free_extent_buffer(left);
1661                 path->slots[0] -= push_items;
1662         }
1663         BUG_ON(path->slots[0] < 0);
1664         if (path->nodes[1])
1665                 check_node(root, path, 1);
1666         return ret;
1667 }
1668
1669 /*
1670  * split the path's leaf in two, making sure there is at least data_size
1671  * available for the resulting leaf level of the path.
1672  *
1673  * returns 0 if all went well and < 0 on failure.
1674  */
1675 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
1676                       *root, struct btrfs_key *ins_key,
1677                       struct btrfs_path *path, int data_size)
1678 {
1679         struct extent_buffer *l;
1680         u32 nritems;
1681         int mid;
1682         int slot;
1683         struct extent_buffer *right;
1684         int space_needed = data_size + sizeof(struct btrfs_item);
1685         int data_copy_size;
1686         int rt_data_off;
1687         int i;
1688         int ret = 0;
1689         int wret;
1690         int double_split = 0;
1691         struct btrfs_disk_key disk_key;
1692
1693         /* first try to make some room by pushing left and right */
1694         wret = push_leaf_left(trans, root, path, data_size);
1695         if (wret < 0)
1696                 return wret;
1697         if (wret) {
1698                 wret = push_leaf_right(trans, root, path, data_size);
1699                 if (wret < 0)
1700                         return wret;
1701         }
1702         l = path->nodes[0];
1703
1704         /* did the pushes work? */
1705         if (btrfs_leaf_free_space(root, l) >=
1706             sizeof(struct btrfs_item) + data_size)
1707                 return 0;
1708
1709         if (!path->nodes[1]) {
1710                 ret = insert_new_root(trans, root, path, 1);
1711                 if (ret)
1712                         return ret;
1713         }
1714         slot = path->slots[0];
1715         nritems = btrfs_header_nritems(l);
1716         mid = (nritems + 1)/ 2;
1717
1718         right = btrfs_alloc_free_block(trans, root,
1719                                               extent_buffer_blocknr(l), 0);
1720         if (IS_ERR(right))
1721                 return PTR_ERR(right);
1722
1723         memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
1724         btrfs_set_header_blocknr(right, extent_buffer_blocknr(right));
1725         btrfs_set_header_generation(right, trans->transid);
1726         btrfs_set_header_owner(right, root->root_key.objectid);
1727         btrfs_set_header_level(right, 0);
1728         write_extent_buffer(right, root->fs_info->fsid,
1729                             (unsigned long)btrfs_header_fsid(right),
1730                             BTRFS_FSID_SIZE);
1731
1732         if (mid <= slot) {
1733                 if (nritems == 1 ||
1734                     leaf_space_used(l, mid, nritems - mid) + space_needed >
1735                         BTRFS_LEAF_DATA_SIZE(root)) {
1736                         if (slot >= nritems) {
1737                                 btrfs_cpu_key_to_disk(&disk_key, ins_key);
1738                                 btrfs_set_header_nritems(right, 0);
1739                                 wret = insert_ptr(trans, root, path,
1740                                                   &disk_key,
1741                                                   extent_buffer_blocknr(right),
1742                                                   path->slots[1] + 1, 1);
1743                                 if (wret)
1744                                         ret = wret;
1745                                 free_extent_buffer(path->nodes[0]);
1746                                 path->nodes[0] = right;
1747                                 path->slots[0] = 0;
1748                                 path->slots[1] += 1;
1749                                 return ret;
1750                         }
1751                         mid = slot;
1752                         double_split = 1;
1753                 }
1754         } else {
1755                 if (leaf_space_used(l, 0, mid + 1) + space_needed >
1756                         BTRFS_LEAF_DATA_SIZE(root)) {
1757                         if (slot == 0) {
1758                                 btrfs_cpu_key_to_disk(&disk_key, ins_key);
1759                                 btrfs_set_header_nritems(right, 0);
1760                                 wret = insert_ptr(trans, root, path,
1761                                                   &disk_key,
1762                                                   extent_buffer_blocknr(right),
1763                                                   path->slots[1], 1);
1764                                 if (wret)
1765                                         ret = wret;
1766                                 free_extent_buffer(path->nodes[0]);
1767                                 path->nodes[0] = right;
1768                                 path->slots[0] = 0;
1769                                 if (path->slots[1] == 0) {
1770                                         wret = fixup_low_keys(trans, root,
1771                                                    path, &disk_key, 1);
1772                                         if (wret)
1773                                                 ret = wret;
1774                                 }
1775                                 return ret;
1776                         }
1777                         mid = slot;
1778                         double_split = 1;
1779                 }
1780         }
1781         nritems = nritems - mid;
1782         btrfs_set_header_nritems(right, nritems);
1783         data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);
1784
1785         copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
1786                            btrfs_item_nr_offset(mid),
1787                            nritems * sizeof(struct btrfs_item));
1788
1789         copy_extent_buffer(right, l,
1790                      btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
1791                      data_copy_size, btrfs_leaf_data(l) +
1792                      leaf_data_end(root, l), data_copy_size);
1793
1794         rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
1795                       btrfs_item_end_nr(l, mid);
1796
1797         for (i = 0; i < nritems; i++) {
1798                 struct btrfs_item *item = btrfs_item_nr(right, i);
1799                 u32 ioff = btrfs_item_offset(right, item);
1800                 btrfs_set_item_offset(right, item, ioff + rt_data_off);
1801         }
1802
1803         btrfs_set_header_nritems(l, mid);
1804         ret = 0;
1805         btrfs_item_key(right, &disk_key, 0);
1806         wret = insert_ptr(trans, root, path, &disk_key,
1807                           extent_buffer_blocknr(right), path->slots[1] + 1, 1);
1808         if (wret)
1809                 ret = wret;
1810
1811         btrfs_mark_buffer_dirty(right);
1812         btrfs_mark_buffer_dirty(l);
1813         BUG_ON(path->slots[0] != slot);
1814
1815         if (mid <= slot) {
1816                 free_extent_buffer(path->nodes[0]);
1817                 path->nodes[0] = right;
1818                 path->slots[0] -= mid;
1819                 path->slots[1] += 1;
1820         } else
1821                 free_extent_buffer(right);
1822
1823         BUG_ON(path->slots[0] < 0);
1824         check_node(root, path, 1);
1825         check_leaf(root, path, 0);
1826
1827         if (!double_split)
1828                 return ret;
1829
1830         right = btrfs_alloc_free_block(trans, root,
1831                                        extent_buffer_blocknr(l), 0);
1832         if (IS_ERR(right))
1833                 return PTR_ERR(right);
1834
1835         memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
1836         btrfs_set_header_blocknr(right, extent_buffer_blocknr(right));
1837         btrfs_set_header_generation(right, trans->transid);
1838         btrfs_set_header_owner(right, root->root_key.objectid);
1839         btrfs_set_header_level(right, 0);
1840         write_extent_buffer(right, root->fs_info->fsid,
1841                             (unsigned long)btrfs_header_fsid(right),
1842                             BTRFS_FSID_SIZE);
1843
1844         btrfs_cpu_key_to_disk(&disk_key, ins_key);
1845         btrfs_set_header_nritems(right, 0);
1846         wret = insert_ptr(trans, root, path,
1847                           &disk_key,
1848                           extent_buffer_blocknr(right),
1849                           path->slots[1], 1);
1850         if (wret)
1851                 ret = wret;
1852         if (path->slots[1] == 0) {
1853                 wret = fixup_low_keys(trans, root, path, &disk_key, 1);
1854                 if (wret)
1855                         ret = wret;
1856         }
1857         free_extent_buffer(path->nodes[0]);
1858         path->nodes[0] = right;
1859         path->slots[0] = 0;
1860         check_node(root, path, 1);
1861         check_leaf(root, path, 0);
1862         return ret;
1863 }
1864
1865 int btrfs_truncate_item(struct btrfs_trans_handle *trans,
1866                         struct btrfs_root *root,
1867                         struct btrfs_path *path,
1868                         u32 new_size)
1869 {
1870         int ret = 0;
1871         int slot;
1872         int slot_orig;
1873         struct extent_buffer *leaf;
1874         struct btrfs_item *item;
1875         u32 nritems;
1876         unsigned int data_end;
1877         unsigned int old_data_start;
1878         unsigned int old_size;
1879         unsigned int size_diff;
1880         int i;
1881
1882         slot_orig = path->slots[0];
1883         leaf = path->nodes[0];
1884
1885         nritems = btrfs_header_nritems(leaf);
1886         data_end = leaf_data_end(root, leaf);
1887
1888         slot = path->slots[0];
1889         old_data_start = btrfs_item_offset_nr(leaf, slot);
1890         old_size = btrfs_item_size_nr(leaf, slot);
1891         BUG_ON(old_size <= new_size);
1892         size_diff = old_size - new_size;
1893
1894         BUG_ON(slot < 0);
1895         BUG_ON(slot >= nritems);
1896
1897         /*
1898          * item0..itemN ... dataN.offset..dataN.size .. data0.size
1899          */
1900         /* first correct the data pointers */
1901         for (i = slot; i < nritems; i++) {
1902                 u32 ioff;
1903                 item = btrfs_item_nr(leaf, i);
1904                 ioff = btrfs_item_offset(leaf, item);
1905                 btrfs_set_item_offset(leaf, item, ioff + size_diff);
1906         }
1907         /* shift the data */
1908         memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
1909                       data_end + size_diff, btrfs_leaf_data(leaf) +
1910                       data_end, old_data_start + new_size - data_end);
1911
1912         item = btrfs_item_nr(leaf, slot);
1913         btrfs_set_item_size(leaf, item, new_size);
1914         btrfs_mark_buffer_dirty(leaf);
1915
1916         ret = 0;
1917         if (btrfs_leaf_free_space(root, leaf) < 0) {
1918                 btrfs_print_leaf(root, leaf);
1919                 BUG();
1920         }
1921         check_leaf(root, path, 0);
1922         return ret;
1923 }
1924
1925 int btrfs_extend_item(struct btrfs_trans_handle *trans,
1926                       struct btrfs_root *root, struct btrfs_path *path,
1927                       u32 data_size)
1928 {
1929         int ret = 0;
1930         int slot;
1931         int slot_orig;
1932         struct extent_buffer *leaf;
1933         struct btrfs_item *item;
1934         u32 nritems;
1935         unsigned int data_end;
1936         unsigned int old_data;
1937         unsigned int old_size;
1938         int i;
1939
1940         slot_orig = path->slots[0];
1941         leaf = path->nodes[0];
1942
1943         nritems = btrfs_header_nritems(leaf);
1944         data_end = leaf_data_end(root, leaf);
1945
1946         if (btrfs_leaf_free_space(root, leaf) < data_size) {
1947                 btrfs_print_leaf(root, leaf);
1948                 BUG();
1949         }
1950         slot = path->slots[0];
1951         old_data = btrfs_item_end_nr(leaf, slot);
1952
1953         BUG_ON(slot < 0);
1954         BUG_ON(slot >= nritems);
1955
1956         /*
1957          * item0..itemN ... dataN.offset..dataN.size .. data0.size
1958          */
1959         /* first correct the data pointers */
1960         for (i = slot; i < nritems; i++) {
1961                 u32 ioff;
1962                 item = btrfs_item_nr(leaf, i);
1963                 ioff = btrfs_item_offset(leaf, item);
1964                 btrfs_set_item_offset(leaf, item, ioff - data_size);
1965         }
1966
1967         /* shift the data */
1968         memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
1969                       data_end - data_size, btrfs_leaf_data(leaf) +
1970                       data_end, old_data - data_end);
1971
1972         data_end = old_data;
1973         old_size = btrfs_item_size_nr(leaf, slot);
1974         item = btrfs_item_nr(leaf, slot);
1975         btrfs_set_item_size(leaf, item, old_size + data_size);
1976         btrfs_mark_buffer_dirty(leaf);
1977
1978         ret = 0;
1979         if (btrfs_leaf_free_space(root, leaf) < 0) {
1980                 btrfs_print_leaf(root, leaf);
1981                 BUG();
1982         }
1983         check_leaf(root, path, 0);
1984         return ret;
1985 }
1986
1987 /*
1988  * Given a key and some data, insert an item into the tree.
1989  * This does all the path init required, making room in the tree if needed.
1990  */
1991 int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
1992                             struct btrfs_root *root,
1993                             struct btrfs_path *path,
1994                             struct btrfs_key *cpu_key, u32 data_size)
1995 {
1996         struct extent_buffer *leaf;
1997         struct btrfs_item *item;
1998         int ret = 0;
1999         int slot;
2000         int slot_orig;
2001         u32 nritems;
2002         unsigned int data_end;
2003         struct btrfs_disk_key disk_key;
2004
2005         btrfs_cpu_key_to_disk(&disk_key, cpu_key);
2006
2007         /* create a root if there isn't one */
2008         if (!root->node)
2009                 BUG();
2010
2011         ret = btrfs_search_slot(trans, root, cpu_key, path, data_size, 1);
2012         if (ret == 0) {
2013                 return -EEXIST;
2014         }
2015         if (ret < 0)
2016                 goto out;
2017
2018         slot_orig = path->slots[0];
2019         leaf = path->nodes[0];
2020
2021         nritems = btrfs_header_nritems(leaf);
2022         data_end = leaf_data_end(root, leaf);
2023
2024         if (btrfs_leaf_free_space(root, leaf) <
2025             sizeof(struct btrfs_item) + data_size) {
2026                 BUG();
2027         }
2028
2029         slot = path->slots[0];
2030         BUG_ON(slot < 0);
2031
2032         if (slot != nritems) {
2033                 int i;
2034                 unsigned int old_data = btrfs_item_end_nr(leaf, slot);
2035
2036                 if (old_data < data_end) {
2037                         btrfs_print_leaf(root, leaf);
2038                         printk("slot %d old_data %d data_end %d\n",
2039                                slot, old_data, data_end);
2040                         BUG_ON(1);
2041                 }
2042                 /*
2043                  * item0..itemN ... dataN.offset..dataN.size .. data0.size
2044                  */
2045                 /* first correct the data pointers */
2046                 for (i = slot; i < nritems; i++) {
2047                         u32 ioff;
2048                         item = btrfs_item_nr(leaf, i);
2049                         ioff = btrfs_item_offset(leaf, item);
2050                         btrfs_set_item_offset(leaf, item, ioff - data_size);
2051                 }
2052
2053                 /* shift the items */
2054                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
2055                               btrfs_item_nr_offset(slot),
2056                               (nritems - slot) * sizeof(struct btrfs_item));
2057
2058                 /* shift the data */
2059                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2060                               data_end - data_size, btrfs_leaf_data(leaf) +
2061                               data_end, old_data - data_end);
2062                 data_end = old_data;
2063         }
2064
2065         /* setup the item for the new data */
2066         btrfs_set_item_key(leaf, &disk_key, slot);
2067         item = btrfs_item_nr(leaf, slot);
2068         btrfs_set_item_offset(leaf, item, data_end - data_size);
2069         btrfs_set_item_size(leaf, item, data_size);
2070         btrfs_set_header_nritems(leaf, nritems + 1);
2071         btrfs_mark_buffer_dirty(leaf);
2072
2073         ret = 0;
2074         if (slot == 0)
2075                 ret = fixup_low_keys(trans, root, path, &disk_key, 1);
2076
2077         if (btrfs_leaf_free_space(root, leaf) < 0) {
2078                 btrfs_print_leaf(root, leaf);
2079                 BUG();
2080         }
2081         check_leaf(root, path, 0);
2082 out:
2083         return ret;
2084 }
2085
2086 /*
2087  * Given a key and some data, insert an item into the tree.
2088  * This does all the path init required, making room in the tree if needed.
2089  */
2090 int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
2091                       *root, struct btrfs_key *cpu_key, void *data, u32
2092                       data_size)
2093 {
2094         int ret = 0;
2095         struct btrfs_path *path;
2096         struct extent_buffer *leaf;
2097         unsigned long ptr;
2098
2099         path = btrfs_alloc_path();
2100         BUG_ON(!path);
2101         ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
2102         if (!ret) {
2103                 leaf = path->nodes[0];
2104                 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
2105                 write_extent_buffer(leaf, data, ptr, data_size);
2106                 btrfs_mark_buffer_dirty(leaf);
2107         }
2108         btrfs_free_path(path);
2109         return ret;
2110 }
2111
2112 /*
2113  * delete the pointer from a given node.
2114  *
2115  * If the delete empties a node, the node is removed from the tree,
2116  * continuing all the way the root if required.  The root is converted into
2117  * a leaf if all the nodes are emptied.
2118  */
2119 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2120                    struct btrfs_path *path, int level, int slot)
2121 {
2122         struct extent_buffer *parent = path->nodes[level];
2123         u32 nritems;
2124         int ret = 0;
2125         int wret;
2126
2127         nritems = btrfs_header_nritems(parent);
2128         if (slot != nritems -1) {
2129                 memmove_extent_buffer(parent,
2130                               btrfs_node_key_ptr_offset(slot),
2131                               btrfs_node_key_ptr_offset(slot + 1),
2132                               sizeof(struct btrfs_key_ptr) *
2133                               (nritems - slot - 1));
2134         }
2135         nritems--;
2136         btrfs_set_header_nritems(parent, nritems);
2137         if (nritems == 0 && parent == root->node) {
2138                 BUG_ON(btrfs_header_level(root->node) != 1);
2139                 /* just turn the root into a leaf and break */
2140                 btrfs_set_header_level(root->node, 0);
2141         } else if (slot == 0) {
2142                 struct btrfs_disk_key disk_key;
2143
2144                 btrfs_node_key(parent, &disk_key, 0);
2145                 wret = fixup_low_keys(trans, root, path, &disk_key, level + 1);
2146                 if (wret)
2147                         ret = wret;
2148         }
2149         btrfs_mark_buffer_dirty(parent);
2150         return ret;
2151 }
2152
2153 /*
2154  * delete the item at the leaf level in path.  If that empties
2155  * the leaf, remove it from the tree
2156  */
2157 int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2158                    struct btrfs_path *path)
2159 {
2160         int slot;
2161         struct extent_buffer *leaf;
2162         struct btrfs_item *item;
2163         int doff;
2164         int dsize;
2165         int ret = 0;
2166         int wret;
2167         u32 nritems;
2168
2169         leaf = path->nodes[0];
2170         slot = path->slots[0];
2171         doff = btrfs_item_offset_nr(leaf, slot);
2172         dsize = btrfs_item_size_nr(leaf, slot);
2173         nritems = btrfs_header_nritems(leaf);
2174
2175         if (slot != nritems - 1) {
2176                 int i;
2177                 int data_end = leaf_data_end(root, leaf);
2178
2179                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2180                               data_end + dsize,
2181                               btrfs_leaf_data(leaf) + data_end,
2182                               doff - data_end);
2183
2184                 for (i = slot + 1; i < nritems; i++) {
2185                         u32 ioff;
2186                         item = btrfs_item_nr(leaf, i);
2187                         ioff = btrfs_item_offset(leaf, item);
2188                         btrfs_set_item_offset(leaf, item, ioff + dsize);
2189                 }
2190                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
2191                               btrfs_item_nr_offset(slot + 1),
2192                               sizeof(struct btrfs_item) *
2193                               (nritems - slot - 1));
2194         }
2195         btrfs_set_header_nritems(leaf, nritems - 1);
2196         nritems--;
2197
2198         /* delete the leaf if we've emptied it */
2199         if (nritems == 0) {
2200                 if (leaf == root->node) {
2201                         btrfs_set_header_level(leaf, 0);
2202                 } else {
2203                         clean_tree_block(trans, root, leaf);
2204                         wait_on_tree_block_writeback(root, leaf);
2205                         wret = del_ptr(trans, root, path, 1, path->slots[1]);
2206                         if (wret)
2207                                 ret = wret;
2208                         wret = btrfs_free_extent(trans, root,
2209                                                  extent_buffer_blocknr(leaf),
2210                                                  1, 1);
2211                         if (wret)
2212                                 ret = wret;
2213                 }
2214         } else {
2215                 int used = leaf_space_used(leaf, 0, nritems);
2216                 if (slot == 0) {
2217                         struct btrfs_disk_key disk_key;
2218
2219                         btrfs_item_key(leaf, &disk_key, 0);
2220                         wret = fixup_low_keys(trans, root, path,
2221                                               &disk_key, 1);
2222                         if (wret)
2223                                 ret = wret;
2224                 }
2225
2226                 /* delete the leaf if it is mostly empty */
2227                 if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) {
2228                         /* push_leaf_left fixes the path.
2229                          * make sure the path still points to our leaf
2230                          * for possible call to del_ptr below
2231                          */
2232                         slot = path->slots[1];
2233                         extent_buffer_get(leaf);
2234
2235                         wret = push_leaf_left(trans, root, path, 1);
2236                         if (wret < 0 && wret != -ENOSPC)
2237                                 ret = wret;
2238
2239                         if (path->nodes[0] == leaf &&
2240                             btrfs_header_nritems(leaf)) {
2241                                 wret = push_leaf_right(trans, root, path, 1);
2242                                 if (wret < 0 && wret != -ENOSPC)
2243                                         ret = wret;
2244                         }
2245
2246                         if (btrfs_header_nritems(leaf) == 0) {
2247                                 u64 blocknr = extent_buffer_blocknr(leaf);
2248
2249                                 clean_tree_block(trans, root, leaf);
2250                                 wait_on_tree_block_writeback(root, leaf);
2251
2252                                 wret = del_ptr(trans, root, path, 1, slot);
2253                                 if (wret)
2254                                         ret = wret;
2255
2256                                 free_extent_buffer(leaf);
2257                                 wret = btrfs_free_extent(trans, root, blocknr,
2258                                                          1, 1);
2259                                 if (wret)
2260                                         ret = wret;
2261                         } else {
2262                                 btrfs_mark_buffer_dirty(leaf);
2263                                 free_extent_buffer(leaf);
2264                         }
2265                 } else {
2266                         btrfs_mark_buffer_dirty(leaf);
2267                 }
2268         }
2269         return ret;
2270 }
2271
2272 /*
2273  * walk up the tree as far as required to find the next leaf.
2274  * returns 0 if it found something or 1 if there are no greater leaves.
2275  * returns < 0 on io errors.
2276  */
2277 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
2278 {
2279         int slot;
2280         int level = 1;
2281         u64 blocknr;
2282         struct extent_buffer *c;
2283         struct extent_buffer *next = NULL;
2284
2285         while(level < BTRFS_MAX_LEVEL) {
2286                 if (!path->nodes[level])
2287                         return 1;
2288
2289                 slot = path->slots[level] + 1;
2290                 c = path->nodes[level];
2291                 if (slot >= btrfs_header_nritems(c)) {
2292                         level++;
2293                         continue;
2294                 }
2295
2296                 blocknr = btrfs_node_blockptr(c, slot);
2297                 if (next)
2298                         free_extent_buffer(next);
2299
2300                 if (path->reada)
2301                         reada_for_search(root, path, level, slot);
2302
2303                 next = read_tree_block(root, blocknr);
2304                 break;
2305         }
2306         path->slots[level] = slot;
2307         while(1) {
2308                 level--;
2309                 c = path->nodes[level];
2310                 free_extent_buffer(c);
2311                 path->nodes[level] = next;
2312                 path->slots[level] = 0;
2313                 if (!level)
2314                         break;
2315                 if (path->reada)
2316                         reada_for_search(root, path, level, 0);
2317                 next = read_tree_block(root, btrfs_node_blockptr(next, 0));
2318         }
2319         return 0;
2320 }