Btrfs: Break up ctree.c a little
[linux-2.6-block.git] / fs / btrfs / ctree.c
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include "kerncompat.h"
4 #include "radix-tree.h"
5 #include "ctree.h"
6 #include "disk-io.h"
7 #include "print-tree.h"
8
9 int split_node(struct ctree_root *root, struct ctree_path *path, int level);
10 int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size);
11 int push_node_left(struct ctree_root *root, struct ctree_path *path, int level);
12 int push_node_right(struct ctree_root *root,
13                     struct ctree_path *path, int level);
14 int del_ptr(struct ctree_root *root, struct ctree_path *path, int level);
15
16 inline void init_path(struct ctree_path *p)
17 {
18         memset(p, 0, sizeof(*p));
19 }
20
21 void release_path(struct ctree_root *root, struct ctree_path *p)
22 {
23         int i;
24         for (i = 0; i < MAX_LEVEL; i++) {
25                 if (!p->nodes[i])
26                         break;
27                 tree_block_release(root, p->nodes[i]);
28         }
29 }
30
31 /*
32  * The leaf data grows from end-to-front in the node.
33  * this returns the address of the start of the last item,
34  * which is the stop of the leaf data stack
35  */
36 static inline unsigned int leaf_data_end(struct leaf *leaf)
37 {
38         unsigned int nr = leaf->header.nritems;
39         if (nr == 0)
40                 return sizeof(leaf->data);
41         return leaf->items[nr-1].offset;
42 }
43
44 /*
45  * The space between the end of the leaf items and
46  * the start of the leaf data.  IOW, how much room
47  * the leaf has left for both items and data
48  */
49 int leaf_free_space(struct leaf *leaf)
50 {
51         int data_end = leaf_data_end(leaf);
52         int nritems = leaf->header.nritems;
53         char *items_end = (char *)(leaf->items + nritems + 1);
54         return (char *)(leaf->data + data_end) - (char *)items_end;
55 }
56
57 /*
58  * compare two keys in a memcmp fashion
59  */
60 int comp_keys(struct key *k1, struct key *k2)
61 {
62         if (k1->objectid > k2->objectid)
63                 return 1;
64         if (k1->objectid < k2->objectid)
65                 return -1;
66         if (k1->flags > k2->flags)
67                 return 1;
68         if (k1->flags < k2->flags)
69                 return -1;
70         if (k1->offset > k2->offset)
71                 return 1;
72         if (k1->offset < k2->offset)
73                 return -1;
74         return 0;
75 }
76
77 /*
78  * search for key in the array p.  items p are item_size apart
79  * and there are 'max' items in p
80  * the slot in the array is returned via slot, and it points to
81  * the place where you would insert key if it is not found in
82  * the array.
83  *
84  * slot may point to max if the key is bigger than all of the keys
85  */
86 int generic_bin_search(char *p, int item_size, struct key *key,
87                        int max, int *slot)
88 {
89         int low = 0;
90         int high = max;
91         int mid;
92         int ret;
93         struct key *tmp;
94
95         while(low < high) {
96                 mid = (low + high) / 2;
97                 tmp = (struct key *)(p + mid * item_size);
98                 ret = comp_keys(tmp, key);
99
100                 if (ret < 0)
101                         low = mid + 1;
102                 else if (ret > 0)
103                         high = mid;
104                 else {
105                         *slot = mid;
106                         return 0;
107                 }
108         }
109         *slot = low;
110         return 1;
111 }
112
113 int bin_search(struct node *c, struct key *key, int *slot)
114 {
115         if (is_leaf(c->header.flags)) {
116                 struct leaf *l = (struct leaf *)c;
117                 return generic_bin_search((void *)l->items, sizeof(struct item),
118                                           key, c->header.nritems, slot);
119         } else {
120                 return generic_bin_search((void *)c->keys, sizeof(struct key),
121                                           key, c->header.nritems, slot);
122         }
123         return -1;
124 }
125
126 /*
127  * look for key in the tree.  path is filled in with nodes along the way
128  * if key is found, we return zero and you can find the item in the leaf
129  * level of the path (level 0)
130  *
131  * If the key isn't found, the path points to the slot where it should
132  * be inserted.
133  */
134 int search_slot(struct ctree_root *root, struct key *key,
135                 struct ctree_path *p, int ins_len)
136 {
137         struct tree_buffer *b = root->node;
138         struct node *c;
139         int slot;
140         int ret;
141         int level;
142
143         b->count++;
144         while (b) {
145                 c = &b->node;
146                 level = node_level(c->header.flags);
147                 p->nodes[level] = b;
148                 ret = bin_search(c, key, &slot);
149                 if (!is_leaf(c->header.flags)) {
150                         if (ret && slot > 0)
151                                 slot -= 1;
152                         p->slots[level] = slot;
153                         if (ins_len > 0 &&
154                             c->header.nritems == NODEPTRS_PER_BLOCK) {
155                                 int sret = split_node(root, p, level);
156                                 BUG_ON(sret > 0);
157                                 if (sret)
158                                         return sret;
159                                 b = p->nodes[level];
160                                 c = &b->node;
161                                 slot = p->slots[level];
162                         } else if (ins_len < 0 &&
163                                    c->header.nritems <= NODEPTRS_PER_BLOCK/4) {
164                                 u64 blocknr = b->blocknr;
165                                 slot = p->slots[level +1];
166                                 b->count++;
167                                 if (push_node_left(root, p, level))
168                                         push_node_right(root, p, level);
169                                 if (c->header.nritems == 0 &&
170                                     level < MAX_LEVEL - 1 &&
171                                     p->nodes[level + 1]) {
172                                         int tslot = p->slots[level + 1];
173
174                                         p->slots[level + 1] = slot;
175                                         del_ptr(root, p, level + 1);
176                                         p->slots[level + 1] = tslot;
177                                         tree_block_release(root, b);
178                                         free_extent(root, blocknr, 1);
179                                 } else {
180                                         tree_block_release(root, b);
181                                 }
182                                 b = p->nodes[level];
183                                 c = &b->node;
184                                 slot = p->slots[level];
185                         }
186                         b = read_tree_block(root, c->blockptrs[slot]);
187                         continue;
188                 } else {
189                         struct leaf *l = (struct leaf *)c;
190                         p->slots[level] = slot;
191                         if (ins_len > 0 && leaf_free_space(l) <
192                             sizeof(struct item) + ins_len) {
193                                 int sret = split_leaf(root, p, ins_len);
194                                 BUG_ON(sret > 0);
195                                 if (sret)
196                                         return sret;
197                         }
198                         return ret;
199                 }
200         }
201         return -1;
202 }
203
204 /*
205  * adjust the pointers going up the tree, starting at level
206  * making sure the right key of each node is points to 'key'.
207  * This is used after shifting pointers to the left, so it stops
208  * fixing up pointers when a given leaf/node is not in slot 0 of the
209  * higher levels
210  */
211 static void fixup_low_keys(struct ctree_root *root,
212                            struct ctree_path *path, struct key *key,
213                            int level)
214 {
215         int i;
216         for (i = level; i < MAX_LEVEL; i++) {
217                 struct node *t;
218                 int tslot = path->slots[i];
219                 if (!path->nodes[i])
220                         break;
221                 t = &path->nodes[i]->node;
222                 memcpy(t->keys + tslot, key, sizeof(*key));
223                 write_tree_block(root, path->nodes[i]);
224                 if (tslot != 0)
225                         break;
226         }
227 }
228
229 /*
230  * try to push data from one node into the next node left in the
231  * tree.  The src node is found at specified level in the path.
232  * If some bytes were pushed, return 0, otherwise return 1.
233  *
234  * Lower nodes/leaves in the path are not touched, higher nodes may
235  * be modified to reflect the push.
236  *
237  * The path is altered to reflect the push.
238  */
239 int push_node_left(struct ctree_root *root, struct ctree_path *path, int level)
240 {
241         int slot;
242         struct node *left;
243         struct node *right;
244         int push_items = 0;
245         int left_nritems;
246         int right_nritems;
247         struct tree_buffer *t;
248         struct tree_buffer *right_buf;
249
250         if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0)
251                 return 1;
252         slot = path->slots[level + 1];
253         if (slot == 0)
254                 return 1;
255
256         t = read_tree_block(root,
257                             path->nodes[level + 1]->node.blockptrs[slot - 1]);
258         left = &t->node;
259         right_buf = path->nodes[level];
260         right = &right_buf->node;
261         left_nritems = left->header.nritems;
262         right_nritems = right->header.nritems;
263         push_items = NODEPTRS_PER_BLOCK - (left_nritems + 1);
264         if (push_items <= 0) {
265                 tree_block_release(root, t);
266                 return 1;
267         }
268
269         if (right_nritems < push_items)
270                 push_items = right_nritems;
271         memcpy(left->keys + left_nritems, right->keys,
272                 push_items * sizeof(struct key));
273         memcpy(left->blockptrs + left_nritems, right->blockptrs,
274                 push_items * sizeof(u64));
275         memmove(right->keys, right->keys + push_items,
276                 (right_nritems - push_items) * sizeof(struct key));
277         memmove(right->blockptrs, right->blockptrs + push_items,
278                 (right_nritems - push_items) * sizeof(u64));
279         right->header.nritems -= push_items;
280         left->header.nritems += push_items;
281
282         /* adjust the pointers going up the tree */
283         fixup_low_keys(root, path, right->keys, level + 1);
284
285         write_tree_block(root, t);
286         write_tree_block(root, right_buf);
287
288         /* then fixup the leaf pointer in the path */
289         if (path->slots[level] < push_items) {
290                 path->slots[level] += left_nritems;
291                 tree_block_release(root, path->nodes[level]);
292                 path->nodes[level] = t;
293                 path->slots[level + 1] -= 1;
294         } else {
295                 path->slots[level] -= push_items;
296                 tree_block_release(root, t);
297         }
298         return 0;
299 }
300
301 /*
302  * try to push data from one node into the next node right in the
303  * tree.  The src node is found at specified level in the path.
304  * If some bytes were pushed, return 0, otherwise return 1.
305  *
306  * Lower nodes/leaves in the path are not touched, higher nodes may
307  * be modified to reflect the push.
308  *
309  * The path is altered to reflect the push.
310  */
311 int push_node_right(struct ctree_root *root, struct ctree_path *path, int level)
312 {
313         int slot;
314         struct tree_buffer *t;
315         struct tree_buffer *src_buffer;
316         struct node *dst;
317         struct node *src;
318         int push_items = 0;
319         int dst_nritems;
320         int src_nritems;
321
322         /* can't push from the root */
323         if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0)
324                 return 1;
325
326         /* only try to push inside the node higher up */
327         slot = path->slots[level + 1];
328         if (slot == NODEPTRS_PER_BLOCK - 1)
329                 return 1;
330
331         if (slot >= path->nodes[level + 1]->node.header.nritems -1)
332                 return 1;
333
334         t = read_tree_block(root,
335                             path->nodes[level + 1]->node.blockptrs[slot + 1]);
336         dst = &t->node;
337         src_buffer = path->nodes[level];
338         src = &src_buffer->node;
339         dst_nritems = dst->header.nritems;
340         src_nritems = src->header.nritems;
341         push_items = NODEPTRS_PER_BLOCK - (dst_nritems + 1);
342         if (push_items <= 0) {
343                 tree_block_release(root, t);
344                 return 1;
345         }
346
347         if (src_nritems < push_items)
348                 push_items = src_nritems;
349         memmove(dst->keys + push_items, dst->keys,
350                 dst_nritems * sizeof(struct key));
351         memcpy(dst->keys, src->keys + src_nritems - push_items,
352                 push_items * sizeof(struct key));
353
354         memmove(dst->blockptrs + push_items, dst->blockptrs,
355                 dst_nritems * sizeof(u64));
356         memcpy(dst->blockptrs, src->blockptrs + src_nritems - push_items,
357                 push_items * sizeof(u64));
358
359         src->header.nritems -= push_items;
360         dst->header.nritems += push_items;
361
362         /* adjust the pointers going up the tree */
363         memcpy(path->nodes[level + 1]->node.keys + path->slots[level + 1] + 1,
364                 dst->keys, sizeof(struct key));
365
366         write_tree_block(root, path->nodes[level + 1]);
367         write_tree_block(root, t);
368         write_tree_block(root, src_buffer);
369
370         /* then fixup the pointers in the path */
371         if (path->slots[level] >= src->header.nritems) {
372                 path->slots[level] -= src->header.nritems;
373                 tree_block_release(root, path->nodes[level]);
374                 path->nodes[level] = t;
375                 path->slots[level + 1] += 1;
376         } else {
377                 tree_block_release(root, t);
378         }
379         return 0;
380 }
381
382 static int insert_new_root(struct ctree_root *root,
383                            struct ctree_path *path, int level)
384 {
385         struct tree_buffer *t;
386         struct node *lower;
387         struct node *c;
388         struct key *lower_key;
389
390         BUG_ON(path->nodes[level]);
391         BUG_ON(path->nodes[level-1] != root->node);
392
393         t = alloc_free_block(root);
394         c = &t->node;
395         memset(c, 0, sizeof(c));
396         c->header.nritems = 1;
397         c->header.flags = node_level(level);
398         c->header.blocknr = t->blocknr;
399         c->header.parentid = root->node->node.header.parentid;
400         lower = &path->nodes[level-1]->node;
401         if (is_leaf(lower->header.flags))
402                 lower_key = &((struct leaf *)lower)->items[0].key;
403         else
404                 lower_key = lower->keys;
405         memcpy(c->keys, lower_key, sizeof(struct key));
406         c->blockptrs[0] = path->nodes[level-1]->blocknr;
407         /* the super has an extra ref to root->node */
408         tree_block_release(root, root->node);
409         root->node = t;
410         t->count++;
411         write_tree_block(root, t);
412         path->nodes[level] = t;
413         path->slots[level] = 0;
414         return 0;
415 }
416
417 /*
418  * worker function to insert a single pointer in a node.
419  * the node should have enough room for the pointer already
420  * slot and level indicate where you want the key to go, and
421  * blocknr is the block the key points to.
422  */
423 int insert_ptr(struct ctree_root *root,
424                 struct ctree_path *path, struct key *key,
425                 u64 blocknr, int slot, int level)
426 {
427         struct node *lower;
428         int nritems;
429
430         BUG_ON(!path->nodes[level]);
431         lower = &path->nodes[level]->node;
432         nritems = lower->header.nritems;
433         if (slot > nritems)
434                 BUG();
435         if (nritems == NODEPTRS_PER_BLOCK)
436                 BUG();
437         if (slot != nritems) {
438                 memmove(lower->keys + slot + 1, lower->keys + slot,
439                         (nritems - slot) * sizeof(struct key));
440                 memmove(lower->blockptrs + slot + 1, lower->blockptrs + slot,
441                         (nritems - slot) * sizeof(u64));
442         }
443         memcpy(lower->keys + slot, key, sizeof(struct key));
444         lower->blockptrs[slot] = blocknr;
445         lower->header.nritems++;
446         if (lower->keys[1].objectid == 0)
447                         BUG();
448         write_tree_block(root, path->nodes[level]);
449         return 0;
450 }
451
452 int split_node(struct ctree_root *root, struct ctree_path *path, int level)
453 {
454         struct tree_buffer *t;
455         struct node *c;
456         struct tree_buffer *split_buffer;
457         struct node *split;
458         int mid;
459         int ret;
460
461         ret = push_node_left(root, path, level);
462         if (!ret)
463                 return 0;
464         ret = push_node_right(root, path, level);
465         if (!ret)
466                 return 0;
467         t = path->nodes[level];
468         c = &t->node;
469         if (t == root->node) {
470                 /* trying to split the root, lets make a new one */
471                 ret = insert_new_root(root, path, level + 1);
472                 if (ret)
473                         return ret;
474         }
475         split_buffer = alloc_free_block(root);
476         split = &split_buffer->node;
477         split->header.flags = c->header.flags;
478         split->header.blocknr = split_buffer->blocknr;
479         split->header.parentid = root->node->node.header.parentid;
480         mid = (c->header.nritems + 1) / 2;
481         memcpy(split->keys, c->keys + mid,
482                 (c->header.nritems - mid) * sizeof(struct key));
483         memcpy(split->blockptrs, c->blockptrs + mid,
484                 (c->header.nritems - mid) * sizeof(u64));
485         split->header.nritems = c->header.nritems - mid;
486         c->header.nritems = mid;
487         write_tree_block(root, t);
488         write_tree_block(root, split_buffer);
489         insert_ptr(root, path, split->keys, split_buffer->blocknr,
490                      path->slots[level + 1] + 1, level + 1);
491         if (path->slots[level] >= mid) {
492                 path->slots[level] -= mid;
493                 tree_block_release(root, t);
494                 path->nodes[level] = split_buffer;
495                 path->slots[level + 1] += 1;
496         } else {
497                 tree_block_release(root, split_buffer);
498         }
499         return 0;
500 }
501
502 /*
503  * how many bytes are required to store the items in a leaf.  start
504  * and nr indicate which items in the leaf to check.  This totals up the
505  * space used both by the item structs and the item data
506  */
507 int leaf_space_used(struct leaf *l, int start, int nr)
508 {
509         int data_len;
510         int end = start + nr - 1;
511
512         if (!nr)
513                 return 0;
514         data_len = l->items[start].offset + l->items[start].size;
515         data_len = data_len - l->items[end].offset;
516         data_len += sizeof(struct item) * nr;
517         return data_len;
518 }
519
520 /*
521  * push some data in the path leaf to the left, trying to free up at
522  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
523  */
524 int push_leaf_left(struct ctree_root *root, struct ctree_path *path,
525                    int data_size)
526 {
527         struct tree_buffer *right_buf = path->nodes[0];
528         struct leaf *right = &right_buf->leaf;
529         struct tree_buffer *t;
530         struct leaf *left;
531         int slot;
532         int i;
533         int free_space;
534         int push_space = 0;
535         int push_items = 0;
536         struct item *item;
537         int old_left_nritems;
538
539         slot = path->slots[1];
540         if (slot == 0) {
541                 return 1;
542         }
543         if (!path->nodes[1]) {
544                 return 1;
545         }
546         t = read_tree_block(root, path->nodes[1]->node.blockptrs[slot - 1]);
547         left = &t->leaf;
548         free_space = leaf_free_space(left);
549         if (free_space < data_size + sizeof(struct item)) {
550                 tree_block_release(root, t);
551                 return 1;
552         }
553         for (i = 0; i < right->header.nritems; i++) {
554                 item = right->items + i;
555                 if (path->slots[0] == i)
556                         push_space += data_size + sizeof(*item);
557                 if (item->size + sizeof(*item) + push_space > free_space)
558                         break;
559                 push_items++;
560                 push_space += item->size + sizeof(*item);
561         }
562         if (push_items == 0) {
563                 tree_block_release(root, t);
564                 return 1;
565         }
566         /* push data from right to left */
567         memcpy(left->items + left->header.nritems,
568                 right->items, push_items * sizeof(struct item));
569         push_space = LEAF_DATA_SIZE - right->items[push_items -1].offset;
570         memcpy(left->data + leaf_data_end(left) - push_space,
571                 right->data + right->items[push_items - 1].offset,
572                 push_space);
573         old_left_nritems = left->header.nritems;
574         BUG_ON(old_left_nritems < 0);
575
576         for(i = old_left_nritems; i < old_left_nritems + push_items; i++) {
577                 left->items[i].offset -= LEAF_DATA_SIZE -
578                         left->items[old_left_nritems -1].offset;
579         }
580         left->header.nritems += push_items;
581
582         /* fixup right node */
583         push_space = right->items[push_items-1].offset - leaf_data_end(right);
584         memmove(right->data + LEAF_DATA_SIZE - push_space, right->data +
585                 leaf_data_end(right), push_space);
586         memmove(right->items, right->items + push_items,
587                 (right->header.nritems - push_items) * sizeof(struct item));
588         right->header.nritems -= push_items;
589         push_space = LEAF_DATA_SIZE;
590
591         for (i = 0; i < right->header.nritems; i++) {
592                 right->items[i].offset = push_space - right->items[i].size;
593                 push_space = right->items[i].offset;
594         }
595
596         write_tree_block(root, t);
597         write_tree_block(root, right_buf);
598
599         fixup_low_keys(root, path, &right->items[0].key, 1);
600
601         /* then fixup the leaf pointer in the path */
602         if (path->slots[0] < push_items) {
603                 path->slots[0] += old_left_nritems;
604                 tree_block_release(root, path->nodes[0]);
605                 path->nodes[0] = t;
606                 path->slots[1] -= 1;
607         } else {
608                 tree_block_release(root, t);
609                 path->slots[0] -= push_items;
610         }
611         BUG_ON(path->slots[0] < 0);
612         return 0;
613 }
614
615 /*
616  * split the path's leaf in two, making sure there is at least data_size
617  * available for the resulting leaf level of the path.
618  */
619 int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size)
620 {
621         struct tree_buffer *l_buf = path->nodes[0];
622         struct leaf *l = &l_buf->leaf;
623         int nritems;
624         int mid;
625         int slot;
626         struct leaf *right;
627         struct tree_buffer *right_buffer;
628         int space_needed = data_size + sizeof(struct item);
629         int data_copy_size;
630         int rt_data_off;
631         int i;
632         int ret;
633
634         if (push_leaf_left(root, path, data_size) == 0) {
635                 l_buf = path->nodes[0];
636                 l = &l_buf->leaf;
637                 if (leaf_free_space(l) >= sizeof(struct item) + data_size)
638                         return 0;
639         }
640         if (!path->nodes[1]) {
641                 ret = insert_new_root(root, path, 1);
642                 if (ret)
643                         return ret;
644         }
645         slot = path->slots[0];
646         nritems = l->header.nritems;
647         mid = (nritems + 1)/ 2;
648
649         right_buffer = alloc_free_block(root);
650         BUG_ON(!right_buffer);
651         BUG_ON(mid == nritems);
652         right = &right_buffer->leaf;
653         memset(right, 0, sizeof(*right));
654         if (mid <= slot) {
655                 if (leaf_space_used(l, mid, nritems - mid) + space_needed >
656                         LEAF_DATA_SIZE)
657                         BUG();
658         } else {
659                 if (leaf_space_used(l, 0, mid + 1) + space_needed >
660                         LEAF_DATA_SIZE)
661                         BUG();
662         }
663         right->header.nritems = nritems - mid;
664         right->header.blocknr = right_buffer->blocknr;
665         right->header.flags = node_level(0);
666         right->header.parentid = root->node->node.header.parentid;
667         data_copy_size = l->items[mid].offset + l->items[mid].size -
668                          leaf_data_end(l);
669         memcpy(right->items, l->items + mid,
670                (nritems - mid) * sizeof(struct item));
671         memcpy(right->data + LEAF_DATA_SIZE - data_copy_size,
672                l->data + leaf_data_end(l), data_copy_size);
673         rt_data_off = LEAF_DATA_SIZE -
674                      (l->items[mid].offset + l->items[mid].size);
675
676         for (i = 0; i < right->header.nritems; i++)
677                 right->items[i].offset += rt_data_off;
678
679         l->header.nritems = mid;
680         ret = insert_ptr(root, path, &right->items[0].key,
681                           right_buffer->blocknr, path->slots[1] + 1, 1);
682         write_tree_block(root, right_buffer);
683         write_tree_block(root, l_buf);
684
685         BUG_ON(path->slots[0] != slot);
686         if (mid <= slot) {
687                 tree_block_release(root, path->nodes[0]);
688                 path->nodes[0] = right_buffer;
689                 path->slots[0] -= mid;
690                 path->slots[1] += 1;
691         } else
692                 tree_block_release(root, right_buffer);
693         BUG_ON(path->slots[0] < 0);
694         return ret;
695 }
696
697 /*
698  * Given a key and some data, insert an item into the tree.
699  * This does all the path init required, making room in the tree if needed.
700  */
701 int insert_item(struct ctree_root *root, struct key *key,
702                           void *data, int data_size)
703 {
704         int ret;
705         int slot;
706         int slot_orig;
707         struct leaf *leaf;
708         struct tree_buffer *leaf_buf;
709         unsigned int nritems;
710         unsigned int data_end;
711         struct ctree_path path;
712
713         /* create a root if there isn't one */
714         if (!root->node)
715                 BUG();
716         init_path(&path);
717         ret = search_slot(root, key, &path, data_size);
718         if (ret == 0) {
719                 release_path(root, &path);
720                 return -EEXIST;
721         }
722
723         slot_orig = path.slots[0];
724         leaf_buf = path.nodes[0];
725         leaf = &leaf_buf->leaf;
726
727         nritems = leaf->header.nritems;
728         data_end = leaf_data_end(leaf);
729
730         if (leaf_free_space(leaf) <  sizeof(struct item) + data_size)
731                 BUG();
732
733         slot = path.slots[0];
734         BUG_ON(slot < 0);
735         if (slot == 0)
736                 fixup_low_keys(root, &path, key, 1);
737         if (slot != nritems) {
738                 int i;
739                 unsigned int old_data = leaf->items[slot].offset +
740                                         leaf->items[slot].size;
741
742                 /*
743                  * item0..itemN ... dataN.offset..dataN.size .. data0.size
744                  */
745                 /* first correct the data pointers */
746                 for (i = slot; i < nritems; i++)
747                         leaf->items[i].offset -= data_size;
748
749                 /* shift the items */
750                 memmove(leaf->items + slot + 1, leaf->items + slot,
751                         (nritems - slot) * sizeof(struct item));
752
753                 /* shift the data */
754                 memmove(leaf->data + data_end - data_size, leaf->data +
755                         data_end, old_data - data_end);
756                 data_end = old_data;
757         }
758         /* copy the new data in */
759         memcpy(&leaf->items[slot].key, key, sizeof(struct key));
760         leaf->items[slot].offset = data_end - data_size;
761         leaf->items[slot].size = data_size;
762         memcpy(leaf->data + data_end - data_size, data, data_size);
763         leaf->header.nritems += 1;
764         write_tree_block(root, leaf_buf);
765         if (leaf_free_space(leaf) < 0)
766                 BUG();
767         release_path(root, &path);
768         return 0;
769 }
770
771 /*
772  * delete the pointer from a given node.
773  *
774  * If the delete empties a node, the node is removed from the tree,
775  * continuing all the way the root if required.  The root is converted into
776  * a leaf if all the nodes are emptied.
777  */
778 int del_ptr(struct ctree_root *root, struct ctree_path *path, int level)
779 {
780         int slot;
781         struct tree_buffer *t;
782         struct node *node;
783         int nritems;
784         u64 blocknr;
785
786         while(1) {
787                 t = path->nodes[level];
788                 if (!t)
789                         break;
790                 node = &t->node;
791                 slot = path->slots[level];
792                 nritems = node->header.nritems;
793
794                 if (slot != nritems -1) {
795                         memmove(node->keys + slot, node->keys + slot + 1,
796                                 sizeof(struct key) * (nritems - slot - 1));
797                         memmove(node->blockptrs + slot,
798                                 node->blockptrs + slot + 1,
799                                 sizeof(u64) * (nritems - slot - 1));
800                 }
801                 node->header.nritems--;
802                 write_tree_block(root, t);
803                 blocknr = t->blocknr;
804                 if (node->header.nritems != 0) {
805                         if (slot == 0)
806                                 fixup_low_keys(root, path, node->keys,
807                                                level + 1);
808                         break;
809                 }
810                 if (t == root->node) {
811                         /* just turn the root into a leaf and break */
812                         root->node->node.header.flags = node_level(0);
813                         write_tree_block(root, t);
814                         break;
815                 }
816                 level++;
817                 free_extent(root, blocknr, 1);
818                 if (!path->nodes[level])
819                         BUG();
820         }
821         return 0;
822 }
823
824 /*
825  * delete the item at the leaf level in path.  If that empties
826  * the leaf, remove it from the tree
827  */
828 int del_item(struct ctree_root *root, struct ctree_path *path)
829 {
830         int slot;
831         struct leaf *leaf;
832         struct tree_buffer *leaf_buf;
833         int doff;
834         int dsize;
835
836         leaf_buf = path->nodes[0];
837         leaf = &leaf_buf->leaf;
838         slot = path->slots[0];
839         doff = leaf->items[slot].offset;
840         dsize = leaf->items[slot].size;
841
842         if (slot != leaf->header.nritems - 1) {
843                 int i;
844                 int data_end = leaf_data_end(leaf);
845                 memmove(leaf->data + data_end + dsize,
846                         leaf->data + data_end,
847                         doff - data_end);
848                 for (i = slot + 1; i < leaf->header.nritems; i++)
849                         leaf->items[i].offset += dsize;
850                 memmove(leaf->items + slot, leaf->items + slot + 1,
851                         sizeof(struct item) *
852                         (leaf->header.nritems - slot - 1));
853         }
854         leaf->header.nritems -= 1;
855         /* delete the leaf if we've emptied it */
856         if (leaf->header.nritems == 0) {
857                 if (leaf_buf == root->node) {
858                         leaf->header.flags = node_level(0);
859                         write_tree_block(root, leaf_buf);
860                 } else {
861                         del_ptr(root, path, 1);
862                         free_extent(root, leaf_buf->blocknr, 1);
863                 }
864         } else {
865                 int used = leaf_space_used(leaf, 0, leaf->header.nritems);
866                 if (slot == 0)
867                         fixup_low_keys(root, path, &leaf->items[0].key, 1);
868                 write_tree_block(root, leaf_buf);
869                 /* delete the leaf if it is mostly empty */
870                 if (used < LEAF_DATA_SIZE / 3) {
871                         /* push_leaf_left fixes the path.
872                          * make sure the path still points to our leaf
873                          * for possible call to del_ptr below
874                          */
875                         slot = path->slots[1];
876                         leaf_buf->count++;
877                         push_leaf_left(root, path, 1);
878                         if (leaf->header.nritems == 0) {
879                                 u64 blocknr = leaf_buf->blocknr;
880                                 path->slots[1] = slot;
881                                 del_ptr(root, path, 1);
882                                 tree_block_release(root, leaf_buf);
883                                 free_extent(root, blocknr, 1);
884                         } else {
885                                 tree_block_release(root, leaf_buf);
886                         }
887                 }
888         }
889         return 0;
890 }
891
892 int next_leaf(struct ctree_root *root, struct ctree_path *path)
893 {
894         int slot;
895         int level = 1;
896         u64 blocknr;
897         struct tree_buffer *c;
898         struct tree_buffer *next = NULL;
899
900         while(level < MAX_LEVEL) {
901                 if (!path->nodes[level])
902                         return -1;
903                 slot = path->slots[level] + 1;
904                 c = path->nodes[level];
905                 if (slot >= c->node.header.nritems) {
906                         level++;
907                         continue;
908                 }
909                 blocknr = c->node.blockptrs[slot];
910                 if (next)
911                         tree_block_release(root, next);
912                 next = read_tree_block(root, blocknr);
913                 break;
914         }
915         path->slots[level] = slot;
916         while(1) {
917                 level--;
918                 c = path->nodes[level];
919                 tree_block_release(root, c);
920                 path->nodes[level] = next;
921                 path->slots[level] = 0;
922                 if (!level)
923                         break;
924                 next = read_tree_block(root, next->node.blockptrs[0]);
925         }
926         return 0;
927 }
928
929 /* for testing only */
930 int next_key(int i, int max_key) {
931         return rand() % max_key;
932         // return i;
933 }
934
935 int main() {
936         struct ctree_root *root;
937         struct key ins;
938         struct key last = { (u64)-1, 0, 0};
939         char *buf;
940         int i;
941         int num;
942         int ret;
943         int run_size = 20000000;
944         int max_key =  100000000;
945         int tree_size = 0;
946         struct ctree_path path;
947         struct ctree_super_block super;
948
949         radix_tree_init();
950
951
952         root = open_ctree("dbfile", &super);
953
954         srand(55);
955         for (i = 0; i < run_size; i++) {
956                 buf = malloc(64);
957                 num = next_key(i, max_key);
958                 // num = i;
959                 sprintf(buf, "string-%d", num);
960                 if (i % 10000 == 0)
961                         printf("insert %d:%d\n", num, i);
962                 ins.objectid = num;
963                 ins.offset = 0;
964                 ins.flags = 0;
965                 ret = insert_item(root, &ins, buf, strlen(buf));
966                 if (!ret)
967                         tree_size++;
968                 free(buf);
969         }
970         write_ctree_super(root, &super);
971         close_ctree(root);
972
973         root = open_ctree("dbfile", &super);
974         printf("starting search\n");
975         srand(55);
976         for (i = 0; i < run_size; i++) {
977                 num = next_key(i, max_key);
978                 ins.objectid = num;
979                 init_path(&path);
980                 if (i % 10000 == 0)
981                         printf("search %d:%d\n", num, i);
982                 ret = search_slot(root, &ins, &path, 0);
983                 if (ret) {
984                         print_tree(root, root->node);
985                         printf("unable to find %d\n", num);
986                         exit(1);
987                 }
988                 release_path(root, &path);
989         }
990         write_ctree_super(root, &super);
991         close_ctree(root);
992         root = open_ctree("dbfile", &super);
993         printf("node %p level %d total ptrs %d free spc %lu\n", root->node,
994                 node_level(root->node->node.header.flags),
995                 root->node->node.header.nritems,
996                 NODEPTRS_PER_BLOCK - root->node->node.header.nritems);
997         printf("all searches good, deleting some items\n");
998         i = 0;
999         srand(55);
1000         for (i = 0 ; i < run_size/4; i++) {
1001                 num = next_key(i, max_key);
1002                 ins.objectid = num;
1003                 init_path(&path);
1004                 ret = search_slot(root, &ins, &path, -1);
1005                 if (!ret) {
1006                         if (i % 10000 == 0)
1007                                 printf("del %d:%d\n", num, i);
1008                         ret = del_item(root, &path);
1009                         if (ret != 0)
1010                                 BUG();
1011                         tree_size--;
1012                 }
1013                 release_path(root, &path);
1014         }
1015         write_ctree_super(root, &super);
1016         close_ctree(root);
1017         root = open_ctree("dbfile", &super);
1018         srand(128);
1019         for (i = 0; i < run_size; i++) {
1020                 buf = malloc(64);
1021                 num = next_key(i, max_key);
1022                 sprintf(buf, "string-%d", num);
1023                 ins.objectid = num;
1024                 if (i % 10000 == 0)
1025                         printf("insert %d:%d\n", num, i);
1026                 ret = insert_item(root, &ins, buf, strlen(buf));
1027                 if (!ret)
1028                         tree_size++;
1029                 free(buf);
1030         }
1031         write_ctree_super(root, &super);
1032         close_ctree(root);
1033         root = open_ctree("dbfile", &super);
1034         srand(128);
1035         printf("starting search2\n");
1036         for (i = 0; i < run_size; i++) {
1037                 num = next_key(i, max_key);
1038                 ins.objectid = num;
1039                 init_path(&path);
1040                 if (i % 10000 == 0)
1041                         printf("search %d:%d\n", num, i);
1042                 ret = search_slot(root, &ins, &path, 0);
1043                 if (ret) {
1044                         print_tree(root, root->node);
1045                         printf("unable to find %d\n", num);
1046                         exit(1);
1047                 }
1048                 release_path(root, &path);
1049         }
1050         printf("starting big long delete run\n");
1051         while(root->node && root->node->node.header.nritems > 0) {
1052                 struct leaf *leaf;
1053                 int slot;
1054                 ins.objectid = (u64)-1;
1055                 init_path(&path);
1056                 ret = search_slot(root, &ins, &path, -1);
1057                 if (ret == 0)
1058                         BUG();
1059
1060                 leaf = &path.nodes[0]->leaf;
1061                 slot = path.slots[0];
1062                 if (slot != leaf->header.nritems)
1063                         BUG();
1064                 while(path.slots[0] > 0) {
1065                         path.slots[0] -= 1;
1066                         slot = path.slots[0];
1067                         leaf = &path.nodes[0]->leaf;
1068
1069                         if (comp_keys(&last, &leaf->items[slot].key) <= 0)
1070                                 BUG();
1071                         memcpy(&last, &leaf->items[slot].key, sizeof(last));
1072                         if (tree_size % 10000 == 0)
1073                                 printf("big del %d:%d\n", tree_size, i);
1074                         ret = del_item(root, &path);
1075                         if (ret != 0) {
1076                                 printf("del_item returned %d\n", ret);
1077                                 BUG();
1078                         }
1079                         tree_size--;
1080                 }
1081                 release_path(root, &path);
1082         }
1083         printf("tree size is now %d\n", tree_size);
1084         printf("map tree\n");
1085         write_ctree_super(root, &super);
1086         close_ctree(root);
1087         return 0;
1088 }