3 #include "kerncompat.h"
11 } __attribute__ ((__packed__));
14 u64 fsid[2]; /* FS specific uuid */
21 } __attribute__ ((__packed__));
23 #define NODEPTRS_PER_BLOCK ((BLOCKSIZE - sizeof(struct header)) / \
24 (sizeof(struct key) + sizeof(u64)))
27 #define MAX_LEVEL (1 << LEVEL_BITS)
28 #define node_level(f) ((f) & (MAX_LEVEL-1))
29 #define is_leaf(f) (node_level(f) == 0)
39 } __attribute__ ((__packed__));
41 #define LEAF_DATA_SIZE (BLOCKSIZE - sizeof(struct header))
45 struct item items[LEAF_DATA_SIZE/sizeof(struct item)];
46 u8 data[BLOCKSIZE-sizeof(struct header)];
48 } __attribute__ ((__packed__));
52 struct key keys[NODEPTRS_PER_BLOCK];
53 u64 blockptrs[NODEPTRS_PER_BLOCK];
54 } __attribute__ ((__packed__));
57 struct node *nodes[MAX_LEVEL];
61 static inline void init_path(struct ctree_path *p)
63 memset(p, 0, sizeof(*p));
66 static inline unsigned int leaf_data_end(struct leaf *leaf)
68 unsigned int nr = leaf->header.nritems;
70 return ARRAY_SIZE(leaf->data);
71 return leaf->items[nr-1].offset;
74 static inline int leaf_free_space(struct leaf *leaf)
76 int data_end = leaf_data_end(leaf);
77 int nritems = leaf->header.nritems;
78 char *items_end = (char *)(leaf->items + nritems + 1);
79 return (char *)(leaf->data + data_end) - (char *)items_end;
82 int comp_keys(struct key *k1, struct key *k2)
84 if (k1->objectid > k2->objectid)
86 if (k1->objectid < k2->objectid)
88 if (k1->flags > k2->flags)
90 if (k1->flags < k2->flags)
92 if (k1->offset > k2->offset)
94 if (k1->offset < k2->offset)
98 int generic_bin_search(char *p, int item_size, struct key *key,
108 mid = (low + high) / 2;
109 tmp = (struct key *)(p + mid * item_size);
110 ret = comp_keys(tmp, key);
125 int bin_search(struct node *c, struct key *key, int *slot)
127 if (is_leaf(c->header.flags)) {
128 struct leaf *l = (struct leaf *)c;
129 return generic_bin_search((void *)l->items, sizeof(struct item),
130 key, c->header.nritems, slot);
132 return generic_bin_search((void *)c->keys, sizeof(struct key),
133 key, c->header.nritems, slot);
138 void *read_block(u64 blocknum)
140 return (void *)blocknum;
143 int search_slot(struct ctree_root *root, struct key *key, struct ctree_path *p)
145 struct node *c = root->node;
150 level = node_level(c->header.flags);
152 ret = bin_search(c, key, &slot);
153 if (!is_leaf(c->header.flags)) {
156 p->slots[level] = slot;
157 c = read_block(c->blockptrs[slot]);
160 p->slots[level] = slot;
167 static void fixup_low_keys(struct ctree_path *path, struct key *key,
171 /* adjust the pointers going up the tree */
172 for (i = level; i < MAX_LEVEL; i++) {
173 struct node *t = path->nodes[i];
174 int tslot = path->slots[i];
177 memcpy(t->keys + tslot, key, sizeof(*key));
183 int __insert_ptr(struct ctree_root *root,
184 struct ctree_path *path, struct key *key,
185 u64 blocknr, int slot, int level)
189 struct key *lower_key;
191 /* need a new root */
192 if (!path->nodes[level]) {
193 c = malloc(sizeof(struct node));
194 memset(c, 0, sizeof(c));
195 c->header.nritems = 2;
196 c->header.flags = node_level(level);
197 lower = path->nodes[level-1];
198 if (is_leaf(lower->header.flags))
199 lower_key = &((struct leaf *)lower)->items[0].key;
201 lower_key = lower->keys;
202 memcpy(c->keys, lower_key, sizeof(struct key));
203 memcpy(c->keys + 1, key, sizeof(struct key));
204 c->blockptrs[0] = (u64)lower;
205 c->blockptrs[1] = blocknr;
207 path->nodes[level] = c;
208 path->slots[level] = 0;
209 if (c->keys[1].objectid == 0)
213 lower = path->nodes[level];
214 nritems = lower->header.nritems;
217 if (nritems == NODEPTRS_PER_BLOCK)
219 if (slot != nritems) {
220 memmove(lower->keys + slot + 1, lower->keys + slot,
221 (nritems - slot) * sizeof(struct key));
222 memmove(lower->blockptrs + slot + 1, lower->blockptrs + slot,
223 (nritems - slot) * sizeof(u64));
225 memcpy(lower->keys + slot, key, sizeof(struct key));
226 lower->blockptrs[slot] = blocknr;
227 lower->header.nritems++;
228 if (lower->keys[1].objectid == 0)
233 int push_node_left(struct ctree_root *root, struct ctree_path *path, int level)
242 if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0)
244 slot = path->slots[level + 1];
248 left = read_block(path->nodes[level + 1]->blockptrs[slot - 1]);
249 right = path->nodes[level];
250 left_nritems = left->header.nritems;
251 right_nritems = right->header.nritems;
252 push_items = NODEPTRS_PER_BLOCK - (left_nritems + 1);
256 if (right_nritems < push_items)
257 push_items = right_nritems;
258 memcpy(left->keys + left_nritems, right->keys,
259 push_items * sizeof(struct key));
260 memcpy(left->blockptrs + left_nritems, right->blockptrs,
261 push_items * sizeof(u64));
262 memmove(right->keys, right->keys + push_items,
263 (right_nritems - push_items) * sizeof(struct key));
264 memmove(right->blockptrs, right->blockptrs + push_items,
265 (right_nritems - push_items) * sizeof(u64));
266 right->header.nritems -= push_items;
267 left->header.nritems += push_items;
269 /* adjust the pointers going up the tree */
270 fixup_low_keys(path, right->keys, level + 1);
272 /* then fixup the leaf pointer in the path */
273 if (path->slots[level] < push_items) {
274 path->slots[level] += left_nritems;
275 path->nodes[level] = (struct node*)left;
276 path->slots[level + 1] -= 1;
278 path->slots[level] -= push_items;
283 int push_node_right(struct ctree_root *root, struct ctree_path *path, int level)
292 if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0)
294 slot = path->slots[level + 1];
295 if (slot == NODEPTRS_PER_BLOCK - 1)
298 if (slot >= path->nodes[level + 1]->header.nritems -1)
301 dst = read_block(path->nodes[level + 1]->blockptrs[slot + 1]);
302 src = path->nodes[level];
303 dst_nritems = dst->header.nritems;
304 src_nritems = src->header.nritems;
305 push_items = NODEPTRS_PER_BLOCK - (dst_nritems + 1);
309 if (src_nritems < push_items)
310 push_items = src_nritems;
311 memmove(dst->keys + push_items, dst->keys,
312 dst_nritems * sizeof(struct key));
313 memcpy(dst->keys, src->keys + src_nritems - push_items,
314 push_items * sizeof(struct key));
316 memmove(dst->blockptrs + push_items, dst->blockptrs,
317 dst_nritems * sizeof(u64));
318 memcpy(dst->blockptrs, src->blockptrs + src_nritems - push_items,
319 push_items * sizeof(u64));
321 src->header.nritems -= push_items;
322 dst->header.nritems += push_items;
324 /* adjust the pointers going up the tree */
325 memcpy(path->nodes[level + 1]->keys + path->slots[level + 1] + 1,
326 dst->keys, sizeof(struct key));
327 /* then fixup the leaf pointer in the path */
328 if (path->slots[level] >= src->header.nritems) {
329 path->slots[level] -= src->header.nritems;
330 path->nodes[level] = (struct node*)dst;
331 path->slots[level + 1] += 1;
336 int insert_ptr(struct ctree_root *root,
337 struct ctree_path *path, struct key *key,
338 u64 blocknr, int level)
340 struct node *c = path->nodes[level];
342 struct node *bal[MAX_LEVEL];
343 int bal_level = level;
347 memset(bal, 0, ARRAY_SIZE(bal));
348 while(c && c->header.nritems == NODEPTRS_PER_BLOCK) {
349 if (push_node_left(root, path,
350 node_level(c->header.flags)) == 0)
352 if (push_node_right(root, path,
353 node_level(c->header.flags)) == 0)
355 bal_start = bal_level;
356 if (bal_level == MAX_LEVEL - 1)
358 b = malloc(sizeof(struct node));
359 b->header.flags = c->header.flags;
360 mid = (c->header.nritems + 1) / 2;
361 memcpy(b->keys, c->keys + mid,
362 (c->header.nritems - mid) * sizeof(struct key));
363 memcpy(b->blockptrs, c->blockptrs + mid,
364 (c->header.nritems - mid) * sizeof(u64));
365 b->header.nritems = c->header.nritems - mid;
366 c->header.nritems = mid;
368 if (bal_level == MAX_LEVEL - 1)
371 c = path->nodes[bal_level];
373 while(bal_start > 0) {
375 c = path->nodes[bal_start];
376 __insert_ptr(root, path, b->keys, (u64)b,
377 path->slots[bal_start + 1] + 1, bal_start + 1);
378 if (path->slots[bal_start] >= c->header.nritems) {
379 path->slots[bal_start] -= c->header.nritems;
380 path->nodes[bal_start] = b;
381 path->slots[bal_start + 1] += 1;
387 return __insert_ptr(root, path, key, blocknr, path->slots[level] + 1,
391 int leaf_space_used(struct leaf *l, int start, int nr)
394 int end = start + nr - 1;
398 data_len = l->items[start].offset + l->items[start].size;
399 data_len = data_len - l->items[end].offset;
400 data_len += sizeof(struct item) * nr;
404 int push_leaf_left(struct ctree_root *root, struct ctree_path *path,
407 struct leaf *right = (struct leaf *)path->nodes[0];
415 int old_left_nritems;
417 slot = path->slots[1];
421 if (!path->nodes[1]) {
424 left = read_block(path->nodes[1]->blockptrs[slot - 1]);
425 free_space = leaf_free_space(left);
426 if (free_space < data_size + sizeof(struct item)) {
429 for (i = 0; i < right->header.nritems; i++) {
430 item = right->items + i;
431 if (path->slots[0] == i)
432 push_space += data_size + sizeof(*item);
433 if (item->size + sizeof(*item) + push_space > free_space)
436 push_space += item->size + sizeof(*item);
438 if (push_items == 0) {
441 /* push data from right to left */
442 memcpy(left->items + left->header.nritems,
443 right->items, push_items * sizeof(struct item));
444 push_space = LEAF_DATA_SIZE - right->items[push_items -1].offset;
445 memcpy(left->data + leaf_data_end(left) - push_space,
446 right->data + right->items[push_items - 1].offset,
448 old_left_nritems = left->header.nritems;
449 for(i = old_left_nritems; i < old_left_nritems + push_items; i++) {
450 left->items[i].offset -= LEAF_DATA_SIZE -
451 left->items[old_left_nritems -1].offset;
453 left->header.nritems += push_items;
455 /* fixup right node */
456 push_space = right->items[push_items-1].offset - leaf_data_end(right);
457 memmove(right->data + LEAF_DATA_SIZE - push_space, right->data +
458 leaf_data_end(right), push_space);
459 memmove(right->items, right->items + push_items,
460 (right->header.nritems - push_items) * sizeof(struct item));
461 right->header.nritems -= push_items;
462 push_space = LEAF_DATA_SIZE;
463 for (i = 0; i < right->header.nritems; i++) {
464 right->items[i].offset = push_space - right->items[i].size;
465 push_space = right->items[i].offset;
467 fixup_low_keys(path, &right->items[0].key, 1);
469 /* then fixup the leaf pointer in the path */
470 if (path->slots[0] < push_items) {
471 path->slots[0] += old_left_nritems;
472 path->nodes[0] = (struct node*)left;
475 path->slots[0] -= push_items;
480 int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size)
482 struct leaf *l = (struct leaf *)path->nodes[0];
483 int nritems = l->header.nritems;
484 int mid = (nritems + 1)/ 2;
485 int slot = path->slots[0];
487 int space_needed = data_size + sizeof(struct item);
493 if (push_leaf_left(root, path, data_size) == 0) {
496 right = malloc(sizeof(struct leaf));
497 memset(right, 0, sizeof(*right));
499 if (leaf_space_used(l, mid, nritems - mid) + space_needed >
503 if (leaf_space_used(l, 0, mid + 1) + space_needed >
507 right->header.nritems = nritems - mid;
508 data_copy_size = l->items[mid].offset + l->items[mid].size -
510 memcpy(right->items, l->items + mid,
511 (nritems - mid) * sizeof(struct item));
512 memcpy(right->data + LEAF_DATA_SIZE - data_copy_size,
513 l->data + leaf_data_end(l), data_copy_size);
514 rt_data_off = LEAF_DATA_SIZE -
515 (l->items[mid].offset + l->items[mid].size);
516 for (i = 0; i < right->header.nritems; i++) {
517 right->items[i].offset += rt_data_off;
519 l->header.nritems = mid;
520 ret = insert_ptr(root, path, &right->items[0].key,
523 path->nodes[0] = (struct node *)right;
524 path->slots[0] -= mid;
530 int insert_item(struct ctree_root *root, struct key *key,
531 void *data, int data_size)
536 unsigned int nritems;
537 unsigned int data_end;
538 struct ctree_path path;
541 ret = search_slot(root, key, &path);
545 leaf = (struct leaf *)path.nodes[0];
546 if (leaf_free_space(leaf) < sizeof(struct item) + data_size)
547 split_leaf(root, &path, data_size);
548 leaf = (struct leaf *)path.nodes[0];
549 nritems = leaf->header.nritems;
550 data_end = leaf_data_end(leaf);
551 if (leaf_free_space(leaf) < sizeof(struct item) + data_size)
554 slot = path.slots[0];
556 fixup_low_keys(&path, key, 1);
557 if (slot != nritems) {
559 unsigned int old_data = leaf->items[slot].offset +
560 leaf->items[slot].size;
563 * item0..itemN ... dataN.offset..dataN.size .. data0.size
565 /* first correct the data pointers */
566 for (i = slot; i < nritems; i++)
567 leaf->items[i].offset -= data_size;
569 /* shift the items */
570 memmove(leaf->items + slot + 1, leaf->items + slot,
571 (nritems - slot) * sizeof(struct item));
574 memmove(leaf->data + data_end - data_size, leaf->data +
575 data_end, old_data - data_end);
578 memcpy(&leaf->items[slot].key, key, sizeof(struct key));
579 leaf->items[slot].offset = data_end - data_size;
580 leaf->items[slot].size = data_size;
581 memcpy(leaf->data + data_end - data_size, data, data_size);
582 leaf->header.nritems += 1;
583 if (leaf_free_space(leaf) < 0)
588 int del_ptr(struct ctree_root *root, struct ctree_path *path, int level)
595 node = path->nodes[level];
598 slot = path->slots[level];
599 nritems = node->header.nritems;
601 if (slot != nritems -1) {
602 memmove(node->keys + slot, node->keys + slot + 1,
603 sizeof(struct key) * (nritems - slot - 1));
604 memmove(node->blockptrs + slot,
605 node->blockptrs + slot + 1,
606 sizeof(u64) * (nritems - slot - 1));
608 node->header.nritems--;
609 if (node->header.nritems != 0) {
612 fixup_low_keys(path, node->keys, level + 1);
613 tslot = path->slots[level+1];
614 push_node_left(root, path, level);
615 if (node->header.nritems) {
616 push_node_right(root, path, level);
618 path->slots[level+1] = tslot;
619 if (node->header.nritems)
622 if (node == root->node) {
623 printf("root is now null!\n");
628 if (!path->nodes[level])
635 int del_item(struct ctree_root *root, struct key *key)
640 struct ctree_path path;
645 ret = search_slot(root, key, &path);
649 leaf = (struct leaf *)path.nodes[0];
650 slot = path.slots[0];
651 doff = leaf->items[slot].offset;
652 dsize = leaf->items[slot].size;
654 if (slot != leaf->header.nritems - 1) {
656 int data_end = leaf_data_end(leaf);
657 memmove(leaf->data + data_end + dsize,
658 leaf->data + data_end,
660 for (i = slot + 1; i < leaf->header.nritems; i++)
661 leaf->items[i].offset += dsize;
662 memmove(leaf->items + slot, leaf->items + slot + 1,
663 sizeof(struct item) *
664 (leaf->header.nritems - slot - 1));
666 leaf->header.nritems -= 1;
667 if (leaf->header.nritems == 0) {
669 del_ptr(root, &path, 1);
672 fixup_low_keys(&path, &leaf->items[0].key, 1);
673 if (leaf_space_used(leaf, 0, leaf->header.nritems) <
674 LEAF_DATA_SIZE / 4) {
675 /* push_leaf_left fixes the path.
676 * make sure the path still points to our leaf
677 * for possible call to del_ptr below
679 slot = path.slots[1];
680 push_leaf_left(root, &path, 1);
681 path.slots[1] = slot;
682 if (leaf->header.nritems == 0) {
684 del_ptr(root, &path, 1);
691 void print_leaf(struct leaf *l)
694 int nr = l->header.nritems;
696 printf("leaf %p total ptrs %d free space %d\n", l, nr,
699 for (i = 0 ; i < nr ; i++) {
701 printf("\titem %d key (%lu %u %lu) itemoff %d itemsize %d\n",
703 item->key.objectid, item->key.flags, item->key.offset,
704 item->offset, item->size);
706 printf("\t\titem data %.*s\n", item->size, l->data+item->offset);
710 void print_tree(struct node *c)
717 nr = c->header.nritems;
718 if (is_leaf(c->header.flags)) {
719 print_leaf((struct leaf *)c);
722 printf("node %p level %d total ptrs %d free spc %lu\n", c,
723 node_level(c->header.flags), c->header.nritems,
724 NODEPTRS_PER_BLOCK - c->header.nritems);
726 for (i = 0; i < nr; i++) {
727 printf("\tkey %d (%lu %u %lu) block %lx\n",
729 c->keys[i].objectid, c->keys[i].flags, c->keys[i].offset,
733 for (i = 0; i < nr; i++) {
734 struct node *next = read_block(c->blockptrs[i]);
735 if (is_leaf(next->header.flags) &&
736 node_level(c->header.flags) != 1)
738 if (node_level(next->header.flags) !=
739 node_level(c->header.flags) - 1)
746 /* for testing only */
747 int next_key(int i, int max_key) {
748 return rand() % max_key;
753 struct leaf *first_node = malloc(sizeof(struct leaf));
754 struct ctree_root root;
760 int run_size = 10000000;
761 int max_key = 100000000;
763 struct ctree_path path;
767 root.node = (struct node *)first_node;
768 memset(first_node, 0, sizeof(*first_node));
769 for (i = 0; i < run_size; i++) {
771 num = next_key(i, max_key);
773 sprintf(buf, "string-%d", num);
774 // printf("insert %d\n", num);
778 ret = insert_item(&root, &ins, buf, strlen(buf));
783 for (i = 0; i < run_size; i++) {
784 num = next_key(i, max_key);
789 ret = search_slot(&root, &ins, &path);
791 print_tree(root.node);
792 printf("unable to find %d\n", num);
796 printf("node %p level %d total ptrs %d free spc %lu\n", root.node,
797 node_level(root.node->header.flags), root.node->header.nritems,
798 NODEPTRS_PER_BLOCK - root.node->header.nritems);
799 // print_tree(root.node);
800 printf("all searches good\n");
803 for (i = 0; i < run_size; i++) {
804 num = next_key(i, max_key);
806 del_item(&root, &ins);
808 print_tree(root.node);