Btrfs: Commenting/cleanup
[linux-block.git] / fs / btrfs / ctree.c
CommitLineData
be0e5c09
CM
1#include <stdio.h>
2#include <stdlib.h>
3#include "kerncompat.h"
eb60ceac
CM
4#include "radix-tree.h"
5#include "ctree.h"
6#include "disk-io.h"
be0e5c09
CM
7
8static inline void init_path(struct ctree_path *p)
9{
10 memset(p, 0, sizeof(*p));
11}
12
eb60ceac
CM
13static void release_path(struct ctree_root *root, struct ctree_path *p)
14{
15 int i;
16 for (i = 0; i < MAX_LEVEL; i++) {
17 if (!p->nodes[i])
18 break;
19 tree_block_release(root, p->nodes[i]);
20 }
21}
22
74123bd7
CM
23/*
24 * The leaf data grows from end-to-front in the node.
25 * this returns the address of the start of the last item,
26 * which is the stop of the leaf data stack
27 */
be0e5c09
CM
28static inline unsigned int leaf_data_end(struct leaf *leaf)
29{
30 unsigned int nr = leaf->header.nritems;
31 if (nr == 0)
32 return ARRAY_SIZE(leaf->data);
33 return leaf->items[nr-1].offset;
34}
35
74123bd7
CM
36/*
37 * The space between the end of the leaf items and
38 * the start of the leaf data. IOW, how much room
39 * the leaf has left for both items and data
40 */
be0e5c09
CM
41static inline int leaf_free_space(struct leaf *leaf)
42{
43 int data_end = leaf_data_end(leaf);
44 int nritems = leaf->header.nritems;
45 char *items_end = (char *)(leaf->items + nritems + 1);
46 return (char *)(leaf->data + data_end) - (char *)items_end;
47}
48
74123bd7
CM
49/*
50 * compare two keys in a memcmp fashion
51 */
be0e5c09
CM
52int comp_keys(struct key *k1, struct key *k2)
53{
54 if (k1->objectid > k2->objectid)
55 return 1;
56 if (k1->objectid < k2->objectid)
57 return -1;
58 if (k1->flags > k2->flags)
59 return 1;
60 if (k1->flags < k2->flags)
61 return -1;
62 if (k1->offset > k2->offset)
63 return 1;
64 if (k1->offset < k2->offset)
65 return -1;
66 return 0;
67}
74123bd7
CM
68
69/*
70 * search for key in the array p. items p are item_size apart
71 * and there are 'max' items in p
72 * the slot in the array is returned via slot, and it points to
73 * the place where you would insert key if it is not found in
74 * the array.
75 *
76 * slot may point to max if the key is bigger than all of the keys
77 */
be0e5c09
CM
78int generic_bin_search(char *p, int item_size, struct key *key,
79 int max, int *slot)
80{
81 int low = 0;
82 int high = max;
83 int mid;
84 int ret;
85 struct key *tmp;
86
87 while(low < high) {
88 mid = (low + high) / 2;
89 tmp = (struct key *)(p + mid * item_size);
90 ret = comp_keys(tmp, key);
91
92 if (ret < 0)
93 low = mid + 1;
94 else if (ret > 0)
95 high = mid;
96 else {
97 *slot = mid;
98 return 0;
99 }
100 }
101 *slot = low;
102 return 1;
103}
104
105int bin_search(struct node *c, struct key *key, int *slot)
106{
107 if (is_leaf(c->header.flags)) {
108 struct leaf *l = (struct leaf *)c;
109 return generic_bin_search((void *)l->items, sizeof(struct item),
110 key, c->header.nritems, slot);
111 } else {
112 return generic_bin_search((void *)c->keys, sizeof(struct key),
113 key, c->header.nritems, slot);
114 }
115 return -1;
116}
117
74123bd7
CM
118/*
119 * look for key in the tree. path is filled in with nodes along the way
120 * if key is found, we return zero and you can find the item in the leaf
121 * level of the path (level 0)
122 *
123 * If the key isn't found, the path points to the slot where it should
124 * be inserted.
125 */
be0e5c09
CM
126int search_slot(struct ctree_root *root, struct key *key, struct ctree_path *p)
127{
eb60ceac
CM
128 struct tree_buffer *b = root->node;
129 struct node *c;
130
be0e5c09
CM
131 int slot;
132 int ret;
133 int level;
eb60ceac
CM
134 b->count++;
135 while (b) {
136 c = &b->node;
be0e5c09 137 level = node_level(c->header.flags);
eb60ceac 138 p->nodes[level] = b;
be0e5c09
CM
139 ret = bin_search(c, key, &slot);
140 if (!is_leaf(c->header.flags)) {
141 if (ret && slot > 0)
142 slot -= 1;
143 p->slots[level] = slot;
eb60ceac 144 b = read_tree_block(root, c->blockptrs[slot]);
be0e5c09
CM
145 continue;
146 } else {
147 p->slots[level] = slot;
148 return ret;
149 }
150 }
151 return -1;
152}
153
74123bd7
CM
154/*
155 * adjust the pointers going up the tree, starting at level
156 * making sure the right key of each node is points to 'key'.
157 * This is used after shifting pointers to the left, so it stops
158 * fixing up pointers when a given leaf/node is not in slot 0 of the
159 * higher levels
160 */
eb60ceac
CM
161static void fixup_low_keys(struct ctree_root *root,
162 struct ctree_path *path, struct key *key,
163 int level)
be0e5c09
CM
164{
165 int i;
be0e5c09 166 for (i = level; i < MAX_LEVEL; i++) {
eb60ceac 167 struct node *t;
be0e5c09 168 int tslot = path->slots[i];
eb60ceac 169 if (!path->nodes[i])
be0e5c09 170 break;
eb60ceac 171 t = &path->nodes[i]->node;
be0e5c09 172 memcpy(t->keys + tslot, key, sizeof(*key));
eb60ceac 173 write_tree_block(root, path->nodes[i]);
be0e5c09
CM
174 if (tslot != 0)
175 break;
176 }
177}
178
74123bd7
CM
179/*
180 * try to push data from one node into the next node left in the
181 * tree. The src node is found at specified level in the path.
182 * If some bytes were pushed, return 0, otherwise return 1.
183 *
184 * Lower nodes/leaves in the path are not touched, higher nodes may
185 * be modified to reflect the push.
186 *
187 * The path is altered to reflect the push.
188 */
be0e5c09
CM
189int push_node_left(struct ctree_root *root, struct ctree_path *path, int level)
190{
191 int slot;
192 struct node *left;
193 struct node *right;
194 int push_items = 0;
195 int left_nritems;
196 int right_nritems;
eb60ceac
CM
197 struct tree_buffer *t;
198 struct tree_buffer *right_buf;
be0e5c09
CM
199
200 if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0)
201 return 1;
202 slot = path->slots[level + 1];
203 if (slot == 0)
204 return 1;
205
eb60ceac
CM
206 t = read_tree_block(root,
207 path->nodes[level + 1]->node.blockptrs[slot - 1]);
208 left = &t->node;
209 right_buf = path->nodes[level];
210 right = &right_buf->node;
be0e5c09
CM
211 left_nritems = left->header.nritems;
212 right_nritems = right->header.nritems;
213 push_items = NODEPTRS_PER_BLOCK - (left_nritems + 1);
eb60ceac
CM
214 if (push_items <= 0) {
215 tree_block_release(root, t);
be0e5c09 216 return 1;
eb60ceac 217 }
be0e5c09
CM
218
219 if (right_nritems < push_items)
220 push_items = right_nritems;
221 memcpy(left->keys + left_nritems, right->keys,
222 push_items * sizeof(struct key));
223 memcpy(left->blockptrs + left_nritems, right->blockptrs,
224 push_items * sizeof(u64));
225 memmove(right->keys, right->keys + push_items,
226 (right_nritems - push_items) * sizeof(struct key));
227 memmove(right->blockptrs, right->blockptrs + push_items,
228 (right_nritems - push_items) * sizeof(u64));
229 right->header.nritems -= push_items;
230 left->header.nritems += push_items;
231
232 /* adjust the pointers going up the tree */
eb60ceac
CM
233 fixup_low_keys(root, path, right->keys, level + 1);
234
235 write_tree_block(root, t);
236 write_tree_block(root, right_buf);
be0e5c09
CM
237
238 /* then fixup the leaf pointer in the path */
239 if (path->slots[level] < push_items) {
240 path->slots[level] += left_nritems;
eb60ceac
CM
241 tree_block_release(root, path->nodes[level]);
242 path->nodes[level] = t;
be0e5c09
CM
243 path->slots[level + 1] -= 1;
244 } else {
245 path->slots[level] -= push_items;
eb60ceac 246 tree_block_release(root, t);
be0e5c09
CM
247 }
248 return 0;
249}
250
74123bd7
CM
251/*
252 * try to push data from one node into the next node right in the
253 * tree. The src node is found at specified level in the path.
254 * If some bytes were pushed, return 0, otherwise return 1.
255 *
256 * Lower nodes/leaves in the path are not touched, higher nodes may
257 * be modified to reflect the push.
258 *
259 * The path is altered to reflect the push.
260 */
be0e5c09
CM
261int push_node_right(struct ctree_root *root, struct ctree_path *path, int level)
262{
263 int slot;
eb60ceac
CM
264 struct tree_buffer *t;
265 struct tree_buffer *src_buffer;
be0e5c09
CM
266 struct node *dst;
267 struct node *src;
268 int push_items = 0;
269 int dst_nritems;
270 int src_nritems;
271
74123bd7 272 /* can't push from the root */
be0e5c09
CM
273 if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0)
274 return 1;
74123bd7
CM
275
276 /* only try to push inside the node higher up */
be0e5c09
CM
277 slot = path->slots[level + 1];
278 if (slot == NODEPTRS_PER_BLOCK - 1)
279 return 1;
280
eb60ceac 281 if (slot >= path->nodes[level + 1]->node.header.nritems -1)
be0e5c09
CM
282 return 1;
283
eb60ceac
CM
284 t = read_tree_block(root,
285 path->nodes[level + 1]->node.blockptrs[slot + 1]);
286 dst = &t->node;
287 src_buffer = path->nodes[level];
288 src = &src_buffer->node;
be0e5c09
CM
289 dst_nritems = dst->header.nritems;
290 src_nritems = src->header.nritems;
291 push_items = NODEPTRS_PER_BLOCK - (dst_nritems + 1);
eb60ceac
CM
292 if (push_items <= 0) {
293 tree_block_release(root, t);
be0e5c09 294 return 1;
eb60ceac 295 }
be0e5c09
CM
296
297 if (src_nritems < push_items)
298 push_items = src_nritems;
299 memmove(dst->keys + push_items, dst->keys,
300 dst_nritems * sizeof(struct key));
301 memcpy(dst->keys, src->keys + src_nritems - push_items,
302 push_items * sizeof(struct key));
303
304 memmove(dst->blockptrs + push_items, dst->blockptrs,
305 dst_nritems * sizeof(u64));
306 memcpy(dst->blockptrs, src->blockptrs + src_nritems - push_items,
307 push_items * sizeof(u64));
308
309 src->header.nritems -= push_items;
310 dst->header.nritems += push_items;
311
312 /* adjust the pointers going up the tree */
eb60ceac 313 memcpy(path->nodes[level + 1]->node.keys + path->slots[level + 1] + 1,
be0e5c09 314 dst->keys, sizeof(struct key));
eb60ceac
CM
315
316 write_tree_block(root, path->nodes[level + 1]);
317 write_tree_block(root, t);
318 write_tree_block(root, src_buffer);
319
74123bd7 320 /* then fixup the pointers in the path */
be0e5c09
CM
321 if (path->slots[level] >= src->header.nritems) {
322 path->slots[level] -= src->header.nritems;
eb60ceac
CM
323 tree_block_release(root, path->nodes[level]);
324 path->nodes[level] = t;
be0e5c09 325 path->slots[level + 1] += 1;
eb60ceac
CM
326 } else {
327 tree_block_release(root, t);
be0e5c09
CM
328 }
329 return 0;
330}
331
74123bd7
CM
332/*
333 * worker function to insert a single pointer in a node.
334 * the node should have enough room for the pointer already
335 * slot and level indicate where you want the key to go, and
336 * blocknr is the block the key points to.
337 */
338int __insert_ptr(struct ctree_root *root,
339 struct ctree_path *path, struct key *key,
340 u64 blocknr, int slot, int level)
341{
342 struct node *c;
343 struct node *lower;
344 struct key *lower_key;
345 int nritems;
346 /* need a new root */
347 if (!path->nodes[level]) {
348 struct tree_buffer *t;
349 t = alloc_free_block(root);
350 c = &t->node;
351 memset(c, 0, sizeof(c));
352 c->header.nritems = 2;
353 c->header.flags = node_level(level);
354 c->header.blocknr = t->blocknr;
355 lower = &path->nodes[level-1]->node;
356 if (is_leaf(lower->header.flags))
357 lower_key = &((struct leaf *)lower)->items[0].key;
358 else
359 lower_key = lower->keys;
360 memcpy(c->keys, lower_key, sizeof(struct key));
361 memcpy(c->keys + 1, key, sizeof(struct key));
362 c->blockptrs[0] = path->nodes[level-1]->blocknr;
363 c->blockptrs[1] = blocknr;
364 /* the path has an extra ref to root->node */
365 tree_block_release(root, root->node);
366 root->node = t;
367 t->count++;
368 write_tree_block(root, t);
369 path->nodes[level] = t;
370 path->slots[level] = 0;
371 if (c->keys[1].objectid == 0)
372 BUG();
373 return 0;
374 }
375 lower = &path->nodes[level]->node;
376 nritems = lower->header.nritems;
377 if (slot > nritems)
378 BUG();
379 if (nritems == NODEPTRS_PER_BLOCK)
380 BUG();
381 if (slot != nritems) {
382 memmove(lower->keys + slot + 1, lower->keys + slot,
383 (nritems - slot) * sizeof(struct key));
384 memmove(lower->blockptrs + slot + 1, lower->blockptrs + slot,
385 (nritems - slot) * sizeof(u64));
386 }
387 memcpy(lower->keys + slot, key, sizeof(struct key));
388 lower->blockptrs[slot] = blocknr;
389 lower->header.nritems++;
390 if (lower->keys[1].objectid == 0)
391 BUG();
392 write_tree_block(root, path->nodes[level]);
393 return 0;
394}
395
396
397/*
398 * insert a key,blocknr pair into the tree at a given level
399 * If the node at that level in the path doesn't have room,
400 * it is split or shifted as appropriate.
401 */
be0e5c09
CM
402int insert_ptr(struct ctree_root *root,
403 struct ctree_path *path, struct key *key,
404 u64 blocknr, int level)
405{
eb60ceac
CM
406 struct tree_buffer *t = path->nodes[level];
407 struct node *c = &path->nodes[level]->node;
be0e5c09 408 struct node *b;
eb60ceac
CM
409 struct tree_buffer *b_buffer;
410 struct tree_buffer *bal[MAX_LEVEL];
be0e5c09
CM
411 int bal_level = level;
412 int mid;
413 int bal_start = -1;
414
74123bd7
CM
415 /*
416 * check to see if we need to make room in the node for this
417 * pointer. If we do, keep walking the tree, making sure there
418 * is enough room in each level for the required insertions.
419 *
420 * The bal array is filled in with any nodes to be inserted
421 * due to splitting. Once we've done all the splitting required
422 * do the inserts based on the data in the bal array.
423 */
be0e5c09 424 memset(bal, 0, ARRAY_SIZE(bal));
eb60ceac
CM
425 while(t && t->node.header.nritems == NODEPTRS_PER_BLOCK) {
426 c = &t->node;
be0e5c09
CM
427 if (push_node_left(root, path,
428 node_level(c->header.flags)) == 0)
429 break;
430 if (push_node_right(root, path,
431 node_level(c->header.flags)) == 0)
432 break;
433 bal_start = bal_level;
434 if (bal_level == MAX_LEVEL - 1)
435 BUG();
eb60ceac
CM
436 b_buffer = alloc_free_block(root);
437 b = &b_buffer->node;
be0e5c09 438 b->header.flags = c->header.flags;
eb60ceac 439 b->header.blocknr = b_buffer->blocknr;
be0e5c09
CM
440 mid = (c->header.nritems + 1) / 2;
441 memcpy(b->keys, c->keys + mid,
442 (c->header.nritems - mid) * sizeof(struct key));
443 memcpy(b->blockptrs, c->blockptrs + mid,
444 (c->header.nritems - mid) * sizeof(u64));
445 b->header.nritems = c->header.nritems - mid;
446 c->header.nritems = mid;
eb60ceac
CM
447
448 write_tree_block(root, t);
449 write_tree_block(root, b_buffer);
450
451 bal[bal_level] = b_buffer;
be0e5c09
CM
452 if (bal_level == MAX_LEVEL - 1)
453 break;
454 bal_level += 1;
eb60ceac 455 t = path->nodes[bal_level];
be0e5c09 456 }
74123bd7
CM
457 /*
458 * bal_start tells us the first level in the tree that needed to
459 * be split. Go through the bal array inserting the new nodes
460 * as needed. The path is fixed as we go.
461 */
be0e5c09 462 while(bal_start > 0) {
eb60ceac
CM
463 b_buffer = bal[bal_start];
464 c = &path->nodes[bal_start]->node;
465 __insert_ptr(root, path, b_buffer->node.keys, b_buffer->blocknr,
be0e5c09
CM
466 path->slots[bal_start + 1] + 1, bal_start + 1);
467 if (path->slots[bal_start] >= c->header.nritems) {
468 path->slots[bal_start] -= c->header.nritems;
eb60ceac
CM
469 tree_block_release(root, path->nodes[bal_start]);
470 path->nodes[bal_start] = b_buffer;
be0e5c09 471 path->slots[bal_start + 1] += 1;
eb60ceac
CM
472 } else {
473 tree_block_release(root, b_buffer);
be0e5c09
CM
474 }
475 bal_start--;
476 if (!bal[bal_start])
477 break;
478 }
74123bd7 479 /* Now that the tree has room, insert the requested pointer */
be0e5c09
CM
480 return __insert_ptr(root, path, key, blocknr, path->slots[level] + 1,
481 level);
482}
483
74123bd7
CM
484/*
485 * how many bytes are required to store the items in a leaf. start
486 * and nr indicate which items in the leaf to check. This totals up the
487 * space used both by the item structs and the item data
488 */
be0e5c09
CM
489int leaf_space_used(struct leaf *l, int start, int nr)
490{
491 int data_len;
492 int end = start + nr - 1;
493
494 if (!nr)
495 return 0;
496 data_len = l->items[start].offset + l->items[start].size;
497 data_len = data_len - l->items[end].offset;
498 data_len += sizeof(struct item) * nr;
499 return data_len;
500}
501
74123bd7
CM
502/*
503 * push some data in the path leaf to the left, trying to free up at
504 * least data_size bytes. returns zero if the push worked, nonzero otherwise
505 */
be0e5c09
CM
506int push_leaf_left(struct ctree_root *root, struct ctree_path *path,
507 int data_size)
508{
eb60ceac
CM
509 struct tree_buffer *right_buf = path->nodes[0];
510 struct leaf *right = &right_buf->leaf;
511 struct tree_buffer *t;
be0e5c09
CM
512 struct leaf *left;
513 int slot;
514 int i;
515 int free_space;
516 int push_space = 0;
517 int push_items = 0;
518 struct item *item;
519 int old_left_nritems;
520
521 slot = path->slots[1];
522 if (slot == 0) {
523 return 1;
524 }
525 if (!path->nodes[1]) {
526 return 1;
527 }
eb60ceac
CM
528 t = read_tree_block(root, path->nodes[1]->node.blockptrs[slot - 1]);
529 left = &t->leaf;
be0e5c09
CM
530 free_space = leaf_free_space(left);
531 if (free_space < data_size + sizeof(struct item)) {
eb60ceac 532 tree_block_release(root, t);
be0e5c09
CM
533 return 1;
534 }
535 for (i = 0; i < right->header.nritems; i++) {
536 item = right->items + i;
537 if (path->slots[0] == i)
538 push_space += data_size + sizeof(*item);
539 if (item->size + sizeof(*item) + push_space > free_space)
540 break;
541 push_items++;
542 push_space += item->size + sizeof(*item);
543 }
544 if (push_items == 0) {
eb60ceac 545 tree_block_release(root, t);
be0e5c09
CM
546 return 1;
547 }
548 /* push data from right to left */
549 memcpy(left->items + left->header.nritems,
550 right->items, push_items * sizeof(struct item));
551 push_space = LEAF_DATA_SIZE - right->items[push_items -1].offset;
552 memcpy(left->data + leaf_data_end(left) - push_space,
553 right->data + right->items[push_items - 1].offset,
554 push_space);
555 old_left_nritems = left->header.nritems;
eb60ceac
CM
556 BUG_ON(old_left_nritems < 0);
557
be0e5c09
CM
558 for(i = old_left_nritems; i < old_left_nritems + push_items; i++) {
559 left->items[i].offset -= LEAF_DATA_SIZE -
560 left->items[old_left_nritems -1].offset;
561 }
562 left->header.nritems += push_items;
563
564 /* fixup right node */
565 push_space = right->items[push_items-1].offset - leaf_data_end(right);
566 memmove(right->data + LEAF_DATA_SIZE - push_space, right->data +
567 leaf_data_end(right), push_space);
568 memmove(right->items, right->items + push_items,
569 (right->header.nritems - push_items) * sizeof(struct item));
570 right->header.nritems -= push_items;
571 push_space = LEAF_DATA_SIZE;
eb60ceac 572
be0e5c09
CM
573 for (i = 0; i < right->header.nritems; i++) {
574 right->items[i].offset = push_space - right->items[i].size;
575 push_space = right->items[i].offset;
576 }
eb60ceac
CM
577
578 write_tree_block(root, t);
579 write_tree_block(root, right_buf);
580
581 fixup_low_keys(root, path, &right->items[0].key, 1);
be0e5c09
CM
582
583 /* then fixup the leaf pointer in the path */
584 if (path->slots[0] < push_items) {
585 path->slots[0] += old_left_nritems;
eb60ceac
CM
586 tree_block_release(root, path->nodes[0]);
587 path->nodes[0] = t;
be0e5c09
CM
588 path->slots[1] -= 1;
589 } else {
eb60ceac 590 tree_block_release(root, t);
be0e5c09
CM
591 path->slots[0] -= push_items;
592 }
eb60ceac 593 BUG_ON(path->slots[0] < 0);
be0e5c09
CM
594 return 0;
595}
596
74123bd7
CM
597/*
598 * split the path's leaf in two, making sure there is at least data_size
599 * available for the resulting leaf level of the path.
600 */
be0e5c09
CM
601int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size)
602{
eb60ceac
CM
603 struct tree_buffer *l_buf = path->nodes[0];
604 struct leaf *l = &l_buf->leaf;
605 int nritems;
606 int mid;
607 int slot;
be0e5c09 608 struct leaf *right;
eb60ceac 609 struct tree_buffer *right_buffer;
be0e5c09
CM
610 int space_needed = data_size + sizeof(struct item);
611 int data_copy_size;
612 int rt_data_off;
613 int i;
614 int ret;
615
616 if (push_leaf_left(root, path, data_size) == 0) {
eb60ceac
CM
617 l_buf = path->nodes[0];
618 l = &l_buf->leaf;
619 if (leaf_free_space(l) >= sizeof(struct item) + data_size)
620 return 0;
be0e5c09 621 }
eb60ceac
CM
622 slot = path->slots[0];
623 nritems = l->header.nritems;
624 mid = (nritems + 1)/ 2;
625
626 right_buffer = alloc_free_block(root);
627 BUG_ON(!right_buffer);
628 BUG_ON(mid == nritems);
629 right = &right_buffer->leaf;
be0e5c09
CM
630 memset(right, 0, sizeof(*right));
631 if (mid <= slot) {
632 if (leaf_space_used(l, mid, nritems - mid) + space_needed >
633 LEAF_DATA_SIZE)
634 BUG();
635 } else {
636 if (leaf_space_used(l, 0, mid + 1) + space_needed >
637 LEAF_DATA_SIZE)
638 BUG();
639 }
640 right->header.nritems = nritems - mid;
eb60ceac
CM
641 right->header.blocknr = right_buffer->blocknr;
642 right->header.flags = node_level(0);
be0e5c09
CM
643 data_copy_size = l->items[mid].offset + l->items[mid].size -
644 leaf_data_end(l);
645 memcpy(right->items, l->items + mid,
646 (nritems - mid) * sizeof(struct item));
647 memcpy(right->data + LEAF_DATA_SIZE - data_copy_size,
648 l->data + leaf_data_end(l), data_copy_size);
649 rt_data_off = LEAF_DATA_SIZE -
650 (l->items[mid].offset + l->items[mid].size);
74123bd7
CM
651
652 for (i = 0; i < right->header.nritems; i++)
be0e5c09 653 right->items[i].offset += rt_data_off;
74123bd7 654
be0e5c09
CM
655 l->header.nritems = mid;
656 ret = insert_ptr(root, path, &right->items[0].key,
eb60ceac
CM
657 right_buffer->blocknr, 1);
658
659 write_tree_block(root, right_buffer);
660 write_tree_block(root, l_buf);
661
662 BUG_ON(path->slots[0] != slot);
be0e5c09 663 if (mid <= slot) {
eb60ceac
CM
664 tree_block_release(root, path->nodes[0]);
665 path->nodes[0] = right_buffer;
be0e5c09
CM
666 path->slots[0] -= mid;
667 path->slots[1] += 1;
eb60ceac
CM
668 } else
669 tree_block_release(root, right_buffer);
670 BUG_ON(path->slots[0] < 0);
be0e5c09
CM
671 return ret;
672}
673
74123bd7
CM
674/*
675 * Given a key and some data, insert an item into the tree.
676 * This does all the path init required, making room in the tree if needed.
677 */
be0e5c09
CM
678int insert_item(struct ctree_root *root, struct key *key,
679 void *data, int data_size)
680{
681 int ret;
682 int slot;
eb60ceac 683 int slot_orig;
be0e5c09 684 struct leaf *leaf;
eb60ceac 685 struct tree_buffer *leaf_buf;
be0e5c09
CM
686 unsigned int nritems;
687 unsigned int data_end;
688 struct ctree_path path;
689
74123bd7 690 /* create a root if there isn't one */
eb60ceac
CM
691 if (!root->node) {
692 struct tree_buffer *t;
693 t = alloc_free_block(root);
694 BUG_ON(!t);
695 t->node.header.nritems = 0;
696 t->node.header.flags = node_level(0);
697 t->node.header.blocknr = t->blocknr;
698 root->node = t;
699 write_tree_block(root, t);
700 }
be0e5c09
CM
701 init_path(&path);
702 ret = search_slot(root, key, &path);
eb60ceac
CM
703 if (ret == 0) {
704 release_path(root, &path);
be0e5c09 705 return -EEXIST;
eb60ceac 706 }
be0e5c09 707
eb60ceac
CM
708 slot_orig = path.slots[0];
709 leaf_buf = path.nodes[0];
710 leaf = &leaf_buf->leaf;
74123bd7
CM
711
712 /* make room if needed */
eb60ceac 713 if (leaf_free_space(leaf) < sizeof(struct item) + data_size) {
be0e5c09 714 split_leaf(root, &path, data_size);
eb60ceac
CM
715 leaf_buf = path.nodes[0];
716 leaf = &path.nodes[0]->leaf;
717 }
be0e5c09
CM
718 nritems = leaf->header.nritems;
719 data_end = leaf_data_end(leaf);
eb60ceac 720
be0e5c09
CM
721 if (leaf_free_space(leaf) < sizeof(struct item) + data_size)
722 BUG();
723
724 slot = path.slots[0];
eb60ceac 725 BUG_ON(slot < 0);
be0e5c09 726 if (slot == 0)
eb60ceac 727 fixup_low_keys(root, &path, key, 1);
be0e5c09
CM
728 if (slot != nritems) {
729 int i;
730 unsigned int old_data = leaf->items[slot].offset +
731 leaf->items[slot].size;
732
733 /*
734 * item0..itemN ... dataN.offset..dataN.size .. data0.size
735 */
736 /* first correct the data pointers */
737 for (i = slot; i < nritems; i++)
738 leaf->items[i].offset -= data_size;
739
740 /* shift the items */
741 memmove(leaf->items + slot + 1, leaf->items + slot,
742 (nritems - slot) * sizeof(struct item));
743
744 /* shift the data */
745 memmove(leaf->data + data_end - data_size, leaf->data +
746 data_end, old_data - data_end);
747 data_end = old_data;
748 }
74123bd7 749 /* copy the new data in */
be0e5c09
CM
750 memcpy(&leaf->items[slot].key, key, sizeof(struct key));
751 leaf->items[slot].offset = data_end - data_size;
752 leaf->items[slot].size = data_size;
753 memcpy(leaf->data + data_end - data_size, data, data_size);
754 leaf->header.nritems += 1;
eb60ceac 755 write_tree_block(root, leaf_buf);
be0e5c09
CM
756 if (leaf_free_space(leaf) < 0)
757 BUG();
eb60ceac 758 release_path(root, &path);
be0e5c09
CM
759 return 0;
760}
761
74123bd7
CM
762/*
763 * delete the pointer from a given level in the path. The path is not
764 * fixed up, so after calling this it is not valid at that level.
765 *
766 * If the delete empties a node, the node is removed from the tree,
767 * continuing all the way the root if required. The root is converted into
768 * a leaf if all the nodes are emptied.
769 */
be0e5c09
CM
770int del_ptr(struct ctree_root *root, struct ctree_path *path, int level)
771{
772 int slot;
eb60ceac 773 struct tree_buffer *t;
be0e5c09
CM
774 struct node *node;
775 int nritems;
776
777 while(1) {
eb60ceac
CM
778 t = path->nodes[level];
779 if (!t)
be0e5c09 780 break;
eb60ceac 781 node = &t->node;
be0e5c09
CM
782 slot = path->slots[level];
783 nritems = node->header.nritems;
784
785 if (slot != nritems -1) {
786 memmove(node->keys + slot, node->keys + slot + 1,
787 sizeof(struct key) * (nritems - slot - 1));
788 memmove(node->blockptrs + slot,
789 node->blockptrs + slot + 1,
790 sizeof(u64) * (nritems - slot - 1));
791 }
792 node->header.nritems--;
eb60ceac 793 write_tree_block(root, t);
be0e5c09
CM
794 if (node->header.nritems != 0) {
795 int tslot;
796 if (slot == 0)
eb60ceac
CM
797 fixup_low_keys(root, path, node->keys,
798 level + 1);
be0e5c09 799 tslot = path->slots[level+1];
eb60ceac 800 t->count++;
be0e5c09
CM
801 push_node_left(root, path, level);
802 if (node->header.nritems) {
803 push_node_right(root, path, level);
804 }
eb60ceac
CM
805 if (node->header.nritems) {
806 tree_block_release(root, t);
be0e5c09 807 break;
eb60ceac
CM
808 }
809 tree_block_release(root, t);
4920c9ac 810 path->slots[level+1] = tslot;
be0e5c09 811 }
eb60ceac
CM
812 if (t == root->node) {
813 /* just turn the root into a leaf and break */
814 root->node->node.header.flags = node_level(0);
815 write_tree_block(root, t);
be0e5c09
CM
816 break;
817 }
818 level++;
819 if (!path->nodes[level])
820 BUG();
be0e5c09
CM
821 }
822 return 0;
823}
824
74123bd7
CM
825/*
826 * delete the item at the leaf level in path. If that empties
827 * the leaf, remove it from the tree
828 */
4920c9ac 829int del_item(struct ctree_root *root, struct ctree_path *path)
be0e5c09 830{
be0e5c09
CM
831 int slot;
832 struct leaf *leaf;
eb60ceac 833 struct tree_buffer *leaf_buf;
be0e5c09
CM
834 int doff;
835 int dsize;
836
eb60ceac
CM
837 leaf_buf = path->nodes[0];
838 leaf = &leaf_buf->leaf;
4920c9ac 839 slot = path->slots[0];
be0e5c09
CM
840 doff = leaf->items[slot].offset;
841 dsize = leaf->items[slot].size;
842
843 if (slot != leaf->header.nritems - 1) {
844 int i;
845 int data_end = leaf_data_end(leaf);
846 memmove(leaf->data + data_end + dsize,
847 leaf->data + data_end,
848 doff - data_end);
849 for (i = slot + 1; i < leaf->header.nritems; i++)
850 leaf->items[i].offset += dsize;
851 memmove(leaf->items + slot, leaf->items + slot + 1,
852 sizeof(struct item) *
853 (leaf->header.nritems - slot - 1));
854 }
855 leaf->header.nritems -= 1;
74123bd7 856 /* delete the leaf if we've emptied it */
be0e5c09 857 if (leaf->header.nritems == 0) {
eb60ceac
CM
858 if (leaf_buf == root->node) {
859 leaf->header.flags = node_level(0);
860 write_tree_block(root, leaf_buf);
861 } else
4920c9ac 862 del_ptr(root, path, 1);
be0e5c09
CM
863 } else {
864 if (slot == 0)
eb60ceac
CM
865 fixup_low_keys(root, path, &leaf->items[0].key, 1);
866 write_tree_block(root, leaf_buf);
74123bd7 867 /* delete the leaf if it is mostly empty */
be0e5c09
CM
868 if (leaf_space_used(leaf, 0, leaf->header.nritems) <
869 LEAF_DATA_SIZE / 4) {
870 /* push_leaf_left fixes the path.
871 * make sure the path still points to our leaf
872 * for possible call to del_ptr below
873 */
4920c9ac 874 slot = path->slots[1];
eb60ceac 875 leaf_buf->count++;
4920c9ac 876 push_leaf_left(root, path, 1);
be0e5c09 877 if (leaf->header.nritems == 0) {
4920c9ac
CM
878 path->slots[1] = slot;
879 del_ptr(root, path, 1);
be0e5c09 880 }
eb60ceac 881 tree_block_release(root, leaf_buf);
be0e5c09
CM
882 }
883 }
884 return 0;
885}
886
887void print_leaf(struct leaf *l)
888{
889 int i;
890 int nr = l->header.nritems;
891 struct item *item;
eb60ceac 892 printf("leaf %lu total ptrs %d free space %d\n", l->header.blocknr, nr,
be0e5c09
CM
893 leaf_free_space(l));
894 fflush(stdout);
895 for (i = 0 ; i < nr ; i++) {
896 item = l->items + i;
897 printf("\titem %d key (%lu %u %lu) itemoff %d itemsize %d\n",
898 i,
899 item->key.objectid, item->key.flags, item->key.offset,
900 item->offset, item->size);
901 fflush(stdout);
902 printf("\t\titem data %.*s\n", item->size, l->data+item->offset);
903 fflush(stdout);
904 }
905}
eb60ceac 906void print_tree(struct ctree_root *root, struct tree_buffer *t)
be0e5c09
CM
907{
908 int i;
909 int nr;
eb60ceac 910 struct node *c;
be0e5c09 911
eb60ceac 912 if (!t)
be0e5c09 913 return;
eb60ceac 914 c = &t->node;
be0e5c09 915 nr = c->header.nritems;
eb60ceac
CM
916 if (c->header.blocknr != t->blocknr)
917 BUG();
be0e5c09
CM
918 if (is_leaf(c->header.flags)) {
919 print_leaf((struct leaf *)c);
920 return;
921 }
eb60ceac 922 printf("node %lu level %d total ptrs %d free spc %lu\n", t->blocknr,
be0e5c09
CM
923 node_level(c->header.flags), c->header.nritems,
924 NODEPTRS_PER_BLOCK - c->header.nritems);
925 fflush(stdout);
926 for (i = 0; i < nr; i++) {
eb60ceac 927 printf("\tkey %d (%lu %u %lu) block %lu\n",
be0e5c09
CM
928 i,
929 c->keys[i].objectid, c->keys[i].flags, c->keys[i].offset,
930 c->blockptrs[i]);
931 fflush(stdout);
932 }
933 for (i = 0; i < nr; i++) {
eb60ceac
CM
934 struct tree_buffer *next_buf = read_tree_block(root,
935 c->blockptrs[i]);
936 struct node *next = &next_buf->node;
be0e5c09
CM
937 if (is_leaf(next->header.flags) &&
938 node_level(c->header.flags) != 1)
939 BUG();
940 if (node_level(next->header.flags) !=
941 node_level(c->header.flags) - 1)
942 BUG();
eb60ceac
CM
943 print_tree(root, next_buf);
944 tree_block_release(root, next_buf);
be0e5c09
CM
945 }
946
947}
948
949/* for testing only */
950int next_key(int i, int max_key) {
951 return rand() % max_key;
952 // return i;
953}
954
955int main() {
eb60ceac 956 struct ctree_root *root;
be0e5c09 957 struct key ins;
4920c9ac 958 struct key last = { (u64)-1, 0, 0};
be0e5c09
CM
959 char *buf;
960 int i;
961 int num;
962 int ret;
74123bd7 963 int run_size = 25000;
be0e5c09
CM
964 int max_key = 100000000;
965 int tree_size = 0;
966 struct ctree_path path;
967
eb60ceac
CM
968 radix_tree_init();
969
970
971 root = open_ctree("dbfile");
be0e5c09
CM
972
973 srand(55);
be0e5c09
CM
974 for (i = 0; i < run_size; i++) {
975 buf = malloc(64);
976 num = next_key(i, max_key);
977 // num = i;
978 sprintf(buf, "string-%d", num);
979 // printf("insert %d\n", num);
980 ins.objectid = num;
981 ins.offset = 0;
982 ins.flags = 0;
eb60ceac 983 ret = insert_item(root, &ins, buf, strlen(buf));
be0e5c09
CM
984 if (!ret)
985 tree_size++;
986 }
eb60ceac
CM
987 close_ctree(root);
988 root = open_ctree("dbfile");
989 printf("starting search\n");
be0e5c09
CM
990 srand(55);
991 for (i = 0; i < run_size; i++) {
992 num = next_key(i, max_key);
993 ins.objectid = num;
be0e5c09 994 init_path(&path);
eb60ceac 995 ret = search_slot(root, &ins, &path);
be0e5c09 996 if (ret) {
eb60ceac 997 print_tree(root, root->node);
be0e5c09
CM
998 printf("unable to find %d\n", num);
999 exit(1);
1000 }
eb60ceac
CM
1001 release_path(root, &path);
1002 }
1003 close_ctree(root);
1004 root = open_ctree("dbfile");
1005 printf("node %p level %d total ptrs %d free spc %lu\n", root->node,
1006 node_level(root->node->node.header.flags),
1007 root->node->node.header.nritems,
1008 NODEPTRS_PER_BLOCK - root->node->node.header.nritems);
1009 printf("all searches good, deleting some items\n");
be0e5c09
CM
1010 i = 0;
1011 srand(55);
4920c9ac
CM
1012 for (i = 0 ; i < run_size/4; i++) {
1013 num = next_key(i, max_key);
1014 ins.objectid = num;
1015 init_path(&path);
eb60ceac 1016 ret = search_slot(root, &ins, &path);
4920c9ac
CM
1017 if (ret)
1018 continue;
eb60ceac 1019 ret = del_item(root, &path);
4920c9ac
CM
1020 if (ret != 0)
1021 BUG();
eb60ceac 1022 release_path(root, &path);
4920c9ac
CM
1023 tree_size--;
1024 }
1025 srand(128);
be0e5c09 1026 for (i = 0; i < run_size; i++) {
4920c9ac 1027 buf = malloc(64);
be0e5c09 1028 num = next_key(i, max_key);
4920c9ac 1029 sprintf(buf, "string-%d", num);
be0e5c09 1030 ins.objectid = num;
eb60ceac 1031 ret = insert_item(root, &ins, buf, strlen(buf));
4920c9ac
CM
1032 if (!ret)
1033 tree_size++;
1034 }
eb60ceac
CM
1035 close_ctree(root);
1036 root = open_ctree("dbfile");
1037 printf("starting search2\n");
1038 srand(128);
1039 for (i = 0; i < run_size; i++) {
1040 num = next_key(i, max_key);
1041 ins.objectid = num;
1042 init_path(&path);
1043 ret = search_slot(root, &ins, &path);
1044 if (ret) {
1045 print_tree(root, root->node);
1046 printf("unable to find %d\n", num);
1047 exit(1);
1048 }
1049 release_path(root, &path);
1050 }
1051 printf("starting big long delete run\n");
1052 while(root->node && root->node->node.header.nritems > 0) {
4920c9ac
CM
1053 struct leaf *leaf;
1054 int slot;
1055 ins.objectid = (u64)-1;
1056 init_path(&path);
eb60ceac 1057 ret = search_slot(root, &ins, &path);
4920c9ac
CM
1058 if (ret == 0)
1059 BUG();
1060
eb60ceac 1061 leaf = &path.nodes[0]->leaf;
4920c9ac
CM
1062 slot = path.slots[0];
1063 if (slot != leaf->header.nritems)
1064 BUG();
1065 while(path.slots[0] > 0) {
1066 path.slots[0] -= 1;
1067 slot = path.slots[0];
eb60ceac 1068 leaf = &path.nodes[0]->leaf;
4920c9ac
CM
1069
1070 if (comp_keys(&last, &leaf->items[slot].key) <= 0)
1071 BUG();
1072 memcpy(&last, &leaf->items[slot].key, sizeof(last));
eb60ceac
CM
1073 ret = del_item(root, &path);
1074 if (ret != 0) {
1075 printf("del_item returned %d\n", ret);
4920c9ac 1076 BUG();
eb60ceac 1077 }
4920c9ac
CM
1078 tree_size--;
1079 }
eb60ceac 1080 release_path(root, &path);
be0e5c09 1081 }
eb60ceac 1082 close_ctree(root);
4920c9ac 1083 printf("tree size is now %d\n", tree_size);
be0e5c09
CM
1084 return 0;
1085}