Btrfs: early extent mapping support
[linux-2.6-block.git] / fs / btrfs / ctree.c
CommitLineData
be0e5c09
CM
1#include <stdio.h>
2#include <stdlib.h>
3#include "kerncompat.h"
eb60ceac
CM
4#include "radix-tree.h"
5#include "ctree.h"
6#include "disk-io.h"
be0e5c09 7
d97e63b6
CM
8static int refill_alloc_extent(struct ctree_root *root);
9
be0e5c09
CM
10static inline void init_path(struct ctree_path *p)
11{
12 memset(p, 0, sizeof(*p));
13}
14
eb60ceac
CM
15static void release_path(struct ctree_root *root, struct ctree_path *p)
16{
17 int i;
18 for (i = 0; i < MAX_LEVEL; i++) {
19 if (!p->nodes[i])
20 break;
21 tree_block_release(root, p->nodes[i]);
22 }
23}
24
74123bd7
CM
25/*
26 * The leaf data grows from end-to-front in the node.
27 * this returns the address of the start of the last item,
28 * which is the stop of the leaf data stack
29 */
be0e5c09
CM
30static inline unsigned int leaf_data_end(struct leaf *leaf)
31{
32 unsigned int nr = leaf->header.nritems;
33 if (nr == 0)
d97e63b6 34 return sizeof(leaf->data);
be0e5c09
CM
35 return leaf->items[nr-1].offset;
36}
37
74123bd7
CM
38/*
39 * The space between the end of the leaf items and
40 * the start of the leaf data. IOW, how much room
41 * the leaf has left for both items and data
42 */
be0e5c09
CM
43static inline int leaf_free_space(struct leaf *leaf)
44{
45 int data_end = leaf_data_end(leaf);
46 int nritems = leaf->header.nritems;
47 char *items_end = (char *)(leaf->items + nritems + 1);
48 return (char *)(leaf->data + data_end) - (char *)items_end;
49}
50
74123bd7
CM
51/*
52 * compare two keys in a memcmp fashion
53 */
be0e5c09
CM
54int comp_keys(struct key *k1, struct key *k2)
55{
56 if (k1->objectid > k2->objectid)
57 return 1;
58 if (k1->objectid < k2->objectid)
59 return -1;
60 if (k1->flags > k2->flags)
61 return 1;
62 if (k1->flags < k2->flags)
63 return -1;
64 if (k1->offset > k2->offset)
65 return 1;
66 if (k1->offset < k2->offset)
67 return -1;
68 return 0;
69}
74123bd7
CM
70
71/*
72 * search for key in the array p. items p are item_size apart
73 * and there are 'max' items in p
74 * the slot in the array is returned via slot, and it points to
75 * the place where you would insert key if it is not found in
76 * the array.
77 *
78 * slot may point to max if the key is bigger than all of the keys
79 */
be0e5c09
CM
80int generic_bin_search(char *p, int item_size, struct key *key,
81 int max, int *slot)
82{
83 int low = 0;
84 int high = max;
85 int mid;
86 int ret;
87 struct key *tmp;
88
89 while(low < high) {
90 mid = (low + high) / 2;
91 tmp = (struct key *)(p + mid * item_size);
92 ret = comp_keys(tmp, key);
93
94 if (ret < 0)
95 low = mid + 1;
96 else if (ret > 0)
97 high = mid;
98 else {
99 *slot = mid;
100 return 0;
101 }
102 }
103 *slot = low;
104 return 1;
105}
106
107int bin_search(struct node *c, struct key *key, int *slot)
108{
109 if (is_leaf(c->header.flags)) {
110 struct leaf *l = (struct leaf *)c;
111 return generic_bin_search((void *)l->items, sizeof(struct item),
112 key, c->header.nritems, slot);
113 } else {
114 return generic_bin_search((void *)c->keys, sizeof(struct key),
115 key, c->header.nritems, slot);
116 }
117 return -1;
118}
119
74123bd7
CM
120/*
121 * look for key in the tree. path is filled in with nodes along the way
122 * if key is found, we return zero and you can find the item in the leaf
123 * level of the path (level 0)
124 *
125 * If the key isn't found, the path points to the slot where it should
126 * be inserted.
127 */
be0e5c09
CM
128int search_slot(struct ctree_root *root, struct key *key, struct ctree_path *p)
129{
eb60ceac
CM
130 struct tree_buffer *b = root->node;
131 struct node *c;
132
be0e5c09
CM
133 int slot;
134 int ret;
135 int level;
eb60ceac
CM
136 b->count++;
137 while (b) {
138 c = &b->node;
be0e5c09 139 level = node_level(c->header.flags);
eb60ceac 140 p->nodes[level] = b;
be0e5c09
CM
141 ret = bin_search(c, key, &slot);
142 if (!is_leaf(c->header.flags)) {
143 if (ret && slot > 0)
144 slot -= 1;
145 p->slots[level] = slot;
eb60ceac 146 b = read_tree_block(root, c->blockptrs[slot]);
be0e5c09
CM
147 continue;
148 } else {
149 p->slots[level] = slot;
150 return ret;
151 }
152 }
153 return -1;
154}
155
74123bd7
CM
156/*
157 * adjust the pointers going up the tree, starting at level
158 * making sure the right key of each node is points to 'key'.
159 * This is used after shifting pointers to the left, so it stops
160 * fixing up pointers when a given leaf/node is not in slot 0 of the
161 * higher levels
162 */
eb60ceac
CM
163static void fixup_low_keys(struct ctree_root *root,
164 struct ctree_path *path, struct key *key,
165 int level)
be0e5c09
CM
166{
167 int i;
be0e5c09 168 for (i = level; i < MAX_LEVEL; i++) {
eb60ceac 169 struct node *t;
be0e5c09 170 int tslot = path->slots[i];
eb60ceac 171 if (!path->nodes[i])
be0e5c09 172 break;
eb60ceac 173 t = &path->nodes[i]->node;
be0e5c09 174 memcpy(t->keys + tslot, key, sizeof(*key));
eb60ceac 175 write_tree_block(root, path->nodes[i]);
be0e5c09
CM
176 if (tslot != 0)
177 break;
178 }
179}
180
74123bd7
CM
181/*
182 * try to push data from one node into the next node left in the
183 * tree. The src node is found at specified level in the path.
184 * If some bytes were pushed, return 0, otherwise return 1.
185 *
186 * Lower nodes/leaves in the path are not touched, higher nodes may
187 * be modified to reflect the push.
188 *
189 * The path is altered to reflect the push.
190 */
be0e5c09
CM
191int push_node_left(struct ctree_root *root, struct ctree_path *path, int level)
192{
193 int slot;
194 struct node *left;
195 struct node *right;
196 int push_items = 0;
197 int left_nritems;
198 int right_nritems;
eb60ceac
CM
199 struct tree_buffer *t;
200 struct tree_buffer *right_buf;
be0e5c09
CM
201
202 if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0)
203 return 1;
204 slot = path->slots[level + 1];
205 if (slot == 0)
206 return 1;
207
eb60ceac
CM
208 t = read_tree_block(root,
209 path->nodes[level + 1]->node.blockptrs[slot - 1]);
210 left = &t->node;
211 right_buf = path->nodes[level];
212 right = &right_buf->node;
be0e5c09
CM
213 left_nritems = left->header.nritems;
214 right_nritems = right->header.nritems;
215 push_items = NODEPTRS_PER_BLOCK - (left_nritems + 1);
eb60ceac
CM
216 if (push_items <= 0) {
217 tree_block_release(root, t);
be0e5c09 218 return 1;
eb60ceac 219 }
be0e5c09
CM
220
221 if (right_nritems < push_items)
222 push_items = right_nritems;
223 memcpy(left->keys + left_nritems, right->keys,
224 push_items * sizeof(struct key));
225 memcpy(left->blockptrs + left_nritems, right->blockptrs,
226 push_items * sizeof(u64));
227 memmove(right->keys, right->keys + push_items,
228 (right_nritems - push_items) * sizeof(struct key));
229 memmove(right->blockptrs, right->blockptrs + push_items,
230 (right_nritems - push_items) * sizeof(u64));
231 right->header.nritems -= push_items;
232 left->header.nritems += push_items;
233
234 /* adjust the pointers going up the tree */
eb60ceac
CM
235 fixup_low_keys(root, path, right->keys, level + 1);
236
237 write_tree_block(root, t);
238 write_tree_block(root, right_buf);
be0e5c09
CM
239
240 /* then fixup the leaf pointer in the path */
241 if (path->slots[level] < push_items) {
242 path->slots[level] += left_nritems;
eb60ceac
CM
243 tree_block_release(root, path->nodes[level]);
244 path->nodes[level] = t;
be0e5c09
CM
245 path->slots[level + 1] -= 1;
246 } else {
247 path->slots[level] -= push_items;
eb60ceac 248 tree_block_release(root, t);
be0e5c09
CM
249 }
250 return 0;
251}
252
74123bd7
CM
253/*
254 * try to push data from one node into the next node right in the
255 * tree. The src node is found at specified level in the path.
256 * If some bytes were pushed, return 0, otherwise return 1.
257 *
258 * Lower nodes/leaves in the path are not touched, higher nodes may
259 * be modified to reflect the push.
260 *
261 * The path is altered to reflect the push.
262 */
be0e5c09
CM
263int push_node_right(struct ctree_root *root, struct ctree_path *path, int level)
264{
265 int slot;
eb60ceac
CM
266 struct tree_buffer *t;
267 struct tree_buffer *src_buffer;
be0e5c09
CM
268 struct node *dst;
269 struct node *src;
270 int push_items = 0;
271 int dst_nritems;
272 int src_nritems;
273
74123bd7 274 /* can't push from the root */
be0e5c09
CM
275 if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0)
276 return 1;
74123bd7
CM
277
278 /* only try to push inside the node higher up */
be0e5c09
CM
279 slot = path->slots[level + 1];
280 if (slot == NODEPTRS_PER_BLOCK - 1)
281 return 1;
282
eb60ceac 283 if (slot >= path->nodes[level + 1]->node.header.nritems -1)
be0e5c09
CM
284 return 1;
285
eb60ceac
CM
286 t = read_tree_block(root,
287 path->nodes[level + 1]->node.blockptrs[slot + 1]);
288 dst = &t->node;
289 src_buffer = path->nodes[level];
290 src = &src_buffer->node;
be0e5c09
CM
291 dst_nritems = dst->header.nritems;
292 src_nritems = src->header.nritems;
293 push_items = NODEPTRS_PER_BLOCK - (dst_nritems + 1);
eb60ceac
CM
294 if (push_items <= 0) {
295 tree_block_release(root, t);
be0e5c09 296 return 1;
eb60ceac 297 }
be0e5c09
CM
298
299 if (src_nritems < push_items)
300 push_items = src_nritems;
301 memmove(dst->keys + push_items, dst->keys,
302 dst_nritems * sizeof(struct key));
303 memcpy(dst->keys, src->keys + src_nritems - push_items,
304 push_items * sizeof(struct key));
305
306 memmove(dst->blockptrs + push_items, dst->blockptrs,
307 dst_nritems * sizeof(u64));
308 memcpy(dst->blockptrs, src->blockptrs + src_nritems - push_items,
309 push_items * sizeof(u64));
310
311 src->header.nritems -= push_items;
312 dst->header.nritems += push_items;
313
314 /* adjust the pointers going up the tree */
eb60ceac 315 memcpy(path->nodes[level + 1]->node.keys + path->slots[level + 1] + 1,
be0e5c09 316 dst->keys, sizeof(struct key));
eb60ceac
CM
317
318 write_tree_block(root, path->nodes[level + 1]);
319 write_tree_block(root, t);
320 write_tree_block(root, src_buffer);
321
74123bd7 322 /* then fixup the pointers in the path */
be0e5c09
CM
323 if (path->slots[level] >= src->header.nritems) {
324 path->slots[level] -= src->header.nritems;
eb60ceac
CM
325 tree_block_release(root, path->nodes[level]);
326 path->nodes[level] = t;
be0e5c09 327 path->slots[level + 1] += 1;
eb60ceac
CM
328 } else {
329 tree_block_release(root, t);
be0e5c09
CM
330 }
331 return 0;
332}
333
74123bd7
CM
334/*
335 * worker function to insert a single pointer in a node.
336 * the node should have enough room for the pointer already
337 * slot and level indicate where you want the key to go, and
338 * blocknr is the block the key points to.
339 */
340int __insert_ptr(struct ctree_root *root,
341 struct ctree_path *path, struct key *key,
342 u64 blocknr, int slot, int level)
343{
344 struct node *c;
345 struct node *lower;
346 struct key *lower_key;
347 int nritems;
348 /* need a new root */
349 if (!path->nodes[level]) {
350 struct tree_buffer *t;
351 t = alloc_free_block(root);
352 c = &t->node;
353 memset(c, 0, sizeof(c));
354 c->header.nritems = 2;
355 c->header.flags = node_level(level);
356 c->header.blocknr = t->blocknr;
357 lower = &path->nodes[level-1]->node;
358 if (is_leaf(lower->header.flags))
359 lower_key = &((struct leaf *)lower)->items[0].key;
360 else
361 lower_key = lower->keys;
362 memcpy(c->keys, lower_key, sizeof(struct key));
363 memcpy(c->keys + 1, key, sizeof(struct key));
364 c->blockptrs[0] = path->nodes[level-1]->blocknr;
365 c->blockptrs[1] = blocknr;
366 /* the path has an extra ref to root->node */
367 tree_block_release(root, root->node);
368 root->node = t;
369 t->count++;
370 write_tree_block(root, t);
371 path->nodes[level] = t;
372 path->slots[level] = 0;
373 if (c->keys[1].objectid == 0)
374 BUG();
375 return 0;
376 }
377 lower = &path->nodes[level]->node;
378 nritems = lower->header.nritems;
379 if (slot > nritems)
380 BUG();
381 if (nritems == NODEPTRS_PER_BLOCK)
382 BUG();
383 if (slot != nritems) {
384 memmove(lower->keys + slot + 1, lower->keys + slot,
385 (nritems - slot) * sizeof(struct key));
386 memmove(lower->blockptrs + slot + 1, lower->blockptrs + slot,
387 (nritems - slot) * sizeof(u64));
388 }
389 memcpy(lower->keys + slot, key, sizeof(struct key));
390 lower->blockptrs[slot] = blocknr;
391 lower->header.nritems++;
392 if (lower->keys[1].objectid == 0)
393 BUG();
394 write_tree_block(root, path->nodes[level]);
395 return 0;
396}
397
398
399/*
400 * insert a key,blocknr pair into the tree at a given level
401 * If the node at that level in the path doesn't have room,
402 * it is split or shifted as appropriate.
403 */
be0e5c09
CM
404int insert_ptr(struct ctree_root *root,
405 struct ctree_path *path, struct key *key,
406 u64 blocknr, int level)
407{
eb60ceac
CM
408 struct tree_buffer *t = path->nodes[level];
409 struct node *c = &path->nodes[level]->node;
be0e5c09 410 struct node *b;
eb60ceac
CM
411 struct tree_buffer *b_buffer;
412 struct tree_buffer *bal[MAX_LEVEL];
be0e5c09
CM
413 int bal_level = level;
414 int mid;
415 int bal_start = -1;
416
74123bd7
CM
417 /*
418 * check to see if we need to make room in the node for this
419 * pointer. If we do, keep walking the tree, making sure there
420 * is enough room in each level for the required insertions.
421 *
422 * The bal array is filled in with any nodes to be inserted
423 * due to splitting. Once we've done all the splitting required
424 * do the inserts based on the data in the bal array.
425 */
d97e63b6 426 memset(bal, 0, sizeof(bal));
eb60ceac
CM
427 while(t && t->node.header.nritems == NODEPTRS_PER_BLOCK) {
428 c = &t->node;
be0e5c09
CM
429 if (push_node_left(root, path,
430 node_level(c->header.flags)) == 0)
431 break;
432 if (push_node_right(root, path,
433 node_level(c->header.flags)) == 0)
434 break;
435 bal_start = bal_level;
436 if (bal_level == MAX_LEVEL - 1)
437 BUG();
eb60ceac
CM
438 b_buffer = alloc_free_block(root);
439 b = &b_buffer->node;
be0e5c09 440 b->header.flags = c->header.flags;
eb60ceac 441 b->header.blocknr = b_buffer->blocknr;
be0e5c09
CM
442 mid = (c->header.nritems + 1) / 2;
443 memcpy(b->keys, c->keys + mid,
444 (c->header.nritems - mid) * sizeof(struct key));
445 memcpy(b->blockptrs, c->blockptrs + mid,
446 (c->header.nritems - mid) * sizeof(u64));
447 b->header.nritems = c->header.nritems - mid;
448 c->header.nritems = mid;
eb60ceac
CM
449
450 write_tree_block(root, t);
451 write_tree_block(root, b_buffer);
452
453 bal[bal_level] = b_buffer;
be0e5c09
CM
454 if (bal_level == MAX_LEVEL - 1)
455 break;
456 bal_level += 1;
eb60ceac 457 t = path->nodes[bal_level];
be0e5c09 458 }
74123bd7
CM
459 /*
460 * bal_start tells us the first level in the tree that needed to
461 * be split. Go through the bal array inserting the new nodes
462 * as needed. The path is fixed as we go.
463 */
be0e5c09 464 while(bal_start > 0) {
eb60ceac
CM
465 b_buffer = bal[bal_start];
466 c = &path->nodes[bal_start]->node;
467 __insert_ptr(root, path, b_buffer->node.keys, b_buffer->blocknr,
be0e5c09
CM
468 path->slots[bal_start + 1] + 1, bal_start + 1);
469 if (path->slots[bal_start] >= c->header.nritems) {
470 path->slots[bal_start] -= c->header.nritems;
eb60ceac
CM
471 tree_block_release(root, path->nodes[bal_start]);
472 path->nodes[bal_start] = b_buffer;
be0e5c09 473 path->slots[bal_start + 1] += 1;
eb60ceac
CM
474 } else {
475 tree_block_release(root, b_buffer);
be0e5c09
CM
476 }
477 bal_start--;
478 if (!bal[bal_start])
479 break;
480 }
74123bd7 481 /* Now that the tree has room, insert the requested pointer */
be0e5c09
CM
482 return __insert_ptr(root, path, key, blocknr, path->slots[level] + 1,
483 level);
484}
485
74123bd7
CM
486/*
487 * how many bytes are required to store the items in a leaf. start
488 * and nr indicate which items in the leaf to check. This totals up the
489 * space used both by the item structs and the item data
490 */
be0e5c09
CM
491int leaf_space_used(struct leaf *l, int start, int nr)
492{
493 int data_len;
494 int end = start + nr - 1;
495
496 if (!nr)
497 return 0;
498 data_len = l->items[start].offset + l->items[start].size;
499 data_len = data_len - l->items[end].offset;
500 data_len += sizeof(struct item) * nr;
501 return data_len;
502}
503
74123bd7
CM
504/*
505 * push some data in the path leaf to the left, trying to free up at
506 * least data_size bytes. returns zero if the push worked, nonzero otherwise
507 */
be0e5c09
CM
508int push_leaf_left(struct ctree_root *root, struct ctree_path *path,
509 int data_size)
510{
eb60ceac
CM
511 struct tree_buffer *right_buf = path->nodes[0];
512 struct leaf *right = &right_buf->leaf;
513 struct tree_buffer *t;
be0e5c09
CM
514 struct leaf *left;
515 int slot;
516 int i;
517 int free_space;
518 int push_space = 0;
519 int push_items = 0;
520 struct item *item;
521 int old_left_nritems;
522
523 slot = path->slots[1];
524 if (slot == 0) {
525 return 1;
526 }
527 if (!path->nodes[1]) {
528 return 1;
529 }
eb60ceac
CM
530 t = read_tree_block(root, path->nodes[1]->node.blockptrs[slot - 1]);
531 left = &t->leaf;
be0e5c09
CM
532 free_space = leaf_free_space(left);
533 if (free_space < data_size + sizeof(struct item)) {
eb60ceac 534 tree_block_release(root, t);
be0e5c09
CM
535 return 1;
536 }
537 for (i = 0; i < right->header.nritems; i++) {
538 item = right->items + i;
539 if (path->slots[0] == i)
540 push_space += data_size + sizeof(*item);
541 if (item->size + sizeof(*item) + push_space > free_space)
542 break;
543 push_items++;
544 push_space += item->size + sizeof(*item);
545 }
546 if (push_items == 0) {
eb60ceac 547 tree_block_release(root, t);
be0e5c09
CM
548 return 1;
549 }
550 /* push data from right to left */
551 memcpy(left->items + left->header.nritems,
552 right->items, push_items * sizeof(struct item));
553 push_space = LEAF_DATA_SIZE - right->items[push_items -1].offset;
554 memcpy(left->data + leaf_data_end(left) - push_space,
555 right->data + right->items[push_items - 1].offset,
556 push_space);
557 old_left_nritems = left->header.nritems;
eb60ceac
CM
558 BUG_ON(old_left_nritems < 0);
559
be0e5c09
CM
560 for(i = old_left_nritems; i < old_left_nritems + push_items; i++) {
561 left->items[i].offset -= LEAF_DATA_SIZE -
562 left->items[old_left_nritems -1].offset;
563 }
564 left->header.nritems += push_items;
565
566 /* fixup right node */
567 push_space = right->items[push_items-1].offset - leaf_data_end(right);
568 memmove(right->data + LEAF_DATA_SIZE - push_space, right->data +
569 leaf_data_end(right), push_space);
570 memmove(right->items, right->items + push_items,
571 (right->header.nritems - push_items) * sizeof(struct item));
572 right->header.nritems -= push_items;
573 push_space = LEAF_DATA_SIZE;
eb60ceac 574
be0e5c09
CM
575 for (i = 0; i < right->header.nritems; i++) {
576 right->items[i].offset = push_space - right->items[i].size;
577 push_space = right->items[i].offset;
578 }
eb60ceac
CM
579
580 write_tree_block(root, t);
581 write_tree_block(root, right_buf);
582
583 fixup_low_keys(root, path, &right->items[0].key, 1);
be0e5c09
CM
584
585 /* then fixup the leaf pointer in the path */
586 if (path->slots[0] < push_items) {
587 path->slots[0] += old_left_nritems;
eb60ceac
CM
588 tree_block_release(root, path->nodes[0]);
589 path->nodes[0] = t;
be0e5c09
CM
590 path->slots[1] -= 1;
591 } else {
eb60ceac 592 tree_block_release(root, t);
be0e5c09
CM
593 path->slots[0] -= push_items;
594 }
eb60ceac 595 BUG_ON(path->slots[0] < 0);
be0e5c09
CM
596 return 0;
597}
598
74123bd7
CM
599/*
600 * split the path's leaf in two, making sure there is at least data_size
601 * available for the resulting leaf level of the path.
602 */
be0e5c09
CM
603int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size)
604{
eb60ceac
CM
605 struct tree_buffer *l_buf = path->nodes[0];
606 struct leaf *l = &l_buf->leaf;
607 int nritems;
608 int mid;
609 int slot;
be0e5c09 610 struct leaf *right;
eb60ceac 611 struct tree_buffer *right_buffer;
be0e5c09
CM
612 int space_needed = data_size + sizeof(struct item);
613 int data_copy_size;
614 int rt_data_off;
615 int i;
616 int ret;
617
618 if (push_leaf_left(root, path, data_size) == 0) {
eb60ceac
CM
619 l_buf = path->nodes[0];
620 l = &l_buf->leaf;
621 if (leaf_free_space(l) >= sizeof(struct item) + data_size)
622 return 0;
be0e5c09 623 }
eb60ceac
CM
624 slot = path->slots[0];
625 nritems = l->header.nritems;
626 mid = (nritems + 1)/ 2;
627
628 right_buffer = alloc_free_block(root);
629 BUG_ON(!right_buffer);
630 BUG_ON(mid == nritems);
631 right = &right_buffer->leaf;
be0e5c09
CM
632 memset(right, 0, sizeof(*right));
633 if (mid <= slot) {
634 if (leaf_space_used(l, mid, nritems - mid) + space_needed >
635 LEAF_DATA_SIZE)
636 BUG();
637 } else {
638 if (leaf_space_used(l, 0, mid + 1) + space_needed >
639 LEAF_DATA_SIZE)
640 BUG();
641 }
642 right->header.nritems = nritems - mid;
eb60ceac
CM
643 right->header.blocknr = right_buffer->blocknr;
644 right->header.flags = node_level(0);
be0e5c09
CM
645 data_copy_size = l->items[mid].offset + l->items[mid].size -
646 leaf_data_end(l);
647 memcpy(right->items, l->items + mid,
648 (nritems - mid) * sizeof(struct item));
649 memcpy(right->data + LEAF_DATA_SIZE - data_copy_size,
650 l->data + leaf_data_end(l), data_copy_size);
651 rt_data_off = LEAF_DATA_SIZE -
652 (l->items[mid].offset + l->items[mid].size);
74123bd7
CM
653
654 for (i = 0; i < right->header.nritems; i++)
be0e5c09 655 right->items[i].offset += rt_data_off;
74123bd7 656
be0e5c09
CM
657 l->header.nritems = mid;
658 ret = insert_ptr(root, path, &right->items[0].key,
eb60ceac
CM
659 right_buffer->blocknr, 1);
660
661 write_tree_block(root, right_buffer);
662 write_tree_block(root, l_buf);
663
664 BUG_ON(path->slots[0] != slot);
be0e5c09 665 if (mid <= slot) {
eb60ceac
CM
666 tree_block_release(root, path->nodes[0]);
667 path->nodes[0] = right_buffer;
be0e5c09
CM
668 path->slots[0] -= mid;
669 path->slots[1] += 1;
eb60ceac
CM
670 } else
671 tree_block_release(root, right_buffer);
672 BUG_ON(path->slots[0] < 0);
be0e5c09
CM
673 return ret;
674}
675
74123bd7
CM
676/*
677 * Given a key and some data, insert an item into the tree.
678 * This does all the path init required, making room in the tree if needed.
679 */
be0e5c09
CM
680int insert_item(struct ctree_root *root, struct key *key,
681 void *data, int data_size)
682{
683 int ret;
684 int slot;
eb60ceac 685 int slot_orig;
be0e5c09 686 struct leaf *leaf;
eb60ceac 687 struct tree_buffer *leaf_buf;
be0e5c09
CM
688 unsigned int nritems;
689 unsigned int data_end;
690 struct ctree_path path;
691
74123bd7 692 /* create a root if there isn't one */
eb60ceac
CM
693 if (!root->node) {
694 struct tree_buffer *t;
695 t = alloc_free_block(root);
696 BUG_ON(!t);
697 t->node.header.nritems = 0;
698 t->node.header.flags = node_level(0);
699 t->node.header.blocknr = t->blocknr;
700 root->node = t;
701 write_tree_block(root, t);
702 }
be0e5c09
CM
703 init_path(&path);
704 ret = search_slot(root, key, &path);
eb60ceac
CM
705 if (ret == 0) {
706 release_path(root, &path);
be0e5c09 707 return -EEXIST;
eb60ceac 708 }
be0e5c09 709
eb60ceac
CM
710 slot_orig = path.slots[0];
711 leaf_buf = path.nodes[0];
712 leaf = &leaf_buf->leaf;
74123bd7
CM
713
714 /* make room if needed */
eb60ceac 715 if (leaf_free_space(leaf) < sizeof(struct item) + data_size) {
be0e5c09 716 split_leaf(root, &path, data_size);
eb60ceac
CM
717 leaf_buf = path.nodes[0];
718 leaf = &path.nodes[0]->leaf;
719 }
be0e5c09
CM
720 nritems = leaf->header.nritems;
721 data_end = leaf_data_end(leaf);
eb60ceac 722
be0e5c09
CM
723 if (leaf_free_space(leaf) < sizeof(struct item) + data_size)
724 BUG();
725
726 slot = path.slots[0];
eb60ceac 727 BUG_ON(slot < 0);
be0e5c09 728 if (slot == 0)
eb60ceac 729 fixup_low_keys(root, &path, key, 1);
be0e5c09
CM
730 if (slot != nritems) {
731 int i;
732 unsigned int old_data = leaf->items[slot].offset +
733 leaf->items[slot].size;
734
735 /*
736 * item0..itemN ... dataN.offset..dataN.size .. data0.size
737 */
738 /* first correct the data pointers */
739 for (i = slot; i < nritems; i++)
740 leaf->items[i].offset -= data_size;
741
742 /* shift the items */
743 memmove(leaf->items + slot + 1, leaf->items + slot,
744 (nritems - slot) * sizeof(struct item));
745
746 /* shift the data */
747 memmove(leaf->data + data_end - data_size, leaf->data +
748 data_end, old_data - data_end);
749 data_end = old_data;
750 }
74123bd7 751 /* copy the new data in */
be0e5c09
CM
752 memcpy(&leaf->items[slot].key, key, sizeof(struct key));
753 leaf->items[slot].offset = data_end - data_size;
754 leaf->items[slot].size = data_size;
755 memcpy(leaf->data + data_end - data_size, data, data_size);
756 leaf->header.nritems += 1;
eb60ceac 757 write_tree_block(root, leaf_buf);
be0e5c09
CM
758 if (leaf_free_space(leaf) < 0)
759 BUG();
eb60ceac 760 release_path(root, &path);
d97e63b6 761 refill_alloc_extent(root);
be0e5c09
CM
762 return 0;
763}
764
74123bd7
CM
765/*
766 * delete the pointer from a given level in the path. The path is not
767 * fixed up, so after calling this it is not valid at that level.
768 *
769 * If the delete empties a node, the node is removed from the tree,
770 * continuing all the way the root if required. The root is converted into
771 * a leaf if all the nodes are emptied.
772 */
be0e5c09
CM
773int del_ptr(struct ctree_root *root, struct ctree_path *path, int level)
774{
775 int slot;
eb60ceac 776 struct tree_buffer *t;
be0e5c09
CM
777 struct node *node;
778 int nritems;
779
780 while(1) {
eb60ceac
CM
781 t = path->nodes[level];
782 if (!t)
be0e5c09 783 break;
eb60ceac 784 node = &t->node;
be0e5c09
CM
785 slot = path->slots[level];
786 nritems = node->header.nritems;
787
788 if (slot != nritems -1) {
789 memmove(node->keys + slot, node->keys + slot + 1,
790 sizeof(struct key) * (nritems - slot - 1));
791 memmove(node->blockptrs + slot,
792 node->blockptrs + slot + 1,
793 sizeof(u64) * (nritems - slot - 1));
794 }
795 node->header.nritems--;
eb60ceac 796 write_tree_block(root, t);
be0e5c09
CM
797 if (node->header.nritems != 0) {
798 int tslot;
799 if (slot == 0)
eb60ceac
CM
800 fixup_low_keys(root, path, node->keys,
801 level + 1);
be0e5c09 802 tslot = path->slots[level+1];
eb60ceac 803 t->count++;
be0e5c09
CM
804 push_node_left(root, path, level);
805 if (node->header.nritems) {
806 push_node_right(root, path, level);
807 }
eb60ceac
CM
808 if (node->header.nritems) {
809 tree_block_release(root, t);
be0e5c09 810 break;
eb60ceac
CM
811 }
812 tree_block_release(root, t);
4920c9ac 813 path->slots[level+1] = tslot;
be0e5c09 814 }
eb60ceac
CM
815 if (t == root->node) {
816 /* just turn the root into a leaf and break */
817 root->node->node.header.flags = node_level(0);
818 write_tree_block(root, t);
be0e5c09
CM
819 break;
820 }
821 level++;
822 if (!path->nodes[level])
823 BUG();
be0e5c09
CM
824 }
825 return 0;
826}
827
74123bd7
CM
828/*
829 * delete the item at the leaf level in path. If that empties
830 * the leaf, remove it from the tree
831 */
4920c9ac 832int del_item(struct ctree_root *root, struct ctree_path *path)
be0e5c09 833{
be0e5c09
CM
834 int slot;
835 struct leaf *leaf;
eb60ceac 836 struct tree_buffer *leaf_buf;
be0e5c09
CM
837 int doff;
838 int dsize;
839
eb60ceac
CM
840 leaf_buf = path->nodes[0];
841 leaf = &leaf_buf->leaf;
4920c9ac 842 slot = path->slots[0];
be0e5c09
CM
843 doff = leaf->items[slot].offset;
844 dsize = leaf->items[slot].size;
845
846 if (slot != leaf->header.nritems - 1) {
847 int i;
848 int data_end = leaf_data_end(leaf);
849 memmove(leaf->data + data_end + dsize,
850 leaf->data + data_end,
851 doff - data_end);
852 for (i = slot + 1; i < leaf->header.nritems; i++)
853 leaf->items[i].offset += dsize;
854 memmove(leaf->items + slot, leaf->items + slot + 1,
855 sizeof(struct item) *
856 (leaf->header.nritems - slot - 1));
857 }
858 leaf->header.nritems -= 1;
74123bd7 859 /* delete the leaf if we've emptied it */
be0e5c09 860 if (leaf->header.nritems == 0) {
eb60ceac
CM
861 if (leaf_buf == root->node) {
862 leaf->header.flags = node_level(0);
863 write_tree_block(root, leaf_buf);
864 } else
4920c9ac 865 del_ptr(root, path, 1);
be0e5c09
CM
866 } else {
867 if (slot == 0)
eb60ceac
CM
868 fixup_low_keys(root, path, &leaf->items[0].key, 1);
869 write_tree_block(root, leaf_buf);
74123bd7 870 /* delete the leaf if it is mostly empty */
be0e5c09
CM
871 if (leaf_space_used(leaf, 0, leaf->header.nritems) <
872 LEAF_DATA_SIZE / 4) {
873 /* push_leaf_left fixes the path.
874 * make sure the path still points to our leaf
875 * for possible call to del_ptr below
876 */
4920c9ac 877 slot = path->slots[1];
eb60ceac 878 leaf_buf->count++;
4920c9ac 879 push_leaf_left(root, path, 1);
be0e5c09 880 if (leaf->header.nritems == 0) {
4920c9ac
CM
881 path->slots[1] = slot;
882 del_ptr(root, path, 1);
be0e5c09 883 }
eb60ceac 884 tree_block_release(root, leaf_buf);
be0e5c09
CM
885 }
886 }
887 return 0;
888}
889
d97e63b6
CM
890int next_leaf(struct ctree_root *root, struct ctree_path *path)
891{
892 int slot;
893 int level = 1;
894 u64 blocknr;
895 struct tree_buffer *c;
896 struct tree_buffer *next;
897
898 while(level < MAX_LEVEL) {
899 if (!path->nodes[level])
900 return -1;
901 slot = path->slots[level] + 1;
902 c = path->nodes[level];
903 if (slot >= c->node.header.nritems) {
904 level++;
905 continue;
906 }
907 blocknr = c->node.blockptrs[slot];
908 next = read_tree_block(root, blocknr);
909 break;
910 }
911 path->slots[level] = slot;
912 while(1) {
913 level--;
914 c = path->nodes[level];
915 tree_block_release(root, c);
916 path->nodes[level] = next;
917 path->slots[level] = 0;
918 if (!level)
919 break;
920 next = read_tree_block(root, next->node.blockptrs[0]);
921 }
922 return 0;
923}
924
925int alloc_extent(struct ctree_root *root, u64 num_blocks, u64 search_start,
926 u64 search_end, u64 owner, struct key *ins)
927{
928 struct ctree_path path;
929 struct key *key;
930 int ret;
931 u64 hole_size = 0;
932 int slot = 0;
933 u64 last_block;
934 int start_found = 0;
935 struct leaf *l;
936 struct extent_item extent_item;
937
938 init_path(&path);
939 ins->objectid = search_start;
940 ins->offset = 0;
941 ins->flags = 0;
942
943 ret = search_slot(root, ins, &path);
944 while (1) {
945 l = &path.nodes[0]->leaf;
946 slot = path.slots[0];
947 if (!l) {
948 // FIXME allocate root
949 }
950 if (slot >= l->header.nritems) {
951 ret = next_leaf(root, &path);
952 if (ret == 0)
953 continue;
954 if (!start_found) {
955 ins->objectid = search_start;
956 ins->offset = num_blocks;
957 hole_size = search_end - search_start;
958 goto insert;
959 }
960 ins->objectid = last_block;
961 ins->offset = num_blocks;
962 hole_size = search_end - last_block;
963 goto insert;
964 }
965 key = &l->items[slot].key;
966 if (start_found) {
967 hole_size = key->objectid - last_block;
968 if (hole_size > num_blocks) {
969 ins->objectid = last_block;
970 ins->offset = num_blocks;
971 goto insert;
972 }
973 } else
974 start_found = 1;
975 last_block = key->objectid + key->offset;
976 path.slots[0]++;
977 printf("last block is not %lu\n", last_block);
978 }
979 // FIXME -ENOSPC
980insert:
981 extent_item.refs = 1;
982 extent_item.owner = owner;
983 ret = insert_item(root, ins, &extent_item, sizeof(extent_item));
984 return ret;
985}
986
987static int refill_alloc_extent(struct ctree_root *root)
988{
989 struct alloc_extent *ae = root->alloc_extent;
990 struct key key;
991 int ret;
992 int min_blocks = MAX_LEVEL * 2;
993
994 printf("refill alloc root %p, numused %lu total %lu\n", root, ae->num_used, ae->num_blocks);
995 if (ae->num_blocks > ae->num_used && ae->num_blocks - ae->num_used >
996 min_blocks)
997 return 0;
998 ae = root->reserve_extent;
999 if (ae->num_blocks > ae->num_used) {
1000 if (root->alloc_extent->num_blocks == 0) {
1001 /* we should swap reserve/alloc_extent when alloc
1002 * fills up
1003 */
1004 BUG();
1005 }
1006 if (ae->num_blocks - ae->num_used < min_blocks)
1007 BUG();
1008 return 0;
1009 }
1010 // FIXME, this recurses
1011 ret = alloc_extent(root->extent_root,
1012 min_blocks * 2, 0, (unsigned long)-1, 0, &key);
1013 ae->blocknr = key.objectid;
1014 ae->num_blocks = key.offset;
1015 ae->num_used = 0;
1016 return ret;
1017}
1018
be0e5c09
CM
1019void print_leaf(struct leaf *l)
1020{
1021 int i;
1022 int nr = l->header.nritems;
1023 struct item *item;
eb60ceac 1024 printf("leaf %lu total ptrs %d free space %d\n", l->header.blocknr, nr,
be0e5c09
CM
1025 leaf_free_space(l));
1026 fflush(stdout);
1027 for (i = 0 ; i < nr ; i++) {
1028 item = l->items + i;
1029 printf("\titem %d key (%lu %u %lu) itemoff %d itemsize %d\n",
1030 i,
1031 item->key.objectid, item->key.flags, item->key.offset,
1032 item->offset, item->size);
1033 fflush(stdout);
1034 printf("\t\titem data %.*s\n", item->size, l->data+item->offset);
1035 fflush(stdout);
1036 }
1037}
eb60ceac 1038void print_tree(struct ctree_root *root, struct tree_buffer *t)
be0e5c09
CM
1039{
1040 int i;
1041 int nr;
eb60ceac 1042 struct node *c;
be0e5c09 1043
eb60ceac 1044 if (!t)
be0e5c09 1045 return;
eb60ceac 1046 c = &t->node;
be0e5c09 1047 nr = c->header.nritems;
eb60ceac
CM
1048 if (c->header.blocknr != t->blocknr)
1049 BUG();
be0e5c09
CM
1050 if (is_leaf(c->header.flags)) {
1051 print_leaf((struct leaf *)c);
1052 return;
1053 }
eb60ceac 1054 printf("node %lu level %d total ptrs %d free spc %lu\n", t->blocknr,
be0e5c09
CM
1055 node_level(c->header.flags), c->header.nritems,
1056 NODEPTRS_PER_BLOCK - c->header.nritems);
1057 fflush(stdout);
1058 for (i = 0; i < nr; i++) {
eb60ceac 1059 printf("\tkey %d (%lu %u %lu) block %lu\n",
be0e5c09
CM
1060 i,
1061 c->keys[i].objectid, c->keys[i].flags, c->keys[i].offset,
1062 c->blockptrs[i]);
1063 fflush(stdout);
1064 }
1065 for (i = 0; i < nr; i++) {
eb60ceac
CM
1066 struct tree_buffer *next_buf = read_tree_block(root,
1067 c->blockptrs[i]);
1068 struct node *next = &next_buf->node;
be0e5c09
CM
1069 if (is_leaf(next->header.flags) &&
1070 node_level(c->header.flags) != 1)
1071 BUG();
1072 if (node_level(next->header.flags) !=
1073 node_level(c->header.flags) - 1)
1074 BUG();
eb60ceac
CM
1075 print_tree(root, next_buf);
1076 tree_block_release(root, next_buf);
be0e5c09
CM
1077 }
1078
1079}
1080
1081/* for testing only */
1082int next_key(int i, int max_key) {
d97e63b6
CM
1083 // return rand() % max_key;
1084 return i;
be0e5c09
CM
1085}
1086
1087int main() {
eb60ceac 1088 struct ctree_root *root;
be0e5c09 1089 struct key ins;
4920c9ac 1090 struct key last = { (u64)-1, 0, 0};
be0e5c09
CM
1091 char *buf;
1092 int i;
1093 int num;
1094 int ret;
d97e63b6 1095 int run_size = 256;
be0e5c09
CM
1096 int max_key = 100000000;
1097 int tree_size = 0;
1098 struct ctree_path path;
1099
eb60ceac
CM
1100 radix_tree_init();
1101
1102
1103 root = open_ctree("dbfile");
be0e5c09
CM
1104
1105 srand(55);
be0e5c09
CM
1106 for (i = 0; i < run_size; i++) {
1107 buf = malloc(64);
1108 num = next_key(i, max_key);
1109 // num = i;
1110 sprintf(buf, "string-%d", num);
1111 // printf("insert %d\n", num);
1112 ins.objectid = num;
1113 ins.offset = 0;
1114 ins.flags = 0;
d97e63b6 1115 printf("insert %d\n", i);
eb60ceac 1116 ret = insert_item(root, &ins, buf, strlen(buf));
be0e5c09
CM
1117 if (!ret)
1118 tree_size++;
d97e63b6 1119 printf("done insert %d\n", i);
be0e5c09 1120 }
d97e63b6
CM
1121 printf("root used: %lu\n", root->alloc_extent->num_used);
1122 printf("root tree\n");
1123 print_tree(root, root->node);
1124 printf("map tree\n");
1125 printf("map used: %lu\n", root->extent_root->alloc_extent->num_used);
1126 print_tree(root->extent_root, root->extent_root->node);
1127 exit(1);
1128
eb60ceac
CM
1129 close_ctree(root);
1130 root = open_ctree("dbfile");
1131 printf("starting search\n");
be0e5c09
CM
1132 srand(55);
1133 for (i = 0; i < run_size; i++) {
1134 num = next_key(i, max_key);
1135 ins.objectid = num;
be0e5c09 1136 init_path(&path);
eb60ceac 1137 ret = search_slot(root, &ins, &path);
be0e5c09 1138 if (ret) {
eb60ceac 1139 print_tree(root, root->node);
be0e5c09
CM
1140 printf("unable to find %d\n", num);
1141 exit(1);
1142 }
eb60ceac
CM
1143 release_path(root, &path);
1144 }
1145 close_ctree(root);
1146 root = open_ctree("dbfile");
1147 printf("node %p level %d total ptrs %d free spc %lu\n", root->node,
1148 node_level(root->node->node.header.flags),
1149 root->node->node.header.nritems,
1150 NODEPTRS_PER_BLOCK - root->node->node.header.nritems);
1151 printf("all searches good, deleting some items\n");
be0e5c09
CM
1152 i = 0;
1153 srand(55);
4920c9ac
CM
1154 for (i = 0 ; i < run_size/4; i++) {
1155 num = next_key(i, max_key);
1156 ins.objectid = num;
1157 init_path(&path);
eb60ceac 1158 ret = search_slot(root, &ins, &path);
4920c9ac
CM
1159 if (ret)
1160 continue;
eb60ceac 1161 ret = del_item(root, &path);
4920c9ac
CM
1162 if (ret != 0)
1163 BUG();
eb60ceac 1164 release_path(root, &path);
4920c9ac
CM
1165 tree_size--;
1166 }
1167 srand(128);
be0e5c09 1168 for (i = 0; i < run_size; i++) {
4920c9ac 1169 buf = malloc(64);
be0e5c09 1170 num = next_key(i, max_key);
4920c9ac 1171 sprintf(buf, "string-%d", num);
be0e5c09 1172 ins.objectid = num;
eb60ceac 1173 ret = insert_item(root, &ins, buf, strlen(buf));
4920c9ac
CM
1174 if (!ret)
1175 tree_size++;
1176 }
eb60ceac
CM
1177 close_ctree(root);
1178 root = open_ctree("dbfile");
1179 printf("starting search2\n");
1180 srand(128);
1181 for (i = 0; i < run_size; i++) {
1182 num = next_key(i, max_key);
1183 ins.objectid = num;
1184 init_path(&path);
1185 ret = search_slot(root, &ins, &path);
1186 if (ret) {
1187 print_tree(root, root->node);
1188 printf("unable to find %d\n", num);
1189 exit(1);
1190 }
1191 release_path(root, &path);
1192 }
1193 printf("starting big long delete run\n");
1194 while(root->node && root->node->node.header.nritems > 0) {
4920c9ac
CM
1195 struct leaf *leaf;
1196 int slot;
1197 ins.objectid = (u64)-1;
1198 init_path(&path);
eb60ceac 1199 ret = search_slot(root, &ins, &path);
4920c9ac
CM
1200 if (ret == 0)
1201 BUG();
1202
eb60ceac 1203 leaf = &path.nodes[0]->leaf;
4920c9ac
CM
1204 slot = path.slots[0];
1205 if (slot != leaf->header.nritems)
1206 BUG();
1207 while(path.slots[0] > 0) {
1208 path.slots[0] -= 1;
1209 slot = path.slots[0];
eb60ceac 1210 leaf = &path.nodes[0]->leaf;
4920c9ac
CM
1211
1212 if (comp_keys(&last, &leaf->items[slot].key) <= 0)
1213 BUG();
1214 memcpy(&last, &leaf->items[slot].key, sizeof(last));
eb60ceac
CM
1215 ret = del_item(root, &path);
1216 if (ret != 0) {
1217 printf("del_item returned %d\n", ret);
4920c9ac 1218 BUG();
eb60ceac 1219 }
4920c9ac
CM
1220 tree_size--;
1221 }
eb60ceac 1222 release_path(root, &path);
be0e5c09 1223 }
eb60ceac 1224 close_ctree(root);
4920c9ac 1225 printf("tree size is now %d\n", tree_size);
be0e5c09
CM
1226 return 0;
1227}