Btrfs: properly set new buffers for new blocks up to date
[linux-block.git] / fs / btrfs / ctree.c
CommitLineData
2e635a27 1#include <linux/module.h>
eb60ceac
CM
2#include "ctree.h"
3#include "disk-io.h"
9a8dd150 4
e089f05c
CM
5static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
6 *root, struct btrfs_path *path, int level);
7static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
8 *root, struct btrfs_path *path, int data_size);
9static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root
e20d96d6 10 *root, struct buffer_head *dst, struct buffer_head
e089f05c
CM
11 *src);
12static int balance_node_right(struct btrfs_trans_handle *trans, struct
e20d96d6
CM
13 btrfs_root *root, struct buffer_head *dst_buf,
14 struct buffer_head *src_buf);
e089f05c
CM
15static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
16 struct btrfs_path *path, int level, int slot);
d97e63b6 17
234b63a0 18inline void btrfs_init_path(struct btrfs_path *p)
be0e5c09
CM
19{
20 memset(p, 0, sizeof(*p));
21}
22
234b63a0 23void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
eb60ceac
CM
24{
25 int i;
234b63a0 26 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
eb60ceac
CM
27 if (!p->nodes[i])
28 break;
234b63a0 29 btrfs_block_release(root, p->nodes[i]);
eb60ceac 30 }
aa5d6bed 31 memset(p, 0, sizeof(*p));
eb60ceac
CM
32}
33
e089f05c 34static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root
e20d96d6
CM
35 *root, struct buffer_head *buf, struct buffer_head
36 *parent, int parent_slot, struct buffer_head
e089f05c 37 **cow_ret)
02217ed2 38{
e20d96d6
CM
39 struct buffer_head *cow;
40 struct btrfs_node *cow_node;
02217ed2 41
d5719762 42 if (buffer_dirty(buf)) {
02217ed2
CM
43 *cow_ret = buf;
44 return 0;
45 }
e089f05c 46 cow = btrfs_alloc_free_block(trans, root);
e20d96d6
CM
47 cow_node = btrfs_buffer_node(cow);
48 memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize);
49 btrfs_set_header_blocknr(&cow_node->header, cow->b_blocknr);
02217ed2 50 *cow_ret = cow;
d5719762 51 mark_buffer_dirty(cow);
e089f05c 52 btrfs_inc_ref(trans, root, buf);
02217ed2
CM
53 if (buf == root->node) {
54 root->node = cow;
e20d96d6 55 get_bh(cow);
a28ec197 56 if (buf != root->commit_root)
e20d96d6 57 btrfs_free_extent(trans, root, buf->b_blocknr, 1, 1);
234b63a0 58 btrfs_block_release(root, buf);
02217ed2 59 } else {
e20d96d6
CM
60 btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot,
61 cow->b_blocknr);
d5719762 62 mark_buffer_dirty(parent);
e20d96d6 63 btrfs_free_extent(trans, root, buf->b_blocknr, 1, 1);
02217ed2 64 }
234b63a0 65 btrfs_block_release(root, buf);
02217ed2
CM
66 return 0;
67}
68
74123bd7
CM
69/*
70 * The leaf data grows from end-to-front in the node.
71 * this returns the address of the start of the last item,
72 * which is the stop of the leaf data stack
73 */
123abc88
CM
74static inline unsigned int leaf_data_end(struct btrfs_root *root,
75 struct btrfs_leaf *leaf)
be0e5c09 76{
7518a238 77 u32 nr = btrfs_header_nritems(&leaf->header);
be0e5c09 78 if (nr == 0)
123abc88 79 return BTRFS_LEAF_DATA_SIZE(root);
0783fcfc 80 return btrfs_item_offset(leaf->items + nr - 1);
be0e5c09
CM
81}
82
74123bd7
CM
83/*
84 * The space between the end of the leaf items and
85 * the start of the leaf data. IOW, how much room
86 * the leaf has left for both items and data
87 */
123abc88 88int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf)
be0e5c09 89{
123abc88 90 int data_end = leaf_data_end(root, leaf);
7518a238 91 int nritems = btrfs_header_nritems(&leaf->header);
be0e5c09 92 char *items_end = (char *)(leaf->items + nritems + 1);
123abc88 93 return (char *)(btrfs_leaf_data(leaf) + data_end) - (char *)items_end;
be0e5c09
CM
94}
95
74123bd7
CM
96/*
97 * compare two keys in a memcmp fashion
98 */
9aca1d51 99static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
be0e5c09 100{
e2fa7227
CM
101 struct btrfs_key k1;
102
103 btrfs_disk_key_to_cpu(&k1, disk);
104
105 if (k1.objectid > k2->objectid)
be0e5c09 106 return 1;
e2fa7227 107 if (k1.objectid < k2->objectid)
be0e5c09 108 return -1;
62e2749e
CM
109 if (k1.flags > k2->flags)
110 return 1;
111 if (k1.flags < k2->flags)
112 return -1;
a8a2ee0c
CM
113 if (k1.offset > k2->offset)
114 return 1;
115 if (k1.offset < k2->offset)
116 return -1;
be0e5c09
CM
117 return 0;
118}
74123bd7 119
123abc88
CM
120static int check_node(struct btrfs_root *root, struct btrfs_path *path,
121 int level)
aa5d6bed
CM
122{
123 int i;
234b63a0 124 struct btrfs_node *parent = NULL;
e20d96d6 125 struct btrfs_node *node = btrfs_buffer_node(path->nodes[level]);
aa5d6bed 126 int parent_slot;
7518a238 127 u32 nritems = btrfs_header_nritems(&node->header);
aa5d6bed
CM
128
129 if (path->nodes[level + 1])
e20d96d6 130 parent = btrfs_buffer_node(path->nodes[level + 1]);
aa5d6bed 131 parent_slot = path->slots[level + 1];
7518a238
CM
132 BUG_ON(nritems == 0);
133 if (parent) {
e2fa7227 134 struct btrfs_disk_key *parent_key;
123abc88
CM
135 parent_key = &parent->ptrs[parent_slot].key;
136 BUG_ON(memcmp(parent_key, &node->ptrs[0].key,
e2fa7227 137 sizeof(struct btrfs_disk_key)));
1d4f8a0c 138 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
7518a238 139 btrfs_header_blocknr(&node->header));
aa5d6bed 140 }
123abc88 141 BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
7518a238 142 for (i = 0; nritems > 1 && i < nritems - 2; i++) {
e2fa7227 143 struct btrfs_key cpukey;
123abc88
CM
144 btrfs_disk_key_to_cpu(&cpukey, &node->ptrs[i + 1].key);
145 BUG_ON(comp_keys(&node->ptrs[i].key, &cpukey) >= 0);
aa5d6bed
CM
146 }
147 return 0;
148}
149
123abc88
CM
150static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
151 int level)
aa5d6bed
CM
152{
153 int i;
e20d96d6 154 struct btrfs_leaf *leaf = btrfs_buffer_leaf(path->nodes[level]);
234b63a0 155 struct btrfs_node *parent = NULL;
aa5d6bed 156 int parent_slot;
7518a238 157 u32 nritems = btrfs_header_nritems(&leaf->header);
aa5d6bed
CM
158
159 if (path->nodes[level + 1])
e20d96d6 160 parent = btrfs_buffer_node(path->nodes[level + 1]);
aa5d6bed 161 parent_slot = path->slots[level + 1];
123abc88 162 BUG_ON(btrfs_leaf_free_space(root, leaf) < 0);
7518a238
CM
163
164 if (nritems == 0)
165 return 0;
166
167 if (parent) {
e2fa7227 168 struct btrfs_disk_key *parent_key;
123abc88 169 parent_key = &parent->ptrs[parent_slot].key;
aa5d6bed 170 BUG_ON(memcmp(parent_key, &leaf->items[0].key,
e2fa7227 171 sizeof(struct btrfs_disk_key)));
1d4f8a0c 172 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
7518a238 173 btrfs_header_blocknr(&leaf->header));
aa5d6bed 174 }
7518a238 175 for (i = 0; nritems > 1 && i < nritems - 2; i++) {
e2fa7227
CM
176 struct btrfs_key cpukey;
177 btrfs_disk_key_to_cpu(&cpukey, &leaf->items[i + 1].key);
aa5d6bed 178 BUG_ON(comp_keys(&leaf->items[i].key,
e2fa7227 179 &cpukey) >= 0);
0783fcfc
CM
180 BUG_ON(btrfs_item_offset(leaf->items + i) !=
181 btrfs_item_end(leaf->items + i + 1));
aa5d6bed 182 if (i == 0) {
0783fcfc
CM
183 BUG_ON(btrfs_item_offset(leaf->items + i) +
184 btrfs_item_size(leaf->items + i) !=
123abc88 185 BTRFS_LEAF_DATA_SIZE(root));
aa5d6bed
CM
186 }
187 }
aa5d6bed
CM
188 return 0;
189}
190
123abc88
CM
191static int check_block(struct btrfs_root *root, struct btrfs_path *path,
192 int level)
aa5d6bed
CM
193{
194 if (level == 0)
123abc88
CM
195 return check_leaf(root, path, level);
196 return check_node(root, path, level);
aa5d6bed
CM
197}
198
74123bd7
CM
199/*
200 * search for key in the array p. items p are item_size apart
201 * and there are 'max' items in p
202 * the slot in the array is returned via slot, and it points to
203 * the place where you would insert key if it is not found in
204 * the array.
205 *
206 * slot may point to max if the key is bigger than all of the keys
207 */
9aca1d51 208static int generic_bin_search(char *p, int item_size, struct btrfs_key *key,
be0e5c09
CM
209 int max, int *slot)
210{
211 int low = 0;
212 int high = max;
213 int mid;
214 int ret;
e2fa7227 215 struct btrfs_disk_key *tmp;
be0e5c09
CM
216
217 while(low < high) {
218 mid = (low + high) / 2;
e2fa7227 219 tmp = (struct btrfs_disk_key *)(p + mid * item_size);
be0e5c09
CM
220 ret = comp_keys(tmp, key);
221
222 if (ret < 0)
223 low = mid + 1;
224 else if (ret > 0)
225 high = mid;
226 else {
227 *slot = mid;
228 return 0;
229 }
230 }
231 *slot = low;
232 return 1;
233}
234
97571fd0
CM
235/*
236 * simple bin_search frontend that does the right thing for
237 * leaves vs nodes
238 */
9aca1d51 239static int bin_search(struct btrfs_node *c, struct btrfs_key *key, int *slot)
be0e5c09 240{
7518a238 241 if (btrfs_is_leaf(c)) {
234b63a0 242 struct btrfs_leaf *l = (struct btrfs_leaf *)c;
0783fcfc
CM
243 return generic_bin_search((void *)l->items,
244 sizeof(struct btrfs_item),
7518a238
CM
245 key, btrfs_header_nritems(&c->header),
246 slot);
be0e5c09 247 } else {
123abc88
CM
248 return generic_bin_search((void *)c->ptrs,
249 sizeof(struct btrfs_key_ptr),
7518a238
CM
250 key, btrfs_header_nritems(&c->header),
251 slot);
be0e5c09
CM
252 }
253 return -1;
254}
255
e20d96d6
CM
256static struct buffer_head *read_node_slot(struct btrfs_root *root,
257 struct buffer_head *parent_buf,
bb803951
CM
258 int slot)
259{
e20d96d6 260 struct btrfs_node *node = btrfs_buffer_node(parent_buf);
bb803951
CM
261 if (slot < 0)
262 return NULL;
7518a238 263 if (slot >= btrfs_header_nritems(&node->header))
bb803951 264 return NULL;
1d4f8a0c 265 return read_tree_block(root, btrfs_node_blockptr(node, slot));
bb803951
CM
266}
267
e089f05c
CM
268static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root
269 *root, struct btrfs_path *path, int level)
bb803951 270{
e20d96d6
CM
271 struct buffer_head *right_buf;
272 struct buffer_head *mid_buf;
273 struct buffer_head *left_buf;
274 struct buffer_head *parent_buf = NULL;
234b63a0
CM
275 struct btrfs_node *right = NULL;
276 struct btrfs_node *mid;
277 struct btrfs_node *left = NULL;
278 struct btrfs_node *parent = NULL;
bb803951
CM
279 int ret = 0;
280 int wret;
281 int pslot;
bb803951 282 int orig_slot = path->slots[level];
79f95c82 283 u64 orig_ptr;
bb803951
CM
284
285 if (level == 0)
286 return 0;
287
288 mid_buf = path->nodes[level];
e20d96d6 289 mid = btrfs_buffer_node(mid_buf);
1d4f8a0c 290 orig_ptr = btrfs_node_blockptr(mid, orig_slot);
79f95c82 291
234b63a0 292 if (level < BTRFS_MAX_LEVEL - 1)
bb803951
CM
293 parent_buf = path->nodes[level + 1];
294 pslot = path->slots[level + 1];
295
40689478
CM
296 /*
297 * deal with the case where there is only one pointer in the root
298 * by promoting the node below to a root
299 */
bb803951 300 if (!parent_buf) {
e20d96d6
CM
301 struct buffer_head *child;
302 u64 blocknr = mid_buf->b_blocknr;
bb803951 303
7518a238 304 if (btrfs_header_nritems(&mid->header) != 1)
bb803951
CM
305 return 0;
306
307 /* promote the child to a root */
308 child = read_node_slot(root, mid_buf, 0);
309 BUG_ON(!child);
310 root->node = child;
311 path->nodes[level] = NULL;
312 /* once for the path */
234b63a0 313 btrfs_block_release(root, mid_buf);
bb803951 314 /* once for the root ptr */
234b63a0 315 btrfs_block_release(root, mid_buf);
e089f05c
CM
316 clean_tree_block(trans, root, mid_buf);
317 return btrfs_free_extent(trans, root, blocknr, 1, 1);
bb803951 318 }
e20d96d6 319 parent = btrfs_buffer_node(parent_buf);
bb803951 320
123abc88
CM
321 if (btrfs_header_nritems(&mid->header) >
322 BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
bb803951
CM
323 return 0;
324
bb803951
CM
325 left_buf = read_node_slot(root, parent_buf, pslot - 1);
326 right_buf = read_node_slot(root, parent_buf, pslot + 1);
79f95c82
CM
327
328 /* first, try to make some room in the middle buffer */
bb803951 329 if (left_buf) {
e089f05c
CM
330 btrfs_cow_block(trans, root, left_buf, parent_buf, pslot - 1,
331 &left_buf);
e20d96d6 332 left = btrfs_buffer_node(left_buf);
7518a238 333 orig_slot += btrfs_header_nritems(&left->header);
e089f05c 334 wret = push_node_left(trans, root, left_buf, mid_buf);
79f95c82
CM
335 if (wret < 0)
336 ret = wret;
bb803951 337 }
79f95c82
CM
338
339 /*
340 * then try to empty the right most buffer into the middle
341 */
bb803951 342 if (right_buf) {
e089f05c
CM
343 btrfs_cow_block(trans, root, right_buf, parent_buf, pslot + 1,
344 &right_buf);
e20d96d6 345 right = btrfs_buffer_node(right_buf);
e089f05c 346 wret = push_node_left(trans, root, mid_buf, right_buf);
79f95c82
CM
347 if (wret < 0)
348 ret = wret;
7518a238 349 if (btrfs_header_nritems(&right->header) == 0) {
e20d96d6 350 u64 blocknr = right_buf->b_blocknr;
234b63a0 351 btrfs_block_release(root, right_buf);
e089f05c 352 clean_tree_block(trans, root, right_buf);
bb803951
CM
353 right_buf = NULL;
354 right = NULL;
e089f05c
CM
355 wret = del_ptr(trans, root, path, level + 1, pslot +
356 1);
bb803951
CM
357 if (wret)
358 ret = wret;
e089f05c 359 wret = btrfs_free_extent(trans, root, blocknr, 1, 1);
bb803951
CM
360 if (wret)
361 ret = wret;
362 } else {
123abc88
CM
363 memcpy(&parent->ptrs[pslot + 1].key,
364 &right->ptrs[0].key,
e2fa7227 365 sizeof(struct btrfs_disk_key));
d5719762 366 mark_buffer_dirty(parent_buf);
bb803951
CM
367 }
368 }
7518a238 369 if (btrfs_header_nritems(&mid->header) == 1) {
79f95c82
CM
370 /*
371 * we're not allowed to leave a node with one item in the
372 * tree during a delete. A deletion from lower in the tree
373 * could try to delete the only pointer in this node.
374 * So, pull some keys from the left.
375 * There has to be a left pointer at this point because
376 * otherwise we would have pulled some pointers from the
377 * right
378 */
379 BUG_ON(!left_buf);
e089f05c 380 wret = balance_node_right(trans, root, mid_buf, left_buf);
79f95c82
CM
381 if (wret < 0)
382 ret = wret;
383 BUG_ON(wret == 1);
384 }
7518a238 385 if (btrfs_header_nritems(&mid->header) == 0) {
79f95c82 386 /* we've managed to empty the middle node, drop it */
e20d96d6 387 u64 blocknr = mid_buf->b_blocknr;
234b63a0 388 btrfs_block_release(root, mid_buf);
e089f05c 389 clean_tree_block(trans, root, mid_buf);
bb803951
CM
390 mid_buf = NULL;
391 mid = NULL;
e089f05c 392 wret = del_ptr(trans, root, path, level + 1, pslot);
bb803951
CM
393 if (wret)
394 ret = wret;
e089f05c 395 wret = btrfs_free_extent(trans, root, blocknr, 1, 1);
bb803951
CM
396 if (wret)
397 ret = wret;
79f95c82
CM
398 } else {
399 /* update the parent key to reflect our changes */
123abc88 400 memcpy(&parent->ptrs[pslot].key, &mid->ptrs[0].key,
e2fa7227 401 sizeof(struct btrfs_disk_key));
d5719762 402 mark_buffer_dirty(parent_buf);
79f95c82 403 }
bb803951 404
79f95c82 405 /* update the path */
bb803951 406 if (left_buf) {
7518a238 407 if (btrfs_header_nritems(&left->header) > orig_slot) {
e20d96d6 408 get_bh(left_buf);
bb803951
CM
409 path->nodes[level] = left_buf;
410 path->slots[level + 1] -= 1;
411 path->slots[level] = orig_slot;
412 if (mid_buf)
234b63a0 413 btrfs_block_release(root, mid_buf);
bb803951 414 } else {
7518a238 415 orig_slot -= btrfs_header_nritems(&left->header);
bb803951
CM
416 path->slots[level] = orig_slot;
417 }
418 }
79f95c82 419 /* double check we haven't messed things up */
123abc88 420 check_block(root, path, level);
e20d96d6
CM
421 if (orig_ptr !=
422 btrfs_node_blockptr(btrfs_buffer_node(path->nodes[level]),
423 path->slots[level]))
79f95c82 424 BUG();
bb803951
CM
425
426 if (right_buf)
234b63a0 427 btrfs_block_release(root, right_buf);
bb803951 428 if (left_buf)
234b63a0 429 btrfs_block_release(root, left_buf);
bb803951
CM
430 return ret;
431}
432
74123bd7
CM
433/*
434 * look for key in the tree. path is filled in with nodes along the way
435 * if key is found, we return zero and you can find the item in the leaf
436 * level of the path (level 0)
437 *
438 * If the key isn't found, the path points to the slot where it should
aa5d6bed
CM
439 * be inserted, and 1 is returned. If there are other errors during the
440 * search a negative error number is returned.
97571fd0
CM
441 *
442 * if ins_len > 0, nodes and leaves will be split as we walk down the
443 * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if
444 * possible)
74123bd7 445 */
e089f05c
CM
446int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
447 *root, struct btrfs_key *key, struct btrfs_path *p, int
448 ins_len, int cow)
be0e5c09 449{
e20d96d6
CM
450 struct buffer_head *b;
451 struct buffer_head *cow_buf;
234b63a0 452 struct btrfs_node *c;
be0e5c09
CM
453 int slot;
454 int ret;
455 int level;
5c680ed6 456
bb803951
CM
457again:
458 b = root->node;
e20d96d6 459 get_bh(b);
eb60ceac 460 while (b) {
e20d96d6
CM
461 c = btrfs_buffer_node(b);
462 level = btrfs_header_level(&c->header);
02217ed2
CM
463 if (cow) {
464 int wret;
e20d96d6
CM
465 wret = btrfs_cow_block(trans, root, b,
466 p->nodes[level + 1],
467 p->slots[level + 1],
e089f05c 468 &cow_buf);
02217ed2
CM
469 b = cow_buf;
470 }
471 BUG_ON(!cow && ins_len);
e20d96d6 472 c = btrfs_buffer_node(b);
eb60ceac 473 p->nodes[level] = b;
123abc88 474 ret = check_block(root, p, level);
aa5d6bed
CM
475 if (ret)
476 return -1;
be0e5c09 477 ret = bin_search(c, key, &slot);
7518a238 478 if (!btrfs_is_leaf(c)) {
be0e5c09
CM
479 if (ret && slot > 0)
480 slot -= 1;
481 p->slots[level] = slot;
7518a238 482 if (ins_len > 0 && btrfs_header_nritems(&c->header) ==
123abc88 483 BTRFS_NODEPTRS_PER_BLOCK(root)) {
e089f05c 484 int sret = split_node(trans, root, p, level);
5c680ed6
CM
485 BUG_ON(sret > 0);
486 if (sret)
487 return sret;
488 b = p->nodes[level];
e20d96d6 489 c = btrfs_buffer_node(b);
5c680ed6 490 slot = p->slots[level];
bb803951 491 } else if (ins_len < 0) {
e089f05c
CM
492 int sret = balance_level(trans, root, p,
493 level);
bb803951
CM
494 if (sret)
495 return sret;
496 b = p->nodes[level];
497 if (!b)
498 goto again;
e20d96d6 499 c = btrfs_buffer_node(b);
bb803951 500 slot = p->slots[level];
7518a238 501 BUG_ON(btrfs_header_nritems(&c->header) == 1);
5c680ed6 502 }
1d4f8a0c 503 b = read_tree_block(root, btrfs_node_blockptr(c, slot));
be0e5c09 504 } else {
234b63a0 505 struct btrfs_leaf *l = (struct btrfs_leaf *)c;
be0e5c09 506 p->slots[level] = slot;
123abc88 507 if (ins_len > 0 && btrfs_leaf_free_space(root, l) <
0783fcfc 508 sizeof(struct btrfs_item) + ins_len) {
e089f05c 509 int sret = split_leaf(trans, root, p, ins_len);
5c680ed6
CM
510 BUG_ON(sret > 0);
511 if (sret)
512 return sret;
513 }
be0e5c09
CM
514 return ret;
515 }
516 }
aa5d6bed 517 return 1;
be0e5c09
CM
518}
519
74123bd7
CM
520/*
521 * adjust the pointers going up the tree, starting at level
522 * making sure the right key of each node is points to 'key'.
523 * This is used after shifting pointers to the left, so it stops
524 * fixing up pointers when a given leaf/node is not in slot 0 of the
525 * higher levels
aa5d6bed
CM
526 *
527 * If this fails to write a tree block, it returns -1, but continues
528 * fixing up the blocks in ram so the tree is consistent.
74123bd7 529 */
e089f05c
CM
530static int fixup_low_keys(struct btrfs_trans_handle *trans, struct btrfs_root
531 *root, struct btrfs_path *path, struct btrfs_disk_key
532 *key, int level)
be0e5c09
CM
533{
534 int i;
aa5d6bed 535 int ret = 0;
234b63a0
CM
536 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
537 struct btrfs_node *t;
be0e5c09 538 int tslot = path->slots[i];
eb60ceac 539 if (!path->nodes[i])
be0e5c09 540 break;
e20d96d6 541 t = btrfs_buffer_node(path->nodes[i]);
123abc88 542 memcpy(&t->ptrs[tslot].key, key, sizeof(*key));
d5719762 543 mark_buffer_dirty(path->nodes[i]);
be0e5c09
CM
544 if (tslot != 0)
545 break;
546 }
aa5d6bed 547 return ret;
be0e5c09
CM
548}
549
74123bd7
CM
550/*
551 * try to push data from one node into the next node left in the
79f95c82 552 * tree.
aa5d6bed
CM
553 *
554 * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
555 * error, and > 0 if there was no room in the left hand block.
74123bd7 556 */
e089f05c 557static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root
e20d96d6
CM
558 *root, struct buffer_head *dst_buf, struct
559 buffer_head *src_buf)
be0e5c09 560{
e20d96d6
CM
561 struct btrfs_node *src = btrfs_buffer_node(src_buf);
562 struct btrfs_node *dst = btrfs_buffer_node(dst_buf);
be0e5c09 563 int push_items = 0;
bb803951
CM
564 int src_nritems;
565 int dst_nritems;
aa5d6bed 566 int ret = 0;
be0e5c09 567
7518a238
CM
568 src_nritems = btrfs_header_nritems(&src->header);
569 dst_nritems = btrfs_header_nritems(&dst->header);
123abc88 570 push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
eb60ceac 571 if (push_items <= 0) {
be0e5c09 572 return 1;
eb60ceac 573 }
be0e5c09 574
bb803951 575 if (src_nritems < push_items)
79f95c82
CM
576 push_items = src_nritems;
577
123abc88
CM
578 memcpy(dst->ptrs + dst_nritems, src->ptrs,
579 push_items * sizeof(struct btrfs_key_ptr));
bb803951 580 if (push_items < src_nritems) {
123abc88 581 memmove(src->ptrs, src->ptrs + push_items,
e2fa7227 582 (src_nritems - push_items) *
123abc88 583 sizeof(struct btrfs_key_ptr));
bb803951 584 }
7518a238
CM
585 btrfs_set_header_nritems(&src->header, src_nritems - push_items);
586 btrfs_set_header_nritems(&dst->header, dst_nritems + push_items);
d5719762
CM
587 mark_buffer_dirty(src_buf);
588 mark_buffer_dirty(dst_buf);
79f95c82
CM
589 return ret;
590}
591
592/*
593 * try to push data from one node into the next node right in the
594 * tree.
595 *
596 * returns 0 if some ptrs were pushed, < 0 if there was some horrible
597 * error, and > 0 if there was no room in the right hand block.
598 *
599 * this will only push up to 1/2 the contents of the left node over
600 */
e089f05c 601static int balance_node_right(struct btrfs_trans_handle *trans, struct
e20d96d6
CM
602 btrfs_root *root, struct buffer_head *dst_buf,
603 struct buffer_head *src_buf)
79f95c82 604{
e20d96d6
CM
605 struct btrfs_node *src = btrfs_buffer_node(src_buf);
606 struct btrfs_node *dst = btrfs_buffer_node(dst_buf);
79f95c82
CM
607 int push_items = 0;
608 int max_push;
609 int src_nritems;
610 int dst_nritems;
611 int ret = 0;
79f95c82 612
7518a238
CM
613 src_nritems = btrfs_header_nritems(&src->header);
614 dst_nritems = btrfs_header_nritems(&dst->header);
123abc88 615 push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
79f95c82
CM
616 if (push_items <= 0) {
617 return 1;
618 }
619
620 max_push = src_nritems / 2 + 1;
621 /* don't try to empty the node */
622 if (max_push > src_nritems)
623 return 1;
624 if (max_push < push_items)
625 push_items = max_push;
626
123abc88
CM
627 memmove(dst->ptrs + push_items, dst->ptrs,
628 dst_nritems * sizeof(struct btrfs_key_ptr));
629 memcpy(dst->ptrs, src->ptrs + src_nritems - push_items,
630 push_items * sizeof(struct btrfs_key_ptr));
79f95c82 631
7518a238
CM
632 btrfs_set_header_nritems(&src->header, src_nritems - push_items);
633 btrfs_set_header_nritems(&dst->header, dst_nritems + push_items);
79f95c82 634
d5719762
CM
635 mark_buffer_dirty(src_buf);
636 mark_buffer_dirty(dst_buf);
aa5d6bed 637 return ret;
be0e5c09
CM
638}
639
97571fd0
CM
640/*
641 * helper function to insert a new root level in the tree.
642 * A new node is allocated, and a single item is inserted to
643 * point to the existing root
aa5d6bed
CM
644 *
645 * returns zero on success or < 0 on failure.
97571fd0 646 */
e089f05c
CM
647static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root
648 *root, struct btrfs_path *path, int level)
5c680ed6 649{
e20d96d6 650 struct buffer_head *t;
234b63a0
CM
651 struct btrfs_node *lower;
652 struct btrfs_node *c;
e2fa7227 653 struct btrfs_disk_key *lower_key;
5c680ed6
CM
654
655 BUG_ON(path->nodes[level]);
656 BUG_ON(path->nodes[level-1] != root->node);
657
e089f05c 658 t = btrfs_alloc_free_block(trans, root);
e20d96d6 659 c = btrfs_buffer_node(t);
123abc88 660 memset(c, 0, root->blocksize);
7518a238
CM
661 btrfs_set_header_nritems(&c->header, 1);
662 btrfs_set_header_level(&c->header, level);
e20d96d6 663 btrfs_set_header_blocknr(&c->header, t->b_blocknr);
7518a238 664 btrfs_set_header_parentid(&c->header,
e20d96d6
CM
665 btrfs_header_parentid(btrfs_buffer_header(root->node)));
666 lower = btrfs_buffer_node(path->nodes[level-1]);
7518a238 667 if (btrfs_is_leaf(lower))
234b63a0 668 lower_key = &((struct btrfs_leaf *)lower)->items[0].key;
5c680ed6 669 else
123abc88
CM
670 lower_key = &lower->ptrs[0].key;
671 memcpy(&c->ptrs[0].key, lower_key, sizeof(struct btrfs_disk_key));
e20d96d6 672 btrfs_set_node_blockptr(c, 0, path->nodes[level - 1]->b_blocknr);
d5719762
CM
673
674 mark_buffer_dirty(t);
675
5c680ed6 676 /* the super has an extra ref to root->node */
234b63a0 677 btrfs_block_release(root, root->node);
5c680ed6 678 root->node = t;
e20d96d6 679 get_bh(t);
5c680ed6
CM
680 path->nodes[level] = t;
681 path->slots[level] = 0;
682 return 0;
683}
684
74123bd7
CM
685/*
686 * worker function to insert a single pointer in a node.
687 * the node should have enough room for the pointer already
97571fd0 688 *
74123bd7
CM
689 * slot and level indicate where you want the key to go, and
690 * blocknr is the block the key points to.
aa5d6bed
CM
691 *
692 * returns zero on success and < 0 on any error
74123bd7 693 */
e089f05c
CM
694static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
695 *root, struct btrfs_path *path, struct btrfs_disk_key
696 *key, u64 blocknr, int slot, int level)
74123bd7 697{
234b63a0 698 struct btrfs_node *lower;
74123bd7 699 int nritems;
5c680ed6
CM
700
701 BUG_ON(!path->nodes[level]);
e20d96d6 702 lower = btrfs_buffer_node(path->nodes[level]);
7518a238 703 nritems = btrfs_header_nritems(&lower->header);
74123bd7
CM
704 if (slot > nritems)
705 BUG();
123abc88 706 if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
74123bd7
CM
707 BUG();
708 if (slot != nritems) {
123abc88
CM
709 memmove(lower->ptrs + slot + 1, lower->ptrs + slot,
710 (nritems - slot) * sizeof(struct btrfs_key_ptr));
74123bd7 711 }
123abc88 712 memcpy(&lower->ptrs[slot].key, key, sizeof(struct btrfs_disk_key));
1d4f8a0c 713 btrfs_set_node_blockptr(lower, slot, blocknr);
7518a238 714 btrfs_set_header_nritems(&lower->header, nritems + 1);
d5719762 715 mark_buffer_dirty(path->nodes[level]);
74123bd7
CM
716 return 0;
717}
718
97571fd0
CM
719/*
720 * split the node at the specified level in path in two.
721 * The path is corrected to point to the appropriate node after the split
722 *
723 * Before splitting this tries to make some room in the node by pushing
724 * left and right, if either one works, it returns right away.
aa5d6bed
CM
725 *
726 * returns 0 on success and < 0 on failure
97571fd0 727 */
e089f05c
CM
728static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
729 *root, struct btrfs_path *path, int level)
be0e5c09 730{
e20d96d6 731 struct buffer_head *t;
234b63a0 732 struct btrfs_node *c;
e20d96d6 733 struct buffer_head *split_buffer;
234b63a0 734 struct btrfs_node *split;
be0e5c09 735 int mid;
5c680ed6 736 int ret;
aa5d6bed 737 int wret;
7518a238 738 u32 c_nritems;
eb60ceac 739
5c680ed6 740 t = path->nodes[level];
e20d96d6 741 c = btrfs_buffer_node(t);
5c680ed6
CM
742 if (t == root->node) {
743 /* trying to split the root, lets make a new one */
e089f05c 744 ret = insert_new_root(trans, root, path, level + 1);
5c680ed6
CM
745 if (ret)
746 return ret;
be0e5c09 747 }
7518a238 748 c_nritems = btrfs_header_nritems(&c->header);
e089f05c 749 split_buffer = btrfs_alloc_free_block(trans, root);
e20d96d6 750 split = btrfs_buffer_node(split_buffer);
7518a238 751 btrfs_set_header_flags(&split->header, btrfs_header_flags(&c->header));
e20d96d6 752 btrfs_set_header_blocknr(&split->header, split_buffer->b_blocknr);
7518a238 753 btrfs_set_header_parentid(&split->header,
e20d96d6 754 btrfs_header_parentid(btrfs_buffer_header(root->node)));
7518a238 755 mid = (c_nritems + 1) / 2;
123abc88
CM
756 memcpy(split->ptrs, c->ptrs + mid,
757 (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
7518a238
CM
758 btrfs_set_header_nritems(&split->header, c_nritems - mid);
759 btrfs_set_header_nritems(&c->header, mid);
aa5d6bed
CM
760 ret = 0;
761
d5719762
CM
762 mark_buffer_dirty(t);
763 mark_buffer_dirty(split_buffer);
e089f05c 764 wret = insert_ptr(trans, root, path, &split->ptrs[0].key,
e20d96d6 765 split_buffer->b_blocknr, path->slots[level + 1] + 1,
123abc88 766 level + 1);
aa5d6bed
CM
767 if (wret)
768 ret = wret;
769
5de08d7d 770 if (path->slots[level] >= mid) {
5c680ed6 771 path->slots[level] -= mid;
234b63a0 772 btrfs_block_release(root, t);
5c680ed6
CM
773 path->nodes[level] = split_buffer;
774 path->slots[level + 1] += 1;
775 } else {
234b63a0 776 btrfs_block_release(root, split_buffer);
be0e5c09 777 }
aa5d6bed 778 return ret;
be0e5c09
CM
779}
780
74123bd7
CM
781/*
782 * how many bytes are required to store the items in a leaf. start
783 * and nr indicate which items in the leaf to check. This totals up the
784 * space used both by the item structs and the item data
785 */
234b63a0 786static int leaf_space_used(struct btrfs_leaf *l, int start, int nr)
be0e5c09
CM
787{
788 int data_len;
789 int end = start + nr - 1;
790
791 if (!nr)
792 return 0;
0783fcfc
CM
793 data_len = btrfs_item_end(l->items + start);
794 data_len = data_len - btrfs_item_offset(l->items + end);
795 data_len += sizeof(struct btrfs_item) * nr;
be0e5c09
CM
796 return data_len;
797}
798
00ec4c51
CM
799/*
800 * push some data in the path leaf to the right, trying to free up at
801 * least data_size bytes. returns zero if the push worked, nonzero otherwise
aa5d6bed
CM
802 *
803 * returns 1 if the push failed because the other node didn't have enough
804 * room, 0 if everything worked out and < 0 if there were major errors.
00ec4c51 805 */
e089f05c
CM
806static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
807 *root, struct btrfs_path *path, int data_size)
00ec4c51 808{
e20d96d6
CM
809 struct buffer_head *left_buf = path->nodes[0];
810 struct btrfs_leaf *left = btrfs_buffer_leaf(left_buf);
234b63a0 811 struct btrfs_leaf *right;
e20d96d6
CM
812 struct buffer_head *right_buf;
813 struct buffer_head *upper;
814 struct btrfs_node *upper_node;
00ec4c51
CM
815 int slot;
816 int i;
817 int free_space;
818 int push_space = 0;
819 int push_items = 0;
0783fcfc 820 struct btrfs_item *item;
7518a238
CM
821 u32 left_nritems;
822 u32 right_nritems;
00ec4c51
CM
823
824 slot = path->slots[1];
825 if (!path->nodes[1]) {
826 return 1;
827 }
828 upper = path->nodes[1];
e20d96d6
CM
829 upper_node = btrfs_buffer_node(upper);
830 if (slot >= btrfs_header_nritems(&upper_node->header) - 1) {
00ec4c51
CM
831 return 1;
832 }
e20d96d6
CM
833 right_buf = read_tree_block(root,
834 btrfs_node_blockptr(btrfs_buffer_node(upper), slot + 1));
835 right = btrfs_buffer_leaf(right_buf);
123abc88 836 free_space = btrfs_leaf_free_space(root, right);
0783fcfc 837 if (free_space < data_size + sizeof(struct btrfs_item)) {
234b63a0 838 btrfs_block_release(root, right_buf);
00ec4c51
CM
839 return 1;
840 }
02217ed2 841 /* cow and double check */
e089f05c 842 btrfs_cow_block(trans, root, right_buf, upper, slot + 1, &right_buf);
e20d96d6 843 right = btrfs_buffer_leaf(right_buf);
123abc88 844 free_space = btrfs_leaf_free_space(root, right);
0783fcfc 845 if (free_space < data_size + sizeof(struct btrfs_item)) {
234b63a0 846 btrfs_block_release(root, right_buf);
02217ed2
CM
847 return 1;
848 }
849
7518a238
CM
850 left_nritems = btrfs_header_nritems(&left->header);
851 for (i = left_nritems - 1; i >= 0; i--) {
00ec4c51
CM
852 item = left->items + i;
853 if (path->slots[0] == i)
854 push_space += data_size + sizeof(*item);
0783fcfc
CM
855 if (btrfs_item_size(item) + sizeof(*item) + push_space >
856 free_space)
00ec4c51
CM
857 break;
858 push_items++;
0783fcfc 859 push_space += btrfs_item_size(item) + sizeof(*item);
00ec4c51
CM
860 }
861 if (push_items == 0) {
234b63a0 862 btrfs_block_release(root, right_buf);
00ec4c51
CM
863 return 1;
864 }
7518a238 865 right_nritems = btrfs_header_nritems(&right->header);
00ec4c51 866 /* push left to right */
0783fcfc 867 push_space = btrfs_item_end(left->items + left_nritems - push_items);
123abc88 868 push_space -= leaf_data_end(root, left);
00ec4c51 869 /* make room in the right data area */
123abc88
CM
870 memmove(btrfs_leaf_data(right) + leaf_data_end(root, right) -
871 push_space, btrfs_leaf_data(right) + leaf_data_end(root, right),
872 BTRFS_LEAF_DATA_SIZE(root) - leaf_data_end(root, right));
00ec4c51 873 /* copy from the left data area */
123abc88
CM
874 memcpy(btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - push_space,
875 btrfs_leaf_data(left) + leaf_data_end(root, left), push_space);
00ec4c51 876 memmove(right->items + push_items, right->items,
0783fcfc 877 right_nritems * sizeof(struct btrfs_item));
00ec4c51 878 /* copy the items from left to right */
7518a238 879 memcpy(right->items, left->items + left_nritems - push_items,
0783fcfc 880 push_items * sizeof(struct btrfs_item));
00ec4c51
CM
881
882 /* update the item pointers */
7518a238
CM
883 right_nritems += push_items;
884 btrfs_set_header_nritems(&right->header, right_nritems);
123abc88 885 push_space = BTRFS_LEAF_DATA_SIZE(root);
7518a238 886 for (i = 0; i < right_nritems; i++) {
0783fcfc
CM
887 btrfs_set_item_offset(right->items + i, push_space -
888 btrfs_item_size(right->items + i));
889 push_space = btrfs_item_offset(right->items + i);
00ec4c51 890 }
7518a238
CM
891 left_nritems -= push_items;
892 btrfs_set_header_nritems(&left->header, left_nritems);
00ec4c51 893
d5719762
CM
894 mark_buffer_dirty(left_buf);
895 mark_buffer_dirty(right_buf);
e20d96d6 896 memcpy(&upper_node->ptrs[slot + 1].key,
e2fa7227 897 &right->items[0].key, sizeof(struct btrfs_disk_key));
d5719762 898 mark_buffer_dirty(upper);
02217ed2 899
00ec4c51 900 /* then fixup the leaf pointer in the path */
7518a238
CM
901 if (path->slots[0] >= left_nritems) {
902 path->slots[0] -= left_nritems;
234b63a0 903 btrfs_block_release(root, path->nodes[0]);
00ec4c51
CM
904 path->nodes[0] = right_buf;
905 path->slots[1] += 1;
906 } else {
234b63a0 907 btrfs_block_release(root, right_buf);
00ec4c51
CM
908 }
909 return 0;
910}
74123bd7
CM
911/*
912 * push some data in the path leaf to the left, trying to free up at
913 * least data_size bytes. returns zero if the push worked, nonzero otherwise
914 */
e089f05c
CM
915static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
916 *root, struct btrfs_path *path, int data_size)
be0e5c09 917{
e20d96d6
CM
918 struct buffer_head *right_buf = path->nodes[0];
919 struct btrfs_leaf *right = btrfs_buffer_leaf(right_buf);
920 struct buffer_head *t;
234b63a0 921 struct btrfs_leaf *left;
be0e5c09
CM
922 int slot;
923 int i;
924 int free_space;
925 int push_space = 0;
926 int push_items = 0;
0783fcfc 927 struct btrfs_item *item;
7518a238 928 u32 old_left_nritems;
aa5d6bed
CM
929 int ret = 0;
930 int wret;
be0e5c09
CM
931
932 slot = path->slots[1];
933 if (slot == 0) {
934 return 1;
935 }
936 if (!path->nodes[1]) {
937 return 1;
938 }
e20d96d6
CM
939 t = read_tree_block(root,
940 btrfs_node_blockptr(btrfs_buffer_node(path->nodes[1]), slot - 1));
941 left = btrfs_buffer_leaf(t);
123abc88 942 free_space = btrfs_leaf_free_space(root, left);
0783fcfc 943 if (free_space < data_size + sizeof(struct btrfs_item)) {
234b63a0 944 btrfs_block_release(root, t);
be0e5c09
CM
945 return 1;
946 }
02217ed2
CM
947
948 /* cow and double check */
e089f05c 949 btrfs_cow_block(trans, root, t, path->nodes[1], slot - 1, &t);
e20d96d6 950 left = btrfs_buffer_leaf(t);
123abc88 951 free_space = btrfs_leaf_free_space(root, left);
0783fcfc 952 if (free_space < data_size + sizeof(struct btrfs_item)) {
234b63a0 953 btrfs_block_release(root, t);
02217ed2
CM
954 return 1;
955 }
956
7518a238 957 for (i = 0; i < btrfs_header_nritems(&right->header); i++) {
be0e5c09
CM
958 item = right->items + i;
959 if (path->slots[0] == i)
960 push_space += data_size + sizeof(*item);
0783fcfc
CM
961 if (btrfs_item_size(item) + sizeof(*item) + push_space >
962 free_space)
be0e5c09
CM
963 break;
964 push_items++;
0783fcfc 965 push_space += btrfs_item_size(item) + sizeof(*item);
be0e5c09
CM
966 }
967 if (push_items == 0) {
234b63a0 968 btrfs_block_release(root, t);
be0e5c09
CM
969 return 1;
970 }
971 /* push data from right to left */
7518a238 972 memcpy(left->items + btrfs_header_nritems(&left->header),
0783fcfc 973 right->items, push_items * sizeof(struct btrfs_item));
123abc88 974 push_space = BTRFS_LEAF_DATA_SIZE(root) -
0783fcfc 975 btrfs_item_offset(right->items + push_items -1);
123abc88
CM
976 memcpy(btrfs_leaf_data(left) + leaf_data_end(root, left) - push_space,
977 btrfs_leaf_data(right) +
978 btrfs_item_offset(right->items + push_items - 1),
be0e5c09 979 push_space);
7518a238 980 old_left_nritems = btrfs_header_nritems(&left->header);
eb60ceac
CM
981 BUG_ON(old_left_nritems < 0);
982
0783fcfc 983 for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
123abc88
CM
984 u32 ioff = btrfs_item_offset(left->items + i);
985 btrfs_set_item_offset(left->items + i, ioff -
986 (BTRFS_LEAF_DATA_SIZE(root) -
0783fcfc
CM
987 btrfs_item_offset(left->items +
988 old_left_nritems - 1)));
be0e5c09 989 }
7518a238 990 btrfs_set_header_nritems(&left->header, old_left_nritems + push_items);
be0e5c09
CM
991
992 /* fixup right node */
0783fcfc 993 push_space = btrfs_item_offset(right->items + push_items - 1) -
123abc88
CM
994 leaf_data_end(root, right);
995 memmove(btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
996 push_space, btrfs_leaf_data(right) +
997 leaf_data_end(root, right), push_space);
be0e5c09 998 memmove(right->items, right->items + push_items,
7518a238 999 (btrfs_header_nritems(&right->header) - push_items) *
0783fcfc 1000 sizeof(struct btrfs_item));
7518a238
CM
1001 btrfs_set_header_nritems(&right->header,
1002 btrfs_header_nritems(&right->header) -
1003 push_items);
123abc88 1004 push_space = BTRFS_LEAF_DATA_SIZE(root);
eb60ceac 1005
7518a238 1006 for (i = 0; i < btrfs_header_nritems(&right->header); i++) {
0783fcfc
CM
1007 btrfs_set_item_offset(right->items + i, push_space -
1008 btrfs_item_size(right->items + i));
1009 push_space = btrfs_item_offset(right->items + i);
be0e5c09 1010 }
eb60ceac 1011
d5719762
CM
1012 mark_buffer_dirty(t);
1013 mark_buffer_dirty(right_buf);
eb60ceac 1014
e089f05c 1015 wret = fixup_low_keys(trans, root, path, &right->items[0].key, 1);
aa5d6bed
CM
1016 if (wret)
1017 ret = wret;
be0e5c09
CM
1018
1019 /* then fixup the leaf pointer in the path */
1020 if (path->slots[0] < push_items) {
1021 path->slots[0] += old_left_nritems;
234b63a0 1022 btrfs_block_release(root, path->nodes[0]);
eb60ceac 1023 path->nodes[0] = t;
be0e5c09
CM
1024 path->slots[1] -= 1;
1025 } else {
234b63a0 1026 btrfs_block_release(root, t);
be0e5c09
CM
1027 path->slots[0] -= push_items;
1028 }
eb60ceac 1029 BUG_ON(path->slots[0] < 0);
aa5d6bed 1030 return ret;
be0e5c09
CM
1031}
1032
74123bd7
CM
1033/*
1034 * split the path's leaf in two, making sure there is at least data_size
1035 * available for the resulting leaf level of the path.
aa5d6bed
CM
1036 *
1037 * returns 0 if all went well and < 0 on failure.
74123bd7 1038 */
e089f05c
CM
1039static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
1040 *root, struct btrfs_path *path, int data_size)
be0e5c09 1041{
e20d96d6 1042 struct buffer_head *l_buf;
234b63a0 1043 struct btrfs_leaf *l;
7518a238 1044 u32 nritems;
eb60ceac
CM
1045 int mid;
1046 int slot;
234b63a0 1047 struct btrfs_leaf *right;
e20d96d6 1048 struct buffer_head *right_buffer;
0783fcfc 1049 int space_needed = data_size + sizeof(struct btrfs_item);
be0e5c09
CM
1050 int data_copy_size;
1051 int rt_data_off;
1052 int i;
1053 int ret;
aa5d6bed
CM
1054 int wret;
1055
40689478 1056 /* first try to make some room by pushing left and right */
e089f05c 1057 wret = push_leaf_left(trans, root, path, data_size);
eaee50e8
CM
1058 if (wret < 0)
1059 return wret;
1060 if (wret) {
e089f05c 1061 wret = push_leaf_right(trans, root, path, data_size);
eaee50e8
CM
1062 if (wret < 0)
1063 return wret;
1064 }
aa5d6bed 1065 l_buf = path->nodes[0];
e20d96d6 1066 l = btrfs_buffer_leaf(l_buf);
aa5d6bed
CM
1067
1068 /* did the pushes work? */
123abc88
CM
1069 if (btrfs_leaf_free_space(root, l) >=
1070 sizeof(struct btrfs_item) + data_size)
aa5d6bed
CM
1071 return 0;
1072
5c680ed6 1073 if (!path->nodes[1]) {
e089f05c 1074 ret = insert_new_root(trans, root, path, 1);
5c680ed6
CM
1075 if (ret)
1076 return ret;
1077 }
eb60ceac 1078 slot = path->slots[0];
7518a238 1079 nritems = btrfs_header_nritems(&l->header);
eb60ceac 1080 mid = (nritems + 1)/ 2;
e089f05c 1081 right_buffer = btrfs_alloc_free_block(trans, root);
eb60ceac
CM
1082 BUG_ON(!right_buffer);
1083 BUG_ON(mid == nritems);
e20d96d6 1084 right = btrfs_buffer_leaf(right_buffer);
123abc88 1085 memset(&right->header, 0, sizeof(right->header));
be0e5c09 1086 if (mid <= slot) {
97571fd0 1087 /* FIXME, just alloc a new leaf here */
be0e5c09 1088 if (leaf_space_used(l, mid, nritems - mid) + space_needed >
123abc88 1089 BTRFS_LEAF_DATA_SIZE(root))
be0e5c09
CM
1090 BUG();
1091 } else {
97571fd0 1092 /* FIXME, just alloc a new leaf here */
be0e5c09 1093 if (leaf_space_used(l, 0, mid + 1) + space_needed >
123abc88 1094 BTRFS_LEAF_DATA_SIZE(root))
be0e5c09
CM
1095 BUG();
1096 }
7518a238 1097 btrfs_set_header_nritems(&right->header, nritems - mid);
e20d96d6 1098 btrfs_set_header_blocknr(&right->header, right_buffer->b_blocknr);
7518a238
CM
1099 btrfs_set_header_level(&right->header, 0);
1100 btrfs_set_header_parentid(&right->header,
e20d96d6 1101 btrfs_header_parentid(btrfs_buffer_header(root->node)));
123abc88
CM
1102 data_copy_size = btrfs_item_end(l->items + mid) -
1103 leaf_data_end(root, l);
be0e5c09 1104 memcpy(right->items, l->items + mid,
0783fcfc 1105 (nritems - mid) * sizeof(struct btrfs_item));
123abc88
CM
1106 memcpy(btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
1107 data_copy_size, btrfs_leaf_data(l) +
1108 leaf_data_end(root, l), data_copy_size);
1109 rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
1110 btrfs_item_end(l->items + mid);
74123bd7 1111
0783fcfc 1112 for (i = 0; i < btrfs_header_nritems(&right->header); i++) {
123abc88 1113 u32 ioff = btrfs_item_offset(right->items + i);
0783fcfc
CM
1114 btrfs_set_item_offset(right->items + i, ioff + rt_data_off);
1115 }
74123bd7 1116
7518a238 1117 btrfs_set_header_nritems(&l->header, mid);
aa5d6bed 1118 ret = 0;
e089f05c 1119 wret = insert_ptr(trans, root, path, &right->items[0].key,
e20d96d6 1120 right_buffer->b_blocknr, path->slots[1] + 1, 1);
aa5d6bed
CM
1121 if (wret)
1122 ret = wret;
d5719762
CM
1123 mark_buffer_dirty(right_buffer);
1124 mark_buffer_dirty(l_buf);
eb60ceac 1125 BUG_ON(path->slots[0] != slot);
be0e5c09 1126 if (mid <= slot) {
234b63a0 1127 btrfs_block_release(root, path->nodes[0]);
eb60ceac 1128 path->nodes[0] = right_buffer;
be0e5c09
CM
1129 path->slots[0] -= mid;
1130 path->slots[1] += 1;
eb60ceac 1131 } else
234b63a0 1132 btrfs_block_release(root, right_buffer);
eb60ceac 1133 BUG_ON(path->slots[0] < 0);
be0e5c09
CM
1134 return ret;
1135}
1136
74123bd7
CM
1137/*
1138 * Given a key and some data, insert an item into the tree.
1139 * This does all the path init required, making room in the tree if needed.
1140 */
e089f05c
CM
1141int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root
1142 *root, struct btrfs_path *path, struct btrfs_key
1143 *cpu_key, u32 data_size)
be0e5c09 1144{
aa5d6bed 1145 int ret = 0;
be0e5c09 1146 int slot;
eb60ceac 1147 int slot_orig;
234b63a0 1148 struct btrfs_leaf *leaf;
e20d96d6 1149 struct buffer_head *leaf_buf;
7518a238 1150 u32 nritems;
be0e5c09 1151 unsigned int data_end;
e2fa7227
CM
1152 struct btrfs_disk_key disk_key;
1153
1154 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
be0e5c09 1155
74123bd7 1156 /* create a root if there isn't one */
5c680ed6 1157 if (!root->node)
cfaa7295 1158 BUG();
e089f05c 1159 ret = btrfs_search_slot(trans, root, cpu_key, path, data_size, 1);
eb60ceac 1160 if (ret == 0) {
62e2749e 1161 btrfs_release_path(root, path);
f0930a37 1162 return -EEXIST;
aa5d6bed 1163 }
ed2ff2cb
CM
1164 if (ret < 0)
1165 goto out;
be0e5c09 1166
62e2749e
CM
1167 slot_orig = path->slots[0];
1168 leaf_buf = path->nodes[0];
e20d96d6 1169 leaf = btrfs_buffer_leaf(leaf_buf);
74123bd7 1170
7518a238 1171 nritems = btrfs_header_nritems(&leaf->header);
123abc88 1172 data_end = leaf_data_end(root, leaf);
eb60ceac 1173
123abc88 1174 if (btrfs_leaf_free_space(root, leaf) <
234b63a0 1175 sizeof(struct btrfs_item) + data_size)
be0e5c09
CM
1176 BUG();
1177
62e2749e 1178 slot = path->slots[0];
eb60ceac 1179 BUG_ON(slot < 0);
be0e5c09
CM
1180 if (slot != nritems) {
1181 int i;
0783fcfc 1182 unsigned int old_data = btrfs_item_end(leaf->items + slot);
be0e5c09
CM
1183
1184 /*
1185 * item0..itemN ... dataN.offset..dataN.size .. data0.size
1186 */
1187 /* first correct the data pointers */
0783fcfc 1188 for (i = slot; i < nritems; i++) {
123abc88 1189 u32 ioff = btrfs_item_offset(leaf->items + i);
0783fcfc
CM
1190 btrfs_set_item_offset(leaf->items + i,
1191 ioff - data_size);
1192 }
be0e5c09
CM
1193
1194 /* shift the items */
1195 memmove(leaf->items + slot + 1, leaf->items + slot,
0783fcfc 1196 (nritems - slot) * sizeof(struct btrfs_item));
be0e5c09
CM
1197
1198 /* shift the data */
123abc88
CM
1199 memmove(btrfs_leaf_data(leaf) + data_end - data_size,
1200 btrfs_leaf_data(leaf) +
be0e5c09
CM
1201 data_end, old_data - data_end);
1202 data_end = old_data;
1203 }
62e2749e 1204 /* setup the item for the new data */
e2fa7227
CM
1205 memcpy(&leaf->items[slot].key, &disk_key,
1206 sizeof(struct btrfs_disk_key));
0783fcfc
CM
1207 btrfs_set_item_offset(leaf->items + slot, data_end - data_size);
1208 btrfs_set_item_size(leaf->items + slot, data_size);
7518a238 1209 btrfs_set_header_nritems(&leaf->header, nritems + 1);
d5719762 1210 mark_buffer_dirty(leaf_buf);
aa5d6bed
CM
1211
1212 ret = 0;
8e19f2cd 1213 if (slot == 0)
e089f05c 1214 ret = fixup_low_keys(trans, root, path, &disk_key, 1);
aa5d6bed 1215
123abc88 1216 if (btrfs_leaf_free_space(root, leaf) < 0)
be0e5c09 1217 BUG();
62e2749e 1218 check_leaf(root, path, 0);
ed2ff2cb 1219out:
62e2749e
CM
1220 return ret;
1221}
1222
1223/*
1224 * Given a key and some data, insert an item into the tree.
1225 * This does all the path init required, making room in the tree if needed.
1226 */
e089f05c
CM
1227int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
1228 *root, struct btrfs_key *cpu_key, void *data, u32
1229 data_size)
62e2749e
CM
1230{
1231 int ret = 0;
1232 struct btrfs_path path;
1233 u8 *ptr;
1234
1235 btrfs_init_path(&path);
e089f05c 1236 ret = btrfs_insert_empty_item(trans, root, &path, cpu_key, data_size);
62e2749e 1237 if (!ret) {
e20d96d6
CM
1238 ptr = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]),
1239 path.slots[0], u8);
62e2749e 1240 memcpy(ptr, data, data_size);
d5719762 1241 mark_buffer_dirty(path.nodes[0]);
62e2749e 1242 }
234b63a0 1243 btrfs_release_path(root, &path);
aa5d6bed 1244 return ret;
be0e5c09
CM
1245}
1246
74123bd7 1247/*
5de08d7d 1248 * delete the pointer from a given node.
74123bd7
CM
1249 *
1250 * If the delete empties a node, the node is removed from the tree,
1251 * continuing all the way the root if required. The root is converted into
1252 * a leaf if all the nodes are emptied.
1253 */
e089f05c
CM
1254static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1255 struct btrfs_path *path, int level, int slot)
be0e5c09 1256{
234b63a0 1257 struct btrfs_node *node;
e20d96d6 1258 struct buffer_head *parent = path->nodes[level];
7518a238 1259 u32 nritems;
aa5d6bed 1260 int ret = 0;
bb803951 1261 int wret;
be0e5c09 1262
e20d96d6 1263 node = btrfs_buffer_node(parent);
7518a238 1264 nritems = btrfs_header_nritems(&node->header);
bb803951 1265 if (slot != nritems -1) {
123abc88
CM
1266 memmove(node->ptrs + slot, node->ptrs + slot + 1,
1267 sizeof(struct btrfs_key_ptr) * (nritems - slot - 1));
bb803951 1268 }
7518a238
CM
1269 nritems--;
1270 btrfs_set_header_nritems(&node->header, nritems);
1271 if (nritems == 0 && parent == root->node) {
e20d96d6
CM
1272 struct btrfs_header *header = btrfs_buffer_header(root->node);
1273 BUG_ON(btrfs_header_level(header) != 1);
bb803951 1274 /* just turn the root into a leaf and break */
e20d96d6 1275 btrfs_set_header_level(header, 0);
bb803951 1276 } else if (slot == 0) {
e089f05c 1277 wret = fixup_low_keys(trans, root, path, &node->ptrs[0].key,
123abc88 1278 level + 1);
0f70abe2
CM
1279 if (wret)
1280 ret = wret;
be0e5c09 1281 }
d5719762 1282 mark_buffer_dirty(parent);
aa5d6bed 1283 return ret;
be0e5c09
CM
1284}
1285
74123bd7
CM
1286/*
1287 * delete the item at the leaf level in path. If that empties
1288 * the leaf, remove it from the tree
1289 */
e089f05c
CM
1290int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1291 struct btrfs_path *path)
be0e5c09 1292{
be0e5c09 1293 int slot;
234b63a0 1294 struct btrfs_leaf *leaf;
e20d96d6 1295 struct buffer_head *leaf_buf;
be0e5c09
CM
1296 int doff;
1297 int dsize;
aa5d6bed
CM
1298 int ret = 0;
1299 int wret;
7518a238 1300 u32 nritems;
be0e5c09 1301
eb60ceac 1302 leaf_buf = path->nodes[0];
e20d96d6 1303 leaf = btrfs_buffer_leaf(leaf_buf);
4920c9ac 1304 slot = path->slots[0];
0783fcfc
CM
1305 doff = btrfs_item_offset(leaf->items + slot);
1306 dsize = btrfs_item_size(leaf->items + slot);
7518a238 1307 nritems = btrfs_header_nritems(&leaf->header);
be0e5c09 1308
7518a238 1309 if (slot != nritems - 1) {
be0e5c09 1310 int i;
123abc88
CM
1311 int data_end = leaf_data_end(root, leaf);
1312 memmove(btrfs_leaf_data(leaf) + data_end + dsize,
1313 btrfs_leaf_data(leaf) + data_end,
be0e5c09 1314 doff - data_end);
0783fcfc 1315 for (i = slot + 1; i < nritems; i++) {
123abc88 1316 u32 ioff = btrfs_item_offset(leaf->items + i);
0783fcfc
CM
1317 btrfs_set_item_offset(leaf->items + i, ioff + dsize);
1318 }
be0e5c09 1319 memmove(leaf->items + slot, leaf->items + slot + 1,
0783fcfc 1320 sizeof(struct btrfs_item) *
7518a238 1321 (nritems - slot - 1));
be0e5c09 1322 }
7518a238
CM
1323 btrfs_set_header_nritems(&leaf->header, nritems - 1);
1324 nritems--;
74123bd7 1325 /* delete the leaf if we've emptied it */
7518a238 1326 if (nritems == 0) {
eb60ceac 1327 if (leaf_buf == root->node) {
7518a238 1328 btrfs_set_header_level(&leaf->header, 0);
9a8dd150 1329 } else {
e089f05c
CM
1330 clean_tree_block(trans, root, leaf_buf);
1331 wret = del_ptr(trans, root, path, 1, path->slots[1]);
aa5d6bed
CM
1332 if (wret)
1333 ret = wret;
e089f05c 1334 wret = btrfs_free_extent(trans, root,
e20d96d6 1335 leaf_buf->b_blocknr, 1, 1);
0f70abe2
CM
1336 if (wret)
1337 ret = wret;
9a8dd150 1338 }
be0e5c09 1339 } else {
7518a238 1340 int used = leaf_space_used(leaf, 0, nritems);
aa5d6bed 1341 if (slot == 0) {
e089f05c
CM
1342 wret = fixup_low_keys(trans, root, path,
1343 &leaf->items[0].key, 1);
aa5d6bed
CM
1344 if (wret)
1345 ret = wret;
1346 }
aa5d6bed 1347
74123bd7 1348 /* delete the leaf if it is mostly empty */
123abc88 1349 if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) {
be0e5c09
CM
1350 /* push_leaf_left fixes the path.
1351 * make sure the path still points to our leaf
1352 * for possible call to del_ptr below
1353 */
4920c9ac 1354 slot = path->slots[1];
e20d96d6 1355 get_bh(leaf_buf);
e089f05c 1356 wret = push_leaf_left(trans, root, path, 1);
aa5d6bed
CM
1357 if (wret < 0)
1358 ret = wret;
f0930a37 1359 if (path->nodes[0] == leaf_buf &&
7518a238 1360 btrfs_header_nritems(&leaf->header)) {
e089f05c 1361 wret = push_leaf_right(trans, root, path, 1);
aa5d6bed
CM
1362 if (wret < 0)
1363 ret = wret;
1364 }
7518a238 1365 if (btrfs_header_nritems(&leaf->header) == 0) {
e20d96d6 1366 u64 blocknr = leaf_buf->b_blocknr;
e089f05c
CM
1367 clean_tree_block(trans, root, leaf_buf);
1368 wret = del_ptr(trans, root, path, 1, slot);
aa5d6bed
CM
1369 if (wret)
1370 ret = wret;
234b63a0 1371 btrfs_block_release(root, leaf_buf);
e089f05c
CM
1372 wret = btrfs_free_extent(trans, root, blocknr,
1373 1, 1);
0f70abe2
CM
1374 if (wret)
1375 ret = wret;
5de08d7d 1376 } else {
d5719762 1377 mark_buffer_dirty(leaf_buf);
234b63a0 1378 btrfs_block_release(root, leaf_buf);
be0e5c09 1379 }
d5719762
CM
1380 } else {
1381 mark_buffer_dirty(leaf_buf);
be0e5c09
CM
1382 }
1383 }
aa5d6bed 1384 return ret;
be0e5c09
CM
1385}
1386
97571fd0
CM
1387/*
1388 * walk up the tree as far as required to find the next leaf.
0f70abe2
CM
1389 * returns 0 if it found something or 1 if there are no greater leaves.
1390 * returns < 0 on io errors.
97571fd0 1391 */
234b63a0 1392int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
d97e63b6
CM
1393{
1394 int slot;
1395 int level = 1;
1396 u64 blocknr;
e20d96d6
CM
1397 struct buffer_head *c;
1398 struct btrfs_node *c_node;
1399 struct buffer_head *next = NULL;
d97e63b6 1400
234b63a0 1401 while(level < BTRFS_MAX_LEVEL) {
d97e63b6 1402 if (!path->nodes[level])
0f70abe2 1403 return 1;
d97e63b6
CM
1404 slot = path->slots[level] + 1;
1405 c = path->nodes[level];
e20d96d6
CM
1406 c_node = btrfs_buffer_node(c);
1407 if (slot >= btrfs_header_nritems(&c_node->header)) {
d97e63b6
CM
1408 level++;
1409 continue;
1410 }
e20d96d6 1411 blocknr = btrfs_node_blockptr(c_node, slot);
cfaa7295 1412 if (next)
234b63a0 1413 btrfs_block_release(root, next);
d97e63b6
CM
1414 next = read_tree_block(root, blocknr);
1415 break;
1416 }
1417 path->slots[level] = slot;
1418 while(1) {
1419 level--;
1420 c = path->nodes[level];
234b63a0 1421 btrfs_block_release(root, c);
d97e63b6
CM
1422 path->nodes[level] = next;
1423 path->slots[level] = 0;
1424 if (!level)
1425 break;
1d4f8a0c 1426 next = read_tree_block(root,
e20d96d6 1427 btrfs_node_blockptr(btrfs_buffer_node(next), 0));
d97e63b6
CM
1428 }
1429 return 0;
1430}