Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
[linux-2.6-block.git] / fs / btrfs / delayed-ref.c
CommitLineData
c1d7c514 1// SPDX-License-Identifier: GPL-2.0
56bec294
CM
2/*
3 * Copyright (C) 2009 Oracle. All rights reserved.
56bec294
CM
4 */
5
6#include <linux/sched.h>
5a0e3ad6 7#include <linux/slab.h>
56bec294 8#include <linux/sort.h>
56bec294
CM
9#include "ctree.h"
10#include "delayed-ref.h"
11#include "transaction.h"
3368d001 12#include "qgroup.h"
56bec294 13
78a6184a
MX
14struct kmem_cache *btrfs_delayed_ref_head_cachep;
15struct kmem_cache *btrfs_delayed_tree_ref_cachep;
16struct kmem_cache *btrfs_delayed_data_ref_cachep;
17struct kmem_cache *btrfs_delayed_extent_op_cachep;
56bec294
CM
18/*
19 * delayed back reference update tracking. For subvolume trees
20 * we queue up extent allocations and backref maintenance for
21 * delayed processing. This avoids deep call chains where we
22 * add extents in the middle of btrfs_search_slot, and it allows
23 * us to buffer up frequently modified backrefs in an rb tree instead
24 * of hammering updates on the extent allocation tree.
56bec294
CM
25 */
26
27/*
5d4f98a2
YZ
28 * compare two delayed tree backrefs with same bytenr and type
29 */
c7ad7c84
JB
30static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref1,
31 struct btrfs_delayed_tree_ref *ref2)
5d4f98a2 32{
3b60d436 33 if (ref1->node.type == BTRFS_TREE_BLOCK_REF_KEY) {
41b0fc42
JB
34 if (ref1->root < ref2->root)
35 return -1;
36 if (ref1->root > ref2->root)
37 return 1;
38 } else {
39 if (ref1->parent < ref2->parent)
40 return -1;
41 if (ref1->parent > ref2->parent)
42 return 1;
43 }
5d4f98a2
YZ
44 return 0;
45}
46
47/*
48 * compare two delayed data backrefs with same bytenr and type
56bec294 49 */
c7ad7c84
JB
50static int comp_data_refs(struct btrfs_delayed_data_ref *ref1,
51 struct btrfs_delayed_data_ref *ref2)
56bec294 52{
5d4f98a2
YZ
53 if (ref1->node.type == BTRFS_EXTENT_DATA_REF_KEY) {
54 if (ref1->root < ref2->root)
55 return -1;
56 if (ref1->root > ref2->root)
57 return 1;
58 if (ref1->objectid < ref2->objectid)
59 return -1;
60 if (ref1->objectid > ref2->objectid)
61 return 1;
62 if (ref1->offset < ref2->offset)
63 return -1;
64 if (ref1->offset > ref2->offset)
65 return 1;
66 } else {
67 if (ref1->parent < ref2->parent)
68 return -1;
69 if (ref1->parent > ref2->parent)
70 return 1;
71 }
72 return 0;
73}
74
1d148e59
JB
75static int comp_refs(struct btrfs_delayed_ref_node *ref1,
76 struct btrfs_delayed_ref_node *ref2,
77 bool check_seq)
78{
79 int ret = 0;
80
81 if (ref1->type < ref2->type)
82 return -1;
83 if (ref1->type > ref2->type)
84 return 1;
85 if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
86 ref1->type == BTRFS_SHARED_BLOCK_REF_KEY)
87 ret = comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref1),
88 btrfs_delayed_node_to_tree_ref(ref2));
89 else
90 ret = comp_data_refs(btrfs_delayed_node_to_data_ref(ref1),
91 btrfs_delayed_node_to_data_ref(ref2));
92 if (ret)
93 return ret;
94 if (check_seq) {
95 if (ref1->seq < ref2->seq)
96 return -1;
97 if (ref1->seq > ref2->seq)
98 return 1;
99 }
100 return 0;
101}
102
c46effa6 103/* insert a new ref to head ref rbtree */
5c9d028b 104static struct btrfs_delayed_ref_head *htree_insert(struct rb_root_cached *root,
c46effa6
LB
105 struct rb_node *node)
106{
5c9d028b 107 struct rb_node **p = &root->rb_root.rb_node;
c46effa6
LB
108 struct rb_node *parent_node = NULL;
109 struct btrfs_delayed_ref_head *entry;
110 struct btrfs_delayed_ref_head *ins;
111 u64 bytenr;
5c9d028b 112 bool leftmost = true;
c46effa6
LB
113
114 ins = rb_entry(node, struct btrfs_delayed_ref_head, href_node);
d278850e 115 bytenr = ins->bytenr;
c46effa6
LB
116 while (*p) {
117 parent_node = *p;
118 entry = rb_entry(parent_node, struct btrfs_delayed_ref_head,
119 href_node);
120
5c9d028b 121 if (bytenr < entry->bytenr) {
c46effa6 122 p = &(*p)->rb_left;
5c9d028b 123 } else if (bytenr > entry->bytenr) {
c46effa6 124 p = &(*p)->rb_right;
5c9d028b
LB
125 leftmost = false;
126 } else {
c46effa6 127 return entry;
5c9d028b 128 }
c46effa6
LB
129 }
130
131 rb_link_node(node, parent_node, p);
5c9d028b 132 rb_insert_color_cached(node, root, leftmost);
c46effa6
LB
133 return NULL;
134}
135
e3d03965 136static struct btrfs_delayed_ref_node* tree_insert(struct rb_root_cached *root,
0e0adbcf
JB
137 struct btrfs_delayed_ref_node *ins)
138{
e3d03965 139 struct rb_node **p = &root->rb_root.rb_node;
0e0adbcf
JB
140 struct rb_node *node = &ins->ref_node;
141 struct rb_node *parent_node = NULL;
142 struct btrfs_delayed_ref_node *entry;
e3d03965 143 bool leftmost = true;
0e0adbcf
JB
144
145 while (*p) {
146 int comp;
147
148 parent_node = *p;
149 entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
150 ref_node);
151 comp = comp_refs(ins, entry, true);
e3d03965 152 if (comp < 0) {
0e0adbcf 153 p = &(*p)->rb_left;
e3d03965 154 } else if (comp > 0) {
0e0adbcf 155 p = &(*p)->rb_right;
e3d03965
LB
156 leftmost = false;
157 } else {
0e0adbcf 158 return entry;
e3d03965 159 }
0e0adbcf
JB
160 }
161
162 rb_link_node(node, parent_node, p);
e3d03965 163 rb_insert_color_cached(node, root, leftmost);
0e0adbcf
JB
164 return NULL;
165}
166
0a9df0df
LF
167static struct btrfs_delayed_ref_head *find_first_ref_head(
168 struct btrfs_delayed_ref_root *dr)
169{
170 struct rb_node *n;
171 struct btrfs_delayed_ref_head *entry;
172
173 n = rb_first_cached(&dr->href_root);
174 if (!n)
175 return NULL;
176
177 entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node);
178
179 return entry;
180}
181
56bec294 182/*
0a9df0df
LF
183 * Find a head entry based on bytenr. This returns the delayed ref head if it
184 * was able to find one, or NULL if nothing was in that spot. If return_bigger
185 * is given, the next bigger entry is returned if no exact match is found.
56bec294 186 */
0a9df0df 187static struct btrfs_delayed_ref_head *find_ref_head(
5c9d028b 188 struct btrfs_delayed_ref_root *dr, u64 bytenr,
d9352794 189 bool return_bigger)
56bec294 190{
5c9d028b 191 struct rb_root *root = &dr->href_root.rb_root;
d1270cd9 192 struct rb_node *n;
c46effa6 193 struct btrfs_delayed_ref_head *entry;
56bec294 194
d1270cd9
AJ
195 n = root->rb_node;
196 entry = NULL;
56bec294 197 while (n) {
c46effa6 198 entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node);
56bec294 199
d278850e 200 if (bytenr < entry->bytenr)
56bec294 201 n = n->rb_left;
d278850e 202 else if (bytenr > entry->bytenr)
56bec294
CM
203 n = n->rb_right;
204 else
205 return entry;
206 }
d1270cd9 207 if (entry && return_bigger) {
d278850e 208 if (bytenr > entry->bytenr) {
c46effa6 209 n = rb_next(&entry->href_node);
d1270cd9 210 if (!n)
0a9df0df 211 return NULL;
c46effa6
LB
212 entry = rb_entry(n, struct btrfs_delayed_ref_head,
213 href_node);
d1270cd9
AJ
214 }
215 return entry;
216 }
56bec294
CM
217 return NULL;
218}
219
9e920a6f 220int btrfs_delayed_ref_lock(struct btrfs_delayed_ref_root *delayed_refs,
c3e69d58 221 struct btrfs_delayed_ref_head *head)
56bec294 222{
a4666e68 223 lockdep_assert_held(&delayed_refs->lock);
c3e69d58
CM
224 if (mutex_trylock(&head->mutex))
225 return 0;
226
d278850e 227 refcount_inc(&head->refs);
c3e69d58
CM
228 spin_unlock(&delayed_refs->lock);
229
230 mutex_lock(&head->mutex);
231 spin_lock(&delayed_refs->lock);
d278850e 232 if (RB_EMPTY_NODE(&head->href_node)) {
c3e69d58 233 mutex_unlock(&head->mutex);
d278850e 234 btrfs_put_delayed_ref_head(head);
c3e69d58
CM
235 return -EAGAIN;
236 }
d278850e 237 btrfs_put_delayed_ref_head(head);
c3e69d58
CM
238 return 0;
239}
240
35a3621b 241static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
ae1e206b 242 struct btrfs_delayed_ref_root *delayed_refs,
d7df2c79 243 struct btrfs_delayed_ref_head *head,
ae1e206b
JB
244 struct btrfs_delayed_ref_node *ref)
245{
a4666e68 246 lockdep_assert_held(&head->lock);
e3d03965 247 rb_erase_cached(&ref->ref_node, &head->ref_tree);
0e0adbcf 248 RB_CLEAR_NODE(&ref->ref_node);
d278850e
JB
249 if (!list_empty(&ref->add_list))
250 list_del(&ref->add_list);
ae1e206b
JB
251 ref->in_tree = 0;
252 btrfs_put_delayed_ref(ref);
d7df2c79 253 atomic_dec(&delayed_refs->num_entries);
ae1e206b
JB
254}
255
2c3cf7d5
FM
256static bool merge_ref(struct btrfs_trans_handle *trans,
257 struct btrfs_delayed_ref_root *delayed_refs,
258 struct btrfs_delayed_ref_head *head,
259 struct btrfs_delayed_ref_node *ref,
260 u64 seq)
261{
262 struct btrfs_delayed_ref_node *next;
0e0adbcf 263 struct rb_node *node = rb_next(&ref->ref_node);
2c3cf7d5
FM
264 bool done = false;
265
0e0adbcf 266 while (!done && node) {
2c3cf7d5 267 int mod;
2c3cf7d5 268
0e0adbcf
JB
269 next = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
270 node = rb_next(node);
2c3cf7d5 271 if (seq && next->seq >= seq)
0e0adbcf 272 break;
1d148e59 273 if (comp_refs(ref, next, false))
0e0adbcf 274 break;
2c3cf7d5
FM
275
276 if (ref->action == next->action) {
277 mod = next->ref_mod;
278 } else {
279 if (ref->ref_mod < next->ref_mod) {
280 swap(ref, next);
281 done = true;
282 }
283 mod = -next->ref_mod;
284 }
285
286 drop_delayed_ref(trans, delayed_refs, head, next);
287 ref->ref_mod += mod;
288 if (ref->ref_mod == 0) {
289 drop_delayed_ref(trans, delayed_refs, head, ref);
290 done = true;
291 } else {
292 /*
293 * Can't have multiples of the same ref on a tree block.
294 */
295 WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
296 ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
297 }
2c3cf7d5
FM
298 }
299
300 return done;
301}
302
303void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
2c3cf7d5
FM
304 struct btrfs_delayed_ref_root *delayed_refs,
305 struct btrfs_delayed_ref_head *head)
306{
be97f133 307 struct btrfs_fs_info *fs_info = trans->fs_info;
2c3cf7d5 308 struct btrfs_delayed_ref_node *ref;
0e0adbcf 309 struct rb_node *node;
2c3cf7d5
FM
310 u64 seq = 0;
311
a4666e68 312 lockdep_assert_held(&head->lock);
2c3cf7d5 313
e3d03965 314 if (RB_EMPTY_ROOT(&head->ref_tree.rb_root))
2c3cf7d5
FM
315 return;
316
317 /* We don't have too many refs to merge for data. */
318 if (head->is_data)
319 return;
320
321 spin_lock(&fs_info->tree_mod_seq_lock);
322 if (!list_empty(&fs_info->tree_mod_seq_list)) {
323 struct seq_list *elem;
324
325 elem = list_first_entry(&fs_info->tree_mod_seq_list,
326 struct seq_list, list);
327 seq = elem->seq;
328 }
329 spin_unlock(&fs_info->tree_mod_seq_lock);
330
0e0adbcf 331again:
e3d03965
LB
332 for (node = rb_first_cached(&head->ref_tree); node;
333 node = rb_next(node)) {
0e0adbcf 334 ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
2c3cf7d5 335 if (seq && ref->seq >= seq)
2c3cf7d5 336 continue;
0e0adbcf
JB
337 if (merge_ref(trans, delayed_refs, head, ref, seq))
338 goto again;
2c3cf7d5
FM
339 }
340}
341
41d0bd3b 342int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq)
00f04b88
AJ
343{
344 struct seq_list *elem;
097b8a7c
JS
345 int ret = 0;
346
347 spin_lock(&fs_info->tree_mod_seq_lock);
348 if (!list_empty(&fs_info->tree_mod_seq_list)) {
349 elem = list_first_entry(&fs_info->tree_mod_seq_list,
350 struct seq_list, list);
351 if (seq >= elem->seq) {
ab8d0fc4 352 btrfs_debug(fs_info,
41d0bd3b 353 "holding back delayed_ref %#x.%x, lowest is %#x.%x",
ab8d0fc4 354 (u32)(seq >> 32), (u32)seq,
41d0bd3b 355 (u32)(elem->seq >> 32), (u32)elem->seq);
097b8a7c
JS
356 ret = 1;
357 }
00f04b88 358 }
097b8a7c
JS
359
360 spin_unlock(&fs_info->tree_mod_seq_lock);
361 return ret;
00f04b88
AJ
362}
363
5637c74b
LF
364struct btrfs_delayed_ref_head *btrfs_select_ref_head(
365 struct btrfs_delayed_ref_root *delayed_refs)
c3e69d58 366{
d7df2c79 367 struct btrfs_delayed_ref_head *head;
56bec294 368
c3e69d58 369again:
0a9df0df
LF
370 head = find_ref_head(delayed_refs, delayed_refs->run_delayed_start,
371 true);
372 if (!head && delayed_refs->run_delayed_start != 0) {
d7df2c79 373 delayed_refs->run_delayed_start = 0;
0a9df0df 374 head = find_first_ref_head(delayed_refs);
c3e69d58 375 }
0a9df0df
LF
376 if (!head)
377 return NULL;
56bec294 378
d7df2c79
JB
379 while (head->processing) {
380 struct rb_node *node;
381
382 node = rb_next(&head->href_node);
383 if (!node) {
0a9df0df 384 if (delayed_refs->run_delayed_start == 0)
d7df2c79
JB
385 return NULL;
386 delayed_refs->run_delayed_start = 0;
d7df2c79
JB
387 goto again;
388 }
389 head = rb_entry(node, struct btrfs_delayed_ref_head,
390 href_node);
391 }
093486c4 392
d7df2c79
JB
393 head->processing = 1;
394 WARN_ON(delayed_refs->num_heads_ready == 0);
395 delayed_refs->num_heads_ready--;
d278850e
JB
396 delayed_refs->run_delayed_start = head->bytenr +
397 head->num_bytes;
d7df2c79 398 return head;
093486c4
MX
399}
400
d7baffda
JB
401void btrfs_delete_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
402 struct btrfs_delayed_ref_head *head)
403{
404 lockdep_assert_held(&delayed_refs->lock);
405 lockdep_assert_held(&head->lock);
406
407 rb_erase_cached(&head->href_node, &delayed_refs->href_root);
408 RB_CLEAR_NODE(&head->href_node);
409 atomic_dec(&delayed_refs->num_entries);
410 delayed_refs->num_heads--;
411 if (head->processing == 0)
412 delayed_refs->num_heads_ready--;
413}
414
c6fc2454
QW
415/*
416 * Helper to insert the ref_node to the tail or merge with tail.
417 *
418 * Return 0 for insert.
419 * Return >0 for merge.
420 */
0e0adbcf
JB
421static int insert_delayed_ref(struct btrfs_trans_handle *trans,
422 struct btrfs_delayed_ref_root *root,
423 struct btrfs_delayed_ref_head *href,
424 struct btrfs_delayed_ref_node *ref)
c6fc2454
QW
425{
426 struct btrfs_delayed_ref_node *exist;
427 int mod;
428 int ret = 0;
429
430 spin_lock(&href->lock);
0e0adbcf
JB
431 exist = tree_insert(&href->ref_tree, ref);
432 if (!exist)
433 goto inserted;
c6fc2454
QW
434
435 /* Now we are sure we can merge */
436 ret = 1;
437 if (exist->action == ref->action) {
438 mod = ref->ref_mod;
439 } else {
440 /* Need to change action */
441 if (exist->ref_mod < ref->ref_mod) {
442 exist->action = ref->action;
443 mod = -exist->ref_mod;
444 exist->ref_mod = ref->ref_mod;
1d57ee94
WX
445 if (ref->action == BTRFS_ADD_DELAYED_REF)
446 list_add_tail(&exist->add_list,
447 &href->ref_add_list);
448 else if (ref->action == BTRFS_DROP_DELAYED_REF) {
449 ASSERT(!list_empty(&exist->add_list));
450 list_del(&exist->add_list);
451 } else {
452 ASSERT(0);
453 }
c6fc2454
QW
454 } else
455 mod = -ref->ref_mod;
456 }
457 exist->ref_mod += mod;
458
459 /* remove existing tail if its ref_mod is zero */
460 if (exist->ref_mod == 0)
461 drop_delayed_ref(trans, root, href, exist);
462 spin_unlock(&href->lock);
463 return ret;
0e0adbcf 464inserted:
1d57ee94
WX
465 if (ref->action == BTRFS_ADD_DELAYED_REF)
466 list_add_tail(&ref->add_list, &href->ref_add_list);
c6fc2454 467 atomic_inc(&root->num_entries);
c6fc2454
QW
468 spin_unlock(&href->lock);
469 return ret;
470}
471
56bec294
CM
472/*
473 * helper function to update the accounting in the head ref
474 * existing and update must have the same bytenr
475 */
ba2c4d4e 476static noinline void update_existing_head_ref(struct btrfs_trans_handle *trans,
d278850e
JB
477 struct btrfs_delayed_ref_head *existing,
478 struct btrfs_delayed_ref_head *update,
7be07912 479 int *old_ref_mod_ret)
56bec294 480{
ba2c4d4e
JB
481 struct btrfs_delayed_ref_root *delayed_refs =
482 &trans->transaction->delayed_refs;
483 struct btrfs_fs_info *fs_info = trans->fs_info;
1262133b 484 int old_ref_mod;
56bec294 485
d278850e 486 BUG_ON(existing->is_data != update->is_data);
56bec294 487
d278850e
JB
488 spin_lock(&existing->lock);
489 if (update->must_insert_reserved) {
56bec294
CM
490 /* if the extent was freed and then
491 * reallocated before the delayed ref
492 * entries were processed, we can end up
493 * with an existing head ref without
494 * the must_insert_reserved flag set.
495 * Set it again here
496 */
d278850e 497 existing->must_insert_reserved = update->must_insert_reserved;
56bec294
CM
498
499 /*
500 * update the num_bytes so we make sure the accounting
501 * is done correctly
502 */
503 existing->num_bytes = update->num_bytes;
504
505 }
506
d278850e
JB
507 if (update->extent_op) {
508 if (!existing->extent_op) {
509 existing->extent_op = update->extent_op;
5d4f98a2 510 } else {
d278850e
JB
511 if (update->extent_op->update_key) {
512 memcpy(&existing->extent_op->key,
513 &update->extent_op->key,
514 sizeof(update->extent_op->key));
515 existing->extent_op->update_key = true;
5d4f98a2 516 }
d278850e
JB
517 if (update->extent_op->update_flags) {
518 existing->extent_op->flags_to_set |=
519 update->extent_op->flags_to_set;
520 existing->extent_op->update_flags = true;
5d4f98a2 521 }
d278850e 522 btrfs_free_delayed_extent_op(update->extent_op);
5d4f98a2
YZ
523 }
524 }
56bec294 525 /*
d7df2c79
JB
526 * update the reference mod on the head to reflect this new operation,
527 * only need the lock for this case cause we could be processing it
528 * currently, for refs we just added we know we're a-ok.
56bec294 529 */
d278850e 530 old_ref_mod = existing->total_ref_mod;
7be07912
OS
531 if (old_ref_mod_ret)
532 *old_ref_mod_ret = old_ref_mod;
56bec294 533 existing->ref_mod += update->ref_mod;
d278850e 534 existing->total_ref_mod += update->ref_mod;
1262133b
JB
535
536 /*
537 * If we are going to from a positive ref mod to a negative or vice
538 * versa we need to make sure to adjust pending_csums accordingly.
539 */
d278850e 540 if (existing->is_data) {
ba2c4d4e
JB
541 u64 csum_leaves =
542 btrfs_csum_bytes_to_leaves(fs_info,
543 existing->num_bytes);
544
545 if (existing->total_ref_mod >= 0 && old_ref_mod < 0) {
1262133b 546 delayed_refs->pending_csums -= existing->num_bytes;
ba2c4d4e
JB
547 btrfs_delayed_refs_rsv_release(fs_info, csum_leaves);
548 }
549 if (existing->total_ref_mod < 0 && old_ref_mod >= 0) {
1262133b 550 delayed_refs->pending_csums += existing->num_bytes;
ba2c4d4e
JB
551 trans->delayed_ref_updates += csum_leaves;
552 }
1262133b 553 }
d278850e 554 spin_unlock(&existing->lock);
56bec294
CM
555}
556
a2e569b3
NB
557static void init_delayed_ref_head(struct btrfs_delayed_ref_head *head_ref,
558 struct btrfs_qgroup_extent_record *qrecord,
559 u64 bytenr, u64 num_bytes, u64 ref_root,
560 u64 reserved, int action, bool is_data,
561 bool is_system)
562{
563 int count_mod = 1;
564 int must_insert_reserved = 0;
565
566 /* If reserved is provided, it must be a data extent. */
567 BUG_ON(!is_data && reserved);
568
569 /*
570 * The head node stores the sum of all the mods, so dropping a ref
571 * should drop the sum in the head node by one.
572 */
573 if (action == BTRFS_UPDATE_DELAYED_HEAD)
574 count_mod = 0;
575 else if (action == BTRFS_DROP_DELAYED_REF)
576 count_mod = -1;
577
578 /*
579 * BTRFS_ADD_DELAYED_EXTENT means that we need to update the reserved
580 * accounting when the extent is finally added, or if a later
581 * modification deletes the delayed ref without ever inserting the
582 * extent into the extent allocation tree. ref->must_insert_reserved
583 * is the flag used to record that accounting mods are required.
584 *
585 * Once we record must_insert_reserved, switch the action to
586 * BTRFS_ADD_DELAYED_REF because other special casing is not required.
587 */
588 if (action == BTRFS_ADD_DELAYED_EXTENT)
589 must_insert_reserved = 1;
590 else
591 must_insert_reserved = 0;
592
593 refcount_set(&head_ref->refs, 1);
594 head_ref->bytenr = bytenr;
595 head_ref->num_bytes = num_bytes;
596 head_ref->ref_mod = count_mod;
597 head_ref->must_insert_reserved = must_insert_reserved;
598 head_ref->is_data = is_data;
599 head_ref->is_system = is_system;
e3d03965 600 head_ref->ref_tree = RB_ROOT_CACHED;
a2e569b3
NB
601 INIT_LIST_HEAD(&head_ref->ref_add_list);
602 RB_CLEAR_NODE(&head_ref->href_node);
603 head_ref->processing = 0;
604 head_ref->total_ref_mod = count_mod;
605 head_ref->qgroup_reserved = 0;
606 head_ref->qgroup_ref_root = 0;
607 spin_lock_init(&head_ref->lock);
608 mutex_init(&head_ref->mutex);
609
610 if (qrecord) {
611 if (ref_root && reserved) {
612 head_ref->qgroup_ref_root = ref_root;
613 head_ref->qgroup_reserved = reserved;
614 }
615
616 qrecord->bytenr = bytenr;
617 qrecord->num_bytes = num_bytes;
618 qrecord->old_roots = NULL;
619 }
620}
621
56bec294 622/*
5d4f98a2 623 * helper function to actually insert a head node into the rbtree.
56bec294 624 * this does all the dirty work in terms of maintaining the correct
5d4f98a2 625 * overall modification count.
56bec294 626 */
d7df2c79 627static noinline struct btrfs_delayed_ref_head *
1acda0c2 628add_delayed_ref_head(struct btrfs_trans_handle *trans,
d278850e 629 struct btrfs_delayed_ref_head *head_ref,
3368d001 630 struct btrfs_qgroup_extent_record *qrecord,
2335efaf 631 int action, int *qrecord_inserted_ret,
7be07912 632 int *old_ref_mod, int *new_ref_mod)
56bec294 633{
d7df2c79 634 struct btrfs_delayed_ref_head *existing;
56bec294 635 struct btrfs_delayed_ref_root *delayed_refs;
fb235dc0 636 int qrecord_inserted = 0;
56bec294 637
56bec294 638 delayed_refs = &trans->transaction->delayed_refs;
2335efaf 639
3368d001
QW
640 /* Record qgroup extent info if provided */
641 if (qrecord) {
eb86ec73 642 if (btrfs_qgroup_trace_extent_nolock(trans->fs_info,
cb93b52c 643 delayed_refs, qrecord))
3368d001 644 kfree(qrecord);
fb235dc0
QW
645 else
646 qrecord_inserted = 1;
3368d001
QW
647 }
648
1acda0c2 649 trace_add_delayed_ref_head(trans->fs_info, head_ref, action);
1abe9b8a 650
d7df2c79
JB
651 existing = htree_insert(&delayed_refs->href_root,
652 &head_ref->href_node);
5d4f98a2 653 if (existing) {
2335efaf
NB
654 WARN_ON(qrecord && head_ref->qgroup_ref_root
655 && head_ref->qgroup_reserved
656 && existing->qgroup_ref_root
5846a3c2 657 && existing->qgroup_reserved);
ba2c4d4e 658 update_existing_head_ref(trans, existing, head_ref,
7be07912 659 old_ref_mod);
5d4f98a2
YZ
660 /*
661 * we've updated the existing ref, free the newly
662 * allocated ref
663 */
78a6184a 664 kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
d7df2c79 665 head_ref = existing;
5d4f98a2 666 } else {
7be07912
OS
667 if (old_ref_mod)
668 *old_ref_mod = 0;
ba2c4d4e 669 if (head_ref->is_data && head_ref->ref_mod < 0) {
2335efaf 670 delayed_refs->pending_csums += head_ref->num_bytes;
ba2c4d4e
JB
671 trans->delayed_ref_updates +=
672 btrfs_csum_bytes_to_leaves(trans->fs_info,
673 head_ref->num_bytes);
674 }
5d4f98a2
YZ
675 delayed_refs->num_heads++;
676 delayed_refs->num_heads_ready++;
d7df2c79 677 atomic_inc(&delayed_refs->num_entries);
5d4f98a2
YZ
678 trans->delayed_ref_updates++;
679 }
fb235dc0
QW
680 if (qrecord_inserted_ret)
681 *qrecord_inserted_ret = qrecord_inserted;
7be07912
OS
682 if (new_ref_mod)
683 *new_ref_mod = head_ref->total_ref_mod;
2335efaf 684
d7df2c79 685 return head_ref;
5d4f98a2
YZ
686}
687
cb49a87b
NB
688/*
689 * init_delayed_ref_common - Initialize the structure which represents a
690 * modification to a an extent.
691 *
692 * @fs_info: Internal to the mounted filesystem mount structure.
693 *
694 * @ref: The structure which is going to be initialized.
695 *
696 * @bytenr: The logical address of the extent for which a modification is
697 * going to be recorded.
698 *
699 * @num_bytes: Size of the extent whose modification is being recorded.
700 *
701 * @ref_root: The id of the root where this modification has originated, this
702 * can be either one of the well-known metadata trees or the
703 * subvolume id which references this extent.
704 *
705 * @action: Can be one of BTRFS_ADD_DELAYED_REF/BTRFS_DROP_DELAYED_REF or
706 * BTRFS_ADD_DELAYED_EXTENT
707 *
708 * @ref_type: Holds the type of the extent which is being recorded, can be
709 * one of BTRFS_SHARED_BLOCK_REF_KEY/BTRFS_TREE_BLOCK_REF_KEY
710 * when recording a metadata extent or BTRFS_SHARED_DATA_REF_KEY/
711 * BTRFS_EXTENT_DATA_REF_KEY when recording data extent
712 */
713static void init_delayed_ref_common(struct btrfs_fs_info *fs_info,
714 struct btrfs_delayed_ref_node *ref,
715 u64 bytenr, u64 num_bytes, u64 ref_root,
716 int action, u8 ref_type)
717{
718 u64 seq = 0;
719
720 if (action == BTRFS_ADD_DELAYED_EXTENT)
721 action = BTRFS_ADD_DELAYED_REF;
722
723 if (is_fstree(ref_root))
724 seq = atomic64_read(&fs_info->tree_mod_seq);
725
726 refcount_set(&ref->refs, 1);
727 ref->bytenr = bytenr;
728 ref->num_bytes = num_bytes;
729 ref->ref_mod = 1;
730 ref->action = action;
731 ref->is_head = 0;
732 ref->in_tree = 1;
733 ref->seq = seq;
734 ref->type = ref_type;
735 RB_CLEAR_NODE(&ref->ref_node);
736 INIT_LIST_HEAD(&ref->add_list);
737}
738
56bec294 739/*
5d4f98a2 740 * add a delayed tree ref. This does all of the accounting required
56bec294
CM
741 * to make sure the delayed ref is eventually processed before this
742 * transaction commits.
743 */
44e1c47d 744int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
5d4f98a2
YZ
745 u64 bytenr, u64 num_bytes, u64 parent,
746 u64 ref_root, int level, int action,
7be07912
OS
747 struct btrfs_delayed_extent_op *extent_op,
748 int *old_ref_mod, int *new_ref_mod)
56bec294 749{
44e1c47d 750 struct btrfs_fs_info *fs_info = trans->fs_info;
5d4f98a2 751 struct btrfs_delayed_tree_ref *ref;
56bec294
CM
752 struct btrfs_delayed_ref_head *head_ref;
753 struct btrfs_delayed_ref_root *delayed_refs;
3368d001 754 struct btrfs_qgroup_extent_record *record = NULL;
fb235dc0 755 int qrecord_inserted;
2335efaf 756 bool is_system = (ref_root == BTRFS_CHUNK_TREE_OBJECTID);
70d64000
NB
757 int ret;
758 u8 ref_type;
56bec294 759
5d4f98a2 760 BUG_ON(extent_op && extent_op->is_data);
78a6184a 761 ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
56bec294
CM
762 if (!ref)
763 return -ENOMEM;
764
7b4284de
NB
765 head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
766 if (!head_ref) {
767 kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
768 return -ENOMEM;
769 }
770
771 if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
772 is_fstree(ref_root)) {
773 record = kmalloc(sizeof(*record), GFP_NOFS);
774 if (!record) {
775 kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
776 kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
777 return -ENOMEM;
778 }
779 }
780
70d64000
NB
781 if (parent)
782 ref_type = BTRFS_SHARED_BLOCK_REF_KEY;
783 else
784 ref_type = BTRFS_TREE_BLOCK_REF_KEY;
7b4284de 785
70d64000
NB
786 init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes,
787 ref_root, action, ref_type);
788 ref->root = ref_root;
789 ref->parent = parent;
790 ref->level = level;
791
2335efaf
NB
792 init_delayed_ref_head(head_ref, record, bytenr, num_bytes,
793 ref_root, 0, action, false, is_system);
5d4f98a2
YZ
794 head_ref->extent_op = extent_op;
795
796 delayed_refs = &trans->transaction->delayed_refs;
797 spin_lock(&delayed_refs->lock);
798
56bec294 799 /*
5d4f98a2
YZ
800 * insert both the head node and the new ref without dropping
801 * the spin lock
56bec294 802 */
2335efaf
NB
803 head_ref = add_delayed_ref_head(trans, head_ref, record,
804 action, &qrecord_inserted,
5e388e95 805 old_ref_mod, new_ref_mod);
5d4f98a2 806
70d64000 807 ret = insert_delayed_ref(trans, delayed_refs, head_ref, &ref->node);
5d4f98a2 808 spin_unlock(&delayed_refs->lock);
95a06077 809
ba2c4d4e
JB
810 /*
811 * Need to update the delayed_refs_rsv with any changes we may have
812 * made.
813 */
814 btrfs_update_delayed_refs_rsv(trans);
815
70d64000
NB
816 trace_add_delayed_tree_ref(fs_info, &ref->node, ref,
817 action == BTRFS_ADD_DELAYED_EXTENT ?
818 BTRFS_ADD_DELAYED_REF : action);
819 if (ret > 0)
820 kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
821
fb235dc0 822 if (qrecord_inserted)
952bd3db
NB
823 btrfs_qgroup_trace_extent_post(fs_info, record);
824
5d4f98a2
YZ
825 return 0;
826}
827
828/*
829 * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref.
830 */
88a979c6 831int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
5d4f98a2
YZ
832 u64 bytenr, u64 num_bytes,
833 u64 parent, u64 ref_root,
7be07912
OS
834 u64 owner, u64 offset, u64 reserved, int action,
835 int *old_ref_mod, int *new_ref_mod)
5d4f98a2 836{
88a979c6 837 struct btrfs_fs_info *fs_info = trans->fs_info;
5d4f98a2
YZ
838 struct btrfs_delayed_data_ref *ref;
839 struct btrfs_delayed_ref_head *head_ref;
840 struct btrfs_delayed_ref_root *delayed_refs;
3368d001 841 struct btrfs_qgroup_extent_record *record = NULL;
fb235dc0 842 int qrecord_inserted;
cd7f9699
NB
843 int ret;
844 u8 ref_type;
5d4f98a2 845
78a6184a 846 ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
5d4f98a2
YZ
847 if (!ref)
848 return -ENOMEM;
56bec294 849
cd7f9699
NB
850 if (parent)
851 ref_type = BTRFS_SHARED_DATA_REF_KEY;
852 else
853 ref_type = BTRFS_EXTENT_DATA_REF_KEY;
854 init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes,
855 ref_root, action, ref_type);
856 ref->root = ref_root;
857 ref->parent = parent;
858 ref->objectid = owner;
859 ref->offset = offset;
860
861
78a6184a 862 head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
56bec294 863 if (!head_ref) {
78a6184a 864 kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
56bec294
CM
865 return -ENOMEM;
866 }
5d4f98a2 867
afcdd129
JB
868 if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
869 is_fstree(ref_root)) {
3368d001
QW
870 record = kmalloc(sizeof(*record), GFP_NOFS);
871 if (!record) {
872 kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
873 kmem_cache_free(btrfs_delayed_ref_head_cachep,
874 head_ref);
875 return -ENOMEM;
876 }
877 }
878
2335efaf
NB
879 init_delayed_ref_head(head_ref, record, bytenr, num_bytes, ref_root,
880 reserved, action, true, false);
fef394f7 881 head_ref->extent_op = NULL;
5d4f98a2 882
56bec294
CM
883 delayed_refs = &trans->transaction->delayed_refs;
884 spin_lock(&delayed_refs->lock);
885
886 /*
887 * insert both the head node and the new ref without dropping
888 * the spin lock
889 */
2335efaf
NB
890 head_ref = add_delayed_ref_head(trans, head_ref, record,
891 action, &qrecord_inserted,
7be07912 892 old_ref_mod, new_ref_mod);
56bec294 893
cd7f9699 894 ret = insert_delayed_ref(trans, delayed_refs, head_ref, &ref->node);
5d4f98a2 895 spin_unlock(&delayed_refs->lock);
95a06077 896
ba2c4d4e
JB
897 /*
898 * Need to update the delayed_refs_rsv with any changes we may have
899 * made.
900 */
901 btrfs_update_delayed_refs_rsv(trans);
902
cd7f9699
NB
903 trace_add_delayed_data_ref(trans->fs_info, &ref->node, ref,
904 action == BTRFS_ADD_DELAYED_EXTENT ?
905 BTRFS_ADD_DELAYED_REF : action);
906 if (ret > 0)
907 kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
908
909
fb235dc0
QW
910 if (qrecord_inserted)
911 return btrfs_qgroup_trace_extent_post(fs_info, record);
5d4f98a2
YZ
912 return 0;
913}
914
66d7e7f0
AJ
915int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
916 struct btrfs_trans_handle *trans,
5d4f98a2
YZ
917 u64 bytenr, u64 num_bytes,
918 struct btrfs_delayed_extent_op *extent_op)
919{
920 struct btrfs_delayed_ref_head *head_ref;
921 struct btrfs_delayed_ref_root *delayed_refs;
5d4f98a2 922
78a6184a 923 head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
5d4f98a2
YZ
924 if (!head_ref)
925 return -ENOMEM;
926
2335efaf
NB
927 init_delayed_ref_head(head_ref, NULL, bytenr, num_bytes, 0, 0,
928 BTRFS_UPDATE_DELAYED_HEAD, extent_op->is_data,
929 false);
5d4f98a2
YZ
930 head_ref->extent_op = extent_op;
931
932 delayed_refs = &trans->transaction->delayed_refs;
933 spin_lock(&delayed_refs->lock);
934
2335efaf
NB
935 add_delayed_ref_head(trans, head_ref, NULL, BTRFS_UPDATE_DELAYED_HEAD,
936 NULL, NULL, NULL);
5d4f98a2 937
56bec294 938 spin_unlock(&delayed_refs->lock);
ba2c4d4e
JB
939
940 /*
941 * Need to update the delayed_refs_rsv with any changes we may have
942 * made.
943 */
944 btrfs_update_delayed_refs_rsv(trans);
56bec294
CM
945 return 0;
946}
947
1887be66
CM
948/*
949 * this does a simple search for the head node for a given extent.
950 * It must be called with the delayed ref spinlock held, and it returns
951 * the head node if any where found, or NULL if not.
952 */
953struct btrfs_delayed_ref_head *
f72ad18e 954btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, u64 bytenr)
1887be66 955{
d9352794 956 return find_ref_head(delayed_refs, bytenr, false);
1887be66 957}
78a6184a 958
e67c718b 959void __cold btrfs_delayed_ref_exit(void)
78a6184a 960{
5598e900
KM
961 kmem_cache_destroy(btrfs_delayed_ref_head_cachep);
962 kmem_cache_destroy(btrfs_delayed_tree_ref_cachep);
963 kmem_cache_destroy(btrfs_delayed_data_ref_cachep);
964 kmem_cache_destroy(btrfs_delayed_extent_op_cachep);
78a6184a
MX
965}
966
f5c29bd9 967int __init btrfs_delayed_ref_init(void)
78a6184a
MX
968{
969 btrfs_delayed_ref_head_cachep = kmem_cache_create(
970 "btrfs_delayed_ref_head",
971 sizeof(struct btrfs_delayed_ref_head), 0,
fba4b697 972 SLAB_MEM_SPREAD, NULL);
78a6184a
MX
973 if (!btrfs_delayed_ref_head_cachep)
974 goto fail;
975
976 btrfs_delayed_tree_ref_cachep = kmem_cache_create(
977 "btrfs_delayed_tree_ref",
978 sizeof(struct btrfs_delayed_tree_ref), 0,
fba4b697 979 SLAB_MEM_SPREAD, NULL);
78a6184a
MX
980 if (!btrfs_delayed_tree_ref_cachep)
981 goto fail;
982
983 btrfs_delayed_data_ref_cachep = kmem_cache_create(
984 "btrfs_delayed_data_ref",
985 sizeof(struct btrfs_delayed_data_ref), 0,
fba4b697 986 SLAB_MEM_SPREAD, NULL);
78a6184a
MX
987 if (!btrfs_delayed_data_ref_cachep)
988 goto fail;
989
990 btrfs_delayed_extent_op_cachep = kmem_cache_create(
991 "btrfs_delayed_extent_op",
992 sizeof(struct btrfs_delayed_extent_op), 0,
fba4b697 993 SLAB_MEM_SPREAD, NULL);
78a6184a
MX
994 if (!btrfs_delayed_extent_op_cachep)
995 goto fail;
996
997 return 0;
998fail:
999 btrfs_delayed_ref_exit();
1000 return -ENOMEM;
1001}