btrfs: Introduce init_delayed_ref_head
[linux-2.6-block.git] / fs / btrfs / delayed-ref.c
CommitLineData
c1d7c514 1// SPDX-License-Identifier: GPL-2.0
56bec294
CM
2/*
3 * Copyright (C) 2009 Oracle. All rights reserved.
56bec294
CM
4 */
5
6#include <linux/sched.h>
5a0e3ad6 7#include <linux/slab.h>
56bec294 8#include <linux/sort.h>
56bec294
CM
9#include "ctree.h"
10#include "delayed-ref.h"
11#include "transaction.h"
3368d001 12#include "qgroup.h"
56bec294 13
78a6184a
MX
14struct kmem_cache *btrfs_delayed_ref_head_cachep;
15struct kmem_cache *btrfs_delayed_tree_ref_cachep;
16struct kmem_cache *btrfs_delayed_data_ref_cachep;
17struct kmem_cache *btrfs_delayed_extent_op_cachep;
56bec294
CM
18/*
19 * delayed back reference update tracking. For subvolume trees
20 * we queue up extent allocations and backref maintenance for
21 * delayed processing. This avoids deep call chains where we
22 * add extents in the middle of btrfs_search_slot, and it allows
23 * us to buffer up frequently modified backrefs in an rb tree instead
24 * of hammering updates on the extent allocation tree.
56bec294
CM
25 */
26
27/*
5d4f98a2
YZ
28 * compare two delayed tree backrefs with same bytenr and type
29 */
c7ad7c84
JB
30static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref1,
31 struct btrfs_delayed_tree_ref *ref2)
5d4f98a2 32{
3b60d436 33 if (ref1->node.type == BTRFS_TREE_BLOCK_REF_KEY) {
41b0fc42
JB
34 if (ref1->root < ref2->root)
35 return -1;
36 if (ref1->root > ref2->root)
37 return 1;
38 } else {
39 if (ref1->parent < ref2->parent)
40 return -1;
41 if (ref1->parent > ref2->parent)
42 return 1;
43 }
5d4f98a2
YZ
44 return 0;
45}
46
47/*
48 * compare two delayed data backrefs with same bytenr and type
56bec294 49 */
c7ad7c84
JB
50static int comp_data_refs(struct btrfs_delayed_data_ref *ref1,
51 struct btrfs_delayed_data_ref *ref2)
56bec294 52{
5d4f98a2
YZ
53 if (ref1->node.type == BTRFS_EXTENT_DATA_REF_KEY) {
54 if (ref1->root < ref2->root)
55 return -1;
56 if (ref1->root > ref2->root)
57 return 1;
58 if (ref1->objectid < ref2->objectid)
59 return -1;
60 if (ref1->objectid > ref2->objectid)
61 return 1;
62 if (ref1->offset < ref2->offset)
63 return -1;
64 if (ref1->offset > ref2->offset)
65 return 1;
66 } else {
67 if (ref1->parent < ref2->parent)
68 return -1;
69 if (ref1->parent > ref2->parent)
70 return 1;
71 }
72 return 0;
73}
74
1d148e59
JB
75static int comp_refs(struct btrfs_delayed_ref_node *ref1,
76 struct btrfs_delayed_ref_node *ref2,
77 bool check_seq)
78{
79 int ret = 0;
80
81 if (ref1->type < ref2->type)
82 return -1;
83 if (ref1->type > ref2->type)
84 return 1;
85 if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
86 ref1->type == BTRFS_SHARED_BLOCK_REF_KEY)
87 ret = comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref1),
88 btrfs_delayed_node_to_tree_ref(ref2));
89 else
90 ret = comp_data_refs(btrfs_delayed_node_to_data_ref(ref1),
91 btrfs_delayed_node_to_data_ref(ref2));
92 if (ret)
93 return ret;
94 if (check_seq) {
95 if (ref1->seq < ref2->seq)
96 return -1;
97 if (ref1->seq > ref2->seq)
98 return 1;
99 }
100 return 0;
101}
102
c46effa6
LB
103/* insert a new ref to head ref rbtree */
104static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root,
105 struct rb_node *node)
106{
107 struct rb_node **p = &root->rb_node;
108 struct rb_node *parent_node = NULL;
109 struct btrfs_delayed_ref_head *entry;
110 struct btrfs_delayed_ref_head *ins;
111 u64 bytenr;
112
113 ins = rb_entry(node, struct btrfs_delayed_ref_head, href_node);
d278850e 114 bytenr = ins->bytenr;
c46effa6
LB
115 while (*p) {
116 parent_node = *p;
117 entry = rb_entry(parent_node, struct btrfs_delayed_ref_head,
118 href_node);
119
d278850e 120 if (bytenr < entry->bytenr)
c46effa6 121 p = &(*p)->rb_left;
d278850e 122 else if (bytenr > entry->bytenr)
c46effa6
LB
123 p = &(*p)->rb_right;
124 else
125 return entry;
126 }
127
128 rb_link_node(node, parent_node, p);
129 rb_insert_color(node, root);
130 return NULL;
131}
132
0e0adbcf
JB
133static struct btrfs_delayed_ref_node* tree_insert(struct rb_root *root,
134 struct btrfs_delayed_ref_node *ins)
135{
136 struct rb_node **p = &root->rb_node;
137 struct rb_node *node = &ins->ref_node;
138 struct rb_node *parent_node = NULL;
139 struct btrfs_delayed_ref_node *entry;
140
141 while (*p) {
142 int comp;
143
144 parent_node = *p;
145 entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
146 ref_node);
147 comp = comp_refs(ins, entry, true);
148 if (comp < 0)
149 p = &(*p)->rb_left;
150 else if (comp > 0)
151 p = &(*p)->rb_right;
152 else
153 return entry;
154 }
155
156 rb_link_node(node, parent_node, p);
157 rb_insert_color(node, root);
158 return NULL;
159}
160
56bec294 161/*
5d4f98a2 162 * find an head entry based on bytenr. This returns the delayed ref
d1270cd9
AJ
163 * head if it was able to find one, or NULL if nothing was in that spot.
164 * If return_bigger is given, the next bigger entry is returned if no exact
165 * match is found.
56bec294 166 */
c46effa6
LB
167static struct btrfs_delayed_ref_head *
168find_ref_head(struct rb_root *root, u64 bytenr,
85fdfdf6 169 int return_bigger)
56bec294 170{
d1270cd9 171 struct rb_node *n;
c46effa6 172 struct btrfs_delayed_ref_head *entry;
56bec294 173
d1270cd9
AJ
174 n = root->rb_node;
175 entry = NULL;
56bec294 176 while (n) {
c46effa6 177 entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node);
56bec294 178
d278850e 179 if (bytenr < entry->bytenr)
56bec294 180 n = n->rb_left;
d278850e 181 else if (bytenr > entry->bytenr)
56bec294
CM
182 n = n->rb_right;
183 else
184 return entry;
185 }
d1270cd9 186 if (entry && return_bigger) {
d278850e 187 if (bytenr > entry->bytenr) {
c46effa6 188 n = rb_next(&entry->href_node);
d1270cd9
AJ
189 if (!n)
190 n = rb_first(root);
c46effa6
LB
191 entry = rb_entry(n, struct btrfs_delayed_ref_head,
192 href_node);
6103fb43 193 return entry;
d1270cd9
AJ
194 }
195 return entry;
196 }
56bec294
CM
197 return NULL;
198}
199
c3e69d58
CM
200int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
201 struct btrfs_delayed_ref_head *head)
56bec294 202{
c3e69d58
CM
203 struct btrfs_delayed_ref_root *delayed_refs;
204
205 delayed_refs = &trans->transaction->delayed_refs;
a4666e68 206 lockdep_assert_held(&delayed_refs->lock);
c3e69d58
CM
207 if (mutex_trylock(&head->mutex))
208 return 0;
209
d278850e 210 refcount_inc(&head->refs);
c3e69d58
CM
211 spin_unlock(&delayed_refs->lock);
212
213 mutex_lock(&head->mutex);
214 spin_lock(&delayed_refs->lock);
d278850e 215 if (RB_EMPTY_NODE(&head->href_node)) {
c3e69d58 216 mutex_unlock(&head->mutex);
d278850e 217 btrfs_put_delayed_ref_head(head);
c3e69d58
CM
218 return -EAGAIN;
219 }
d278850e 220 btrfs_put_delayed_ref_head(head);
c3e69d58
CM
221 return 0;
222}
223
35a3621b 224static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
ae1e206b 225 struct btrfs_delayed_ref_root *delayed_refs,
d7df2c79 226 struct btrfs_delayed_ref_head *head,
ae1e206b
JB
227 struct btrfs_delayed_ref_node *ref)
228{
a4666e68 229 lockdep_assert_held(&head->lock);
0e0adbcf
JB
230 rb_erase(&ref->ref_node, &head->ref_tree);
231 RB_CLEAR_NODE(&ref->ref_node);
d278850e
JB
232 if (!list_empty(&ref->add_list))
233 list_del(&ref->add_list);
ae1e206b
JB
234 ref->in_tree = 0;
235 btrfs_put_delayed_ref(ref);
d7df2c79 236 atomic_dec(&delayed_refs->num_entries);
ae1e206b
JB
237 if (trans->delayed_ref_updates)
238 trans->delayed_ref_updates--;
239}
240
2c3cf7d5
FM
241static bool merge_ref(struct btrfs_trans_handle *trans,
242 struct btrfs_delayed_ref_root *delayed_refs,
243 struct btrfs_delayed_ref_head *head,
244 struct btrfs_delayed_ref_node *ref,
245 u64 seq)
246{
247 struct btrfs_delayed_ref_node *next;
0e0adbcf 248 struct rb_node *node = rb_next(&ref->ref_node);
2c3cf7d5
FM
249 bool done = false;
250
0e0adbcf 251 while (!done && node) {
2c3cf7d5 252 int mod;
2c3cf7d5 253
0e0adbcf
JB
254 next = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
255 node = rb_next(node);
2c3cf7d5 256 if (seq && next->seq >= seq)
0e0adbcf 257 break;
1d148e59 258 if (comp_refs(ref, next, false))
0e0adbcf 259 break;
2c3cf7d5
FM
260
261 if (ref->action == next->action) {
262 mod = next->ref_mod;
263 } else {
264 if (ref->ref_mod < next->ref_mod) {
265 swap(ref, next);
266 done = true;
267 }
268 mod = -next->ref_mod;
269 }
270
271 drop_delayed_ref(trans, delayed_refs, head, next);
272 ref->ref_mod += mod;
273 if (ref->ref_mod == 0) {
274 drop_delayed_ref(trans, delayed_refs, head, ref);
275 done = true;
276 } else {
277 /*
278 * Can't have multiples of the same ref on a tree block.
279 */
280 WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
281 ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
282 }
2c3cf7d5
FM
283 }
284
285 return done;
286}
287
288void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
2c3cf7d5
FM
289 struct btrfs_delayed_ref_root *delayed_refs,
290 struct btrfs_delayed_ref_head *head)
291{
be97f133 292 struct btrfs_fs_info *fs_info = trans->fs_info;
2c3cf7d5 293 struct btrfs_delayed_ref_node *ref;
0e0adbcf 294 struct rb_node *node;
2c3cf7d5
FM
295 u64 seq = 0;
296
a4666e68 297 lockdep_assert_held(&head->lock);
2c3cf7d5 298
0e0adbcf 299 if (RB_EMPTY_ROOT(&head->ref_tree))
2c3cf7d5
FM
300 return;
301
302 /* We don't have too many refs to merge for data. */
303 if (head->is_data)
304 return;
305
306 spin_lock(&fs_info->tree_mod_seq_lock);
307 if (!list_empty(&fs_info->tree_mod_seq_list)) {
308 struct seq_list *elem;
309
310 elem = list_first_entry(&fs_info->tree_mod_seq_list,
311 struct seq_list, list);
312 seq = elem->seq;
313 }
314 spin_unlock(&fs_info->tree_mod_seq_lock);
315
0e0adbcf
JB
316again:
317 for (node = rb_first(&head->ref_tree); node; node = rb_next(node)) {
318 ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
2c3cf7d5 319 if (seq && ref->seq >= seq)
2c3cf7d5 320 continue;
0e0adbcf
JB
321 if (merge_ref(trans, delayed_refs, head, ref, seq))
322 goto again;
2c3cf7d5
FM
323 }
324}
325
41d0bd3b 326int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq)
00f04b88
AJ
327{
328 struct seq_list *elem;
097b8a7c
JS
329 int ret = 0;
330
331 spin_lock(&fs_info->tree_mod_seq_lock);
332 if (!list_empty(&fs_info->tree_mod_seq_list)) {
333 elem = list_first_entry(&fs_info->tree_mod_seq_list,
334 struct seq_list, list);
335 if (seq >= elem->seq) {
ab8d0fc4 336 btrfs_debug(fs_info,
41d0bd3b 337 "holding back delayed_ref %#x.%x, lowest is %#x.%x",
ab8d0fc4 338 (u32)(seq >> 32), (u32)seq,
41d0bd3b 339 (u32)(elem->seq >> 32), (u32)elem->seq);
097b8a7c
JS
340 ret = 1;
341 }
00f04b88 342 }
097b8a7c
JS
343
344 spin_unlock(&fs_info->tree_mod_seq_lock);
345 return ret;
00f04b88
AJ
346}
347
d7df2c79
JB
348struct btrfs_delayed_ref_head *
349btrfs_select_ref_head(struct btrfs_trans_handle *trans)
c3e69d58 350{
c3e69d58 351 struct btrfs_delayed_ref_root *delayed_refs;
d7df2c79
JB
352 struct btrfs_delayed_ref_head *head;
353 u64 start;
354 bool loop = false;
56bec294 355
c3e69d58 356 delayed_refs = &trans->transaction->delayed_refs;
c46effa6 357
c3e69d58 358again:
d7df2c79 359 start = delayed_refs->run_delayed_start;
85fdfdf6 360 head = find_ref_head(&delayed_refs->href_root, start, 1);
d7df2c79
JB
361 if (!head && !loop) {
362 delayed_refs->run_delayed_start = 0;
c3e69d58 363 start = 0;
d7df2c79 364 loop = true;
85fdfdf6 365 head = find_ref_head(&delayed_refs->href_root, start, 1);
d7df2c79
JB
366 if (!head)
367 return NULL;
368 } else if (!head && loop) {
369 return NULL;
c3e69d58 370 }
56bec294 371
d7df2c79
JB
372 while (head->processing) {
373 struct rb_node *node;
374
375 node = rb_next(&head->href_node);
376 if (!node) {
377 if (loop)
378 return NULL;
379 delayed_refs->run_delayed_start = 0;
380 start = 0;
381 loop = true;
382 goto again;
383 }
384 head = rb_entry(node, struct btrfs_delayed_ref_head,
385 href_node);
386 }
093486c4 387
d7df2c79
JB
388 head->processing = 1;
389 WARN_ON(delayed_refs->num_heads_ready == 0);
390 delayed_refs->num_heads_ready--;
d278850e
JB
391 delayed_refs->run_delayed_start = head->bytenr +
392 head->num_bytes;
d7df2c79 393 return head;
093486c4
MX
394}
395
c6fc2454
QW
396/*
397 * Helper to insert the ref_node to the tail or merge with tail.
398 *
399 * Return 0 for insert.
400 * Return >0 for merge.
401 */
0e0adbcf
JB
402static int insert_delayed_ref(struct btrfs_trans_handle *trans,
403 struct btrfs_delayed_ref_root *root,
404 struct btrfs_delayed_ref_head *href,
405 struct btrfs_delayed_ref_node *ref)
c6fc2454
QW
406{
407 struct btrfs_delayed_ref_node *exist;
408 int mod;
409 int ret = 0;
410
411 spin_lock(&href->lock);
0e0adbcf
JB
412 exist = tree_insert(&href->ref_tree, ref);
413 if (!exist)
414 goto inserted;
c6fc2454
QW
415
416 /* Now we are sure we can merge */
417 ret = 1;
418 if (exist->action == ref->action) {
419 mod = ref->ref_mod;
420 } else {
421 /* Need to change action */
422 if (exist->ref_mod < ref->ref_mod) {
423 exist->action = ref->action;
424 mod = -exist->ref_mod;
425 exist->ref_mod = ref->ref_mod;
1d57ee94
WX
426 if (ref->action == BTRFS_ADD_DELAYED_REF)
427 list_add_tail(&exist->add_list,
428 &href->ref_add_list);
429 else if (ref->action == BTRFS_DROP_DELAYED_REF) {
430 ASSERT(!list_empty(&exist->add_list));
431 list_del(&exist->add_list);
432 } else {
433 ASSERT(0);
434 }
c6fc2454
QW
435 } else
436 mod = -ref->ref_mod;
437 }
438 exist->ref_mod += mod;
439
440 /* remove existing tail if its ref_mod is zero */
441 if (exist->ref_mod == 0)
442 drop_delayed_ref(trans, root, href, exist);
443 spin_unlock(&href->lock);
444 return ret;
0e0adbcf 445inserted:
1d57ee94
WX
446 if (ref->action == BTRFS_ADD_DELAYED_REF)
447 list_add_tail(&ref->add_list, &href->ref_add_list);
c6fc2454
QW
448 atomic_inc(&root->num_entries);
449 trans->delayed_ref_updates++;
450 spin_unlock(&href->lock);
451 return ret;
452}
453
56bec294
CM
454/*
455 * helper function to update the accounting in the head ref
456 * existing and update must have the same bytenr
457 */
458static noinline void
1262133b 459update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs,
d278850e
JB
460 struct btrfs_delayed_ref_head *existing,
461 struct btrfs_delayed_ref_head *update,
7be07912 462 int *old_ref_mod_ret)
56bec294 463{
1262133b 464 int old_ref_mod;
56bec294 465
d278850e 466 BUG_ON(existing->is_data != update->is_data);
56bec294 467
d278850e
JB
468 spin_lock(&existing->lock);
469 if (update->must_insert_reserved) {
56bec294
CM
470 /* if the extent was freed and then
471 * reallocated before the delayed ref
472 * entries were processed, we can end up
473 * with an existing head ref without
474 * the must_insert_reserved flag set.
475 * Set it again here
476 */
d278850e 477 existing->must_insert_reserved = update->must_insert_reserved;
56bec294
CM
478
479 /*
480 * update the num_bytes so we make sure the accounting
481 * is done correctly
482 */
483 existing->num_bytes = update->num_bytes;
484
485 }
486
d278850e
JB
487 if (update->extent_op) {
488 if (!existing->extent_op) {
489 existing->extent_op = update->extent_op;
5d4f98a2 490 } else {
d278850e
JB
491 if (update->extent_op->update_key) {
492 memcpy(&existing->extent_op->key,
493 &update->extent_op->key,
494 sizeof(update->extent_op->key));
495 existing->extent_op->update_key = true;
5d4f98a2 496 }
d278850e
JB
497 if (update->extent_op->update_flags) {
498 existing->extent_op->flags_to_set |=
499 update->extent_op->flags_to_set;
500 existing->extent_op->update_flags = true;
5d4f98a2 501 }
d278850e 502 btrfs_free_delayed_extent_op(update->extent_op);
5d4f98a2
YZ
503 }
504 }
56bec294 505 /*
d7df2c79
JB
506 * update the reference mod on the head to reflect this new operation,
507 * only need the lock for this case cause we could be processing it
508 * currently, for refs we just added we know we're a-ok.
56bec294 509 */
d278850e 510 old_ref_mod = existing->total_ref_mod;
7be07912
OS
511 if (old_ref_mod_ret)
512 *old_ref_mod_ret = old_ref_mod;
56bec294 513 existing->ref_mod += update->ref_mod;
d278850e 514 existing->total_ref_mod += update->ref_mod;
1262133b
JB
515
516 /*
517 * If we are going to from a positive ref mod to a negative or vice
518 * versa we need to make sure to adjust pending_csums accordingly.
519 */
d278850e
JB
520 if (existing->is_data) {
521 if (existing->total_ref_mod >= 0 && old_ref_mod < 0)
1262133b 522 delayed_refs->pending_csums -= existing->num_bytes;
d278850e 523 if (existing->total_ref_mod < 0 && old_ref_mod >= 0)
1262133b
JB
524 delayed_refs->pending_csums += existing->num_bytes;
525 }
d278850e 526 spin_unlock(&existing->lock);
56bec294
CM
527}
528
a2e569b3
NB
529static void init_delayed_ref_head(struct btrfs_delayed_ref_head *head_ref,
530 struct btrfs_qgroup_extent_record *qrecord,
531 u64 bytenr, u64 num_bytes, u64 ref_root,
532 u64 reserved, int action, bool is_data,
533 bool is_system)
534{
535 int count_mod = 1;
536 int must_insert_reserved = 0;
537
538 /* If reserved is provided, it must be a data extent. */
539 BUG_ON(!is_data && reserved);
540
541 /*
542 * The head node stores the sum of all the mods, so dropping a ref
543 * should drop the sum in the head node by one.
544 */
545 if (action == BTRFS_UPDATE_DELAYED_HEAD)
546 count_mod = 0;
547 else if (action == BTRFS_DROP_DELAYED_REF)
548 count_mod = -1;
549
550 /*
551 * BTRFS_ADD_DELAYED_EXTENT means that we need to update the reserved
552 * accounting when the extent is finally added, or if a later
553 * modification deletes the delayed ref without ever inserting the
554 * extent into the extent allocation tree. ref->must_insert_reserved
555 * is the flag used to record that accounting mods are required.
556 *
557 * Once we record must_insert_reserved, switch the action to
558 * BTRFS_ADD_DELAYED_REF because other special casing is not required.
559 */
560 if (action == BTRFS_ADD_DELAYED_EXTENT)
561 must_insert_reserved = 1;
562 else
563 must_insert_reserved = 0;
564
565 refcount_set(&head_ref->refs, 1);
566 head_ref->bytenr = bytenr;
567 head_ref->num_bytes = num_bytes;
568 head_ref->ref_mod = count_mod;
569 head_ref->must_insert_reserved = must_insert_reserved;
570 head_ref->is_data = is_data;
571 head_ref->is_system = is_system;
572 head_ref->ref_tree = RB_ROOT;
573 INIT_LIST_HEAD(&head_ref->ref_add_list);
574 RB_CLEAR_NODE(&head_ref->href_node);
575 head_ref->processing = 0;
576 head_ref->total_ref_mod = count_mod;
577 head_ref->qgroup_reserved = 0;
578 head_ref->qgroup_ref_root = 0;
579 spin_lock_init(&head_ref->lock);
580 mutex_init(&head_ref->mutex);
581
582 if (qrecord) {
583 if (ref_root && reserved) {
584 head_ref->qgroup_ref_root = ref_root;
585 head_ref->qgroup_reserved = reserved;
586 }
587
588 qrecord->bytenr = bytenr;
589 qrecord->num_bytes = num_bytes;
590 qrecord->old_roots = NULL;
591 }
592}
593
56bec294 594/*
5d4f98a2 595 * helper function to actually insert a head node into the rbtree.
56bec294 596 * this does all the dirty work in terms of maintaining the correct
5d4f98a2 597 * overall modification count.
56bec294 598 */
d7df2c79 599static noinline struct btrfs_delayed_ref_head *
1acda0c2 600add_delayed_ref_head(struct btrfs_trans_handle *trans,
d278850e 601 struct btrfs_delayed_ref_head *head_ref,
3368d001 602 struct btrfs_qgroup_extent_record *qrecord,
5846a3c2 603 u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved,
5e388e95
NB
604 int action, int is_data, int is_system,
605 int *qrecord_inserted_ret,
7be07912 606 int *old_ref_mod, int *new_ref_mod)
5e388e95 607
56bec294 608{
d7df2c79 609 struct btrfs_delayed_ref_head *existing;
56bec294
CM
610 struct btrfs_delayed_ref_root *delayed_refs;
611 int count_mod = 1;
612 int must_insert_reserved = 0;
fb235dc0 613 int qrecord_inserted = 0;
56bec294 614
5846a3c2
QW
615 /* If reserved is provided, it must be a data extent. */
616 BUG_ON(!is_data && reserved);
617
56bec294
CM
618 /*
619 * the head node stores the sum of all the mods, so dropping a ref
620 * should drop the sum in the head node by one.
621 */
5d4f98a2
YZ
622 if (action == BTRFS_UPDATE_DELAYED_HEAD)
623 count_mod = 0;
624 else if (action == BTRFS_DROP_DELAYED_REF)
625 count_mod = -1;
56bec294
CM
626
627 /*
628 * BTRFS_ADD_DELAYED_EXTENT means that we need to update
629 * the reserved accounting when the extent is finally added, or
630 * if a later modification deletes the delayed ref without ever
631 * inserting the extent into the extent allocation tree.
632 * ref->must_insert_reserved is the flag used to record
633 * that accounting mods are required.
634 *
635 * Once we record must_insert_reserved, switch the action to
636 * BTRFS_ADD_DELAYED_REF because other special casing is not required.
637 */
5d4f98a2 638 if (action == BTRFS_ADD_DELAYED_EXTENT)
56bec294 639 must_insert_reserved = 1;
5d4f98a2 640 else
56bec294 641 must_insert_reserved = 0;
56bec294
CM
642
643 delayed_refs = &trans->transaction->delayed_refs;
644
d278850e
JB
645 refcount_set(&head_ref->refs, 1);
646 head_ref->bytenr = bytenr;
647 head_ref->num_bytes = num_bytes;
648 head_ref->ref_mod = count_mod;
5d4f98a2
YZ
649 head_ref->must_insert_reserved = must_insert_reserved;
650 head_ref->is_data = is_data;
5e388e95 651 head_ref->is_system = is_system;
0e0adbcf 652 head_ref->ref_tree = RB_ROOT;
1d57ee94 653 INIT_LIST_HEAD(&head_ref->ref_add_list);
d278850e 654 RB_CLEAR_NODE(&head_ref->href_node);
d7df2c79 655 head_ref->processing = 0;
1262133b 656 head_ref->total_ref_mod = count_mod;
f64d5ca8
QW
657 head_ref->qgroup_reserved = 0;
658 head_ref->qgroup_ref_root = 0;
d278850e
JB
659 spin_lock_init(&head_ref->lock);
660 mutex_init(&head_ref->mutex);
5d4f98a2 661
3368d001
QW
662 /* Record qgroup extent info if provided */
663 if (qrecord) {
5846a3c2
QW
664 if (ref_root && reserved) {
665 head_ref->qgroup_ref_root = ref_root;
666 head_ref->qgroup_reserved = reserved;
667 }
668
3368d001
QW
669 qrecord->bytenr = bytenr;
670 qrecord->num_bytes = num_bytes;
671 qrecord->old_roots = NULL;
672
1acda0c2 673 if(btrfs_qgroup_trace_extent_nolock(trans->fs_info,
cb93b52c 674 delayed_refs, qrecord))
3368d001 675 kfree(qrecord);
fb235dc0
QW
676 else
677 qrecord_inserted = 1;
3368d001
QW
678 }
679
1acda0c2 680 trace_add_delayed_ref_head(trans->fs_info, head_ref, action);
1abe9b8a 681
d7df2c79
JB
682 existing = htree_insert(&delayed_refs->href_root,
683 &head_ref->href_node);
5d4f98a2 684 if (existing) {
5846a3c2
QW
685 WARN_ON(ref_root && reserved && existing->qgroup_ref_root
686 && existing->qgroup_reserved);
d278850e 687 update_existing_head_ref(delayed_refs, existing, head_ref,
7be07912 688 old_ref_mod);
5d4f98a2
YZ
689 /*
690 * we've updated the existing ref, free the newly
691 * allocated ref
692 */
78a6184a 693 kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
d7df2c79 694 head_ref = existing;
5d4f98a2 695 } else {
7be07912
OS
696 if (old_ref_mod)
697 *old_ref_mod = 0;
1262133b
JB
698 if (is_data && count_mod < 0)
699 delayed_refs->pending_csums += num_bytes;
5d4f98a2
YZ
700 delayed_refs->num_heads++;
701 delayed_refs->num_heads_ready++;
d7df2c79 702 atomic_inc(&delayed_refs->num_entries);
5d4f98a2
YZ
703 trans->delayed_ref_updates++;
704 }
fb235dc0
QW
705 if (qrecord_inserted_ret)
706 *qrecord_inserted_ret = qrecord_inserted;
7be07912
OS
707 if (new_ref_mod)
708 *new_ref_mod = head_ref->total_ref_mod;
d7df2c79 709 return head_ref;
5d4f98a2
YZ
710}
711
cb49a87b
NB
712/*
713 * init_delayed_ref_common - Initialize the structure which represents a
714 * modification to a an extent.
715 *
716 * @fs_info: Internal to the mounted filesystem mount structure.
717 *
718 * @ref: The structure which is going to be initialized.
719 *
720 * @bytenr: The logical address of the extent for which a modification is
721 * going to be recorded.
722 *
723 * @num_bytes: Size of the extent whose modification is being recorded.
724 *
725 * @ref_root: The id of the root where this modification has originated, this
726 * can be either one of the well-known metadata trees or the
727 * subvolume id which references this extent.
728 *
729 * @action: Can be one of BTRFS_ADD_DELAYED_REF/BTRFS_DROP_DELAYED_REF or
730 * BTRFS_ADD_DELAYED_EXTENT
731 *
732 * @ref_type: Holds the type of the extent which is being recorded, can be
733 * one of BTRFS_SHARED_BLOCK_REF_KEY/BTRFS_TREE_BLOCK_REF_KEY
734 * when recording a metadata extent or BTRFS_SHARED_DATA_REF_KEY/
735 * BTRFS_EXTENT_DATA_REF_KEY when recording data extent
736 */
737static void init_delayed_ref_common(struct btrfs_fs_info *fs_info,
738 struct btrfs_delayed_ref_node *ref,
739 u64 bytenr, u64 num_bytes, u64 ref_root,
740 int action, u8 ref_type)
741{
742 u64 seq = 0;
743
744 if (action == BTRFS_ADD_DELAYED_EXTENT)
745 action = BTRFS_ADD_DELAYED_REF;
746
747 if (is_fstree(ref_root))
748 seq = atomic64_read(&fs_info->tree_mod_seq);
749
750 refcount_set(&ref->refs, 1);
751 ref->bytenr = bytenr;
752 ref->num_bytes = num_bytes;
753 ref->ref_mod = 1;
754 ref->action = action;
755 ref->is_head = 0;
756 ref->in_tree = 1;
757 ref->seq = seq;
758 ref->type = ref_type;
759 RB_CLEAR_NODE(&ref->ref_node);
760 INIT_LIST_HEAD(&ref->add_list);
761}
762
56bec294 763/*
5d4f98a2 764 * add a delayed tree ref. This does all of the accounting required
56bec294
CM
765 * to make sure the delayed ref is eventually processed before this
766 * transaction commits.
767 */
66d7e7f0
AJ
768int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
769 struct btrfs_trans_handle *trans,
5d4f98a2
YZ
770 u64 bytenr, u64 num_bytes, u64 parent,
771 u64 ref_root, int level, int action,
7be07912
OS
772 struct btrfs_delayed_extent_op *extent_op,
773 int *old_ref_mod, int *new_ref_mod)
56bec294 774{
5d4f98a2 775 struct btrfs_delayed_tree_ref *ref;
56bec294
CM
776 struct btrfs_delayed_ref_head *head_ref;
777 struct btrfs_delayed_ref_root *delayed_refs;
3368d001 778 struct btrfs_qgroup_extent_record *record = NULL;
fb235dc0 779 int qrecord_inserted;
5e388e95 780 int is_system = (ref_root == BTRFS_CHUNK_TREE_OBJECTID);
70d64000
NB
781 int ret;
782 u8 ref_type;
56bec294 783
5d4f98a2 784 BUG_ON(extent_op && extent_op->is_data);
78a6184a 785 ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
56bec294
CM
786 if (!ref)
787 return -ENOMEM;
788
70d64000
NB
789 if (parent)
790 ref_type = BTRFS_SHARED_BLOCK_REF_KEY;
791 else
792 ref_type = BTRFS_TREE_BLOCK_REF_KEY;
793 init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes,
794 ref_root, action, ref_type);
795 ref->root = ref_root;
796 ref->parent = parent;
797 ref->level = level;
798
78a6184a 799 head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
5a5003df
DC
800 if (!head_ref)
801 goto free_ref;
5d4f98a2 802
afcdd129
JB
803 if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
804 is_fstree(ref_root)) {
3368d001 805 record = kmalloc(sizeof(*record), GFP_NOFS);
5a5003df
DC
806 if (!record)
807 goto free_head_ref;
3368d001
QW
808 }
809
5d4f98a2
YZ
810 head_ref->extent_op = extent_op;
811
812 delayed_refs = &trans->transaction->delayed_refs;
813 spin_lock(&delayed_refs->lock);
814
56bec294 815 /*
5d4f98a2
YZ
816 * insert both the head node and the new ref without dropping
817 * the spin lock
56bec294 818 */
1acda0c2
NB
819 head_ref = add_delayed_ref_head(trans, head_ref, record, bytenr,
820 num_bytes, 0, 0, action, 0,
5e388e95
NB
821 is_system, &qrecord_inserted,
822 old_ref_mod, new_ref_mod);
5d4f98a2 823
70d64000
NB
824
825 ret = insert_delayed_ref(trans, delayed_refs, head_ref, &ref->node);
5d4f98a2 826 spin_unlock(&delayed_refs->lock);
95a06077 827
70d64000
NB
828 trace_add_delayed_tree_ref(fs_info, &ref->node, ref,
829 action == BTRFS_ADD_DELAYED_EXTENT ?
830 BTRFS_ADD_DELAYED_REF : action);
831 if (ret > 0)
832 kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
833
fb235dc0 834 if (qrecord_inserted)
952bd3db
NB
835 btrfs_qgroup_trace_extent_post(fs_info, record);
836
5d4f98a2 837 return 0;
5a5003df
DC
838
839free_head_ref:
840 kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
841free_ref:
842 kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
843
844 return -ENOMEM;
5d4f98a2
YZ
845}
846
847/*
848 * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref.
849 */
66d7e7f0
AJ
850int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
851 struct btrfs_trans_handle *trans,
5d4f98a2
YZ
852 u64 bytenr, u64 num_bytes,
853 u64 parent, u64 ref_root,
7be07912
OS
854 u64 owner, u64 offset, u64 reserved, int action,
855 int *old_ref_mod, int *new_ref_mod)
5d4f98a2
YZ
856{
857 struct btrfs_delayed_data_ref *ref;
858 struct btrfs_delayed_ref_head *head_ref;
859 struct btrfs_delayed_ref_root *delayed_refs;
3368d001 860 struct btrfs_qgroup_extent_record *record = NULL;
fb235dc0 861 int qrecord_inserted;
cd7f9699
NB
862 int ret;
863 u8 ref_type;
5d4f98a2 864
78a6184a 865 ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
5d4f98a2
YZ
866 if (!ref)
867 return -ENOMEM;
56bec294 868
cd7f9699
NB
869 if (parent)
870 ref_type = BTRFS_SHARED_DATA_REF_KEY;
871 else
872 ref_type = BTRFS_EXTENT_DATA_REF_KEY;
873 init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes,
874 ref_root, action, ref_type);
875 ref->root = ref_root;
876 ref->parent = parent;
877 ref->objectid = owner;
878 ref->offset = offset;
879
880
78a6184a 881 head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
56bec294 882 if (!head_ref) {
78a6184a 883 kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
56bec294
CM
884 return -ENOMEM;
885 }
5d4f98a2 886
afcdd129
JB
887 if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
888 is_fstree(ref_root)) {
3368d001
QW
889 record = kmalloc(sizeof(*record), GFP_NOFS);
890 if (!record) {
891 kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
892 kmem_cache_free(btrfs_delayed_ref_head_cachep,
893 head_ref);
894 return -ENOMEM;
895 }
896 }
897
fef394f7 898 head_ref->extent_op = NULL;
5d4f98a2 899
56bec294
CM
900 delayed_refs = &trans->transaction->delayed_refs;
901 spin_lock(&delayed_refs->lock);
902
903 /*
904 * insert both the head node and the new ref without dropping
905 * the spin lock
906 */
1acda0c2
NB
907 head_ref = add_delayed_ref_head(trans, head_ref, record, bytenr,
908 num_bytes, ref_root, reserved,
5e388e95 909 action, 1, 0, &qrecord_inserted,
7be07912 910 old_ref_mod, new_ref_mod);
56bec294 911
cd7f9699 912 ret = insert_delayed_ref(trans, delayed_refs, head_ref, &ref->node);
5d4f98a2 913 spin_unlock(&delayed_refs->lock);
95a06077 914
cd7f9699
NB
915 trace_add_delayed_data_ref(trans->fs_info, &ref->node, ref,
916 action == BTRFS_ADD_DELAYED_EXTENT ?
917 BTRFS_ADD_DELAYED_REF : action);
918 if (ret > 0)
919 kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
920
921
fb235dc0
QW
922 if (qrecord_inserted)
923 return btrfs_qgroup_trace_extent_post(fs_info, record);
5d4f98a2
YZ
924 return 0;
925}
926
66d7e7f0
AJ
927int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
928 struct btrfs_trans_handle *trans,
5d4f98a2
YZ
929 u64 bytenr, u64 num_bytes,
930 struct btrfs_delayed_extent_op *extent_op)
931{
932 struct btrfs_delayed_ref_head *head_ref;
933 struct btrfs_delayed_ref_root *delayed_refs;
5d4f98a2 934
78a6184a 935 head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
5d4f98a2
YZ
936 if (!head_ref)
937 return -ENOMEM;
938
939 head_ref->extent_op = extent_op;
940
941 delayed_refs = &trans->transaction->delayed_refs;
942 spin_lock(&delayed_refs->lock);
943
5e388e95
NB
944 /*
945 * extent_ops just modify the flags of an extent and they don't result
946 * in ref count changes, hence it's safe to pass false/0 for is_system
947 * argument
948 */
1acda0c2
NB
949 add_delayed_ref_head(trans, head_ref, NULL, bytenr, num_bytes, 0, 0,
950 BTRFS_UPDATE_DELAYED_HEAD, extent_op->is_data,
951 0, NULL, NULL, NULL);
5d4f98a2 952
56bec294
CM
953 spin_unlock(&delayed_refs->lock);
954 return 0;
955}
956
1887be66
CM
957/*
958 * this does a simple search for the head node for a given extent.
959 * It must be called with the delayed ref spinlock held, and it returns
960 * the head node if any where found, or NULL if not.
961 */
962struct btrfs_delayed_ref_head *
f72ad18e 963btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, u64 bytenr)
1887be66 964{
85fdfdf6 965 return find_ref_head(&delayed_refs->href_root, bytenr, 0);
1887be66 966}
78a6184a 967
e67c718b 968void __cold btrfs_delayed_ref_exit(void)
78a6184a 969{
5598e900
KM
970 kmem_cache_destroy(btrfs_delayed_ref_head_cachep);
971 kmem_cache_destroy(btrfs_delayed_tree_ref_cachep);
972 kmem_cache_destroy(btrfs_delayed_data_ref_cachep);
973 kmem_cache_destroy(btrfs_delayed_extent_op_cachep);
78a6184a
MX
974}
975
f5c29bd9 976int __init btrfs_delayed_ref_init(void)
78a6184a
MX
977{
978 btrfs_delayed_ref_head_cachep = kmem_cache_create(
979 "btrfs_delayed_ref_head",
980 sizeof(struct btrfs_delayed_ref_head), 0,
fba4b697 981 SLAB_MEM_SPREAD, NULL);
78a6184a
MX
982 if (!btrfs_delayed_ref_head_cachep)
983 goto fail;
984
985 btrfs_delayed_tree_ref_cachep = kmem_cache_create(
986 "btrfs_delayed_tree_ref",
987 sizeof(struct btrfs_delayed_tree_ref), 0,
fba4b697 988 SLAB_MEM_SPREAD, NULL);
78a6184a
MX
989 if (!btrfs_delayed_tree_ref_cachep)
990 goto fail;
991
992 btrfs_delayed_data_ref_cachep = kmem_cache_create(
993 "btrfs_delayed_data_ref",
994 sizeof(struct btrfs_delayed_data_ref), 0,
fba4b697 995 SLAB_MEM_SPREAD, NULL);
78a6184a
MX
996 if (!btrfs_delayed_data_ref_cachep)
997 goto fail;
998
999 btrfs_delayed_extent_op_cachep = kmem_cache_create(
1000 "btrfs_delayed_extent_op",
1001 sizeof(struct btrfs_delayed_extent_op), 0,
fba4b697 1002 SLAB_MEM_SPREAD, NULL);
78a6184a
MX
1003 if (!btrfs_delayed_extent_op_cachep)
1004 goto fail;
1005
1006 return 0;
1007fail:
1008 btrfs_delayed_ref_exit();
1009 return -ENOMEM;
1010}