Merge branch 'linux-4.19' of git://github.com/skeggsb/linux into drm-fixes
[linux-2.6-block.git] / fs / btrfs / delayed-ref.c
CommitLineData
c1d7c514 1// SPDX-License-Identifier: GPL-2.0
56bec294
CM
2/*
3 * Copyright (C) 2009 Oracle. All rights reserved.
56bec294
CM
4 */
5
6#include <linux/sched.h>
5a0e3ad6 7#include <linux/slab.h>
56bec294 8#include <linux/sort.h>
56bec294
CM
9#include "ctree.h"
10#include "delayed-ref.h"
11#include "transaction.h"
3368d001 12#include "qgroup.h"
56bec294 13
78a6184a
MX
14struct kmem_cache *btrfs_delayed_ref_head_cachep;
15struct kmem_cache *btrfs_delayed_tree_ref_cachep;
16struct kmem_cache *btrfs_delayed_data_ref_cachep;
17struct kmem_cache *btrfs_delayed_extent_op_cachep;
56bec294
CM
18/*
19 * delayed back reference update tracking. For subvolume trees
20 * we queue up extent allocations and backref maintenance for
21 * delayed processing. This avoids deep call chains where we
22 * add extents in the middle of btrfs_search_slot, and it allows
23 * us to buffer up frequently modified backrefs in an rb tree instead
24 * of hammering updates on the extent allocation tree.
56bec294
CM
25 */
26
27/*
5d4f98a2
YZ
28 * compare two delayed tree backrefs with same bytenr and type
29 */
c7ad7c84
JB
30static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref1,
31 struct btrfs_delayed_tree_ref *ref2)
5d4f98a2 32{
3b60d436 33 if (ref1->node.type == BTRFS_TREE_BLOCK_REF_KEY) {
41b0fc42
JB
34 if (ref1->root < ref2->root)
35 return -1;
36 if (ref1->root > ref2->root)
37 return 1;
38 } else {
39 if (ref1->parent < ref2->parent)
40 return -1;
41 if (ref1->parent > ref2->parent)
42 return 1;
43 }
5d4f98a2
YZ
44 return 0;
45}
46
47/*
48 * compare two delayed data backrefs with same bytenr and type
56bec294 49 */
c7ad7c84
JB
50static int comp_data_refs(struct btrfs_delayed_data_ref *ref1,
51 struct btrfs_delayed_data_ref *ref2)
56bec294 52{
5d4f98a2
YZ
53 if (ref1->node.type == BTRFS_EXTENT_DATA_REF_KEY) {
54 if (ref1->root < ref2->root)
55 return -1;
56 if (ref1->root > ref2->root)
57 return 1;
58 if (ref1->objectid < ref2->objectid)
59 return -1;
60 if (ref1->objectid > ref2->objectid)
61 return 1;
62 if (ref1->offset < ref2->offset)
63 return -1;
64 if (ref1->offset > ref2->offset)
65 return 1;
66 } else {
67 if (ref1->parent < ref2->parent)
68 return -1;
69 if (ref1->parent > ref2->parent)
70 return 1;
71 }
72 return 0;
73}
74
1d148e59
JB
75static int comp_refs(struct btrfs_delayed_ref_node *ref1,
76 struct btrfs_delayed_ref_node *ref2,
77 bool check_seq)
78{
79 int ret = 0;
80
81 if (ref1->type < ref2->type)
82 return -1;
83 if (ref1->type > ref2->type)
84 return 1;
85 if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
86 ref1->type == BTRFS_SHARED_BLOCK_REF_KEY)
87 ret = comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref1),
88 btrfs_delayed_node_to_tree_ref(ref2));
89 else
90 ret = comp_data_refs(btrfs_delayed_node_to_data_ref(ref1),
91 btrfs_delayed_node_to_data_ref(ref2));
92 if (ret)
93 return ret;
94 if (check_seq) {
95 if (ref1->seq < ref2->seq)
96 return -1;
97 if (ref1->seq > ref2->seq)
98 return 1;
99 }
100 return 0;
101}
102
c46effa6
LB
103/* insert a new ref to head ref rbtree */
104static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root,
105 struct rb_node *node)
106{
107 struct rb_node **p = &root->rb_node;
108 struct rb_node *parent_node = NULL;
109 struct btrfs_delayed_ref_head *entry;
110 struct btrfs_delayed_ref_head *ins;
111 u64 bytenr;
112
113 ins = rb_entry(node, struct btrfs_delayed_ref_head, href_node);
d278850e 114 bytenr = ins->bytenr;
c46effa6
LB
115 while (*p) {
116 parent_node = *p;
117 entry = rb_entry(parent_node, struct btrfs_delayed_ref_head,
118 href_node);
119
d278850e 120 if (bytenr < entry->bytenr)
c46effa6 121 p = &(*p)->rb_left;
d278850e 122 else if (bytenr > entry->bytenr)
c46effa6
LB
123 p = &(*p)->rb_right;
124 else
125 return entry;
126 }
127
128 rb_link_node(node, parent_node, p);
129 rb_insert_color(node, root);
130 return NULL;
131}
132
0e0adbcf
JB
133static struct btrfs_delayed_ref_node* tree_insert(struct rb_root *root,
134 struct btrfs_delayed_ref_node *ins)
135{
136 struct rb_node **p = &root->rb_node;
137 struct rb_node *node = &ins->ref_node;
138 struct rb_node *parent_node = NULL;
139 struct btrfs_delayed_ref_node *entry;
140
141 while (*p) {
142 int comp;
143
144 parent_node = *p;
145 entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
146 ref_node);
147 comp = comp_refs(ins, entry, true);
148 if (comp < 0)
149 p = &(*p)->rb_left;
150 else if (comp > 0)
151 p = &(*p)->rb_right;
152 else
153 return entry;
154 }
155
156 rb_link_node(node, parent_node, p);
157 rb_insert_color(node, root);
158 return NULL;
159}
160
56bec294 161/*
5d4f98a2 162 * find an head entry based on bytenr. This returns the delayed ref
d1270cd9
AJ
163 * head if it was able to find one, or NULL if nothing was in that spot.
164 * If return_bigger is given, the next bigger entry is returned if no exact
165 * match is found.
56bec294 166 */
c46effa6
LB
167static struct btrfs_delayed_ref_head *
168find_ref_head(struct rb_root *root, u64 bytenr,
85fdfdf6 169 int return_bigger)
56bec294 170{
d1270cd9 171 struct rb_node *n;
c46effa6 172 struct btrfs_delayed_ref_head *entry;
56bec294 173
d1270cd9
AJ
174 n = root->rb_node;
175 entry = NULL;
56bec294 176 while (n) {
c46effa6 177 entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node);
56bec294 178
d278850e 179 if (bytenr < entry->bytenr)
56bec294 180 n = n->rb_left;
d278850e 181 else if (bytenr > entry->bytenr)
56bec294
CM
182 n = n->rb_right;
183 else
184 return entry;
185 }
d1270cd9 186 if (entry && return_bigger) {
d278850e 187 if (bytenr > entry->bytenr) {
c46effa6 188 n = rb_next(&entry->href_node);
d1270cd9
AJ
189 if (!n)
190 n = rb_first(root);
c46effa6
LB
191 entry = rb_entry(n, struct btrfs_delayed_ref_head,
192 href_node);
6103fb43 193 return entry;
d1270cd9
AJ
194 }
195 return entry;
196 }
56bec294
CM
197 return NULL;
198}
199
c3e69d58
CM
200int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
201 struct btrfs_delayed_ref_head *head)
56bec294 202{
c3e69d58
CM
203 struct btrfs_delayed_ref_root *delayed_refs;
204
205 delayed_refs = &trans->transaction->delayed_refs;
a4666e68 206 lockdep_assert_held(&delayed_refs->lock);
c3e69d58
CM
207 if (mutex_trylock(&head->mutex))
208 return 0;
209
d278850e 210 refcount_inc(&head->refs);
c3e69d58
CM
211 spin_unlock(&delayed_refs->lock);
212
213 mutex_lock(&head->mutex);
214 spin_lock(&delayed_refs->lock);
d278850e 215 if (RB_EMPTY_NODE(&head->href_node)) {
c3e69d58 216 mutex_unlock(&head->mutex);
d278850e 217 btrfs_put_delayed_ref_head(head);
c3e69d58
CM
218 return -EAGAIN;
219 }
d278850e 220 btrfs_put_delayed_ref_head(head);
c3e69d58
CM
221 return 0;
222}
223
35a3621b 224static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
ae1e206b 225 struct btrfs_delayed_ref_root *delayed_refs,
d7df2c79 226 struct btrfs_delayed_ref_head *head,
ae1e206b
JB
227 struct btrfs_delayed_ref_node *ref)
228{
a4666e68 229 lockdep_assert_held(&head->lock);
0e0adbcf
JB
230 rb_erase(&ref->ref_node, &head->ref_tree);
231 RB_CLEAR_NODE(&ref->ref_node);
d278850e
JB
232 if (!list_empty(&ref->add_list))
233 list_del(&ref->add_list);
ae1e206b
JB
234 ref->in_tree = 0;
235 btrfs_put_delayed_ref(ref);
d7df2c79 236 atomic_dec(&delayed_refs->num_entries);
ae1e206b
JB
237 if (trans->delayed_ref_updates)
238 trans->delayed_ref_updates--;
239}
240
2c3cf7d5
FM
241static bool merge_ref(struct btrfs_trans_handle *trans,
242 struct btrfs_delayed_ref_root *delayed_refs,
243 struct btrfs_delayed_ref_head *head,
244 struct btrfs_delayed_ref_node *ref,
245 u64 seq)
246{
247 struct btrfs_delayed_ref_node *next;
0e0adbcf 248 struct rb_node *node = rb_next(&ref->ref_node);
2c3cf7d5
FM
249 bool done = false;
250
0e0adbcf 251 while (!done && node) {
2c3cf7d5 252 int mod;
2c3cf7d5 253
0e0adbcf
JB
254 next = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
255 node = rb_next(node);
2c3cf7d5 256 if (seq && next->seq >= seq)
0e0adbcf 257 break;
1d148e59 258 if (comp_refs(ref, next, false))
0e0adbcf 259 break;
2c3cf7d5
FM
260
261 if (ref->action == next->action) {
262 mod = next->ref_mod;
263 } else {
264 if (ref->ref_mod < next->ref_mod) {
265 swap(ref, next);
266 done = true;
267 }
268 mod = -next->ref_mod;
269 }
270
271 drop_delayed_ref(trans, delayed_refs, head, next);
272 ref->ref_mod += mod;
273 if (ref->ref_mod == 0) {
274 drop_delayed_ref(trans, delayed_refs, head, ref);
275 done = true;
276 } else {
277 /*
278 * Can't have multiples of the same ref on a tree block.
279 */
280 WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
281 ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
282 }
2c3cf7d5
FM
283 }
284
285 return done;
286}
287
288void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
2c3cf7d5
FM
289 struct btrfs_delayed_ref_root *delayed_refs,
290 struct btrfs_delayed_ref_head *head)
291{
be97f133 292 struct btrfs_fs_info *fs_info = trans->fs_info;
2c3cf7d5 293 struct btrfs_delayed_ref_node *ref;
0e0adbcf 294 struct rb_node *node;
2c3cf7d5
FM
295 u64 seq = 0;
296
a4666e68 297 lockdep_assert_held(&head->lock);
2c3cf7d5 298
0e0adbcf 299 if (RB_EMPTY_ROOT(&head->ref_tree))
2c3cf7d5
FM
300 return;
301
302 /* We don't have too many refs to merge for data. */
303 if (head->is_data)
304 return;
305
306 spin_lock(&fs_info->tree_mod_seq_lock);
307 if (!list_empty(&fs_info->tree_mod_seq_list)) {
308 struct seq_list *elem;
309
310 elem = list_first_entry(&fs_info->tree_mod_seq_list,
311 struct seq_list, list);
312 seq = elem->seq;
313 }
314 spin_unlock(&fs_info->tree_mod_seq_lock);
315
0e0adbcf
JB
316again:
317 for (node = rb_first(&head->ref_tree); node; node = rb_next(node)) {
318 ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
2c3cf7d5 319 if (seq && ref->seq >= seq)
2c3cf7d5 320 continue;
0e0adbcf
JB
321 if (merge_ref(trans, delayed_refs, head, ref, seq))
322 goto again;
2c3cf7d5
FM
323 }
324}
325
41d0bd3b 326int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq)
00f04b88
AJ
327{
328 struct seq_list *elem;
097b8a7c
JS
329 int ret = 0;
330
331 spin_lock(&fs_info->tree_mod_seq_lock);
332 if (!list_empty(&fs_info->tree_mod_seq_list)) {
333 elem = list_first_entry(&fs_info->tree_mod_seq_list,
334 struct seq_list, list);
335 if (seq >= elem->seq) {
ab8d0fc4 336 btrfs_debug(fs_info,
41d0bd3b 337 "holding back delayed_ref %#x.%x, lowest is %#x.%x",
ab8d0fc4 338 (u32)(seq >> 32), (u32)seq,
41d0bd3b 339 (u32)(elem->seq >> 32), (u32)elem->seq);
097b8a7c
JS
340 ret = 1;
341 }
00f04b88 342 }
097b8a7c
JS
343
344 spin_unlock(&fs_info->tree_mod_seq_lock);
345 return ret;
00f04b88
AJ
346}
347
d7df2c79
JB
348struct btrfs_delayed_ref_head *
349btrfs_select_ref_head(struct btrfs_trans_handle *trans)
c3e69d58 350{
c3e69d58 351 struct btrfs_delayed_ref_root *delayed_refs;
d7df2c79
JB
352 struct btrfs_delayed_ref_head *head;
353 u64 start;
354 bool loop = false;
56bec294 355
c3e69d58 356 delayed_refs = &trans->transaction->delayed_refs;
c46effa6 357
c3e69d58 358again:
d7df2c79 359 start = delayed_refs->run_delayed_start;
85fdfdf6 360 head = find_ref_head(&delayed_refs->href_root, start, 1);
d7df2c79
JB
361 if (!head && !loop) {
362 delayed_refs->run_delayed_start = 0;
c3e69d58 363 start = 0;
d7df2c79 364 loop = true;
85fdfdf6 365 head = find_ref_head(&delayed_refs->href_root, start, 1);
d7df2c79
JB
366 if (!head)
367 return NULL;
368 } else if (!head && loop) {
369 return NULL;
c3e69d58 370 }
56bec294 371
d7df2c79
JB
372 while (head->processing) {
373 struct rb_node *node;
374
375 node = rb_next(&head->href_node);
376 if (!node) {
377 if (loop)
378 return NULL;
379 delayed_refs->run_delayed_start = 0;
380 start = 0;
381 loop = true;
382 goto again;
383 }
384 head = rb_entry(node, struct btrfs_delayed_ref_head,
385 href_node);
386 }
093486c4 387
d7df2c79
JB
388 head->processing = 1;
389 WARN_ON(delayed_refs->num_heads_ready == 0);
390 delayed_refs->num_heads_ready--;
d278850e
JB
391 delayed_refs->run_delayed_start = head->bytenr +
392 head->num_bytes;
d7df2c79 393 return head;
093486c4
MX
394}
395
c6fc2454
QW
396/*
397 * Helper to insert the ref_node to the tail or merge with tail.
398 *
399 * Return 0 for insert.
400 * Return >0 for merge.
401 */
0e0adbcf
JB
402static int insert_delayed_ref(struct btrfs_trans_handle *trans,
403 struct btrfs_delayed_ref_root *root,
404 struct btrfs_delayed_ref_head *href,
405 struct btrfs_delayed_ref_node *ref)
c6fc2454
QW
406{
407 struct btrfs_delayed_ref_node *exist;
408 int mod;
409 int ret = 0;
410
411 spin_lock(&href->lock);
0e0adbcf
JB
412 exist = tree_insert(&href->ref_tree, ref);
413 if (!exist)
414 goto inserted;
c6fc2454
QW
415
416 /* Now we are sure we can merge */
417 ret = 1;
418 if (exist->action == ref->action) {
419 mod = ref->ref_mod;
420 } else {
421 /* Need to change action */
422 if (exist->ref_mod < ref->ref_mod) {
423 exist->action = ref->action;
424 mod = -exist->ref_mod;
425 exist->ref_mod = ref->ref_mod;
1d57ee94
WX
426 if (ref->action == BTRFS_ADD_DELAYED_REF)
427 list_add_tail(&exist->add_list,
428 &href->ref_add_list);
429 else if (ref->action == BTRFS_DROP_DELAYED_REF) {
430 ASSERT(!list_empty(&exist->add_list));
431 list_del(&exist->add_list);
432 } else {
433 ASSERT(0);
434 }
c6fc2454
QW
435 } else
436 mod = -ref->ref_mod;
437 }
438 exist->ref_mod += mod;
439
440 /* remove existing tail if its ref_mod is zero */
441 if (exist->ref_mod == 0)
442 drop_delayed_ref(trans, root, href, exist);
443 spin_unlock(&href->lock);
444 return ret;
0e0adbcf 445inserted:
1d57ee94
WX
446 if (ref->action == BTRFS_ADD_DELAYED_REF)
447 list_add_tail(&ref->add_list, &href->ref_add_list);
c6fc2454
QW
448 atomic_inc(&root->num_entries);
449 trans->delayed_ref_updates++;
450 spin_unlock(&href->lock);
451 return ret;
452}
453
56bec294
CM
454/*
455 * helper function to update the accounting in the head ref
456 * existing and update must have the same bytenr
457 */
458static noinline void
1262133b 459update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs,
d278850e
JB
460 struct btrfs_delayed_ref_head *existing,
461 struct btrfs_delayed_ref_head *update,
7be07912 462 int *old_ref_mod_ret)
56bec294 463{
1262133b 464 int old_ref_mod;
56bec294 465
d278850e 466 BUG_ON(existing->is_data != update->is_data);
56bec294 467
d278850e
JB
468 spin_lock(&existing->lock);
469 if (update->must_insert_reserved) {
56bec294
CM
470 /* if the extent was freed and then
471 * reallocated before the delayed ref
472 * entries were processed, we can end up
473 * with an existing head ref without
474 * the must_insert_reserved flag set.
475 * Set it again here
476 */
d278850e 477 existing->must_insert_reserved = update->must_insert_reserved;
56bec294
CM
478
479 /*
480 * update the num_bytes so we make sure the accounting
481 * is done correctly
482 */
483 existing->num_bytes = update->num_bytes;
484
485 }
486
d278850e
JB
487 if (update->extent_op) {
488 if (!existing->extent_op) {
489 existing->extent_op = update->extent_op;
5d4f98a2 490 } else {
d278850e
JB
491 if (update->extent_op->update_key) {
492 memcpy(&existing->extent_op->key,
493 &update->extent_op->key,
494 sizeof(update->extent_op->key));
495 existing->extent_op->update_key = true;
5d4f98a2 496 }
d278850e
JB
497 if (update->extent_op->update_flags) {
498 existing->extent_op->flags_to_set |=
499 update->extent_op->flags_to_set;
500 existing->extent_op->update_flags = true;
5d4f98a2 501 }
d278850e 502 btrfs_free_delayed_extent_op(update->extent_op);
5d4f98a2
YZ
503 }
504 }
56bec294 505 /*
d7df2c79
JB
506 * update the reference mod on the head to reflect this new operation,
507 * only need the lock for this case cause we could be processing it
508 * currently, for refs we just added we know we're a-ok.
56bec294 509 */
d278850e 510 old_ref_mod = existing->total_ref_mod;
7be07912
OS
511 if (old_ref_mod_ret)
512 *old_ref_mod_ret = old_ref_mod;
56bec294 513 existing->ref_mod += update->ref_mod;
d278850e 514 existing->total_ref_mod += update->ref_mod;
1262133b
JB
515
516 /*
517 * If we are going to from a positive ref mod to a negative or vice
518 * versa we need to make sure to adjust pending_csums accordingly.
519 */
d278850e
JB
520 if (existing->is_data) {
521 if (existing->total_ref_mod >= 0 && old_ref_mod < 0)
1262133b 522 delayed_refs->pending_csums -= existing->num_bytes;
d278850e 523 if (existing->total_ref_mod < 0 && old_ref_mod >= 0)
1262133b
JB
524 delayed_refs->pending_csums += existing->num_bytes;
525 }
d278850e 526 spin_unlock(&existing->lock);
56bec294
CM
527}
528
a2e569b3
NB
529static void init_delayed_ref_head(struct btrfs_delayed_ref_head *head_ref,
530 struct btrfs_qgroup_extent_record *qrecord,
531 u64 bytenr, u64 num_bytes, u64 ref_root,
532 u64 reserved, int action, bool is_data,
533 bool is_system)
534{
535 int count_mod = 1;
536 int must_insert_reserved = 0;
537
538 /* If reserved is provided, it must be a data extent. */
539 BUG_ON(!is_data && reserved);
540
541 /*
542 * The head node stores the sum of all the mods, so dropping a ref
543 * should drop the sum in the head node by one.
544 */
545 if (action == BTRFS_UPDATE_DELAYED_HEAD)
546 count_mod = 0;
547 else if (action == BTRFS_DROP_DELAYED_REF)
548 count_mod = -1;
549
550 /*
551 * BTRFS_ADD_DELAYED_EXTENT means that we need to update the reserved
552 * accounting when the extent is finally added, or if a later
553 * modification deletes the delayed ref without ever inserting the
554 * extent into the extent allocation tree. ref->must_insert_reserved
555 * is the flag used to record that accounting mods are required.
556 *
557 * Once we record must_insert_reserved, switch the action to
558 * BTRFS_ADD_DELAYED_REF because other special casing is not required.
559 */
560 if (action == BTRFS_ADD_DELAYED_EXTENT)
561 must_insert_reserved = 1;
562 else
563 must_insert_reserved = 0;
564
565 refcount_set(&head_ref->refs, 1);
566 head_ref->bytenr = bytenr;
567 head_ref->num_bytes = num_bytes;
568 head_ref->ref_mod = count_mod;
569 head_ref->must_insert_reserved = must_insert_reserved;
570 head_ref->is_data = is_data;
571 head_ref->is_system = is_system;
572 head_ref->ref_tree = RB_ROOT;
573 INIT_LIST_HEAD(&head_ref->ref_add_list);
574 RB_CLEAR_NODE(&head_ref->href_node);
575 head_ref->processing = 0;
576 head_ref->total_ref_mod = count_mod;
577 head_ref->qgroup_reserved = 0;
578 head_ref->qgroup_ref_root = 0;
579 spin_lock_init(&head_ref->lock);
580 mutex_init(&head_ref->mutex);
581
582 if (qrecord) {
583 if (ref_root && reserved) {
584 head_ref->qgroup_ref_root = ref_root;
585 head_ref->qgroup_reserved = reserved;
586 }
587
588 qrecord->bytenr = bytenr;
589 qrecord->num_bytes = num_bytes;
590 qrecord->old_roots = NULL;
591 }
592}
593
56bec294 594/*
5d4f98a2 595 * helper function to actually insert a head node into the rbtree.
56bec294 596 * this does all the dirty work in terms of maintaining the correct
5d4f98a2 597 * overall modification count.
56bec294 598 */
d7df2c79 599static noinline struct btrfs_delayed_ref_head *
1acda0c2 600add_delayed_ref_head(struct btrfs_trans_handle *trans,
d278850e 601 struct btrfs_delayed_ref_head *head_ref,
3368d001 602 struct btrfs_qgroup_extent_record *qrecord,
2335efaf 603 int action, int *qrecord_inserted_ret,
7be07912 604 int *old_ref_mod, int *new_ref_mod)
56bec294 605{
d7df2c79 606 struct btrfs_delayed_ref_head *existing;
56bec294 607 struct btrfs_delayed_ref_root *delayed_refs;
fb235dc0 608 int qrecord_inserted = 0;
56bec294 609
56bec294 610 delayed_refs = &trans->transaction->delayed_refs;
2335efaf 611
3368d001
QW
612 /* Record qgroup extent info if provided */
613 if (qrecord) {
eb86ec73 614 if (btrfs_qgroup_trace_extent_nolock(trans->fs_info,
cb93b52c 615 delayed_refs, qrecord))
3368d001 616 kfree(qrecord);
fb235dc0
QW
617 else
618 qrecord_inserted = 1;
3368d001
QW
619 }
620
1acda0c2 621 trace_add_delayed_ref_head(trans->fs_info, head_ref, action);
1abe9b8a 622
d7df2c79
JB
623 existing = htree_insert(&delayed_refs->href_root,
624 &head_ref->href_node);
5d4f98a2 625 if (existing) {
2335efaf
NB
626 WARN_ON(qrecord && head_ref->qgroup_ref_root
627 && head_ref->qgroup_reserved
628 && existing->qgroup_ref_root
5846a3c2 629 && existing->qgroup_reserved);
d278850e 630 update_existing_head_ref(delayed_refs, existing, head_ref,
7be07912 631 old_ref_mod);
5d4f98a2
YZ
632 /*
633 * we've updated the existing ref, free the newly
634 * allocated ref
635 */
78a6184a 636 kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
d7df2c79 637 head_ref = existing;
5d4f98a2 638 } else {
7be07912
OS
639 if (old_ref_mod)
640 *old_ref_mod = 0;
2335efaf
NB
641 if (head_ref->is_data && head_ref->ref_mod < 0)
642 delayed_refs->pending_csums += head_ref->num_bytes;
5d4f98a2
YZ
643 delayed_refs->num_heads++;
644 delayed_refs->num_heads_ready++;
d7df2c79 645 atomic_inc(&delayed_refs->num_entries);
5d4f98a2
YZ
646 trans->delayed_ref_updates++;
647 }
fb235dc0
QW
648 if (qrecord_inserted_ret)
649 *qrecord_inserted_ret = qrecord_inserted;
7be07912
OS
650 if (new_ref_mod)
651 *new_ref_mod = head_ref->total_ref_mod;
2335efaf 652
d7df2c79 653 return head_ref;
5d4f98a2
YZ
654}
655
cb49a87b
NB
656/*
657 * init_delayed_ref_common - Initialize the structure which represents a
658 * modification to a an extent.
659 *
660 * @fs_info: Internal to the mounted filesystem mount structure.
661 *
662 * @ref: The structure which is going to be initialized.
663 *
664 * @bytenr: The logical address of the extent for which a modification is
665 * going to be recorded.
666 *
667 * @num_bytes: Size of the extent whose modification is being recorded.
668 *
669 * @ref_root: The id of the root where this modification has originated, this
670 * can be either one of the well-known metadata trees or the
671 * subvolume id which references this extent.
672 *
673 * @action: Can be one of BTRFS_ADD_DELAYED_REF/BTRFS_DROP_DELAYED_REF or
674 * BTRFS_ADD_DELAYED_EXTENT
675 *
676 * @ref_type: Holds the type of the extent which is being recorded, can be
677 * one of BTRFS_SHARED_BLOCK_REF_KEY/BTRFS_TREE_BLOCK_REF_KEY
678 * when recording a metadata extent or BTRFS_SHARED_DATA_REF_KEY/
679 * BTRFS_EXTENT_DATA_REF_KEY when recording data extent
680 */
681static void init_delayed_ref_common(struct btrfs_fs_info *fs_info,
682 struct btrfs_delayed_ref_node *ref,
683 u64 bytenr, u64 num_bytes, u64 ref_root,
684 int action, u8 ref_type)
685{
686 u64 seq = 0;
687
688 if (action == BTRFS_ADD_DELAYED_EXTENT)
689 action = BTRFS_ADD_DELAYED_REF;
690
691 if (is_fstree(ref_root))
692 seq = atomic64_read(&fs_info->tree_mod_seq);
693
694 refcount_set(&ref->refs, 1);
695 ref->bytenr = bytenr;
696 ref->num_bytes = num_bytes;
697 ref->ref_mod = 1;
698 ref->action = action;
699 ref->is_head = 0;
700 ref->in_tree = 1;
701 ref->seq = seq;
702 ref->type = ref_type;
703 RB_CLEAR_NODE(&ref->ref_node);
704 INIT_LIST_HEAD(&ref->add_list);
705}
706
56bec294 707/*
5d4f98a2 708 * add a delayed tree ref. This does all of the accounting required
56bec294
CM
709 * to make sure the delayed ref is eventually processed before this
710 * transaction commits.
711 */
44e1c47d 712int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
5d4f98a2
YZ
713 u64 bytenr, u64 num_bytes, u64 parent,
714 u64 ref_root, int level, int action,
7be07912
OS
715 struct btrfs_delayed_extent_op *extent_op,
716 int *old_ref_mod, int *new_ref_mod)
56bec294 717{
44e1c47d 718 struct btrfs_fs_info *fs_info = trans->fs_info;
5d4f98a2 719 struct btrfs_delayed_tree_ref *ref;
56bec294
CM
720 struct btrfs_delayed_ref_head *head_ref;
721 struct btrfs_delayed_ref_root *delayed_refs;
3368d001 722 struct btrfs_qgroup_extent_record *record = NULL;
fb235dc0 723 int qrecord_inserted;
2335efaf 724 bool is_system = (ref_root == BTRFS_CHUNK_TREE_OBJECTID);
70d64000
NB
725 int ret;
726 u8 ref_type;
56bec294 727
5d4f98a2 728 BUG_ON(extent_op && extent_op->is_data);
78a6184a 729 ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
56bec294
CM
730 if (!ref)
731 return -ENOMEM;
732
7b4284de
NB
733 head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
734 if (!head_ref) {
735 kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
736 return -ENOMEM;
737 }
738
739 if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
740 is_fstree(ref_root)) {
741 record = kmalloc(sizeof(*record), GFP_NOFS);
742 if (!record) {
743 kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
744 kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
745 return -ENOMEM;
746 }
747 }
748
70d64000
NB
749 if (parent)
750 ref_type = BTRFS_SHARED_BLOCK_REF_KEY;
751 else
752 ref_type = BTRFS_TREE_BLOCK_REF_KEY;
7b4284de 753
70d64000
NB
754 init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes,
755 ref_root, action, ref_type);
756 ref->root = ref_root;
757 ref->parent = parent;
758 ref->level = level;
759
2335efaf
NB
760 init_delayed_ref_head(head_ref, record, bytenr, num_bytes,
761 ref_root, 0, action, false, is_system);
5d4f98a2
YZ
762 head_ref->extent_op = extent_op;
763
764 delayed_refs = &trans->transaction->delayed_refs;
765 spin_lock(&delayed_refs->lock);
766
56bec294 767 /*
5d4f98a2
YZ
768 * insert both the head node and the new ref without dropping
769 * the spin lock
56bec294 770 */
2335efaf
NB
771 head_ref = add_delayed_ref_head(trans, head_ref, record,
772 action, &qrecord_inserted,
5e388e95 773 old_ref_mod, new_ref_mod);
5d4f98a2 774
70d64000 775 ret = insert_delayed_ref(trans, delayed_refs, head_ref, &ref->node);
5d4f98a2 776 spin_unlock(&delayed_refs->lock);
95a06077 777
70d64000
NB
778 trace_add_delayed_tree_ref(fs_info, &ref->node, ref,
779 action == BTRFS_ADD_DELAYED_EXTENT ?
780 BTRFS_ADD_DELAYED_REF : action);
781 if (ret > 0)
782 kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
783
fb235dc0 784 if (qrecord_inserted)
952bd3db
NB
785 btrfs_qgroup_trace_extent_post(fs_info, record);
786
5d4f98a2
YZ
787 return 0;
788}
789
790/*
791 * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref.
792 */
88a979c6 793int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
5d4f98a2
YZ
794 u64 bytenr, u64 num_bytes,
795 u64 parent, u64 ref_root,
7be07912
OS
796 u64 owner, u64 offset, u64 reserved, int action,
797 int *old_ref_mod, int *new_ref_mod)
5d4f98a2 798{
88a979c6 799 struct btrfs_fs_info *fs_info = trans->fs_info;
5d4f98a2
YZ
800 struct btrfs_delayed_data_ref *ref;
801 struct btrfs_delayed_ref_head *head_ref;
802 struct btrfs_delayed_ref_root *delayed_refs;
3368d001 803 struct btrfs_qgroup_extent_record *record = NULL;
fb235dc0 804 int qrecord_inserted;
cd7f9699
NB
805 int ret;
806 u8 ref_type;
5d4f98a2 807
78a6184a 808 ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
5d4f98a2
YZ
809 if (!ref)
810 return -ENOMEM;
56bec294 811
cd7f9699
NB
812 if (parent)
813 ref_type = BTRFS_SHARED_DATA_REF_KEY;
814 else
815 ref_type = BTRFS_EXTENT_DATA_REF_KEY;
816 init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes,
817 ref_root, action, ref_type);
818 ref->root = ref_root;
819 ref->parent = parent;
820 ref->objectid = owner;
821 ref->offset = offset;
822
823
78a6184a 824 head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
56bec294 825 if (!head_ref) {
78a6184a 826 kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
56bec294
CM
827 return -ENOMEM;
828 }
5d4f98a2 829
afcdd129
JB
830 if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
831 is_fstree(ref_root)) {
3368d001
QW
832 record = kmalloc(sizeof(*record), GFP_NOFS);
833 if (!record) {
834 kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
835 kmem_cache_free(btrfs_delayed_ref_head_cachep,
836 head_ref);
837 return -ENOMEM;
838 }
839 }
840
2335efaf
NB
841 init_delayed_ref_head(head_ref, record, bytenr, num_bytes, ref_root,
842 reserved, action, true, false);
fef394f7 843 head_ref->extent_op = NULL;
5d4f98a2 844
56bec294
CM
845 delayed_refs = &trans->transaction->delayed_refs;
846 spin_lock(&delayed_refs->lock);
847
848 /*
849 * insert both the head node and the new ref without dropping
850 * the spin lock
851 */
2335efaf
NB
852 head_ref = add_delayed_ref_head(trans, head_ref, record,
853 action, &qrecord_inserted,
7be07912 854 old_ref_mod, new_ref_mod);
56bec294 855
cd7f9699 856 ret = insert_delayed_ref(trans, delayed_refs, head_ref, &ref->node);
5d4f98a2 857 spin_unlock(&delayed_refs->lock);
95a06077 858
cd7f9699
NB
859 trace_add_delayed_data_ref(trans->fs_info, &ref->node, ref,
860 action == BTRFS_ADD_DELAYED_EXTENT ?
861 BTRFS_ADD_DELAYED_REF : action);
862 if (ret > 0)
863 kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
864
865
fb235dc0
QW
866 if (qrecord_inserted)
867 return btrfs_qgroup_trace_extent_post(fs_info, record);
5d4f98a2
YZ
868 return 0;
869}
870
66d7e7f0
AJ
871int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
872 struct btrfs_trans_handle *trans,
5d4f98a2
YZ
873 u64 bytenr, u64 num_bytes,
874 struct btrfs_delayed_extent_op *extent_op)
875{
876 struct btrfs_delayed_ref_head *head_ref;
877 struct btrfs_delayed_ref_root *delayed_refs;
5d4f98a2 878
78a6184a 879 head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
5d4f98a2
YZ
880 if (!head_ref)
881 return -ENOMEM;
882
2335efaf
NB
883 init_delayed_ref_head(head_ref, NULL, bytenr, num_bytes, 0, 0,
884 BTRFS_UPDATE_DELAYED_HEAD, extent_op->is_data,
885 false);
5d4f98a2
YZ
886 head_ref->extent_op = extent_op;
887
888 delayed_refs = &trans->transaction->delayed_refs;
889 spin_lock(&delayed_refs->lock);
890
2335efaf
NB
891 add_delayed_ref_head(trans, head_ref, NULL, BTRFS_UPDATE_DELAYED_HEAD,
892 NULL, NULL, NULL);
5d4f98a2 893
56bec294
CM
894 spin_unlock(&delayed_refs->lock);
895 return 0;
896}
897
1887be66
CM
898/*
899 * this does a simple search for the head node for a given extent.
900 * It must be called with the delayed ref spinlock held, and it returns
901 * the head node if any where found, or NULL if not.
902 */
903struct btrfs_delayed_ref_head *
f72ad18e 904btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, u64 bytenr)
1887be66 905{
85fdfdf6 906 return find_ref_head(&delayed_refs->href_root, bytenr, 0);
1887be66 907}
78a6184a 908
e67c718b 909void __cold btrfs_delayed_ref_exit(void)
78a6184a 910{
5598e900
KM
911 kmem_cache_destroy(btrfs_delayed_ref_head_cachep);
912 kmem_cache_destroy(btrfs_delayed_tree_ref_cachep);
913 kmem_cache_destroy(btrfs_delayed_data_ref_cachep);
914 kmem_cache_destroy(btrfs_delayed_extent_op_cachep);
78a6184a
MX
915}
916
f5c29bd9 917int __init btrfs_delayed_ref_init(void)
78a6184a
MX
918{
919 btrfs_delayed_ref_head_cachep = kmem_cache_create(
920 "btrfs_delayed_ref_head",
921 sizeof(struct btrfs_delayed_ref_head), 0,
fba4b697 922 SLAB_MEM_SPREAD, NULL);
78a6184a
MX
923 if (!btrfs_delayed_ref_head_cachep)
924 goto fail;
925
926 btrfs_delayed_tree_ref_cachep = kmem_cache_create(
927 "btrfs_delayed_tree_ref",
928 sizeof(struct btrfs_delayed_tree_ref), 0,
fba4b697 929 SLAB_MEM_SPREAD, NULL);
78a6184a
MX
930 if (!btrfs_delayed_tree_ref_cachep)
931 goto fail;
932
933 btrfs_delayed_data_ref_cachep = kmem_cache_create(
934 "btrfs_delayed_data_ref",
935 sizeof(struct btrfs_delayed_data_ref), 0,
fba4b697 936 SLAB_MEM_SPREAD, NULL);
78a6184a
MX
937 if (!btrfs_delayed_data_ref_cachep)
938 goto fail;
939
940 btrfs_delayed_extent_op_cachep = kmem_cache_create(
941 "btrfs_delayed_extent_op",
942 sizeof(struct btrfs_delayed_extent_op), 0,
fba4b697 943 SLAB_MEM_SPREAD, NULL);
78a6184a
MX
944 if (!btrfs_delayed_extent_op_cachep)
945 goto fail;
946
947 return 0;
948fail:
949 btrfs_delayed_ref_exit();
950 return -ENOMEM;
951}