btrfs: replace GPL boilerplate by SPDX -- headers
[linux-2.6-block.git] / fs / btrfs / delayed-ref.c
CommitLineData
56bec294
CM
1/*
2 * Copyright (C) 2009 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/sched.h>
5a0e3ad6 20#include <linux/slab.h>
56bec294 21#include <linux/sort.h>
56bec294
CM
22#include "ctree.h"
23#include "delayed-ref.h"
24#include "transaction.h"
3368d001 25#include "qgroup.h"
56bec294 26
78a6184a
MX
27struct kmem_cache *btrfs_delayed_ref_head_cachep;
28struct kmem_cache *btrfs_delayed_tree_ref_cachep;
29struct kmem_cache *btrfs_delayed_data_ref_cachep;
30struct kmem_cache *btrfs_delayed_extent_op_cachep;
56bec294
CM
31/*
32 * delayed back reference update tracking. For subvolume trees
33 * we queue up extent allocations and backref maintenance for
34 * delayed processing. This avoids deep call chains where we
35 * add extents in the middle of btrfs_search_slot, and it allows
36 * us to buffer up frequently modified backrefs in an rb tree instead
37 * of hammering updates on the extent allocation tree.
56bec294
CM
38 */
39
40/*
5d4f98a2
YZ
41 * compare two delayed tree backrefs with same bytenr and type
42 */
c7ad7c84
JB
43static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref1,
44 struct btrfs_delayed_tree_ref *ref2)
5d4f98a2 45{
3b60d436 46 if (ref1->node.type == BTRFS_TREE_BLOCK_REF_KEY) {
41b0fc42
JB
47 if (ref1->root < ref2->root)
48 return -1;
49 if (ref1->root > ref2->root)
50 return 1;
51 } else {
52 if (ref1->parent < ref2->parent)
53 return -1;
54 if (ref1->parent > ref2->parent)
55 return 1;
56 }
5d4f98a2
YZ
57 return 0;
58}
59
60/*
61 * compare two delayed data backrefs with same bytenr and type
56bec294 62 */
c7ad7c84
JB
63static int comp_data_refs(struct btrfs_delayed_data_ref *ref1,
64 struct btrfs_delayed_data_ref *ref2)
56bec294 65{
5d4f98a2
YZ
66 if (ref1->node.type == BTRFS_EXTENT_DATA_REF_KEY) {
67 if (ref1->root < ref2->root)
68 return -1;
69 if (ref1->root > ref2->root)
70 return 1;
71 if (ref1->objectid < ref2->objectid)
72 return -1;
73 if (ref1->objectid > ref2->objectid)
74 return 1;
75 if (ref1->offset < ref2->offset)
76 return -1;
77 if (ref1->offset > ref2->offset)
78 return 1;
79 } else {
80 if (ref1->parent < ref2->parent)
81 return -1;
82 if (ref1->parent > ref2->parent)
83 return 1;
84 }
85 return 0;
86}
87
1d148e59
JB
88static int comp_refs(struct btrfs_delayed_ref_node *ref1,
89 struct btrfs_delayed_ref_node *ref2,
90 bool check_seq)
91{
92 int ret = 0;
93
94 if (ref1->type < ref2->type)
95 return -1;
96 if (ref1->type > ref2->type)
97 return 1;
98 if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
99 ref1->type == BTRFS_SHARED_BLOCK_REF_KEY)
100 ret = comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref1),
101 btrfs_delayed_node_to_tree_ref(ref2));
102 else
103 ret = comp_data_refs(btrfs_delayed_node_to_data_ref(ref1),
104 btrfs_delayed_node_to_data_ref(ref2));
105 if (ret)
106 return ret;
107 if (check_seq) {
108 if (ref1->seq < ref2->seq)
109 return -1;
110 if (ref1->seq > ref2->seq)
111 return 1;
112 }
113 return 0;
114}
115
c46effa6
LB
116/* insert a new ref to head ref rbtree */
117static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root,
118 struct rb_node *node)
119{
120 struct rb_node **p = &root->rb_node;
121 struct rb_node *parent_node = NULL;
122 struct btrfs_delayed_ref_head *entry;
123 struct btrfs_delayed_ref_head *ins;
124 u64 bytenr;
125
126 ins = rb_entry(node, struct btrfs_delayed_ref_head, href_node);
d278850e 127 bytenr = ins->bytenr;
c46effa6
LB
128 while (*p) {
129 parent_node = *p;
130 entry = rb_entry(parent_node, struct btrfs_delayed_ref_head,
131 href_node);
132
d278850e 133 if (bytenr < entry->bytenr)
c46effa6 134 p = &(*p)->rb_left;
d278850e 135 else if (bytenr > entry->bytenr)
c46effa6
LB
136 p = &(*p)->rb_right;
137 else
138 return entry;
139 }
140
141 rb_link_node(node, parent_node, p);
142 rb_insert_color(node, root);
143 return NULL;
144}
145
0e0adbcf
JB
146static struct btrfs_delayed_ref_node* tree_insert(struct rb_root *root,
147 struct btrfs_delayed_ref_node *ins)
148{
149 struct rb_node **p = &root->rb_node;
150 struct rb_node *node = &ins->ref_node;
151 struct rb_node *parent_node = NULL;
152 struct btrfs_delayed_ref_node *entry;
153
154 while (*p) {
155 int comp;
156
157 parent_node = *p;
158 entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
159 ref_node);
160 comp = comp_refs(ins, entry, true);
161 if (comp < 0)
162 p = &(*p)->rb_left;
163 else if (comp > 0)
164 p = &(*p)->rb_right;
165 else
166 return entry;
167 }
168
169 rb_link_node(node, parent_node, p);
170 rb_insert_color(node, root);
171 return NULL;
172}
173
56bec294 174/*
5d4f98a2 175 * find an head entry based on bytenr. This returns the delayed ref
d1270cd9
AJ
176 * head if it was able to find one, or NULL if nothing was in that spot.
177 * If return_bigger is given, the next bigger entry is returned if no exact
178 * match is found.
56bec294 179 */
c46effa6
LB
180static struct btrfs_delayed_ref_head *
181find_ref_head(struct rb_root *root, u64 bytenr,
85fdfdf6 182 int return_bigger)
56bec294 183{
d1270cd9 184 struct rb_node *n;
c46effa6 185 struct btrfs_delayed_ref_head *entry;
56bec294 186
d1270cd9
AJ
187 n = root->rb_node;
188 entry = NULL;
56bec294 189 while (n) {
c46effa6 190 entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node);
56bec294 191
d278850e 192 if (bytenr < entry->bytenr)
56bec294 193 n = n->rb_left;
d278850e 194 else if (bytenr > entry->bytenr)
56bec294
CM
195 n = n->rb_right;
196 else
197 return entry;
198 }
d1270cd9 199 if (entry && return_bigger) {
d278850e 200 if (bytenr > entry->bytenr) {
c46effa6 201 n = rb_next(&entry->href_node);
d1270cd9
AJ
202 if (!n)
203 n = rb_first(root);
c46effa6
LB
204 entry = rb_entry(n, struct btrfs_delayed_ref_head,
205 href_node);
6103fb43 206 return entry;
d1270cd9
AJ
207 }
208 return entry;
209 }
56bec294
CM
210 return NULL;
211}
212
c3e69d58
CM
213int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
214 struct btrfs_delayed_ref_head *head)
56bec294 215{
c3e69d58
CM
216 struct btrfs_delayed_ref_root *delayed_refs;
217
218 delayed_refs = &trans->transaction->delayed_refs;
a4666e68 219 lockdep_assert_held(&delayed_refs->lock);
c3e69d58
CM
220 if (mutex_trylock(&head->mutex))
221 return 0;
222
d278850e 223 refcount_inc(&head->refs);
c3e69d58
CM
224 spin_unlock(&delayed_refs->lock);
225
226 mutex_lock(&head->mutex);
227 spin_lock(&delayed_refs->lock);
d278850e 228 if (RB_EMPTY_NODE(&head->href_node)) {
c3e69d58 229 mutex_unlock(&head->mutex);
d278850e 230 btrfs_put_delayed_ref_head(head);
c3e69d58
CM
231 return -EAGAIN;
232 }
d278850e 233 btrfs_put_delayed_ref_head(head);
c3e69d58
CM
234 return 0;
235}
236
35a3621b 237static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
ae1e206b 238 struct btrfs_delayed_ref_root *delayed_refs,
d7df2c79 239 struct btrfs_delayed_ref_head *head,
ae1e206b
JB
240 struct btrfs_delayed_ref_node *ref)
241{
a4666e68 242 lockdep_assert_held(&head->lock);
0e0adbcf
JB
243 rb_erase(&ref->ref_node, &head->ref_tree);
244 RB_CLEAR_NODE(&ref->ref_node);
d278850e
JB
245 if (!list_empty(&ref->add_list))
246 list_del(&ref->add_list);
ae1e206b
JB
247 ref->in_tree = 0;
248 btrfs_put_delayed_ref(ref);
d7df2c79 249 atomic_dec(&delayed_refs->num_entries);
ae1e206b
JB
250 if (trans->delayed_ref_updates)
251 trans->delayed_ref_updates--;
252}
253
2c3cf7d5
FM
254static bool merge_ref(struct btrfs_trans_handle *trans,
255 struct btrfs_delayed_ref_root *delayed_refs,
256 struct btrfs_delayed_ref_head *head,
257 struct btrfs_delayed_ref_node *ref,
258 u64 seq)
259{
260 struct btrfs_delayed_ref_node *next;
0e0adbcf 261 struct rb_node *node = rb_next(&ref->ref_node);
2c3cf7d5
FM
262 bool done = false;
263
0e0adbcf 264 while (!done && node) {
2c3cf7d5 265 int mod;
2c3cf7d5 266
0e0adbcf
JB
267 next = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
268 node = rb_next(node);
2c3cf7d5 269 if (seq && next->seq >= seq)
0e0adbcf 270 break;
1d148e59 271 if (comp_refs(ref, next, false))
0e0adbcf 272 break;
2c3cf7d5
FM
273
274 if (ref->action == next->action) {
275 mod = next->ref_mod;
276 } else {
277 if (ref->ref_mod < next->ref_mod) {
278 swap(ref, next);
279 done = true;
280 }
281 mod = -next->ref_mod;
282 }
283
284 drop_delayed_ref(trans, delayed_refs, head, next);
285 ref->ref_mod += mod;
286 if (ref->ref_mod == 0) {
287 drop_delayed_ref(trans, delayed_refs, head, ref);
288 done = true;
289 } else {
290 /*
291 * Can't have multiples of the same ref on a tree block.
292 */
293 WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
294 ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
295 }
2c3cf7d5
FM
296 }
297
298 return done;
299}
300
301void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
302 struct btrfs_fs_info *fs_info,
303 struct btrfs_delayed_ref_root *delayed_refs,
304 struct btrfs_delayed_ref_head *head)
305{
306 struct btrfs_delayed_ref_node *ref;
0e0adbcf 307 struct rb_node *node;
2c3cf7d5
FM
308 u64 seq = 0;
309
a4666e68 310 lockdep_assert_held(&head->lock);
2c3cf7d5 311
0e0adbcf 312 if (RB_EMPTY_ROOT(&head->ref_tree))
2c3cf7d5
FM
313 return;
314
315 /* We don't have too many refs to merge for data. */
316 if (head->is_data)
317 return;
318
319 spin_lock(&fs_info->tree_mod_seq_lock);
320 if (!list_empty(&fs_info->tree_mod_seq_list)) {
321 struct seq_list *elem;
322
323 elem = list_first_entry(&fs_info->tree_mod_seq_list,
324 struct seq_list, list);
325 seq = elem->seq;
326 }
327 spin_unlock(&fs_info->tree_mod_seq_lock);
328
0e0adbcf
JB
329again:
330 for (node = rb_first(&head->ref_tree); node; node = rb_next(node)) {
331 ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
2c3cf7d5 332 if (seq && ref->seq >= seq)
2c3cf7d5 333 continue;
0e0adbcf
JB
334 if (merge_ref(trans, delayed_refs, head, ref, seq))
335 goto again;
2c3cf7d5
FM
336 }
337}
338
097b8a7c
JS
339int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
340 struct btrfs_delayed_ref_root *delayed_refs,
00f04b88
AJ
341 u64 seq)
342{
343 struct seq_list *elem;
097b8a7c
JS
344 int ret = 0;
345
346 spin_lock(&fs_info->tree_mod_seq_lock);
347 if (!list_empty(&fs_info->tree_mod_seq_list)) {
348 elem = list_first_entry(&fs_info->tree_mod_seq_list,
349 struct seq_list, list);
350 if (seq >= elem->seq) {
ab8d0fc4
JM
351 btrfs_debug(fs_info,
352 "holding back delayed_ref %#x.%x, lowest is %#x.%x (%p)",
353 (u32)(seq >> 32), (u32)seq,
354 (u32)(elem->seq >> 32), (u32)elem->seq,
355 delayed_refs);
097b8a7c
JS
356 ret = 1;
357 }
00f04b88 358 }
097b8a7c
JS
359
360 spin_unlock(&fs_info->tree_mod_seq_lock);
361 return ret;
00f04b88
AJ
362}
363
d7df2c79
JB
364struct btrfs_delayed_ref_head *
365btrfs_select_ref_head(struct btrfs_trans_handle *trans)
c3e69d58 366{
c3e69d58 367 struct btrfs_delayed_ref_root *delayed_refs;
d7df2c79
JB
368 struct btrfs_delayed_ref_head *head;
369 u64 start;
370 bool loop = false;
56bec294 371
c3e69d58 372 delayed_refs = &trans->transaction->delayed_refs;
c46effa6 373
c3e69d58 374again:
d7df2c79 375 start = delayed_refs->run_delayed_start;
85fdfdf6 376 head = find_ref_head(&delayed_refs->href_root, start, 1);
d7df2c79
JB
377 if (!head && !loop) {
378 delayed_refs->run_delayed_start = 0;
c3e69d58 379 start = 0;
d7df2c79 380 loop = true;
85fdfdf6 381 head = find_ref_head(&delayed_refs->href_root, start, 1);
d7df2c79
JB
382 if (!head)
383 return NULL;
384 } else if (!head && loop) {
385 return NULL;
c3e69d58 386 }
56bec294 387
d7df2c79
JB
388 while (head->processing) {
389 struct rb_node *node;
390
391 node = rb_next(&head->href_node);
392 if (!node) {
393 if (loop)
394 return NULL;
395 delayed_refs->run_delayed_start = 0;
396 start = 0;
397 loop = true;
398 goto again;
399 }
400 head = rb_entry(node, struct btrfs_delayed_ref_head,
401 href_node);
402 }
093486c4 403
d7df2c79
JB
404 head->processing = 1;
405 WARN_ON(delayed_refs->num_heads_ready == 0);
406 delayed_refs->num_heads_ready--;
d278850e
JB
407 delayed_refs->run_delayed_start = head->bytenr +
408 head->num_bytes;
d7df2c79 409 return head;
093486c4
MX
410}
411
c6fc2454
QW
412/*
413 * Helper to insert the ref_node to the tail or merge with tail.
414 *
415 * Return 0 for insert.
416 * Return >0 for merge.
417 */
0e0adbcf
JB
418static int insert_delayed_ref(struct btrfs_trans_handle *trans,
419 struct btrfs_delayed_ref_root *root,
420 struct btrfs_delayed_ref_head *href,
421 struct btrfs_delayed_ref_node *ref)
c6fc2454
QW
422{
423 struct btrfs_delayed_ref_node *exist;
424 int mod;
425 int ret = 0;
426
427 spin_lock(&href->lock);
0e0adbcf
JB
428 exist = tree_insert(&href->ref_tree, ref);
429 if (!exist)
430 goto inserted;
c6fc2454
QW
431
432 /* Now we are sure we can merge */
433 ret = 1;
434 if (exist->action == ref->action) {
435 mod = ref->ref_mod;
436 } else {
437 /* Need to change action */
438 if (exist->ref_mod < ref->ref_mod) {
439 exist->action = ref->action;
440 mod = -exist->ref_mod;
441 exist->ref_mod = ref->ref_mod;
1d57ee94
WX
442 if (ref->action == BTRFS_ADD_DELAYED_REF)
443 list_add_tail(&exist->add_list,
444 &href->ref_add_list);
445 else if (ref->action == BTRFS_DROP_DELAYED_REF) {
446 ASSERT(!list_empty(&exist->add_list));
447 list_del(&exist->add_list);
448 } else {
449 ASSERT(0);
450 }
c6fc2454
QW
451 } else
452 mod = -ref->ref_mod;
453 }
454 exist->ref_mod += mod;
455
456 /* remove existing tail if its ref_mod is zero */
457 if (exist->ref_mod == 0)
458 drop_delayed_ref(trans, root, href, exist);
459 spin_unlock(&href->lock);
460 return ret;
0e0adbcf 461inserted:
1d57ee94
WX
462 if (ref->action == BTRFS_ADD_DELAYED_REF)
463 list_add_tail(&ref->add_list, &href->ref_add_list);
c6fc2454
QW
464 atomic_inc(&root->num_entries);
465 trans->delayed_ref_updates++;
466 spin_unlock(&href->lock);
467 return ret;
468}
469
56bec294
CM
470/*
471 * helper function to update the accounting in the head ref
472 * existing and update must have the same bytenr
473 */
474static noinline void
1262133b 475update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs,
d278850e
JB
476 struct btrfs_delayed_ref_head *existing,
477 struct btrfs_delayed_ref_head *update,
7be07912 478 int *old_ref_mod_ret)
56bec294 479{
1262133b 480 int old_ref_mod;
56bec294 481
d278850e 482 BUG_ON(existing->is_data != update->is_data);
56bec294 483
d278850e
JB
484 spin_lock(&existing->lock);
485 if (update->must_insert_reserved) {
56bec294
CM
486 /* if the extent was freed and then
487 * reallocated before the delayed ref
488 * entries were processed, we can end up
489 * with an existing head ref without
490 * the must_insert_reserved flag set.
491 * Set it again here
492 */
d278850e 493 existing->must_insert_reserved = update->must_insert_reserved;
56bec294
CM
494
495 /*
496 * update the num_bytes so we make sure the accounting
497 * is done correctly
498 */
499 existing->num_bytes = update->num_bytes;
500
501 }
502
d278850e
JB
503 if (update->extent_op) {
504 if (!existing->extent_op) {
505 existing->extent_op = update->extent_op;
5d4f98a2 506 } else {
d278850e
JB
507 if (update->extent_op->update_key) {
508 memcpy(&existing->extent_op->key,
509 &update->extent_op->key,
510 sizeof(update->extent_op->key));
511 existing->extent_op->update_key = true;
5d4f98a2 512 }
d278850e
JB
513 if (update->extent_op->update_flags) {
514 existing->extent_op->flags_to_set |=
515 update->extent_op->flags_to_set;
516 existing->extent_op->update_flags = true;
5d4f98a2 517 }
d278850e 518 btrfs_free_delayed_extent_op(update->extent_op);
5d4f98a2
YZ
519 }
520 }
56bec294 521 /*
d7df2c79
JB
522 * update the reference mod on the head to reflect this new operation,
523 * only need the lock for this case cause we could be processing it
524 * currently, for refs we just added we know we're a-ok.
56bec294 525 */
d278850e 526 old_ref_mod = existing->total_ref_mod;
7be07912
OS
527 if (old_ref_mod_ret)
528 *old_ref_mod_ret = old_ref_mod;
56bec294 529 existing->ref_mod += update->ref_mod;
d278850e 530 existing->total_ref_mod += update->ref_mod;
1262133b
JB
531
532 /*
533 * If we are going to from a positive ref mod to a negative or vice
534 * versa we need to make sure to adjust pending_csums accordingly.
535 */
d278850e
JB
536 if (existing->is_data) {
537 if (existing->total_ref_mod >= 0 && old_ref_mod < 0)
1262133b 538 delayed_refs->pending_csums -= existing->num_bytes;
d278850e 539 if (existing->total_ref_mod < 0 && old_ref_mod >= 0)
1262133b
JB
540 delayed_refs->pending_csums += existing->num_bytes;
541 }
d278850e 542 spin_unlock(&existing->lock);
56bec294
CM
543}
544
545/*
5d4f98a2 546 * helper function to actually insert a head node into the rbtree.
56bec294 547 * this does all the dirty work in terms of maintaining the correct
5d4f98a2 548 * overall modification count.
56bec294 549 */
d7df2c79
JB
550static noinline struct btrfs_delayed_ref_head *
551add_delayed_ref_head(struct btrfs_fs_info *fs_info,
552 struct btrfs_trans_handle *trans,
d278850e 553 struct btrfs_delayed_ref_head *head_ref,
3368d001 554 struct btrfs_qgroup_extent_record *qrecord,
5846a3c2 555 u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved,
7be07912
OS
556 int action, int is_data, int *qrecord_inserted_ret,
557 int *old_ref_mod, int *new_ref_mod)
56bec294 558{
d7df2c79 559 struct btrfs_delayed_ref_head *existing;
56bec294
CM
560 struct btrfs_delayed_ref_root *delayed_refs;
561 int count_mod = 1;
562 int must_insert_reserved = 0;
fb235dc0 563 int qrecord_inserted = 0;
56bec294 564
5846a3c2
QW
565 /* If reserved is provided, it must be a data extent. */
566 BUG_ON(!is_data && reserved);
567
56bec294
CM
568 /*
569 * the head node stores the sum of all the mods, so dropping a ref
570 * should drop the sum in the head node by one.
571 */
5d4f98a2
YZ
572 if (action == BTRFS_UPDATE_DELAYED_HEAD)
573 count_mod = 0;
574 else if (action == BTRFS_DROP_DELAYED_REF)
575 count_mod = -1;
56bec294
CM
576
577 /*
578 * BTRFS_ADD_DELAYED_EXTENT means that we need to update
579 * the reserved accounting when the extent is finally added, or
580 * if a later modification deletes the delayed ref without ever
581 * inserting the extent into the extent allocation tree.
582 * ref->must_insert_reserved is the flag used to record
583 * that accounting mods are required.
584 *
585 * Once we record must_insert_reserved, switch the action to
586 * BTRFS_ADD_DELAYED_REF because other special casing is not required.
587 */
5d4f98a2 588 if (action == BTRFS_ADD_DELAYED_EXTENT)
56bec294 589 must_insert_reserved = 1;
5d4f98a2 590 else
56bec294 591 must_insert_reserved = 0;
56bec294
CM
592
593 delayed_refs = &trans->transaction->delayed_refs;
594
d278850e
JB
595 refcount_set(&head_ref->refs, 1);
596 head_ref->bytenr = bytenr;
597 head_ref->num_bytes = num_bytes;
598 head_ref->ref_mod = count_mod;
5d4f98a2
YZ
599 head_ref->must_insert_reserved = must_insert_reserved;
600 head_ref->is_data = is_data;
0e0adbcf 601 head_ref->ref_tree = RB_ROOT;
1d57ee94 602 INIT_LIST_HEAD(&head_ref->ref_add_list);
d278850e 603 RB_CLEAR_NODE(&head_ref->href_node);
d7df2c79 604 head_ref->processing = 0;
1262133b 605 head_ref->total_ref_mod = count_mod;
f64d5ca8
QW
606 head_ref->qgroup_reserved = 0;
607 head_ref->qgroup_ref_root = 0;
d278850e
JB
608 spin_lock_init(&head_ref->lock);
609 mutex_init(&head_ref->mutex);
5d4f98a2 610
3368d001
QW
611 /* Record qgroup extent info if provided */
612 if (qrecord) {
5846a3c2
QW
613 if (ref_root && reserved) {
614 head_ref->qgroup_ref_root = ref_root;
615 head_ref->qgroup_reserved = reserved;
616 }
617
3368d001
QW
618 qrecord->bytenr = bytenr;
619 qrecord->num_bytes = num_bytes;
620 qrecord->old_roots = NULL;
621
50b3e040 622 if(btrfs_qgroup_trace_extent_nolock(fs_info,
cb93b52c 623 delayed_refs, qrecord))
3368d001 624 kfree(qrecord);
fb235dc0
QW
625 else
626 qrecord_inserted = 1;
3368d001
QW
627 }
628
d278850e 629 trace_add_delayed_ref_head(fs_info, head_ref, action);
1abe9b8a 630
d7df2c79
JB
631 existing = htree_insert(&delayed_refs->href_root,
632 &head_ref->href_node);
5d4f98a2 633 if (existing) {
5846a3c2
QW
634 WARN_ON(ref_root && reserved && existing->qgroup_ref_root
635 && existing->qgroup_reserved);
d278850e 636 update_existing_head_ref(delayed_refs, existing, head_ref,
7be07912 637 old_ref_mod);
5d4f98a2
YZ
638 /*
639 * we've updated the existing ref, free the newly
640 * allocated ref
641 */
78a6184a 642 kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
d7df2c79 643 head_ref = existing;
5d4f98a2 644 } else {
7be07912
OS
645 if (old_ref_mod)
646 *old_ref_mod = 0;
1262133b
JB
647 if (is_data && count_mod < 0)
648 delayed_refs->pending_csums += num_bytes;
5d4f98a2
YZ
649 delayed_refs->num_heads++;
650 delayed_refs->num_heads_ready++;
d7df2c79 651 atomic_inc(&delayed_refs->num_entries);
5d4f98a2
YZ
652 trans->delayed_ref_updates++;
653 }
fb235dc0
QW
654 if (qrecord_inserted_ret)
655 *qrecord_inserted_ret = qrecord_inserted;
7be07912
OS
656 if (new_ref_mod)
657 *new_ref_mod = head_ref->total_ref_mod;
d7df2c79 658 return head_ref;
5d4f98a2
YZ
659}
660
661/*
662 * helper to insert a delayed tree ref into the rbtree.
663 */
d7df2c79
JB
664static noinline void
665add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
666 struct btrfs_trans_handle *trans,
667 struct btrfs_delayed_ref_head *head_ref,
668 struct btrfs_delayed_ref_node *ref, u64 bytenr,
669 u64 num_bytes, u64 parent, u64 ref_root, int level,
b06c4bf5 670 int action)
5d4f98a2 671{
5d4f98a2
YZ
672 struct btrfs_delayed_tree_ref *full_ref;
673 struct btrfs_delayed_ref_root *delayed_refs;
00f04b88 674 u64 seq = 0;
c6fc2454 675 int ret;
5d4f98a2
YZ
676
677 if (action == BTRFS_ADD_DELAYED_EXTENT)
678 action = BTRFS_ADD_DELAYED_REF;
679
fcebe456
JB
680 if (is_fstree(ref_root))
681 seq = atomic64_read(&fs_info->tree_mod_seq);
5d4f98a2
YZ
682 delayed_refs = &trans->transaction->delayed_refs;
683
684 /* first set the basic ref node struct up */
6df8cdf5 685 refcount_set(&ref->refs, 1);
5d4f98a2 686 ref->bytenr = bytenr;
56bec294 687 ref->num_bytes = num_bytes;
5d4f98a2
YZ
688 ref->ref_mod = 1;
689 ref->action = action;
690 ref->is_head = 0;
691 ref->in_tree = 1;
00f04b88 692 ref->seq = seq;
0e0adbcf 693 RB_CLEAR_NODE(&ref->ref_node);
1d57ee94 694 INIT_LIST_HEAD(&ref->add_list);
00f04b88 695
5d4f98a2 696 full_ref = btrfs_delayed_node_to_tree_ref(ref);
eebe063b
AJ
697 full_ref->parent = parent;
698 full_ref->root = ref_root;
699 if (parent)
5d4f98a2 700 ref->type = BTRFS_SHARED_BLOCK_REF_KEY;
eebe063b 701 else
5d4f98a2 702 ref->type = BTRFS_TREE_BLOCK_REF_KEY;
5d4f98a2 703 full_ref->level = level;
56bec294 704
bc074524 705 trace_add_delayed_tree_ref(fs_info, ref, full_ref, action);
1abe9b8a 706
0e0adbcf 707 ret = insert_delayed_ref(trans, delayed_refs, head_ref, ref);
c6fc2454
QW
708
709 /*
710 * XXX: memory should be freed at the same level allocated.
711 * But bad practice is anywhere... Follow it now. Need cleanup.
712 */
713 if (ret > 0)
78a6184a 714 kmem_cache_free(btrfs_delayed_tree_ref_cachep, full_ref);
5d4f98a2
YZ
715}
716
717/*
718 * helper to insert a delayed data ref into the rbtree.
719 */
d7df2c79
JB
720static noinline void
721add_delayed_data_ref(struct btrfs_fs_info *fs_info,
722 struct btrfs_trans_handle *trans,
723 struct btrfs_delayed_ref_head *head_ref,
724 struct btrfs_delayed_ref_node *ref, u64 bytenr,
725 u64 num_bytes, u64 parent, u64 ref_root, u64 owner,
b06c4bf5 726 u64 offset, int action)
5d4f98a2 727{
5d4f98a2
YZ
728 struct btrfs_delayed_data_ref *full_ref;
729 struct btrfs_delayed_ref_root *delayed_refs;
00f04b88 730 u64 seq = 0;
c6fc2454 731 int ret;
5d4f98a2
YZ
732
733 if (action == BTRFS_ADD_DELAYED_EXTENT)
734 action = BTRFS_ADD_DELAYED_REF;
735
736 delayed_refs = &trans->transaction->delayed_refs;
737
fcebe456
JB
738 if (is_fstree(ref_root))
739 seq = atomic64_read(&fs_info->tree_mod_seq);
740
5d4f98a2 741 /* first set the basic ref node struct up */
6df8cdf5 742 refcount_set(&ref->refs, 1);
5d4f98a2
YZ
743 ref->bytenr = bytenr;
744 ref->num_bytes = num_bytes;
745 ref->ref_mod = 1;
746 ref->action = action;
747 ref->is_head = 0;
748 ref->in_tree = 1;
00f04b88 749 ref->seq = seq;
0e0adbcf 750 RB_CLEAR_NODE(&ref->ref_node);
1d57ee94 751 INIT_LIST_HEAD(&ref->add_list);
00f04b88 752
5d4f98a2 753 full_ref = btrfs_delayed_node_to_data_ref(ref);
eebe063b
AJ
754 full_ref->parent = parent;
755 full_ref->root = ref_root;
756 if (parent)
5d4f98a2 757 ref->type = BTRFS_SHARED_DATA_REF_KEY;
eebe063b 758 else
5d4f98a2 759 ref->type = BTRFS_EXTENT_DATA_REF_KEY;
66d7e7f0 760
5d4f98a2
YZ
761 full_ref->objectid = owner;
762 full_ref->offset = offset;
56bec294 763
bc074524 764 trace_add_delayed_data_ref(fs_info, ref, full_ref, action);
1abe9b8a 765
0e0adbcf 766 ret = insert_delayed_ref(trans, delayed_refs, head_ref, ref);
c6fc2454 767 if (ret > 0)
78a6184a 768 kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref);
56bec294
CM
769}
770
771/*
5d4f98a2 772 * add a delayed tree ref. This does all of the accounting required
56bec294
CM
773 * to make sure the delayed ref is eventually processed before this
774 * transaction commits.
775 */
66d7e7f0
AJ
776int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
777 struct btrfs_trans_handle *trans,
5d4f98a2
YZ
778 u64 bytenr, u64 num_bytes, u64 parent,
779 u64 ref_root, int level, int action,
7be07912
OS
780 struct btrfs_delayed_extent_op *extent_op,
781 int *old_ref_mod, int *new_ref_mod)
56bec294 782{
5d4f98a2 783 struct btrfs_delayed_tree_ref *ref;
56bec294
CM
784 struct btrfs_delayed_ref_head *head_ref;
785 struct btrfs_delayed_ref_root *delayed_refs;
3368d001 786 struct btrfs_qgroup_extent_record *record = NULL;
fb235dc0 787 int qrecord_inserted;
56bec294 788
5d4f98a2 789 BUG_ON(extent_op && extent_op->is_data);
78a6184a 790 ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
56bec294
CM
791 if (!ref)
792 return -ENOMEM;
793
78a6184a 794 head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
5a5003df
DC
795 if (!head_ref)
796 goto free_ref;
5d4f98a2 797
afcdd129
JB
798 if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
799 is_fstree(ref_root)) {
3368d001 800 record = kmalloc(sizeof(*record), GFP_NOFS);
5a5003df
DC
801 if (!record)
802 goto free_head_ref;
3368d001
QW
803 }
804
5d4f98a2
YZ
805 head_ref->extent_op = extent_op;
806
807 delayed_refs = &trans->transaction->delayed_refs;
808 spin_lock(&delayed_refs->lock);
809
56bec294 810 /*
5d4f98a2
YZ
811 * insert both the head node and the new ref without dropping
812 * the spin lock
56bec294 813 */
d278850e 814 head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record,
fb235dc0 815 bytenr, num_bytes, 0, 0, action, 0,
7be07912
OS
816 &qrecord_inserted, old_ref_mod,
817 new_ref_mod);
5d4f98a2 818
d7df2c79 819 add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
b06c4bf5 820 num_bytes, parent, ref_root, level, action);
5d4f98a2 821 spin_unlock(&delayed_refs->lock);
95a06077 822
fb235dc0 823 if (qrecord_inserted)
952bd3db
NB
824 btrfs_qgroup_trace_extent_post(fs_info, record);
825
5d4f98a2 826 return 0;
5a5003df
DC
827
828free_head_ref:
829 kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
830free_ref:
831 kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
832
833 return -ENOMEM;
5d4f98a2
YZ
834}
835
836/*
837 * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref.
838 */
66d7e7f0
AJ
839int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
840 struct btrfs_trans_handle *trans,
5d4f98a2
YZ
841 u64 bytenr, u64 num_bytes,
842 u64 parent, u64 ref_root,
7be07912
OS
843 u64 owner, u64 offset, u64 reserved, int action,
844 int *old_ref_mod, int *new_ref_mod)
5d4f98a2
YZ
845{
846 struct btrfs_delayed_data_ref *ref;
847 struct btrfs_delayed_ref_head *head_ref;
848 struct btrfs_delayed_ref_root *delayed_refs;
3368d001 849 struct btrfs_qgroup_extent_record *record = NULL;
fb235dc0 850 int qrecord_inserted;
5d4f98a2 851
78a6184a 852 ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
5d4f98a2
YZ
853 if (!ref)
854 return -ENOMEM;
56bec294 855
78a6184a 856 head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
56bec294 857 if (!head_ref) {
78a6184a 858 kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
56bec294
CM
859 return -ENOMEM;
860 }
5d4f98a2 861
afcdd129
JB
862 if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
863 is_fstree(ref_root)) {
3368d001
QW
864 record = kmalloc(sizeof(*record), GFP_NOFS);
865 if (!record) {
866 kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
867 kmem_cache_free(btrfs_delayed_ref_head_cachep,
868 head_ref);
869 return -ENOMEM;
870 }
871 }
872
fef394f7 873 head_ref->extent_op = NULL;
5d4f98a2 874
56bec294
CM
875 delayed_refs = &trans->transaction->delayed_refs;
876 spin_lock(&delayed_refs->lock);
877
878 /*
879 * insert both the head node and the new ref without dropping
880 * the spin lock
881 */
d278850e 882 head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record,
5846a3c2 883 bytenr, num_bytes, ref_root, reserved,
7be07912
OS
884 action, 1, &qrecord_inserted,
885 old_ref_mod, new_ref_mod);
56bec294 886
d7df2c79 887 add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
66d7e7f0 888 num_bytes, parent, ref_root, owner, offset,
b06c4bf5 889 action);
5d4f98a2 890 spin_unlock(&delayed_refs->lock);
95a06077 891
fb235dc0
QW
892 if (qrecord_inserted)
893 return btrfs_qgroup_trace_extent_post(fs_info, record);
5d4f98a2
YZ
894 return 0;
895}
896
66d7e7f0
AJ
897int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
898 struct btrfs_trans_handle *trans,
5d4f98a2
YZ
899 u64 bytenr, u64 num_bytes,
900 struct btrfs_delayed_extent_op *extent_op)
901{
902 struct btrfs_delayed_ref_head *head_ref;
903 struct btrfs_delayed_ref_root *delayed_refs;
5d4f98a2 904
78a6184a 905 head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
5d4f98a2
YZ
906 if (!head_ref)
907 return -ENOMEM;
908
909 head_ref->extent_op = extent_op;
910
911 delayed_refs = &trans->transaction->delayed_refs;
912 spin_lock(&delayed_refs->lock);
913
d278850e 914 add_delayed_ref_head(fs_info, trans, head_ref, NULL, bytenr,
5846a3c2 915 num_bytes, 0, 0, BTRFS_UPDATE_DELAYED_HEAD,
7be07912 916 extent_op->is_data, NULL, NULL, NULL);
5d4f98a2 917
56bec294
CM
918 spin_unlock(&delayed_refs->lock);
919 return 0;
920}
921
1887be66
CM
922/*
923 * this does a simple search for the head node for a given extent.
924 * It must be called with the delayed ref spinlock held, and it returns
925 * the head node if any where found, or NULL if not.
926 */
927struct btrfs_delayed_ref_head *
f72ad18e 928btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, u64 bytenr)
1887be66 929{
85fdfdf6 930 return find_ref_head(&delayed_refs->href_root, bytenr, 0);
1887be66 931}
78a6184a 932
e67c718b 933void __cold btrfs_delayed_ref_exit(void)
78a6184a 934{
5598e900
KM
935 kmem_cache_destroy(btrfs_delayed_ref_head_cachep);
936 kmem_cache_destroy(btrfs_delayed_tree_ref_cachep);
937 kmem_cache_destroy(btrfs_delayed_data_ref_cachep);
938 kmem_cache_destroy(btrfs_delayed_extent_op_cachep);
78a6184a
MX
939}
940
f5c29bd9 941int __init btrfs_delayed_ref_init(void)
78a6184a
MX
942{
943 btrfs_delayed_ref_head_cachep = kmem_cache_create(
944 "btrfs_delayed_ref_head",
945 sizeof(struct btrfs_delayed_ref_head), 0,
fba4b697 946 SLAB_MEM_SPREAD, NULL);
78a6184a
MX
947 if (!btrfs_delayed_ref_head_cachep)
948 goto fail;
949
950 btrfs_delayed_tree_ref_cachep = kmem_cache_create(
951 "btrfs_delayed_tree_ref",
952 sizeof(struct btrfs_delayed_tree_ref), 0,
fba4b697 953 SLAB_MEM_SPREAD, NULL);
78a6184a
MX
954 if (!btrfs_delayed_tree_ref_cachep)
955 goto fail;
956
957 btrfs_delayed_data_ref_cachep = kmem_cache_create(
958 "btrfs_delayed_data_ref",
959 sizeof(struct btrfs_delayed_data_ref), 0,
fba4b697 960 SLAB_MEM_SPREAD, NULL);
78a6184a
MX
961 if (!btrfs_delayed_data_ref_cachep)
962 goto fail;
963
964 btrfs_delayed_extent_op_cachep = kmem_cache_create(
965 "btrfs_delayed_extent_op",
966 sizeof(struct btrfs_delayed_extent_op), 0,
fba4b697 967 SLAB_MEM_SPREAD, NULL);
78a6184a
MX
968 if (!btrfs_delayed_extent_op_cachep)
969 goto fail;
970
971 return 0;
972fail:
973 btrfs_delayed_ref_exit();
974 return -ENOMEM;
975}