Btrfs: don't access non-existent key when csum tree is empty
[linux-2.6-block.git] / fs / btrfs / transaction.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/fs.h>
20 #include <linux/slab.h>
21 #include <linux/sched.h>
22 #include <linux/writeback.h>
23 #include <linux/pagemap.h>
24 #include <linux/blkdev.h>
25 #include <linux/uuid.h>
26 #include "ctree.h"
27 #include "disk-io.h"
28 #include "transaction.h"
29 #include "locking.h"
30 #include "tree-log.h"
31 #include "inode-map.h"
32 #include "volumes.h"
33 #include "dev-replace.h"
34
35 #define BTRFS_ROOT_TRANS_TAG 0
36
37 static unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = {
38         [TRANS_STATE_RUNNING]           = 0U,
39         [TRANS_STATE_BLOCKED]           = (__TRANS_USERSPACE |
40                                            __TRANS_START),
41         [TRANS_STATE_COMMIT_START]      = (__TRANS_USERSPACE |
42                                            __TRANS_START |
43                                            __TRANS_ATTACH),
44         [TRANS_STATE_COMMIT_DOING]      = (__TRANS_USERSPACE |
45                                            __TRANS_START |
46                                            __TRANS_ATTACH |
47                                            __TRANS_JOIN),
48         [TRANS_STATE_UNBLOCKED]         = (__TRANS_USERSPACE |
49                                            __TRANS_START |
50                                            __TRANS_ATTACH |
51                                            __TRANS_JOIN |
52                                            __TRANS_JOIN_NOLOCK),
53         [TRANS_STATE_COMPLETED]         = (__TRANS_USERSPACE |
54                                            __TRANS_START |
55                                            __TRANS_ATTACH |
56                                            __TRANS_JOIN |
57                                            __TRANS_JOIN_NOLOCK),
58 };
59
60 void btrfs_put_transaction(struct btrfs_transaction *transaction)
61 {
62         WARN_ON(atomic_read(&transaction->use_count) == 0);
63         if (atomic_dec_and_test(&transaction->use_count)) {
64                 BUG_ON(!list_empty(&transaction->list));
65                 WARN_ON(!RB_EMPTY_ROOT(&transaction->delayed_refs.href_root));
66                 while (!list_empty(&transaction->pending_chunks)) {
67                         struct extent_map *em;
68
69                         em = list_first_entry(&transaction->pending_chunks,
70                                               struct extent_map, list);
71                         list_del_init(&em->list);
72                         free_extent_map(em);
73                 }
74                 kmem_cache_free(btrfs_transaction_cachep, transaction);
75         }
76 }
77
78 static noinline void switch_commit_roots(struct btrfs_transaction *trans,
79                                          struct btrfs_fs_info *fs_info)
80 {
81         struct btrfs_root *root, *tmp;
82
83         down_write(&fs_info->commit_root_sem);
84         list_for_each_entry_safe(root, tmp, &trans->switch_commits,
85                                  dirty_list) {
86                 list_del_init(&root->dirty_list);
87                 free_extent_buffer(root->commit_root);
88                 root->commit_root = btrfs_root_node(root);
89                 if (is_fstree(root->objectid))
90                         btrfs_unpin_free_ino(root);
91         }
92         up_write(&fs_info->commit_root_sem);
93 }
94
95 static inline void extwriter_counter_inc(struct btrfs_transaction *trans,
96                                          unsigned int type)
97 {
98         if (type & TRANS_EXTWRITERS)
99                 atomic_inc(&trans->num_extwriters);
100 }
101
102 static inline void extwriter_counter_dec(struct btrfs_transaction *trans,
103                                          unsigned int type)
104 {
105         if (type & TRANS_EXTWRITERS)
106                 atomic_dec(&trans->num_extwriters);
107 }
108
109 static inline void extwriter_counter_init(struct btrfs_transaction *trans,
110                                           unsigned int type)
111 {
112         atomic_set(&trans->num_extwriters, ((type & TRANS_EXTWRITERS) ? 1 : 0));
113 }
114
115 static inline int extwriter_counter_read(struct btrfs_transaction *trans)
116 {
117         return atomic_read(&trans->num_extwriters);
118 }
119
120 /*
121  * either allocate a new transaction or hop into the existing one
122  */
123 static noinline int join_transaction(struct btrfs_root *root, unsigned int type)
124 {
125         struct btrfs_transaction *cur_trans;
126         struct btrfs_fs_info *fs_info = root->fs_info;
127
128         spin_lock(&fs_info->trans_lock);
129 loop:
130         /* The file system has been taken offline. No new transactions. */
131         if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
132                 spin_unlock(&fs_info->trans_lock);
133                 return -EROFS;
134         }
135
136         cur_trans = fs_info->running_transaction;
137         if (cur_trans) {
138                 if (cur_trans->aborted) {
139                         spin_unlock(&fs_info->trans_lock);
140                         return cur_trans->aborted;
141                 }
142                 if (btrfs_blocked_trans_types[cur_trans->state] & type) {
143                         spin_unlock(&fs_info->trans_lock);
144                         return -EBUSY;
145                 }
146                 atomic_inc(&cur_trans->use_count);
147                 atomic_inc(&cur_trans->num_writers);
148                 extwriter_counter_inc(cur_trans, type);
149                 spin_unlock(&fs_info->trans_lock);
150                 return 0;
151         }
152         spin_unlock(&fs_info->trans_lock);
153
154         /*
155          * If we are ATTACH, we just want to catch the current transaction,
156          * and commit it. If there is no transaction, just return ENOENT.
157          */
158         if (type == TRANS_ATTACH)
159                 return -ENOENT;
160
161         /*
162          * JOIN_NOLOCK only happens during the transaction commit, so
163          * it is impossible that ->running_transaction is NULL
164          */
165         BUG_ON(type == TRANS_JOIN_NOLOCK);
166
167         cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS);
168         if (!cur_trans)
169                 return -ENOMEM;
170
171         spin_lock(&fs_info->trans_lock);
172         if (fs_info->running_transaction) {
173                 /*
174                  * someone started a transaction after we unlocked.  Make sure
175                  * to redo the checks above
176                  */
177                 kmem_cache_free(btrfs_transaction_cachep, cur_trans);
178                 goto loop;
179         } else if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
180                 spin_unlock(&fs_info->trans_lock);
181                 kmem_cache_free(btrfs_transaction_cachep, cur_trans);
182                 return -EROFS;
183         }
184
185         atomic_set(&cur_trans->num_writers, 1);
186         extwriter_counter_init(cur_trans, type);
187         init_waitqueue_head(&cur_trans->writer_wait);
188         init_waitqueue_head(&cur_trans->commit_wait);
189         cur_trans->state = TRANS_STATE_RUNNING;
190         /*
191          * One for this trans handle, one so it will live on until we
192          * commit the transaction.
193          */
194         atomic_set(&cur_trans->use_count, 2);
195         cur_trans->start_time = get_seconds();
196
197         cur_trans->delayed_refs.href_root = RB_ROOT;
198         atomic_set(&cur_trans->delayed_refs.num_entries, 0);
199         cur_trans->delayed_refs.num_heads_ready = 0;
200         cur_trans->delayed_refs.num_heads = 0;
201         cur_trans->delayed_refs.flushing = 0;
202         cur_trans->delayed_refs.run_delayed_start = 0;
203
204         /*
205          * although the tree mod log is per file system and not per transaction,
206          * the log must never go across transaction boundaries.
207          */
208         smp_mb();
209         if (!list_empty(&fs_info->tree_mod_seq_list))
210                 WARN(1, KERN_ERR "BTRFS: tree_mod_seq_list not empty when "
211                         "creating a fresh transaction\n");
212         if (!RB_EMPTY_ROOT(&fs_info->tree_mod_log))
213                 WARN(1, KERN_ERR "BTRFS: tree_mod_log rb tree not empty when "
214                         "creating a fresh transaction\n");
215         atomic64_set(&fs_info->tree_mod_seq, 0);
216
217         spin_lock_init(&cur_trans->delayed_refs.lock);
218
219         INIT_LIST_HEAD(&cur_trans->pending_snapshots);
220         INIT_LIST_HEAD(&cur_trans->ordered_operations);
221         INIT_LIST_HEAD(&cur_trans->pending_chunks);
222         INIT_LIST_HEAD(&cur_trans->switch_commits);
223         list_add_tail(&cur_trans->list, &fs_info->trans_list);
224         extent_io_tree_init(&cur_trans->dirty_pages,
225                              fs_info->btree_inode->i_mapping);
226         fs_info->generation++;
227         cur_trans->transid = fs_info->generation;
228         fs_info->running_transaction = cur_trans;
229         cur_trans->aborted = 0;
230         spin_unlock(&fs_info->trans_lock);
231
232         return 0;
233 }
234
235 /*
236  * this does all the record keeping required to make sure that a reference
237  * counted root is properly recorded in a given transaction.  This is required
238  * to make sure the old root from before we joined the transaction is deleted
239  * when the transaction commits
240  */
241 static int record_root_in_trans(struct btrfs_trans_handle *trans,
242                                struct btrfs_root *root)
243 {
244         if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
245             root->last_trans < trans->transid) {
246                 WARN_ON(root == root->fs_info->extent_root);
247                 WARN_ON(root->commit_root != root->node);
248
249                 /*
250                  * see below for IN_TRANS_SETUP usage rules
251                  * we have the reloc mutex held now, so there
252                  * is only one writer in this function
253                  */
254                 set_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state);
255
256                 /* make sure readers find IN_TRANS_SETUP before
257                  * they find our root->last_trans update
258                  */
259                 smp_wmb();
260
261                 spin_lock(&root->fs_info->fs_roots_radix_lock);
262                 if (root->last_trans == trans->transid) {
263                         spin_unlock(&root->fs_info->fs_roots_radix_lock);
264                         return 0;
265                 }
266                 radix_tree_tag_set(&root->fs_info->fs_roots_radix,
267                            (unsigned long)root->root_key.objectid,
268                            BTRFS_ROOT_TRANS_TAG);
269                 spin_unlock(&root->fs_info->fs_roots_radix_lock);
270                 root->last_trans = trans->transid;
271
272                 /* this is pretty tricky.  We don't want to
273                  * take the relocation lock in btrfs_record_root_in_trans
274                  * unless we're really doing the first setup for this root in
275                  * this transaction.
276                  *
277                  * Normally we'd use root->last_trans as a flag to decide
278                  * if we want to take the expensive mutex.
279                  *
280                  * But, we have to set root->last_trans before we
281                  * init the relocation root, otherwise, we trip over warnings
282                  * in ctree.c.  The solution used here is to flag ourselves
283                  * with root IN_TRANS_SETUP.  When this is 1, we're still
284                  * fixing up the reloc trees and everyone must wait.
285                  *
286                  * When this is zero, they can trust root->last_trans and fly
287                  * through btrfs_record_root_in_trans without having to take the
288                  * lock.  smp_wmb() makes sure that all the writes above are
289                  * done before we pop in the zero below
290                  */
291                 btrfs_init_reloc_root(trans, root);
292                 smp_mb__before_clear_bit();
293                 clear_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state);
294         }
295         return 0;
296 }
297
298
299 int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
300                                struct btrfs_root *root)
301 {
302         if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
303                 return 0;
304
305         /*
306          * see record_root_in_trans for comments about IN_TRANS_SETUP usage
307          * and barriers
308          */
309         smp_rmb();
310         if (root->last_trans == trans->transid &&
311             !test_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state))
312                 return 0;
313
314         mutex_lock(&root->fs_info->reloc_mutex);
315         record_root_in_trans(trans, root);
316         mutex_unlock(&root->fs_info->reloc_mutex);
317
318         return 0;
319 }
320
321 static inline int is_transaction_blocked(struct btrfs_transaction *trans)
322 {
323         return (trans->state >= TRANS_STATE_BLOCKED &&
324                 trans->state < TRANS_STATE_UNBLOCKED &&
325                 !trans->aborted);
326 }
327
328 /* wait for commit against the current transaction to become unblocked
329  * when this is done, it is safe to start a new transaction, but the current
330  * transaction might not be fully on disk.
331  */
332 static void wait_current_trans(struct btrfs_root *root)
333 {
334         struct btrfs_transaction *cur_trans;
335
336         spin_lock(&root->fs_info->trans_lock);
337         cur_trans = root->fs_info->running_transaction;
338         if (cur_trans && is_transaction_blocked(cur_trans)) {
339                 atomic_inc(&cur_trans->use_count);
340                 spin_unlock(&root->fs_info->trans_lock);
341
342                 wait_event(root->fs_info->transaction_wait,
343                            cur_trans->state >= TRANS_STATE_UNBLOCKED ||
344                            cur_trans->aborted);
345                 btrfs_put_transaction(cur_trans);
346         } else {
347                 spin_unlock(&root->fs_info->trans_lock);
348         }
349 }
350
351 static int may_wait_transaction(struct btrfs_root *root, int type)
352 {
353         if (root->fs_info->log_root_recovering)
354                 return 0;
355
356         if (type == TRANS_USERSPACE)
357                 return 1;
358
359         if (type == TRANS_START &&
360             !atomic_read(&root->fs_info->open_ioctl_trans))
361                 return 1;
362
363         return 0;
364 }
365
366 static inline bool need_reserve_reloc_root(struct btrfs_root *root)
367 {
368         if (!root->fs_info->reloc_ctl ||
369             !test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
370             root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
371             root->reloc_root)
372                 return false;
373
374         return true;
375 }
376
377 static struct btrfs_trans_handle *
378 start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type,
379                   enum btrfs_reserve_flush_enum flush)
380 {
381         struct btrfs_trans_handle *h;
382         struct btrfs_transaction *cur_trans;
383         u64 num_bytes = 0;
384         u64 qgroup_reserved = 0;
385         bool reloc_reserved = false;
386         int ret;
387
388         if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
389                 return ERR_PTR(-EROFS);
390
391         if (current->journal_info &&
392             current->journal_info != (void *)BTRFS_SEND_TRANS_STUB) {
393                 WARN_ON(type & TRANS_EXTWRITERS);
394                 h = current->journal_info;
395                 h->use_count++;
396                 WARN_ON(h->use_count > 2);
397                 h->orig_rsv = h->block_rsv;
398                 h->block_rsv = NULL;
399                 goto got_it;
400         }
401
402         /*
403          * Do the reservation before we join the transaction so we can do all
404          * the appropriate flushing if need be.
405          */
406         if (num_items > 0 && root != root->fs_info->chunk_root) {
407                 if (root->fs_info->quota_enabled &&
408                     is_fstree(root->root_key.objectid)) {
409                         qgroup_reserved = num_items * root->leafsize;
410                         ret = btrfs_qgroup_reserve(root, qgroup_reserved);
411                         if (ret)
412                                 return ERR_PTR(ret);
413                 }
414
415                 num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
416                 /*
417                  * Do the reservation for the relocation root creation
418                  */
419                 if (unlikely(need_reserve_reloc_root(root))) {
420                         num_bytes += root->nodesize;
421                         reloc_reserved = true;
422                 }
423
424                 ret = btrfs_block_rsv_add(root,
425                                           &root->fs_info->trans_block_rsv,
426                                           num_bytes, flush);
427                 if (ret)
428                         goto reserve_fail;
429         }
430 again:
431         h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
432         if (!h) {
433                 ret = -ENOMEM;
434                 goto alloc_fail;
435         }
436
437         /*
438          * If we are JOIN_NOLOCK we're already committing a transaction and
439          * waiting on this guy, so we don't need to do the sb_start_intwrite
440          * because we're already holding a ref.  We need this because we could
441          * have raced in and did an fsync() on a file which can kick a commit
442          * and then we deadlock with somebody doing a freeze.
443          *
444          * If we are ATTACH, it means we just want to catch the current
445          * transaction and commit it, so we needn't do sb_start_intwrite(). 
446          */
447         if (type & __TRANS_FREEZABLE)
448                 sb_start_intwrite(root->fs_info->sb);
449
450         if (may_wait_transaction(root, type))
451                 wait_current_trans(root);
452
453         do {
454                 ret = join_transaction(root, type);
455                 if (ret == -EBUSY) {
456                         wait_current_trans(root);
457                         if (unlikely(type == TRANS_ATTACH))
458                                 ret = -ENOENT;
459                 }
460         } while (ret == -EBUSY);
461
462         if (ret < 0) {
463                 /* We must get the transaction if we are JOIN_NOLOCK. */
464                 BUG_ON(type == TRANS_JOIN_NOLOCK);
465                 goto join_fail;
466         }
467
468         cur_trans = root->fs_info->running_transaction;
469
470         h->transid = cur_trans->transid;
471         h->transaction = cur_trans;
472         h->blocks_used = 0;
473         h->bytes_reserved = 0;
474         h->root = root;
475         h->delayed_ref_updates = 0;
476         h->use_count = 1;
477         h->adding_csums = 0;
478         h->block_rsv = NULL;
479         h->orig_rsv = NULL;
480         h->aborted = 0;
481         h->qgroup_reserved = 0;
482         h->delayed_ref_elem.seq = 0;
483         h->type = type;
484         h->allocating_chunk = false;
485         h->reloc_reserved = false;
486         h->sync = false;
487         INIT_LIST_HEAD(&h->qgroup_ref_list);
488         INIT_LIST_HEAD(&h->new_bgs);
489
490         smp_mb();
491         if (cur_trans->state >= TRANS_STATE_BLOCKED &&
492             may_wait_transaction(root, type)) {
493                 btrfs_commit_transaction(h, root);
494                 goto again;
495         }
496
497         if (num_bytes) {
498                 trace_btrfs_space_reservation(root->fs_info, "transaction",
499                                               h->transid, num_bytes, 1);
500                 h->block_rsv = &root->fs_info->trans_block_rsv;
501                 h->bytes_reserved = num_bytes;
502                 h->reloc_reserved = reloc_reserved;
503         }
504         h->qgroup_reserved = qgroup_reserved;
505
506 got_it:
507         btrfs_record_root_in_trans(h, root);
508
509         if (!current->journal_info && type != TRANS_USERSPACE)
510                 current->journal_info = h;
511         return h;
512
513 join_fail:
514         if (type & __TRANS_FREEZABLE)
515                 sb_end_intwrite(root->fs_info->sb);
516         kmem_cache_free(btrfs_trans_handle_cachep, h);
517 alloc_fail:
518         if (num_bytes)
519                 btrfs_block_rsv_release(root, &root->fs_info->trans_block_rsv,
520                                         num_bytes);
521 reserve_fail:
522         if (qgroup_reserved)
523                 btrfs_qgroup_free(root, qgroup_reserved);
524         return ERR_PTR(ret);
525 }
526
527 struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
528                                                    int num_items)
529 {
530         return start_transaction(root, num_items, TRANS_START,
531                                  BTRFS_RESERVE_FLUSH_ALL);
532 }
533
534 struct btrfs_trans_handle *btrfs_start_transaction_lflush(
535                                         struct btrfs_root *root, int num_items)
536 {
537         return start_transaction(root, num_items, TRANS_START,
538                                  BTRFS_RESERVE_FLUSH_LIMIT);
539 }
540
541 struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root)
542 {
543         return start_transaction(root, 0, TRANS_JOIN, 0);
544 }
545
546 struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root)
547 {
548         return start_transaction(root, 0, TRANS_JOIN_NOLOCK, 0);
549 }
550
551 struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root)
552 {
553         return start_transaction(root, 0, TRANS_USERSPACE, 0);
554 }
555
556 /*
557  * btrfs_attach_transaction() - catch the running transaction
558  *
559  * It is used when we want to commit the current the transaction, but
560  * don't want to start a new one.
561  *
562  * Note: If this function return -ENOENT, it just means there is no
563  * running transaction. But it is possible that the inactive transaction
564  * is still in the memory, not fully on disk. If you hope there is no
565  * inactive transaction in the fs when -ENOENT is returned, you should
566  * invoke
567  *     btrfs_attach_transaction_barrier()
568  */
569 struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root)
570 {
571         return start_transaction(root, 0, TRANS_ATTACH, 0);
572 }
573
574 /*
575  * btrfs_attach_transaction_barrier() - catch the running transaction
576  *
577  * It is similar to the above function, the differentia is this one
578  * will wait for all the inactive transactions until they fully
579  * complete.
580  */
581 struct btrfs_trans_handle *
582 btrfs_attach_transaction_barrier(struct btrfs_root *root)
583 {
584         struct btrfs_trans_handle *trans;
585
586         trans = start_transaction(root, 0, TRANS_ATTACH, 0);
587         if (IS_ERR(trans) && PTR_ERR(trans) == -ENOENT)
588                 btrfs_wait_for_commit(root, 0);
589
590         return trans;
591 }
592
593 /* wait for a transaction commit to be fully complete */
594 static noinline void wait_for_commit(struct btrfs_root *root,
595                                     struct btrfs_transaction *commit)
596 {
597         wait_event(commit->commit_wait, commit->state == TRANS_STATE_COMPLETED);
598 }
599
600 int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
601 {
602         struct btrfs_transaction *cur_trans = NULL, *t;
603         int ret = 0;
604
605         if (transid) {
606                 if (transid <= root->fs_info->last_trans_committed)
607                         goto out;
608
609                 ret = -EINVAL;
610                 /* find specified transaction */
611                 spin_lock(&root->fs_info->trans_lock);
612                 list_for_each_entry(t, &root->fs_info->trans_list, list) {
613                         if (t->transid == transid) {
614                                 cur_trans = t;
615                                 atomic_inc(&cur_trans->use_count);
616                                 ret = 0;
617                                 break;
618                         }
619                         if (t->transid > transid) {
620                                 ret = 0;
621                                 break;
622                         }
623                 }
624                 spin_unlock(&root->fs_info->trans_lock);
625                 /* The specified transaction doesn't exist */
626                 if (!cur_trans)
627                         goto out;
628         } else {
629                 /* find newest transaction that is committing | committed */
630                 spin_lock(&root->fs_info->trans_lock);
631                 list_for_each_entry_reverse(t, &root->fs_info->trans_list,
632                                             list) {
633                         if (t->state >= TRANS_STATE_COMMIT_START) {
634                                 if (t->state == TRANS_STATE_COMPLETED)
635                                         break;
636                                 cur_trans = t;
637                                 atomic_inc(&cur_trans->use_count);
638                                 break;
639                         }
640                 }
641                 spin_unlock(&root->fs_info->trans_lock);
642                 if (!cur_trans)
643                         goto out;  /* nothing committing|committed */
644         }
645
646         wait_for_commit(root, cur_trans);
647         btrfs_put_transaction(cur_trans);
648 out:
649         return ret;
650 }
651
652 void btrfs_throttle(struct btrfs_root *root)
653 {
654         if (!atomic_read(&root->fs_info->open_ioctl_trans))
655                 wait_current_trans(root);
656 }
657
658 static int should_end_transaction(struct btrfs_trans_handle *trans,
659                                   struct btrfs_root *root)
660 {
661         if (root->fs_info->global_block_rsv.space_info->full &&
662             btrfs_check_space_for_delayed_refs(trans, root))
663                 return 1;
664
665         return !!btrfs_block_rsv_check(root, &root->fs_info->global_block_rsv, 5);
666 }
667
668 int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
669                                  struct btrfs_root *root)
670 {
671         struct btrfs_transaction *cur_trans = trans->transaction;
672         int updates;
673         int err;
674
675         smp_mb();
676         if (cur_trans->state >= TRANS_STATE_BLOCKED ||
677             cur_trans->delayed_refs.flushing)
678                 return 1;
679
680         updates = trans->delayed_ref_updates;
681         trans->delayed_ref_updates = 0;
682         if (updates) {
683                 err = btrfs_run_delayed_refs(trans, root, updates);
684                 if (err) /* Error code will also eval true */
685                         return err;
686         }
687
688         return should_end_transaction(trans, root);
689 }
690
691 static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
692                           struct btrfs_root *root, int throttle)
693 {
694         struct btrfs_transaction *cur_trans = trans->transaction;
695         struct btrfs_fs_info *info = root->fs_info;
696         unsigned long cur = trans->delayed_ref_updates;
697         int lock = (trans->type != TRANS_JOIN_NOLOCK);
698         int err = 0;
699
700         if (trans->use_count > 1) {
701                 trans->use_count--;
702                 trans->block_rsv = trans->orig_rsv;
703                 return 0;
704         }
705
706         /*
707          * do the qgroup accounting as early as possible
708          */
709         err = btrfs_delayed_refs_qgroup_accounting(trans, info);
710
711         btrfs_trans_release_metadata(trans, root);
712         trans->block_rsv = NULL;
713
714         if (trans->qgroup_reserved) {
715                 /*
716                  * the same root has to be passed here between start_transaction
717                  * and end_transaction. Subvolume quota depends on this.
718                  */
719                 btrfs_qgroup_free(trans->root, trans->qgroup_reserved);
720                 trans->qgroup_reserved = 0;
721         }
722
723         if (!list_empty(&trans->new_bgs))
724                 btrfs_create_pending_block_groups(trans, root);
725
726         trans->delayed_ref_updates = 0;
727         if (!trans->sync && btrfs_should_throttle_delayed_refs(trans, root)) {
728                 cur = max_t(unsigned long, cur, 32);
729                 trans->delayed_ref_updates = 0;
730                 btrfs_run_delayed_refs(trans, root, cur);
731         }
732
733         btrfs_trans_release_metadata(trans, root);
734         trans->block_rsv = NULL;
735
736         if (!list_empty(&trans->new_bgs))
737                 btrfs_create_pending_block_groups(trans, root);
738
739         if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) &&
740             should_end_transaction(trans, root) &&
741             ACCESS_ONCE(cur_trans->state) == TRANS_STATE_RUNNING) {
742                 spin_lock(&info->trans_lock);
743                 if (cur_trans->state == TRANS_STATE_RUNNING)
744                         cur_trans->state = TRANS_STATE_BLOCKED;
745                 spin_unlock(&info->trans_lock);
746         }
747
748         if (lock && ACCESS_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) {
749                 if (throttle)
750                         return btrfs_commit_transaction(trans, root);
751                 else
752                         wake_up_process(info->transaction_kthread);
753         }
754
755         if (trans->type & __TRANS_FREEZABLE)
756                 sb_end_intwrite(root->fs_info->sb);
757
758         WARN_ON(cur_trans != info->running_transaction);
759         WARN_ON(atomic_read(&cur_trans->num_writers) < 1);
760         atomic_dec(&cur_trans->num_writers);
761         extwriter_counter_dec(cur_trans, trans->type);
762
763         smp_mb();
764         if (waitqueue_active(&cur_trans->writer_wait))
765                 wake_up(&cur_trans->writer_wait);
766         btrfs_put_transaction(cur_trans);
767
768         if (current->journal_info == trans)
769                 current->journal_info = NULL;
770
771         if (throttle)
772                 btrfs_run_delayed_iputs(root);
773
774         if (trans->aborted ||
775             test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) {
776                 wake_up_process(info->transaction_kthread);
777                 err = -EIO;
778         }
779         assert_qgroups_uptodate(trans);
780
781         kmem_cache_free(btrfs_trans_handle_cachep, trans);
782         return err;
783 }
784
785 int btrfs_end_transaction(struct btrfs_trans_handle *trans,
786                           struct btrfs_root *root)
787 {
788         return __btrfs_end_transaction(trans, root, 0);
789 }
790
791 int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
792                                    struct btrfs_root *root)
793 {
794         return __btrfs_end_transaction(trans, root, 1);
795 }
796
797 /*
798  * when btree blocks are allocated, they have some corresponding bits set for
799  * them in one of two extent_io trees.  This is used to make sure all of
800  * those extents are sent to disk but does not wait on them
801  */
802 int btrfs_write_marked_extents(struct btrfs_root *root,
803                                struct extent_io_tree *dirty_pages, int mark)
804 {
805         int err = 0;
806         int werr = 0;
807         struct address_space *mapping = root->fs_info->btree_inode->i_mapping;
808         struct extent_state *cached_state = NULL;
809         u64 start = 0;
810         u64 end;
811
812         while (!find_first_extent_bit(dirty_pages, start, &start, &end,
813                                       mark, &cached_state)) {
814                 convert_extent_bit(dirty_pages, start, end, EXTENT_NEED_WAIT,
815                                    mark, &cached_state, GFP_NOFS);
816                 cached_state = NULL;
817                 err = filemap_fdatawrite_range(mapping, start, end);
818                 if (err)
819                         werr = err;
820                 cond_resched();
821                 start = end + 1;
822         }
823         if (err)
824                 werr = err;
825         return werr;
826 }
827
828 /*
829  * when btree blocks are allocated, they have some corresponding bits set for
830  * them in one of two extent_io trees.  This is used to make sure all of
831  * those extents are on disk for transaction or log commit.  We wait
832  * on all the pages and clear them from the dirty pages state tree
833  */
834 int btrfs_wait_marked_extents(struct btrfs_root *root,
835                               struct extent_io_tree *dirty_pages, int mark)
836 {
837         int err = 0;
838         int werr = 0;
839         struct address_space *mapping = root->fs_info->btree_inode->i_mapping;
840         struct extent_state *cached_state = NULL;
841         u64 start = 0;
842         u64 end;
843
844         while (!find_first_extent_bit(dirty_pages, start, &start, &end,
845                                       EXTENT_NEED_WAIT, &cached_state)) {
846                 clear_extent_bit(dirty_pages, start, end, EXTENT_NEED_WAIT,
847                                  0, 0, &cached_state, GFP_NOFS);
848                 err = filemap_fdatawait_range(mapping, start, end);
849                 if (err)
850                         werr = err;
851                 cond_resched();
852                 start = end + 1;
853         }
854         if (err)
855                 werr = err;
856         return werr;
857 }
858
859 /*
860  * when btree blocks are allocated, they have some corresponding bits set for
861  * them in one of two extent_io trees.  This is used to make sure all of
862  * those extents are on disk for transaction or log commit
863  */
864 static int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
865                                 struct extent_io_tree *dirty_pages, int mark)
866 {
867         int ret;
868         int ret2;
869         struct blk_plug plug;
870
871         blk_start_plug(&plug);
872         ret = btrfs_write_marked_extents(root, dirty_pages, mark);
873         blk_finish_plug(&plug);
874         ret2 = btrfs_wait_marked_extents(root, dirty_pages, mark);
875
876         if (ret)
877                 return ret;
878         if (ret2)
879                 return ret2;
880         return 0;
881 }
882
883 int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
884                                      struct btrfs_root *root)
885 {
886         if (!trans || !trans->transaction) {
887                 struct inode *btree_inode;
888                 btree_inode = root->fs_info->btree_inode;
889                 return filemap_write_and_wait(btree_inode->i_mapping);
890         }
891         return btrfs_write_and_wait_marked_extents(root,
892                                            &trans->transaction->dirty_pages,
893                                            EXTENT_DIRTY);
894 }
895
896 /*
897  * this is used to update the root pointer in the tree of tree roots.
898  *
899  * But, in the case of the extent allocation tree, updating the root
900  * pointer may allocate blocks which may change the root of the extent
901  * allocation tree.
902  *
903  * So, this loops and repeats and makes sure the cowonly root didn't
904  * change while the root pointer was being updated in the metadata.
905  */
906 static int update_cowonly_root(struct btrfs_trans_handle *trans,
907                                struct btrfs_root *root)
908 {
909         int ret;
910         u64 old_root_bytenr;
911         u64 old_root_used;
912         struct btrfs_root *tree_root = root->fs_info->tree_root;
913
914         old_root_used = btrfs_root_used(&root->root_item);
915         btrfs_write_dirty_block_groups(trans, root);
916
917         while (1) {
918                 old_root_bytenr = btrfs_root_bytenr(&root->root_item);
919                 if (old_root_bytenr == root->node->start &&
920                     old_root_used == btrfs_root_used(&root->root_item))
921                         break;
922
923                 btrfs_set_root_node(&root->root_item, root->node);
924                 ret = btrfs_update_root(trans, tree_root,
925                                         &root->root_key,
926                                         &root->root_item);
927                 if (ret)
928                         return ret;
929
930                 old_root_used = btrfs_root_used(&root->root_item);
931                 ret = btrfs_write_dirty_block_groups(trans, root);
932                 if (ret)
933                         return ret;
934         }
935
936         return 0;
937 }
938
939 /*
940  * update all the cowonly tree roots on disk
941  *
942  * The error handling in this function may not be obvious. Any of the
943  * failures will cause the file system to go offline. We still need
944  * to clean up the delayed refs.
945  */
946 static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
947                                          struct btrfs_root *root)
948 {
949         struct btrfs_fs_info *fs_info = root->fs_info;
950         struct list_head *next;
951         struct extent_buffer *eb;
952         int ret;
953
954         ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
955         if (ret)
956                 return ret;
957
958         eb = btrfs_lock_root_node(fs_info->tree_root);
959         ret = btrfs_cow_block(trans, fs_info->tree_root, eb, NULL,
960                               0, &eb);
961         btrfs_tree_unlock(eb);
962         free_extent_buffer(eb);
963
964         if (ret)
965                 return ret;
966
967         ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
968         if (ret)
969                 return ret;
970
971         ret = btrfs_run_dev_stats(trans, root->fs_info);
972         if (ret)
973                 return ret;
974         ret = btrfs_run_dev_replace(trans, root->fs_info);
975         if (ret)
976                 return ret;
977         ret = btrfs_run_qgroups(trans, root->fs_info);
978         if (ret)
979                 return ret;
980
981         /* run_qgroups might have added some more refs */
982         ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
983         if (ret)
984                 return ret;
985
986         while (!list_empty(&fs_info->dirty_cowonly_roots)) {
987                 next = fs_info->dirty_cowonly_roots.next;
988                 list_del_init(next);
989                 root = list_entry(next, struct btrfs_root, dirty_list);
990
991                 if (root != fs_info->extent_root)
992                         list_add_tail(&root->dirty_list,
993                                       &trans->transaction->switch_commits);
994                 ret = update_cowonly_root(trans, root);
995                 if (ret)
996                         return ret;
997         }
998
999         list_add_tail(&fs_info->extent_root->dirty_list,
1000                       &trans->transaction->switch_commits);
1001         btrfs_after_dev_replace_commit(fs_info);
1002
1003         return 0;
1004 }
1005
1006 /*
1007  * dead roots are old snapshots that need to be deleted.  This allocates
1008  * a dirty root struct and adds it into the list of dead roots that need to
1009  * be deleted
1010  */
1011 void btrfs_add_dead_root(struct btrfs_root *root)
1012 {
1013         spin_lock(&root->fs_info->trans_lock);
1014         if (list_empty(&root->root_list))
1015                 list_add_tail(&root->root_list, &root->fs_info->dead_roots);
1016         spin_unlock(&root->fs_info->trans_lock);
1017 }
1018
1019 /*
1020  * update all the cowonly tree roots on disk
1021  */
1022 static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
1023                                     struct btrfs_root *root)
1024 {
1025         struct btrfs_root *gang[8];
1026         struct btrfs_fs_info *fs_info = root->fs_info;
1027         int i;
1028         int ret;
1029         int err = 0;
1030
1031         spin_lock(&fs_info->fs_roots_radix_lock);
1032         while (1) {
1033                 ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix,
1034                                                  (void **)gang, 0,
1035                                                  ARRAY_SIZE(gang),
1036                                                  BTRFS_ROOT_TRANS_TAG);
1037                 if (ret == 0)
1038                         break;
1039                 for (i = 0; i < ret; i++) {
1040                         root = gang[i];
1041                         radix_tree_tag_clear(&fs_info->fs_roots_radix,
1042                                         (unsigned long)root->root_key.objectid,
1043                                         BTRFS_ROOT_TRANS_TAG);
1044                         spin_unlock(&fs_info->fs_roots_radix_lock);
1045
1046                         btrfs_free_log(trans, root);
1047                         btrfs_update_reloc_root(trans, root);
1048                         btrfs_orphan_commit_root(trans, root);
1049
1050                         btrfs_save_ino_cache(root, trans);
1051
1052                         /* see comments in should_cow_block() */
1053                         clear_bit(BTRFS_ROOT_FORCE_COW, &root->state);
1054                         smp_mb__after_clear_bit();
1055
1056                         if (root->commit_root != root->node) {
1057                                 list_add_tail(&root->dirty_list,
1058                                         &trans->transaction->switch_commits);
1059                                 btrfs_set_root_node(&root->root_item,
1060                                                     root->node);
1061                         }
1062
1063                         err = btrfs_update_root(trans, fs_info->tree_root,
1064                                                 &root->root_key,
1065                                                 &root->root_item);
1066                         spin_lock(&fs_info->fs_roots_radix_lock);
1067                         if (err)
1068                                 break;
1069                 }
1070         }
1071         spin_unlock(&fs_info->fs_roots_radix_lock);
1072         return err;
1073 }
1074
1075 /*
1076  * defrag a given btree.
1077  * Every leaf in the btree is read and defragged.
1078  */
1079 int btrfs_defrag_root(struct btrfs_root *root)
1080 {
1081         struct btrfs_fs_info *info = root->fs_info;
1082         struct btrfs_trans_handle *trans;
1083         int ret;
1084
1085         if (test_and_set_bit(BTRFS_ROOT_DEFRAG_RUNNING, &root->state))
1086                 return 0;
1087
1088         while (1) {
1089                 trans = btrfs_start_transaction(root, 0);
1090                 if (IS_ERR(trans))
1091                         return PTR_ERR(trans);
1092
1093                 ret = btrfs_defrag_leaves(trans, root);
1094
1095                 btrfs_end_transaction(trans, root);
1096                 btrfs_btree_balance_dirty(info->tree_root);
1097                 cond_resched();
1098
1099                 if (btrfs_fs_closing(root->fs_info) || ret != -EAGAIN)
1100                         break;
1101
1102                 if (btrfs_defrag_cancelled(root->fs_info)) {
1103                         pr_debug("BTRFS: defrag_root cancelled\n");
1104                         ret = -EAGAIN;
1105                         break;
1106                 }
1107         }
1108         clear_bit(BTRFS_ROOT_DEFRAG_RUNNING, &root->state);
1109         return ret;
1110 }
1111
1112 /*
1113  * new snapshots need to be created at a very specific time in the
1114  * transaction commit.  This does the actual creation.
1115  *
1116  * Note:
1117  * If the error which may affect the commitment of the current transaction
1118  * happens, we should return the error number. If the error which just affect
1119  * the creation of the pending snapshots, just return 0.
1120  */
1121 static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
1122                                    struct btrfs_fs_info *fs_info,
1123                                    struct btrfs_pending_snapshot *pending)
1124 {
1125         struct btrfs_key key;
1126         struct btrfs_root_item *new_root_item;
1127         struct btrfs_root *tree_root = fs_info->tree_root;
1128         struct btrfs_root *root = pending->root;
1129         struct btrfs_root *parent_root;
1130         struct btrfs_block_rsv *rsv;
1131         struct inode *parent_inode;
1132         struct btrfs_path *path;
1133         struct btrfs_dir_item *dir_item;
1134         struct dentry *dentry;
1135         struct extent_buffer *tmp;
1136         struct extent_buffer *old;
1137         struct timespec cur_time = CURRENT_TIME;
1138         int ret = 0;
1139         u64 to_reserve = 0;
1140         u64 index = 0;
1141         u64 objectid;
1142         u64 root_flags;
1143         uuid_le new_uuid;
1144
1145         path = btrfs_alloc_path();
1146         if (!path) {
1147                 pending->error = -ENOMEM;
1148                 return 0;
1149         }
1150
1151         new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS);
1152         if (!new_root_item) {
1153                 pending->error = -ENOMEM;
1154                 goto root_item_alloc_fail;
1155         }
1156
1157         pending->error = btrfs_find_free_objectid(tree_root, &objectid);
1158         if (pending->error)
1159                 goto no_free_objectid;
1160
1161         btrfs_reloc_pre_snapshot(trans, pending, &to_reserve);
1162
1163         if (to_reserve > 0) {
1164                 pending->error = btrfs_block_rsv_add(root,
1165                                                      &pending->block_rsv,
1166                                                      to_reserve,
1167                                                      BTRFS_RESERVE_NO_FLUSH);
1168                 if (pending->error)
1169                         goto no_free_objectid;
1170         }
1171
1172         pending->error = btrfs_qgroup_inherit(trans, fs_info,
1173                                               root->root_key.objectid,
1174                                               objectid, pending->inherit);
1175         if (pending->error)
1176                 goto no_free_objectid;
1177
1178         key.objectid = objectid;
1179         key.offset = (u64)-1;
1180         key.type = BTRFS_ROOT_ITEM_KEY;
1181
1182         rsv = trans->block_rsv;
1183         trans->block_rsv = &pending->block_rsv;
1184         trans->bytes_reserved = trans->block_rsv->reserved;
1185
1186         dentry = pending->dentry;
1187         parent_inode = pending->dir;
1188         parent_root = BTRFS_I(parent_inode)->root;
1189         record_root_in_trans(trans, parent_root);
1190
1191         /*
1192          * insert the directory item
1193          */
1194         ret = btrfs_set_inode_index(parent_inode, &index);
1195         BUG_ON(ret); /* -ENOMEM */
1196
1197         /* check if there is a file/dir which has the same name. */
1198         dir_item = btrfs_lookup_dir_item(NULL, parent_root, path,
1199                                          btrfs_ino(parent_inode),
1200                                          dentry->d_name.name,
1201                                          dentry->d_name.len, 0);
1202         if (dir_item != NULL && !IS_ERR(dir_item)) {
1203                 pending->error = -EEXIST;
1204                 goto dir_item_existed;
1205         } else if (IS_ERR(dir_item)) {
1206                 ret = PTR_ERR(dir_item);
1207                 btrfs_abort_transaction(trans, root, ret);
1208                 goto fail;
1209         }
1210         btrfs_release_path(path);
1211
1212         /*
1213          * pull in the delayed directory update
1214          * and the delayed inode item
1215          * otherwise we corrupt the FS during
1216          * snapshot
1217          */
1218         ret = btrfs_run_delayed_items(trans, root);
1219         if (ret) {      /* Transaction aborted */
1220                 btrfs_abort_transaction(trans, root, ret);
1221                 goto fail;
1222         }
1223
1224         record_root_in_trans(trans, root);
1225         btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
1226         memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
1227         btrfs_check_and_init_root_item(new_root_item);
1228
1229         root_flags = btrfs_root_flags(new_root_item);
1230         if (pending->readonly)
1231                 root_flags |= BTRFS_ROOT_SUBVOL_RDONLY;
1232         else
1233                 root_flags &= ~BTRFS_ROOT_SUBVOL_RDONLY;
1234         btrfs_set_root_flags(new_root_item, root_flags);
1235
1236         btrfs_set_root_generation_v2(new_root_item,
1237                         trans->transid);
1238         uuid_le_gen(&new_uuid);
1239         memcpy(new_root_item->uuid, new_uuid.b, BTRFS_UUID_SIZE);
1240         memcpy(new_root_item->parent_uuid, root->root_item.uuid,
1241                         BTRFS_UUID_SIZE);
1242         if (!(root_flags & BTRFS_ROOT_SUBVOL_RDONLY)) {
1243                 memset(new_root_item->received_uuid, 0,
1244                        sizeof(new_root_item->received_uuid));
1245                 memset(&new_root_item->stime, 0, sizeof(new_root_item->stime));
1246                 memset(&new_root_item->rtime, 0, sizeof(new_root_item->rtime));
1247                 btrfs_set_root_stransid(new_root_item, 0);
1248                 btrfs_set_root_rtransid(new_root_item, 0);
1249         }
1250         btrfs_set_stack_timespec_sec(&new_root_item->otime, cur_time.tv_sec);
1251         btrfs_set_stack_timespec_nsec(&new_root_item->otime, cur_time.tv_nsec);
1252         btrfs_set_root_otransid(new_root_item, trans->transid);
1253
1254         old = btrfs_lock_root_node(root);
1255         ret = btrfs_cow_block(trans, root, old, NULL, 0, &old);
1256         if (ret) {
1257                 btrfs_tree_unlock(old);
1258                 free_extent_buffer(old);
1259                 btrfs_abort_transaction(trans, root, ret);
1260                 goto fail;
1261         }
1262
1263         btrfs_set_lock_blocking(old);
1264
1265         ret = btrfs_copy_root(trans, root, old, &tmp, objectid);
1266         /* clean up in any case */
1267         btrfs_tree_unlock(old);
1268         free_extent_buffer(old);
1269         if (ret) {
1270                 btrfs_abort_transaction(trans, root, ret);
1271                 goto fail;
1272         }
1273
1274         /* see comments in should_cow_block() */
1275         set_bit(BTRFS_ROOT_FORCE_COW, &root->state);
1276         smp_wmb();
1277
1278         btrfs_set_root_node(new_root_item, tmp);
1279         /* record when the snapshot was created in key.offset */
1280         key.offset = trans->transid;
1281         ret = btrfs_insert_root(trans, tree_root, &key, new_root_item);
1282         btrfs_tree_unlock(tmp);
1283         free_extent_buffer(tmp);
1284         if (ret) {
1285                 btrfs_abort_transaction(trans, root, ret);
1286                 goto fail;
1287         }
1288
1289         /*
1290          * insert root back/forward references
1291          */
1292         ret = btrfs_add_root_ref(trans, tree_root, objectid,
1293                                  parent_root->root_key.objectid,
1294                                  btrfs_ino(parent_inode), index,
1295                                  dentry->d_name.name, dentry->d_name.len);
1296         if (ret) {
1297                 btrfs_abort_transaction(trans, root, ret);
1298                 goto fail;
1299         }
1300
1301         key.offset = (u64)-1;
1302         pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key);
1303         if (IS_ERR(pending->snap)) {
1304                 ret = PTR_ERR(pending->snap);
1305                 btrfs_abort_transaction(trans, root, ret);
1306                 goto fail;
1307         }
1308
1309         ret = btrfs_reloc_post_snapshot(trans, pending);
1310         if (ret) {
1311                 btrfs_abort_transaction(trans, root, ret);
1312                 goto fail;
1313         }
1314
1315         ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1316         if (ret) {
1317                 btrfs_abort_transaction(trans, root, ret);
1318                 goto fail;
1319         }
1320
1321         ret = btrfs_insert_dir_item(trans, parent_root,
1322                                     dentry->d_name.name, dentry->d_name.len,
1323                                     parent_inode, &key,
1324                                     BTRFS_FT_DIR, index);
1325         /* We have check then name at the beginning, so it is impossible. */
1326         BUG_ON(ret == -EEXIST || ret == -EOVERFLOW);
1327         if (ret) {
1328                 btrfs_abort_transaction(trans, root, ret);
1329                 goto fail;
1330         }
1331
1332         btrfs_i_size_write(parent_inode, parent_inode->i_size +
1333                                          dentry->d_name.len * 2);
1334         parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
1335         ret = btrfs_update_inode_fallback(trans, parent_root, parent_inode);
1336         if (ret) {
1337                 btrfs_abort_transaction(trans, root, ret);
1338                 goto fail;
1339         }
1340         ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root, new_uuid.b,
1341                                   BTRFS_UUID_KEY_SUBVOL, objectid);
1342         if (ret) {
1343                 btrfs_abort_transaction(trans, root, ret);
1344                 goto fail;
1345         }
1346         if (!btrfs_is_empty_uuid(new_root_item->received_uuid)) {
1347                 ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root,
1348                                           new_root_item->received_uuid,
1349                                           BTRFS_UUID_KEY_RECEIVED_SUBVOL,
1350                                           objectid);
1351                 if (ret && ret != -EEXIST) {
1352                         btrfs_abort_transaction(trans, root, ret);
1353                         goto fail;
1354                 }
1355         }
1356 fail:
1357         pending->error = ret;
1358 dir_item_existed:
1359         trans->block_rsv = rsv;
1360         trans->bytes_reserved = 0;
1361 no_free_objectid:
1362         kfree(new_root_item);
1363 root_item_alloc_fail:
1364         btrfs_free_path(path);
1365         return ret;
1366 }
1367
1368 /*
1369  * create all the snapshots we've scheduled for creation
1370  */
1371 static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
1372                                              struct btrfs_fs_info *fs_info)
1373 {
1374         struct btrfs_pending_snapshot *pending, *next;
1375         struct list_head *head = &trans->transaction->pending_snapshots;
1376         int ret = 0;
1377
1378         list_for_each_entry_safe(pending, next, head, list) {
1379                 list_del(&pending->list);
1380                 ret = create_pending_snapshot(trans, fs_info, pending);
1381                 if (ret)
1382                         break;
1383         }
1384         return ret;
1385 }
1386
1387 static void update_super_roots(struct btrfs_root *root)
1388 {
1389         struct btrfs_root_item *root_item;
1390         struct btrfs_super_block *super;
1391
1392         super = root->fs_info->super_copy;
1393
1394         root_item = &root->fs_info->chunk_root->root_item;
1395         super->chunk_root = root_item->bytenr;
1396         super->chunk_root_generation = root_item->generation;
1397         super->chunk_root_level = root_item->level;
1398
1399         root_item = &root->fs_info->tree_root->root_item;
1400         super->root = root_item->bytenr;
1401         super->generation = root_item->generation;
1402         super->root_level = root_item->level;
1403         if (btrfs_test_opt(root, SPACE_CACHE))
1404                 super->cache_generation = root_item->generation;
1405         if (root->fs_info->update_uuid_tree_gen)
1406                 super->uuid_tree_generation = root_item->generation;
1407 }
1408
1409 int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
1410 {
1411         struct btrfs_transaction *trans;
1412         int ret = 0;
1413
1414         spin_lock(&info->trans_lock);
1415         trans = info->running_transaction;
1416         if (trans)
1417                 ret = (trans->state >= TRANS_STATE_COMMIT_START);
1418         spin_unlock(&info->trans_lock);
1419         return ret;
1420 }
1421
1422 int btrfs_transaction_blocked(struct btrfs_fs_info *info)
1423 {
1424         struct btrfs_transaction *trans;
1425         int ret = 0;
1426
1427         spin_lock(&info->trans_lock);
1428         trans = info->running_transaction;
1429         if (trans)
1430                 ret = is_transaction_blocked(trans);
1431         spin_unlock(&info->trans_lock);
1432         return ret;
1433 }
1434
1435 /*
1436  * wait for the current transaction commit to start and block subsequent
1437  * transaction joins
1438  */
1439 static void wait_current_trans_commit_start(struct btrfs_root *root,
1440                                             struct btrfs_transaction *trans)
1441 {
1442         wait_event(root->fs_info->transaction_blocked_wait,
1443                    trans->state >= TRANS_STATE_COMMIT_START ||
1444                    trans->aborted);
1445 }
1446
1447 /*
1448  * wait for the current transaction to start and then become unblocked.
1449  * caller holds ref.
1450  */
1451 static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root,
1452                                          struct btrfs_transaction *trans)
1453 {
1454         wait_event(root->fs_info->transaction_wait,
1455                    trans->state >= TRANS_STATE_UNBLOCKED ||
1456                    trans->aborted);
1457 }
1458
1459 /*
1460  * commit transactions asynchronously. once btrfs_commit_transaction_async
1461  * returns, any subsequent transaction will not be allowed to join.
1462  */
1463 struct btrfs_async_commit {
1464         struct btrfs_trans_handle *newtrans;
1465         struct btrfs_root *root;
1466         struct work_struct work;
1467 };
1468
1469 static void do_async_commit(struct work_struct *work)
1470 {
1471         struct btrfs_async_commit *ac =
1472                 container_of(work, struct btrfs_async_commit, work);
1473
1474         /*
1475          * We've got freeze protection passed with the transaction.
1476          * Tell lockdep about it.
1477          */
1478         if (ac->newtrans->type & __TRANS_FREEZABLE)
1479                 rwsem_acquire_read(
1480                      &ac->root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
1481                      0, 1, _THIS_IP_);
1482
1483         current->journal_info = ac->newtrans;
1484
1485         btrfs_commit_transaction(ac->newtrans, ac->root);
1486         kfree(ac);
1487 }
1488
1489 int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
1490                                    struct btrfs_root *root,
1491                                    int wait_for_unblock)
1492 {
1493         struct btrfs_async_commit *ac;
1494         struct btrfs_transaction *cur_trans;
1495
1496         ac = kmalloc(sizeof(*ac), GFP_NOFS);
1497         if (!ac)
1498                 return -ENOMEM;
1499
1500         INIT_WORK(&ac->work, do_async_commit);
1501         ac->root = root;
1502         ac->newtrans = btrfs_join_transaction(root);
1503         if (IS_ERR(ac->newtrans)) {
1504                 int err = PTR_ERR(ac->newtrans);
1505                 kfree(ac);
1506                 return err;
1507         }
1508
1509         /* take transaction reference */
1510         cur_trans = trans->transaction;
1511         atomic_inc(&cur_trans->use_count);
1512
1513         btrfs_end_transaction(trans, root);
1514
1515         /*
1516          * Tell lockdep we've released the freeze rwsem, since the
1517          * async commit thread will be the one to unlock it.
1518          */
1519         if (ac->newtrans->type & __TRANS_FREEZABLE)
1520                 rwsem_release(
1521                         &root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
1522                         1, _THIS_IP_);
1523
1524         schedule_work(&ac->work);
1525
1526         /* wait for transaction to start and unblock */
1527         if (wait_for_unblock)
1528                 wait_current_trans_commit_start_and_unblock(root, cur_trans);
1529         else
1530                 wait_current_trans_commit_start(root, cur_trans);
1531
1532         if (current->journal_info == trans)
1533                 current->journal_info = NULL;
1534
1535         btrfs_put_transaction(cur_trans);
1536         return 0;
1537 }
1538
1539
1540 static void cleanup_transaction(struct btrfs_trans_handle *trans,
1541                                 struct btrfs_root *root, int err)
1542 {
1543         struct btrfs_transaction *cur_trans = trans->transaction;
1544         DEFINE_WAIT(wait);
1545
1546         WARN_ON(trans->use_count > 1);
1547
1548         btrfs_abort_transaction(trans, root, err);
1549
1550         spin_lock(&root->fs_info->trans_lock);
1551
1552         /*
1553          * If the transaction is removed from the list, it means this
1554          * transaction has been committed successfully, so it is impossible
1555          * to call the cleanup function.
1556          */
1557         BUG_ON(list_empty(&cur_trans->list));
1558
1559         list_del_init(&cur_trans->list);
1560         if (cur_trans == root->fs_info->running_transaction) {
1561                 cur_trans->state = TRANS_STATE_COMMIT_DOING;
1562                 spin_unlock(&root->fs_info->trans_lock);
1563                 wait_event(cur_trans->writer_wait,
1564                            atomic_read(&cur_trans->num_writers) == 1);
1565
1566                 spin_lock(&root->fs_info->trans_lock);
1567         }
1568         spin_unlock(&root->fs_info->trans_lock);
1569
1570         btrfs_cleanup_one_transaction(trans->transaction, root);
1571
1572         spin_lock(&root->fs_info->trans_lock);
1573         if (cur_trans == root->fs_info->running_transaction)
1574                 root->fs_info->running_transaction = NULL;
1575         spin_unlock(&root->fs_info->trans_lock);
1576
1577         if (trans->type & __TRANS_FREEZABLE)
1578                 sb_end_intwrite(root->fs_info->sb);
1579         btrfs_put_transaction(cur_trans);
1580         btrfs_put_transaction(cur_trans);
1581
1582         trace_btrfs_transaction_commit(root);
1583
1584         if (current->journal_info == trans)
1585                 current->journal_info = NULL;
1586         btrfs_scrub_cancel(root->fs_info);
1587
1588         kmem_cache_free(btrfs_trans_handle_cachep, trans);
1589 }
1590
1591 static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans,
1592                                           struct btrfs_root *root)
1593 {
1594         int ret;
1595
1596         ret = btrfs_run_delayed_items(trans, root);
1597         /*
1598          * running the delayed items may have added new refs. account
1599          * them now so that they hinder processing of more delayed refs
1600          * as little as possible.
1601          */
1602         if (ret) {
1603                 btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
1604                 return ret;
1605         }
1606
1607         ret = btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
1608         if (ret)
1609                 return ret;
1610
1611         /*
1612          * rename don't use btrfs_join_transaction, so, once we
1613          * set the transaction to blocked above, we aren't going
1614          * to get any new ordered operations.  We can safely run
1615          * it here and no for sure that nothing new will be added
1616          * to the list
1617          */
1618         ret = btrfs_run_ordered_operations(trans, root, 1);
1619
1620         return ret;
1621 }
1622
1623 static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
1624 {
1625         if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT))
1626                 return btrfs_start_delalloc_roots(fs_info, 1, -1);
1627         return 0;
1628 }
1629
1630 static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
1631 {
1632         if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT))
1633                 btrfs_wait_ordered_roots(fs_info, -1);
1634 }
1635
1636 int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1637                              struct btrfs_root *root)
1638 {
1639         struct btrfs_transaction *cur_trans = trans->transaction;
1640         struct btrfs_transaction *prev_trans = NULL;
1641         int ret;
1642
1643         ret = btrfs_run_ordered_operations(trans, root, 0);
1644         if (ret) {
1645                 btrfs_abort_transaction(trans, root, ret);
1646                 btrfs_end_transaction(trans, root);
1647                 return ret;
1648         }
1649
1650         /* Stop the commit early if ->aborted is set */
1651         if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
1652                 ret = cur_trans->aborted;
1653                 btrfs_end_transaction(trans, root);
1654                 return ret;
1655         }
1656
1657         /* make a pass through all the delayed refs we have so far
1658          * any runnings procs may add more while we are here
1659          */
1660         ret = btrfs_run_delayed_refs(trans, root, 0);
1661         if (ret) {
1662                 btrfs_end_transaction(trans, root);
1663                 return ret;
1664         }
1665
1666         btrfs_trans_release_metadata(trans, root);
1667         trans->block_rsv = NULL;
1668         if (trans->qgroup_reserved) {
1669                 btrfs_qgroup_free(root, trans->qgroup_reserved);
1670                 trans->qgroup_reserved = 0;
1671         }
1672
1673         cur_trans = trans->transaction;
1674
1675         /*
1676          * set the flushing flag so procs in this transaction have to
1677          * start sending their work down.
1678          */
1679         cur_trans->delayed_refs.flushing = 1;
1680         smp_wmb();
1681
1682         if (!list_empty(&trans->new_bgs))
1683                 btrfs_create_pending_block_groups(trans, root);
1684
1685         ret = btrfs_run_delayed_refs(trans, root, 0);
1686         if (ret) {
1687                 btrfs_end_transaction(trans, root);
1688                 return ret;
1689         }
1690
1691         spin_lock(&root->fs_info->trans_lock);
1692         if (cur_trans->state >= TRANS_STATE_COMMIT_START) {
1693                 spin_unlock(&root->fs_info->trans_lock);
1694                 atomic_inc(&cur_trans->use_count);
1695                 ret = btrfs_end_transaction(trans, root);
1696
1697                 wait_for_commit(root, cur_trans);
1698
1699                 btrfs_put_transaction(cur_trans);
1700
1701                 return ret;
1702         }
1703
1704         cur_trans->state = TRANS_STATE_COMMIT_START;
1705         wake_up(&root->fs_info->transaction_blocked_wait);
1706
1707         if (cur_trans->list.prev != &root->fs_info->trans_list) {
1708                 prev_trans = list_entry(cur_trans->list.prev,
1709                                         struct btrfs_transaction, list);
1710                 if (prev_trans->state != TRANS_STATE_COMPLETED) {
1711                         atomic_inc(&prev_trans->use_count);
1712                         spin_unlock(&root->fs_info->trans_lock);
1713
1714                         wait_for_commit(root, prev_trans);
1715
1716                         btrfs_put_transaction(prev_trans);
1717                 } else {
1718                         spin_unlock(&root->fs_info->trans_lock);
1719                 }
1720         } else {
1721                 spin_unlock(&root->fs_info->trans_lock);
1722         }
1723
1724         extwriter_counter_dec(cur_trans, trans->type);
1725
1726         ret = btrfs_start_delalloc_flush(root->fs_info);
1727         if (ret)
1728                 goto cleanup_transaction;
1729
1730         ret = btrfs_flush_all_pending_stuffs(trans, root);
1731         if (ret)
1732                 goto cleanup_transaction;
1733
1734         wait_event(cur_trans->writer_wait,
1735                    extwriter_counter_read(cur_trans) == 0);
1736
1737         /* some pending stuffs might be added after the previous flush. */
1738         ret = btrfs_flush_all_pending_stuffs(trans, root);
1739         if (ret)
1740                 goto cleanup_transaction;
1741
1742         btrfs_wait_delalloc_flush(root->fs_info);
1743
1744         btrfs_scrub_pause(root);
1745         /*
1746          * Ok now we need to make sure to block out any other joins while we
1747          * commit the transaction.  We could have started a join before setting
1748          * COMMIT_DOING so make sure to wait for num_writers to == 1 again.
1749          */
1750         spin_lock(&root->fs_info->trans_lock);
1751         cur_trans->state = TRANS_STATE_COMMIT_DOING;
1752         spin_unlock(&root->fs_info->trans_lock);
1753         wait_event(cur_trans->writer_wait,
1754                    atomic_read(&cur_trans->num_writers) == 1);
1755
1756         /* ->aborted might be set after the previous check, so check it */
1757         if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
1758                 ret = cur_trans->aborted;
1759                 goto scrub_continue;
1760         }
1761         /*
1762          * the reloc mutex makes sure that we stop
1763          * the balancing code from coming in and moving
1764          * extents around in the middle of the commit
1765          */
1766         mutex_lock(&root->fs_info->reloc_mutex);
1767
1768         /*
1769          * We needn't worry about the delayed items because we will
1770          * deal with them in create_pending_snapshot(), which is the
1771          * core function of the snapshot creation.
1772          */
1773         ret = create_pending_snapshots(trans, root->fs_info);
1774         if (ret) {
1775                 mutex_unlock(&root->fs_info->reloc_mutex);
1776                 goto scrub_continue;
1777         }
1778
1779         /*
1780          * We insert the dir indexes of the snapshots and update the inode
1781          * of the snapshots' parents after the snapshot creation, so there
1782          * are some delayed items which are not dealt with. Now deal with
1783          * them.
1784          *
1785          * We needn't worry that this operation will corrupt the snapshots,
1786          * because all the tree which are snapshoted will be forced to COW
1787          * the nodes and leaves.
1788          */
1789         ret = btrfs_run_delayed_items(trans, root);
1790         if (ret) {
1791                 mutex_unlock(&root->fs_info->reloc_mutex);
1792                 goto scrub_continue;
1793         }
1794
1795         ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1796         if (ret) {
1797                 mutex_unlock(&root->fs_info->reloc_mutex);
1798                 goto scrub_continue;
1799         }
1800
1801         /*
1802          * make sure none of the code above managed to slip in a
1803          * delayed item
1804          */
1805         btrfs_assert_delayed_root_empty(root);
1806
1807         WARN_ON(cur_trans != trans->transaction);
1808
1809         /* btrfs_commit_tree_roots is responsible for getting the
1810          * various roots consistent with each other.  Every pointer
1811          * in the tree of tree roots has to point to the most up to date
1812          * root for every subvolume and other tree.  So, we have to keep
1813          * the tree logging code from jumping in and changing any
1814          * of the trees.
1815          *
1816          * At this point in the commit, there can't be any tree-log
1817          * writers, but a little lower down we drop the trans mutex
1818          * and let new people in.  By holding the tree_log_mutex
1819          * from now until after the super is written, we avoid races
1820          * with the tree-log code.
1821          */
1822         mutex_lock(&root->fs_info->tree_log_mutex);
1823
1824         ret = commit_fs_roots(trans, root);
1825         if (ret) {
1826                 mutex_unlock(&root->fs_info->tree_log_mutex);
1827                 mutex_unlock(&root->fs_info->reloc_mutex);
1828                 goto scrub_continue;
1829         }
1830
1831         /*
1832          * Since the transaction is done, we should set the inode map cache flag
1833          * before any other comming transaction.
1834          */
1835         if (btrfs_test_opt(root, CHANGE_INODE_CACHE))
1836                 btrfs_set_opt(root->fs_info->mount_opt, INODE_MAP_CACHE);
1837         else
1838                 btrfs_clear_opt(root->fs_info->mount_opt, INODE_MAP_CACHE);
1839
1840         /* commit_fs_roots gets rid of all the tree log roots, it is now
1841          * safe to free the root of tree log roots
1842          */
1843         btrfs_free_log_root_tree(trans, root->fs_info);
1844
1845         ret = commit_cowonly_roots(trans, root);
1846         if (ret) {
1847                 mutex_unlock(&root->fs_info->tree_log_mutex);
1848                 mutex_unlock(&root->fs_info->reloc_mutex);
1849                 goto scrub_continue;
1850         }
1851
1852         /*
1853          * The tasks which save the space cache and inode cache may also
1854          * update ->aborted, check it.
1855          */
1856         if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
1857                 ret = cur_trans->aborted;
1858                 mutex_unlock(&root->fs_info->tree_log_mutex);
1859                 mutex_unlock(&root->fs_info->reloc_mutex);
1860                 goto scrub_continue;
1861         }
1862
1863         btrfs_prepare_extent_commit(trans, root);
1864
1865         cur_trans = root->fs_info->running_transaction;
1866
1867         btrfs_set_root_node(&root->fs_info->tree_root->root_item,
1868                             root->fs_info->tree_root->node);
1869         list_add_tail(&root->fs_info->tree_root->dirty_list,
1870                       &cur_trans->switch_commits);
1871
1872         btrfs_set_root_node(&root->fs_info->chunk_root->root_item,
1873                             root->fs_info->chunk_root->node);
1874         list_add_tail(&root->fs_info->chunk_root->dirty_list,
1875                       &cur_trans->switch_commits);
1876
1877         switch_commit_roots(cur_trans, root->fs_info);
1878
1879         assert_qgroups_uptodate(trans);
1880         update_super_roots(root);
1881
1882         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
1883         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
1884         memcpy(root->fs_info->super_for_commit, root->fs_info->super_copy,
1885                sizeof(*root->fs_info->super_copy));
1886
1887         spin_lock(&root->fs_info->trans_lock);
1888         cur_trans->state = TRANS_STATE_UNBLOCKED;
1889         root->fs_info->running_transaction = NULL;
1890         spin_unlock(&root->fs_info->trans_lock);
1891         mutex_unlock(&root->fs_info->reloc_mutex);
1892
1893         wake_up(&root->fs_info->transaction_wait);
1894
1895         ret = btrfs_write_and_wait_transaction(trans, root);
1896         if (ret) {
1897                 btrfs_error(root->fs_info, ret,
1898                             "Error while writing out transaction");
1899                 mutex_unlock(&root->fs_info->tree_log_mutex);
1900                 goto scrub_continue;
1901         }
1902
1903         ret = write_ctree_super(trans, root, 0);
1904         if (ret) {
1905                 mutex_unlock(&root->fs_info->tree_log_mutex);
1906                 goto scrub_continue;
1907         }
1908
1909         /*
1910          * the super is written, we can safely allow the tree-loggers
1911          * to go about their business
1912          */
1913         mutex_unlock(&root->fs_info->tree_log_mutex);
1914
1915         btrfs_finish_extent_commit(trans, root);
1916
1917         root->fs_info->last_trans_committed = cur_trans->transid;
1918         /*
1919          * We needn't acquire the lock here because there is no other task
1920          * which can change it.
1921          */
1922         cur_trans->state = TRANS_STATE_COMPLETED;
1923         wake_up(&cur_trans->commit_wait);
1924
1925         spin_lock(&root->fs_info->trans_lock);
1926         list_del_init(&cur_trans->list);
1927         spin_unlock(&root->fs_info->trans_lock);
1928
1929         btrfs_put_transaction(cur_trans);
1930         btrfs_put_transaction(cur_trans);
1931
1932         if (trans->type & __TRANS_FREEZABLE)
1933                 sb_end_intwrite(root->fs_info->sb);
1934
1935         trace_btrfs_transaction_commit(root);
1936
1937         btrfs_scrub_continue(root);
1938
1939         if (current->journal_info == trans)
1940                 current->journal_info = NULL;
1941
1942         kmem_cache_free(btrfs_trans_handle_cachep, trans);
1943
1944         if (current != root->fs_info->transaction_kthread)
1945                 btrfs_run_delayed_iputs(root);
1946
1947         return ret;
1948
1949 scrub_continue:
1950         btrfs_scrub_continue(root);
1951 cleanup_transaction:
1952         btrfs_trans_release_metadata(trans, root);
1953         trans->block_rsv = NULL;
1954         if (trans->qgroup_reserved) {
1955                 btrfs_qgroup_free(root, trans->qgroup_reserved);
1956                 trans->qgroup_reserved = 0;
1957         }
1958         btrfs_warn(root->fs_info, "Skipping commit of aborted transaction.");
1959         if (current->journal_info == trans)
1960                 current->journal_info = NULL;
1961         cleanup_transaction(trans, root, ret);
1962
1963         return ret;
1964 }
1965
1966 /*
1967  * return < 0 if error
1968  * 0 if there are no more dead_roots at the time of call
1969  * 1 there are more to be processed, call me again
1970  *
1971  * The return value indicates there are certainly more snapshots to delete, but
1972  * if there comes a new one during processing, it may return 0. We don't mind,
1973  * because btrfs_commit_super will poke cleaner thread and it will process it a
1974  * few seconds later.
1975  */
1976 int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root)
1977 {
1978         int ret;
1979         struct btrfs_fs_info *fs_info = root->fs_info;
1980
1981         spin_lock(&fs_info->trans_lock);
1982         if (list_empty(&fs_info->dead_roots)) {
1983                 spin_unlock(&fs_info->trans_lock);
1984                 return 0;
1985         }
1986         root = list_first_entry(&fs_info->dead_roots,
1987                         struct btrfs_root, root_list);
1988         list_del_init(&root->root_list);
1989         spin_unlock(&fs_info->trans_lock);
1990
1991         pr_debug("BTRFS: cleaner removing %llu\n", root->objectid);
1992
1993         btrfs_kill_all_delayed_nodes(root);
1994
1995         if (btrfs_header_backref_rev(root->node) <
1996                         BTRFS_MIXED_BACKREF_REV)
1997                 ret = btrfs_drop_snapshot(root, NULL, 0, 0);
1998         else
1999                 ret = btrfs_drop_snapshot(root, NULL, 1, 0);
2000         /*
2001          * If we encounter a transaction abort during snapshot cleaning, we
2002          * don't want to crash here
2003          */
2004         return (ret < 0) ? 0 : 1;
2005 }