fs/btrfs/transaction.c

   1 /*
   2  * Copyright (C) 2007 Oracle.  All rights reserved.
   3  *
   4  * This program is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU General Public
   6  * License v2 as published by the Free Software Foundation.
   7  *
   8  * This program is distributed in the hope that it will be useful,
   9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  11  * General Public License for more details.
  12  *
  13  * You should have received a copy of the GNU General Public
  14  * License along with this program; if not, write to the
  15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16  * Boston, MA 021110-1307, USA.
  17  */
  18
  19 #include <linux/fs.h>
  20 #include <linux/slab.h>
  21 #include <linux/sched.h>
  22 #include <linux/writeback.h>
  23 #include <linux/pagemap.h>
  24 #include <linux/blkdev.h>
  25 #include <linux/uuid.h>
  26 #include "ctree.h"
  27 #include "disk-io.h"
  28 #include "transaction.h"
  29 #include "locking.h"
  30 #include "tree-log.h"
  31 #include "inode-map.h"
  32 #include "volumes.h"
  33 #include "dev-replace.h"
  34
  35 #define BTRFS_ROOT_TRANS_TAG 0
  36
  37 static unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = {
  38         [TRANS_STATE_RUNNING]           = 0U,
  39         [TRANS_STATE_BLOCKED]           = (__TRANS_USERSPACE |
  40                                            __TRANS_START),
  41         [TRANS_STATE_COMMIT_START]      = (__TRANS_USERSPACE |
  42                                            __TRANS_START |
  43                                            __TRANS_ATTACH),
  44         [TRANS_STATE_COMMIT_DOING]      = (__TRANS_USERSPACE |
  45                                            __TRANS_START |
  46                                            __TRANS_ATTACH |
  47                                            __TRANS_JOIN),
  48         [TRANS_STATE_UNBLOCKED]         = (__TRANS_USERSPACE |
  49                                            __TRANS_START |
  50                                            __TRANS_ATTACH |
  51                                            __TRANS_JOIN |
  52                                            __TRANS_JOIN_NOLOCK),
  53         [TRANS_STATE_COMPLETED]         = (__TRANS_USERSPACE |
  54                                            __TRANS_START |
  55                                            __TRANS_ATTACH |
  56                                            __TRANS_JOIN |
  57                                            __TRANS_JOIN_NOLOCK),
  58 };
  59
  60 void btrfs_put_transaction(struct btrfs_transaction *transaction)
  61 {
  62         WARN_ON(atomic_read(&transaction->use_count) == 0);
  63         if (atomic_dec_and_test(&transaction->use_count)) {
  64                 BUG_ON(!list_empty(&transaction->list));
  65                 WARN_ON(!RB_EMPTY_ROOT(&transaction->delayed_refs.href_root));
  66                 while (!list_empty(&transaction->pending_chunks)) {
  67                         struct extent_map *em;
  68
  69                         em = list_first_entry(&transaction->pending_chunks,
  70                                               struct extent_map, list);
  71                         list_del_init(&em->list);
  72                         free_extent_map(em);
  73                 }
  74                 kmem_cache_free(btrfs_transaction_cachep, transaction);
  75         }
  76 }
  77
  78 static noinline void switch_commit_roots(struct btrfs_transaction *trans,
  79                                          struct btrfs_fs_info *fs_info)
  80 {
  81         struct btrfs_root *root, *tmp;
  82
  83         down_write(&fs_info->commit_root_sem);
  84         list_for_each_entry_safe(root, tmp, &trans->switch_commits,
  85                                  dirty_list) {
  86                 list_del_init(&root->dirty_list);
  87                 free_extent_buffer(root->commit_root);
  88                 root->commit_root = btrfs_root_node(root);
  89                 if (is_fstree(root->objectid))
  90                         btrfs_unpin_free_ino(root);
  91         }
  92         up_write(&fs_info->commit_root_sem);
  93 }
  94
  95 static inline void extwriter_counter_inc(struct btrfs_transaction *trans,
  96                                          unsigned int type)
  97 {
  98         if (type & TRANS_EXTWRITERS)
  99                 atomic_inc(&trans->num_extwriters);
 100 }
 101
 102 static inline void extwriter_counter_dec(struct btrfs_transaction *trans,
 103                                          unsigned int type)
 104 {
 105         if (type & TRANS_EXTWRITERS)
 106                 atomic_dec(&trans->num_extwriters);
 107 }
 108
 109 static inline void extwriter_counter_init(struct btrfs_transaction *trans,
 110                                           unsigned int type)
 111 {
 112         atomic_set(&trans->num_extwriters, ((type & TRANS_EXTWRITERS) ? 1 : 0));
 113 }
 114
 115 static inline int extwriter_counter_read(struct btrfs_transaction *trans)
 116 {
 117         return atomic_read(&trans->num_extwriters);
 118 }
 119
 120 /*
 121  * either allocate a new transaction or hop into the existing one
 122  */
 123 static noinline int join_transaction(struct btrfs_root *root, unsigned int type)
 124 {
 125         struct btrfs_transaction *cur_trans;
 126         struct btrfs_fs_info *fs_info = root->fs_info;
 127
 128         spin_lock(&fs_info->trans_lock);
 129 loop:
 130         /* The file system has been taken offline. No new transactions. */
 131         if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
 132                 spin_unlock(&fs_info->trans_lock);
 133                 return -EROFS;
 134         }
 135
 136         cur_trans = fs_info->running_transaction;
 137         if (cur_trans) {
 138                 if (cur_trans->aborted) {
 139                         spin_unlock(&fs_info->trans_lock);
 140                         return cur_trans->aborted;
 141                 }
 142                 if (btrfs_blocked_trans_types[cur_trans->state] & type) {
 143                         spin_unlock(&fs_info->trans_lock);
 144                         return -EBUSY;
 145                 }
 146                 atomic_inc(&cur_trans->use_count);
 147                 atomic_inc(&cur_trans->num_writers);
 148                 extwriter_counter_inc(cur_trans, type);
 149                 spin_unlock(&fs_info->trans_lock);
 150                 return 0;
 151         }
 152         spin_unlock(&fs_info->trans_lock);
 153
 154         /*
 155          * If we are ATTACH, we just want to catch the current transaction,
 156          * and commit it. If there is no transaction, just return ENOENT.
 157          */
 158         if (type == TRANS_ATTACH)
 159                 return -ENOENT;
 160
 161         /*
 162          * JOIN_NOLOCK only happens during the transaction commit, so
 163          * it is impossible that ->running_transaction is NULL
 164          */
 165         BUG_ON(type == TRANS_JOIN_NOLOCK);
 166
 167         cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS);
 168         if (!cur_trans)
 169                 return -ENOMEM;
 170
 171         spin_lock(&fs_info->trans_lock);
 172         if (fs_info->running_transaction) {
 173                 /*
 174                  * someone started a transaction after we unlocked.  Make sure
 175                  * to redo the checks above
 176                  */
 177                 kmem_cache_free(btrfs_transaction_cachep, cur_trans);
 178                 goto loop;
 179         } else if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
 180                 spin_unlock(&fs_info->trans_lock);
 181                 kmem_cache_free(btrfs_transaction_cachep, cur_trans);
 182                 return -EROFS;
 183         }
 184
 185         atomic_set(&cur_trans->num_writers, 1);
 186         extwriter_counter_init(cur_trans, type);
 187         init_waitqueue_head(&cur_trans->writer_wait);
 188         init_waitqueue_head(&cur_trans->commit_wait);
 189         cur_trans->state = TRANS_STATE_RUNNING;
 190         /*
 191          * One for this trans handle, one so it will live on until we
 192          * commit the transaction.
 193          */
 194         atomic_set(&cur_trans->use_count, 2);
 195         cur_trans->start_time = get_seconds();
 196
 197         cur_trans->delayed_refs.href_root = RB_ROOT;
 198         atomic_set(&cur_trans->delayed_refs.num_entries, 0);
 199         cur_trans->delayed_refs.num_heads_ready = 0;
 200         cur_trans->delayed_refs.num_heads = 0;
 201         cur_trans->delayed_refs.flushing = 0;
 202         cur_trans->delayed_refs.run_delayed_start = 0;
 203
 204         /*
 205          * although the tree mod log is per file system and not per transaction,
 206          * the log must never go across transaction boundaries.
 207          */
 208         smp_mb();
 209         if (!list_empty(&fs_info->tree_mod_seq_list))
 210                 WARN(1, KERN_ERR "BTRFS: tree_mod_seq_list not empty when "
 211                         "creating a fresh transaction\n");
 212         if (!RB_EMPTY_ROOT(&fs_info->tree_mod_log))
 213                 WARN(1, KERN_ERR "BTRFS: tree_mod_log rb tree not empty when "
 214                         "creating a fresh transaction\n");
 215         atomic64_set(&fs_info->tree_mod_seq, 0);
 216
 217         spin_lock_init(&cur_trans->delayed_refs.lock);
 218
 219         INIT_LIST_HEAD(&cur_trans->pending_snapshots);
 220         INIT_LIST_HEAD(&cur_trans->ordered_operations);
 221         INIT_LIST_HEAD(&cur_trans->pending_chunks);
 222         INIT_LIST_HEAD(&cur_trans->switch_commits);
 223         list_add_tail(&cur_trans->list, &fs_info->trans_list);
 224         extent_io_tree_init(&cur_trans->dirty_pages,
 225                              fs_info->btree_inode->i_mapping);
 226         fs_info->generation++;
 227         cur_trans->transid = fs_info->generation;
 228         fs_info->running_transaction = cur_trans;
 229         cur_trans->aborted = 0;
 230         spin_unlock(&fs_info->trans_lock);
 231
 232         return 0;
 233 }
 234
 235 /*
 236  * this does all the record keeping required to make sure that a reference
 237  * counted root is properly recorded in a given transaction.  This is required
 238  * to make sure the old root from before we joined the transaction is deleted
 239  * when the transaction commits
 240  */
 241 static int record_root_in_trans(struct btrfs_trans_handle *trans,
 242                                struct btrfs_root *root)
 243 {
 244         if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
 245             root->last_trans < trans->transid) {
 246                 WARN_ON(root == root->fs_info->extent_root);
 247                 WARN_ON(root->commit_root != root->node);
 248
 249                 /*
 250                  * see below for IN_TRANS_SETUP usage rules
 251                  * we have the reloc mutex held now, so there
 252                  * is only one writer in this function
 253                  */
 254                 set_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state);
 255
 256                 /* make sure readers find IN_TRANS_SETUP before
 257                  * they find our root->last_trans update
 258                  */
 259                 smp_wmb();
 260
 261                 spin_lock(&root->fs_info->fs_roots_radix_lock);
 262                 if (root->last_trans == trans->transid) {
 263                         spin_unlock(&root->fs_info->fs_roots_radix_lock);
 264                         return 0;
 265                 }
 266                 radix_tree_tag_set(&root->fs_info->fs_roots_radix,
 267                            (unsigned long)root->root_key.objectid,
 268                            BTRFS_ROOT_TRANS_TAG);
 269                 spin_unlock(&root->fs_info->fs_roots_radix_lock);
 270                 root->last_trans = trans->transid;
 271
 272                 /* this is pretty tricky.  We don't want to
 273                  * take the relocation lock in btrfs_record_root_in_trans
 274                  * unless we're really doing the first setup for this root in
 275                  * this transaction.
 276                  *
 277                  * Normally we'd use root->last_trans as a flag to decide
 278                  * if we want to take the expensive mutex.
 279                  *
 280                  * But, we have to set root->last_trans before we
 281                  * init the relocation root, otherwise, we trip over warnings
 282                  * in ctree.c.  The solution used here is to flag ourselves
 283                  * with root IN_TRANS_SETUP.  When this is 1, we're still
 284                  * fixing up the reloc trees and everyone must wait.
 285                  *
 286                  * When this is zero, they can trust root->last_trans and fly
 287                  * through btrfs_record_root_in_trans without having to take the
 288                  * lock.  smp_wmb() makes sure that all the writes above are
 289                  * done before we pop in the zero below
 290                  */
 291                 btrfs_init_reloc_root(trans, root);
 292                 smp_mb__before_clear_bit();
 293                 clear_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state);
 294         }
 295         return 0;
 296 }
 297
 298
 299 int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
 300                                struct btrfs_root *root)
 301 {
 302         if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
 303                 return 0;
 304
 305         /*
 306          * see record_root_in_trans for comments about IN_TRANS_SETUP usage
 307          * and barriers
 308          */
 309         smp_rmb();
 310         if (root->last_trans == trans->transid &&
 311             !test_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state))
 312                 return 0;
 313
 314         mutex_lock(&root->fs_info->reloc_mutex);
 315         record_root_in_trans(trans, root);
 316         mutex_unlock(&root->fs_info->reloc_mutex);
 317
 318         return 0;
 319 }
 320
 321 static inline int is_transaction_blocked(struct btrfs_transaction *trans)
 322 {
 323         return (trans->state >= TRANS_STATE_BLOCKED &&
 324                 trans->state < TRANS_STATE_UNBLOCKED &&
 325                 !trans->aborted);
 326 }
 327
 328 /* wait for commit against the current transaction to become unblocked
 329  * when this is done, it is safe to start a new transaction, but the current
 330  * transaction might not be fully on disk.
 331  */
 332 static void wait_current_trans(struct btrfs_root *root)
 333 {
 334         struct btrfs_transaction *cur_trans;
 335
 336         spin_lock(&root->fs_info->trans_lock);
 337         cur_trans = root->fs_info->running_transaction;
 338         if (cur_trans && is_transaction_blocked(cur_trans)) {
 339                 atomic_inc(&cur_trans->use_count);
 340                 spin_unlock(&root->fs_info->trans_lock);
 341
 342                 wait_event(root->fs_info->transaction_wait,
 343                            cur_trans->state >= TRANS_STATE_UNBLOCKED ||
 344                            cur_trans->aborted);
 345                 btrfs_put_transaction(cur_trans);
 346         } else {
 347                 spin_unlock(&root->fs_info->trans_lock);
 348         }
 349 }
 350
 351 static int may_wait_transaction(struct btrfs_root *root, int type)
 352 {
 353         if (root->fs_info->log_root_recovering)
 354                 return 0;
 355
 356         if (type == TRANS_USERSPACE)
 357                 return 1;
 358
 359         if (type == TRANS_START &&
 360             !atomic_read(&root->fs_info->open_ioctl_trans))
 361                 return 1;
 362
 363         return 0;
 364 }
 365
 366 static inline bool need_reserve_reloc_root(struct btrfs_root *root)
 367 {
 368         if (!root->fs_info->reloc_ctl ||
 369             !test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
 370             root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
 371             root->reloc_root)
 372                 return false;
 373
 374         return true;
 375 }
 376
 377 static struct btrfs_trans_handle *
 378 start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type,
 379                   enum btrfs_reserve_flush_enum flush)
 380 {
 381         struct btrfs_trans_handle *h;
 382         struct btrfs_transaction *cur_trans;
 383         u64 num_bytes = 0;
 384         u64 qgroup_reserved = 0;
 385         bool reloc_reserved = false;
 386         int ret;
 387
 388         if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
 389                 return ERR_PTR(-EROFS);
 390
 391         if (current->journal_info &&
 392             current->journal_info != (void *)BTRFS_SEND_TRANS_STUB) {
 393                 WARN_ON(type & TRANS_EXTWRITERS);
 394                 h = current->journal_info;
 395                 h->use_count++;
 396                 WARN_ON(h->use_count > 2);
 397                 h->orig_rsv = h->block_rsv;
 398                 h->block_rsv = NULL;
 399                 goto got_it;
 400         }
 401
 402         /*
 403          * Do the reservation before we join the transaction so we can do all
 404          * the appropriate flushing if need be.
 405          */
 406         if (num_items > 0 && root != root->fs_info->chunk_root) {
 407                 if (root->fs_info->quota_enabled &&
 408                     is_fstree(root->root_key.objectid)) {
 409                         qgroup_reserved = num_items * root->leafsize;
 410                         ret = btrfs_qgroup_reserve(root, qgroup_reserved);
 411                         if (ret)
 412                                 return ERR_PTR(ret);
 413                 }
 414
 415                 num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
 416                 /*
 417                  * Do the reservation for the relocation root creation
 418                  */
 419                 if (unlikely(need_reserve_reloc_root(root))) {
 420                         num_bytes += root->nodesize;
 421                         reloc_reserved = true;
 422                 }
 423
 424                 ret = btrfs_block_rsv_add(root,
 425                                           &root->fs_info->trans_block_rsv,
 426                                           num_bytes, flush);
 427                 if (ret)
 428                         goto reserve_fail;
 429         }
 430 again:
 431         h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
 432         if (!h) {
 433                 ret = -ENOMEM;
 434                 goto alloc_fail;
 435         }
 436
 437         /*
 438          * If we are JOIN_NOLOCK we're already committing a transaction and
 439          * waiting on this guy, so we don't need to do the sb_start_intwrite
 440          * because we're already holding a ref.  We need this because we could
 441          * have raced in and did an fsync() on a file which can kick a commit
 442          * and then we deadlock with somebody doing a freeze.
 443          *
 444          * If we are ATTACH, it means we just want to catch the current
 445          * transaction and commit it, so we needn't do sb_start_intwrite().
 446          */
 447         if (type & __TRANS_FREEZABLE)
 448                 sb_start_intwrite(root->fs_info->sb);
 449
 450         if (may_wait_transaction(root, type))
 451                 wait_current_trans(root);
 452
 453         do {
 454                 ret = join_transaction(root, type);
 455                 if (ret == -EBUSY) {
 456                         wait_current_trans(root);
 457                         if (unlikely(type == TRANS_ATTACH))
 458                                 ret = -ENOENT;
 459                 }
 460         } while (ret == -EBUSY);
 461
 462         if (ret < 0) {
 463                 /* We must get the transaction if we are JOIN_NOLOCK. */
 464                 BUG_ON(type == TRANS_JOIN_NOLOCK);
 465                 goto join_fail;
 466         }
 467
 468         cur_trans = root->fs_info->running_transaction;
 469
 470         h->transid = cur_trans->transid;
 471         h->transaction = cur_trans;
 472         h->blocks_used = 0;
 473         h->bytes_reserved = 0;
 474         h->root = root;
 475         h->delayed_ref_updates = 0;
 476         h->use_count = 1;
 477         h->adding_csums = 0;
 478         h->block_rsv = NULL;
 479         h->orig_rsv = NULL;
 480         h->aborted = 0;
 481         h->qgroup_reserved = 0;
 482         h->delayed_ref_elem.seq = 0;
 483         h->type = type;
 484         h->allocating_chunk = false;
 485         h->reloc_reserved = false;
 486         h->sync = false;
 487         INIT_LIST_HEAD(&h->qgroup_ref_list);
 488         INIT_LIST_HEAD(&h->new_bgs);
 489
 490         smp_mb();
 491         if (cur_trans->state >= TRANS_STATE_BLOCKED &&
 492             may_wait_transaction(root, type)) {
 493                 btrfs_commit_transaction(h, root);
 494                 goto again;
 495         }
 496
 497         if (num_bytes) {
 498                 trace_btrfs_space_reservation(root->fs_info, "transaction",
 499                                               h->transid, num_bytes, 1);
 500                 h->block_rsv = &root->fs_info->trans_block_rsv;
 501                 h->bytes_reserved = num_bytes;
 502                 h->reloc_reserved = reloc_reserved;
 503         }
 504         h->qgroup_reserved = qgroup_reserved;
 505
 506 got_it:
 507         btrfs_record_root_in_trans(h, root);
 508
 509         if (!current->journal_info && type != TRANS_USERSPACE)
 510                 current->journal_info = h;
 511         return h;
 512
 513 join_fail:
 514         if (type & __TRANS_FREEZABLE)
 515                 sb_end_intwrite(root->fs_info->sb);
 516         kmem_cache_free(btrfs_trans_handle_cachep, h);
 517 alloc_fail:
 518         if (num_bytes)
 519                 btrfs_block_rsv_release(root, &root->fs_info->trans_block_rsv,
 520                                         num_bytes);
 521 reserve_fail:
 522         if (qgroup_reserved)
 523                 btrfs_qgroup_free(root, qgroup_reserved);
 524         return ERR_PTR(ret);
 525 }
 526
 527 struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
 528                                                    int num_items)
 529 {
 530         return start_transaction(root, num_items, TRANS_START,
 531                                  BTRFS_RESERVE_FLUSH_ALL);
 532 }
 533
 534 struct btrfs_trans_handle *btrfs_start_transaction_lflush(
 535                                         struct btrfs_root *root, int num_items)
 536 {
 537         return start_transaction(root, num_items, TRANS_START,
 538                                  BTRFS_RESERVE_FLUSH_LIMIT);
 539 }
 540
 541 struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root)
 542 {
 543         return start_transaction(root, 0, TRANS_JOIN, 0);
 544 }
 545
 546 struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root)
 547 {
 548         return start_transaction(root, 0, TRANS_JOIN_NOLOCK, 0);
 549 }
 550
 551 struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root)
 552 {
 553         return start_transaction(root, 0, TRANS_USERSPACE, 0);
 554 }
 555
 556 /*
 557  * btrfs_attach_transaction() - catch the running transaction
 558  *
 559  * It is used when we want to commit the current the transaction, but
 560  * don't want to start a new one.
 561  *
 562  * Note: If this function return -ENOENT, it just means there is no
 563  * running transaction. But it is possible that the inactive transaction
 564  * is still in the memory, not fully on disk. If you hope there is no
 565  * inactive transaction in the fs when -ENOENT is returned, you should
 566  * invoke
 567  *     btrfs_attach_transaction_barrier()
 568  */
 569 struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root)
 570 {
 571         return start_transaction(root, 0, TRANS_ATTACH, 0);
 572 }
 573
 574 /*
 575  * btrfs_attach_transaction_barrier() - catch the running transaction
 576  *
 577  * It is similar to the above function, the differentia is this one
 578  * will wait for all the inactive transactions until they fully
 579  * complete.
 580  */
 581 struct btrfs_trans_handle *
 582 btrfs_attach_transaction_barrier(struct btrfs_root *root)
 583 {
 584         struct btrfs_trans_handle *trans;
 585
 586         trans = start_transaction(root, 0, TRANS_ATTACH, 0);
 587         if (IS_ERR(trans) && PTR_ERR(trans) == -ENOENT)
 588                 btrfs_wait_for_commit(root, 0);
 589
 590         return trans;
 591 }
 592
 593 /* wait for a transaction commit to be fully complete */
 594 static noinline void wait_for_commit(struct btrfs_root *root,
 595                                     struct btrfs_transaction *commit)
 596 {
 597         wait_event(commit->commit_wait, commit->state == TRANS_STATE_COMPLETED);
 598 }
 599
 600 int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
 601 {
 602         struct btrfs_transaction *cur_trans = NULL, *t;
 603         int ret = 0;
 604
 605         if (transid) {
 606                 if (transid <= root->fs_info->last_trans_committed)
 607                         goto out;
 608
 609                 ret = -EINVAL;
 610                 /* find specified transaction */
 611                 spin_lock(&root->fs_info->trans_lock);
 612                 list_for_each_entry(t, &root->fs_info->trans_list, list) {
 613                         if (t->transid == transid) {
 614                                 cur_trans = t;
 615                                 atomic_inc(&cur_trans->use_count);
 616                                 ret = 0;
 617                                 break;
 618                         }
 619                         if (t->transid > transid) {
 620                                 ret = 0;
 621                                 break;
 622                         }
 623                 }
 624                 spin_unlock(&root->fs_info->trans_lock);
 625                 /* The specified transaction doesn't exist */
 626                 if (!cur_trans)
 627                         goto out;
 628         } else {
 629                 /* find newest transaction that is committing | committed */
 630                 spin_lock(&root->fs_info->trans_lock);
 631                 list_for_each_entry_reverse(t, &root->fs_info->trans_list,
 632                                             list) {
 633                         if (t->state >= TRANS_STATE_COMMIT_START) {
 634                                 if (t->state == TRANS_STATE_COMPLETED)
 635                                         break;
 636                                 cur_trans = t;
 637                                 atomic_inc(&cur_trans->use_count);
 638                                 break;
 639                         }
 640                 }
 641                 spin_unlock(&root->fs_info->trans_lock);
 642                 if (!cur_trans)
 643                         goto out;  /* nothing committing|committed */
 644         }
 645
 646         wait_for_commit(root, cur_trans);
 647         btrfs_put_transaction(cur_trans);
 648 out:
 649         return ret;
 650 }
 651
 652 void btrfs_throttle(struct btrfs_root *root)
 653 {
 654         if (!atomic_read(&root->fs_info->open_ioctl_trans))
 655                 wait_current_trans(root);
 656 }
 657
 658 static int should_end_transaction(struct btrfs_trans_handle *trans,
 659                                   struct btrfs_root *root)
 660 {
 661         if (root->fs_info->global_block_rsv.space_info->full &&
 662             btrfs_check_space_for_delayed_refs(trans, root))
 663                 return 1;
 664
 665         return !!btrfs_block_rsv_check(root, &root->fs_info->global_block_rsv, 5);
 666 }
 667
 668 int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
 669                                  struct btrfs_root *root)
 670 {
 671         struct btrfs_transaction *cur_trans = trans->transaction;
 672         int updates;
 673         int err;
 674
 675         smp_mb();
 676         if (cur_trans->state >= TRANS_STATE_BLOCKED ||
 677             cur_trans->delayed_refs.flushing)
 678                 return 1;
 679
 680         updates = trans->delayed_ref_updates;
 681         trans->delayed_ref_updates = 0;
 682         if (updates) {
 683                 err = btrfs_run_delayed_refs(trans, root, updates);
 684                 if (err) /* Error code will also eval true */
 685                         return err;
 686         }
 687
 688         return should_end_transaction(trans, root);
 689 }
 690
 691 static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
 692                           struct btrfs_root *root, int throttle)
 693 {
 694         struct btrfs_transaction *cur_trans = trans->transaction;
 695         struct btrfs_fs_info *info = root->fs_info;
 696         unsigned long cur = trans->delayed_ref_updates;
 697         int lock = (trans->type != TRANS_JOIN_NOLOCK);
 698         int err = 0;
 699
 700         if (trans->use_count > 1) {
 701                 trans->use_count--;
 702                 trans->block_rsv = trans->orig_rsv;
 703                 return 0;
 704         }
 705
 706         /*
 707          * do the qgroup accounting as early as possible
 708          */
 709         err = btrfs_delayed_refs_qgroup_accounting(trans, info);
 710
 711         btrfs_trans_release_metadata(trans, root);
 712         trans->block_rsv = NULL;
 713
 714         if (trans->qgroup_reserved) {
 715                 /*
 716                  * the same root has to be passed here between start_transaction
 717                  * and end_transaction. Subvolume quota depends on this.
 718                  */
 719                 btrfs_qgroup_free(trans->root, trans->qgroup_reserved);
 720                 trans->qgroup_reserved = 0;
 721         }
 722
 723         if (!list_empty(&trans->new_bgs))
 724                 btrfs_create_pending_block_groups(trans, root);
 725
 726         trans->delayed_ref_updates = 0;
 727         if (!trans->sync && btrfs_should_throttle_delayed_refs(trans, root)) {
 728                 cur = max_t(unsigned long, cur, 32);
 729                 trans->delayed_ref_updates = 0;
 730                 btrfs_run_delayed_refs(trans, root, cur);
 731         }
 732
 733         btrfs_trans_release_metadata(trans, root);
 734         trans->block_rsv = NULL;
 735
 736         if (!list_empty(&trans->new_bgs))
 737                 btrfs_create_pending_block_groups(trans, root);
 738
 739         if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) &&
 740             should_end_transaction(trans, root) &&
 741             ACCESS_ONCE(cur_trans->state) == TRANS_STATE_RUNNING) {
 742                 spin_lock(&info->trans_lock);
 743                 if (cur_trans->state == TRANS_STATE_RUNNING)
 744                         cur_trans->state = TRANS_STATE_BLOCKED;
 745                 spin_unlock(&info->trans_lock);
 746         }
 747
 748         if (lock && ACCESS_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) {
 749                 if (throttle)
 750                         return btrfs_commit_transaction(trans, root);
 751                 else
 752                         wake_up_process(info->transaction_kthread);
 753         }
 754
 755         if (trans->type & __TRANS_FREEZABLE)
 756                 sb_end_intwrite(root->fs_info->sb);
 757
 758         WARN_ON(cur_trans != info->running_transaction);
 759         WARN_ON(atomic_read(&cur_trans->num_writers) < 1);
 760         atomic_dec(&cur_trans->num_writers);
 761         extwriter_counter_dec(cur_trans, trans->type);
 762
 763         smp_mb();
 764         if (waitqueue_active(&cur_trans->writer_wait))
 765                 wake_up(&cur_trans->writer_wait);
 766         btrfs_put_transaction(cur_trans);
 767
 768         if (current->journal_info == trans)
 769                 current->journal_info = NULL;
 770
 771         if (throttle)
 772                 btrfs_run_delayed_iputs(root);
 773
 774         if (trans->aborted ||
 775             test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) {
 776                 wake_up_process(info->transaction_kthread);
 777                 err = -EIO;
 778         }
 779         assert_qgroups_uptodate(trans);
 780
 781         kmem_cache_free(btrfs_trans_handle_cachep, trans);
 782         return err;
 783 }
 784
 785 int btrfs_end_transaction(struct btrfs_trans_handle *trans,
 786                           struct btrfs_root *root)
 787 {
 788         return __btrfs_end_transaction(trans, root, 0);
 789 }
 790
 791 int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
 792                                    struct btrfs_root *root)
 793 {
 794         return __btrfs_end_transaction(trans, root, 1);
 795 }
 796
 797 /*
 798  * when btree blocks are allocated, they have some corresponding bits set for
 799  * them in one of two extent_io trees.  This is used to make sure all of
 800  * those extents are sent to disk but does not wait on them
 801  */
 802 int btrfs_write_marked_extents(struct btrfs_root *root,
 803                                struct extent_io_tree *dirty_pages, int mark)
 804 {
 805         int err = 0;
 806         int werr = 0;
 807         struct address_space *mapping = root->fs_info->btree_inode->i_mapping;
 808         struct extent_state *cached_state = NULL;
 809         u64 start = 0;
 810         u64 end;
 811
 812         while (!find_first_extent_bit(dirty_pages, start, &start, &end,
 813                                       mark, &cached_state)) {
 814                 convert_extent_bit(dirty_pages, start, end, EXTENT_NEED_WAIT,
 815                                    mark, &cached_state, GFP_NOFS);
 816                 cached_state = NULL;
 817                 err = filemap_fdatawrite_range(mapping, start, end);
 818                 if (err)
 819                         werr = err;
 820                 cond_resched();
 821                 start = end + 1;
 822         }
 823         if (err)
 824                 werr = err;
 825         return werr;
 826 }
 827
 828 /*
 829  * when btree blocks are allocated, they have some corresponding bits set for
 830  * them in one of two extent_io trees.  This is used to make sure all of
 831  * those extents are on disk for transaction or log commit.  We wait
 832  * on all the pages and clear them from the dirty pages state tree
 833  */
 834 int btrfs_wait_marked_extents(struct btrfs_root *root,
 835                               struct extent_io_tree *dirty_pages, int mark)
 836 {
 837         int err = 0;
 838         int werr = 0;
 839         struct address_space *mapping = root->fs_info->btree_inode->i_mapping;
 840         struct extent_state *cached_state = NULL;
 841         u64 start = 0;
 842         u64 end;
 843
 844         while (!find_first_extent_bit(dirty_pages, start, &start, &end,
 845                                       EXTENT_NEED_WAIT, &cached_state)) {
 846                 clear_extent_bit(dirty_pages, start, end, EXTENT_NEED_WAIT,
 847                                  0, 0, &cached_state, GFP_NOFS);
 848                 err = filemap_fdatawait_range(mapping, start, end);
 849                 if (err)
 850                         werr = err;
 851                 cond_resched();
 852                 start = end + 1;
 853         }
 854         if (err)
 855                 werr = err;
 856         return werr;
 857 }
 858
 859 /*
 860  * when btree blocks are allocated, they have some corresponding bits set for
 861  * them in one of two extent_io trees.  This is used to make sure all of
 862  * those extents are on disk for transaction or log commit
 863  */
 864 static int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
 865                                 struct extent_io_tree *dirty_pages, int mark)
 866 {
 867         int ret;
 868         int ret2;
 869         struct blk_plug plug;
 870
 871         blk_start_plug(&plug);
 872         ret = btrfs_write_marked_extents(root, dirty_pages, mark);
 873         blk_finish_plug(&plug);
 874         ret2 = btrfs_wait_marked_extents(root, dirty_pages, mark);
 875
 876         if (ret)
 877                 return ret;
 878         if (ret2)
 879                 return ret2;
 880         return 0;
 881 }
 882
 883 int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
 884                                      struct btrfs_root *root)
 885 {
 886         if (!trans || !trans->transaction) {
 887                 struct inode *btree_inode;
 888                 btree_inode = root->fs_info->btree_inode;
 889                 return filemap_write_and_wait(btree_inode->i_mapping);
 890         }
 891         return btrfs_write_and_wait_marked_extents(root,
 892                                            &trans->transaction->dirty_pages,
 893                                            EXTENT_DIRTY);
 894 }
 895
 896 /*
 897  * this is used to update the root pointer in the tree of tree roots.
 898  *
 899  * But, in the case of the extent allocation tree, updating the root
 900  * pointer may allocate blocks which may change the root of the extent
 901  * allocation tree.
 902  *
 903  * So, this loops and repeats and makes sure the cowonly root didn't
 904  * change while the root pointer was being updated in the metadata.
 905  */
 906 static int update_cowonly_root(struct btrfs_trans_handle *trans,
 907                                struct btrfs_root *root)
 908 {
 909         int ret;
 910         u64 old_root_bytenr;
 911         u64 old_root_used;
 912         struct btrfs_root *tree_root = root->fs_info->tree_root;
 913
 914         old_root_used = btrfs_root_used(&root->root_item);
 915         btrfs_write_dirty_block_groups(trans, root);
 916
 917         while (1) {
 918                 old_root_bytenr = btrfs_root_bytenr(&root->root_item);
 919                 if (old_root_bytenr == root->node->start &&
 920                     old_root_used == btrfs_root_used(&root->root_item))
 921                         break;
 922
 923                 btrfs_set_root_node(&root->root_item, root->node);
 924                 ret = btrfs_update_root(trans, tree_root,
 925                                         &root->root_key,
 926                                         &root->root_item);
 927                 if (ret)
 928                         return ret;
 929
 930                 old_root_used = btrfs_root_used(&root->root_item);
 931                 ret = btrfs_write_dirty_block_groups(trans, root);
 932                 if (ret)
 933                         return ret;
 934         }
 935
 936         return 0;
 937 }
 938
 939 /*
 940  * update all the cowonly tree roots on disk
 941  *
 942  * The error handling in this function may not be obvious. Any of the
 943  * failures will cause the file system to go offline. We still need
 944  * to clean up the delayed refs.
 945  */
 946 static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
 947                                          struct btrfs_root *root)
 948 {
 949         struct btrfs_fs_info *fs_info = root->fs_info;
 950         struct list_head *next;
 951         struct extent_buffer *eb;
 952         int ret;
 953
 954         ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
 955         if (ret)
 956                 return ret;
 957
 958         eb = btrfs_lock_root_node(fs_info->tree_root);
 959         ret = btrfs_cow_block(trans, fs_info->tree_root, eb, NULL,
 960                               0, &eb);
 961         btrfs_tree_unlock(eb);
 962         free_extent_buffer(eb);
 963
 964         if (ret)
 965                 return ret;
 966
 967         ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
 968         if (ret)
 969                 return ret;
 970
 971         ret = btrfs_run_dev_stats(trans, root->fs_info);
 972         if (ret)
 973                 return ret;
 974         ret = btrfs_run_dev_replace(trans, root->fs_info);
 975         if (ret)
 976                 return ret;
 977         ret = btrfs_run_qgroups(trans, root->fs_info);
 978         if (ret)
 979                 return ret;
 980
 981         /* run_qgroups might have added some more refs */
 982         ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
 983         if (ret)
 984                 return ret;
 985
 986         while (!list_empty(&fs_info->dirty_cowonly_roots)) {
 987                 next = fs_info->dirty_cowonly_roots.next;
 988                 list_del_init(next);
 989                 root = list_entry(next, struct btrfs_root, dirty_list);
 990
 991                 if (root != fs_info->extent_root)
 992                         list_add_tail(&root->dirty_list,
 993                                       &trans->transaction->switch_commits);
 994                 ret = update_cowonly_root(trans, root);
 995                 if (ret)
 996                         return ret;
 997         }
 998
 999         list_add_tail(&fs_info->extent_root->dirty_list,
1000                       &trans->transaction->switch_commits);
1001         btrfs_after_dev_replace_commit(fs_info);
1002
1003         return 0;
1004 }
1005
1006 /*
1007  * dead roots are old snapshots that need to be deleted.  This allocates
1008  * a dirty root struct and adds it into the list of dead roots that need to
1009  * be deleted
1010  */
1011 void btrfs_add_dead_root(struct btrfs_root *root)
1012 {
1013         spin_lock(&root->fs_info->trans_lock);
1014         if (list_empty(&root->root_list))
1015                 list_add_tail(&root->root_list, &root->fs_info->dead_roots);
1016         spin_unlock(&root->fs_info->trans_lock);
1017 }
1018
1019 /*
1020  * update all the cowonly tree roots on disk
1021  */
1022 static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
1023                                     struct btrfs_root *root)
1024 {
1025         struct btrfs_root *gang[8];
1026         struct btrfs_fs_info *fs_info = root->fs_info;
1027         int i;
1028         int ret;
1029         int err = 0;
1030
1031         spin_lock(&fs_info->fs_roots_radix_lock);
1032         while (1) {
1033                 ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix,
1034                                                  (void **)gang, 0,
1035                                                  ARRAY_SIZE(gang),
1036                                                  BTRFS_ROOT_TRANS_TAG);
1037                 if (ret == 0)
1038                         break;
1039                 for (i = 0; i < ret; i++) {
1040                         root = gang[i];
1041                         radix_tree_tag_clear(&fs_info->fs_roots_radix,
1042                                         (unsigned long)root->root_key.objectid,
1043                                         BTRFS_ROOT_TRANS_TAG);
1044                         spin_unlock(&fs_info->fs_roots_radix_lock);
1045
1046                         btrfs_free_log(trans, root);
1047                         btrfs_update_reloc_root(trans, root);
1048                         btrfs_orphan_commit_root(trans, root);
1049
1050                         btrfs_save_ino_cache(root, trans);
1051
1052                         /* see comments in should_cow_block() */
1053                         clear_bit(BTRFS_ROOT_FORCE_COW, &root->state);
1054                         smp_mb__after_clear_bit();
1055
1056                         if (root->commit_root != root->node) {
1057                                 list_add_tail(&root->dirty_list,
1058                                         &trans->transaction->switch_commits);
1059                                 btrfs_set_root_node(&root->root_item,
1060                                                     root->node);
1061                         }
1062
1063                         err = btrfs_update_root(trans, fs_info->tree_root,
1064                                                 &root->root_key,
1065                                                 &root->root_item);
1066                         spin_lock(&fs_info->fs_roots_radix_lock);
1067                         if (err)
1068                                 break;
1069                 }
1070         }
1071         spin_unlock(&fs_info->fs_roots_radix_lock);
1072         return err;
1073 }
1074
1075 /*
1076  * defrag a given btree.
1077  * Every leaf in the btree is read and defragged.
1078  */
1079 int btrfs_defrag_root(struct btrfs_root *root)
1080 {
1081         struct btrfs_fs_info *info = root->fs_info;
1082         struct btrfs_trans_handle *trans;
1083         int ret;
1084
1085         if (test_and_set_bit(BTRFS_ROOT_DEFRAG_RUNNING, &root->state))
1086                 return 0;
1087
1088         while (1) {
1089                 trans = btrfs_start_transaction(root, 0);
1090                 if (IS_ERR(trans))
1091                         return PTR_ERR(trans);
1092
1093                 ret = btrfs_defrag_leaves(trans, root);
1094
1095                 btrfs_end_transaction(trans, root);
1096                 btrfs_btree_balance_dirty(info->tree_root);
1097                 cond_resched();
1098
1099                 if (btrfs_fs_closing(root->fs_info) || ret != -EAGAIN)
1100                         break;
1101
1102                 if (btrfs_defrag_cancelled(root->fs_info)) {
1103                         pr_debug("BTRFS: defrag_root cancelled\n");
1104                         ret = -EAGAIN;
1105                         break;
1106                 }
1107         }
1108         clear_bit(BTRFS_ROOT_DEFRAG_RUNNING, &root->state);
1109         return ret;
1110 }
1111
1112 /*
1113  * new snapshots need to be created at a very specific time in the
1114  * transaction commit.  This does the actual creation.
1115  *
1116  * Note:
1117  * If the error which may affect the commitment of the current transaction
1118  * happens, we should return the error number. If the error which just affect
1119  * the creation of the pending snapshots, just return 0.
1120  */
1121 static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
1122                                    struct btrfs_fs_info *fs_info,
1123                                    struct btrfs_pending_snapshot *pending)
1124 {
1125         struct btrfs_key key;
1126         struct btrfs_root_item *new_root_item;
1127         struct btrfs_root *tree_root = fs_info->tree_root;
1128         struct btrfs_root *root = pending->root;
1129         struct btrfs_root *parent_root;
1130         struct btrfs_block_rsv *rsv;
1131         struct inode *parent_inode;
1132         struct btrfs_path *path;
1133         struct btrfs_dir_item *dir_item;
1134         struct dentry *dentry;
1135         struct extent_buffer *tmp;
1136         struct extent_buffer *old;
1137         struct timespec cur_time = CURRENT_TIME;
1138         int ret = 0;
1139         u64 to_reserve = 0;
1140         u64 index = 0;
1141         u64 objectid;
1142         u64 root_flags;
1143         uuid_le new_uuid;
1144
1145         path = btrfs_alloc_path();
1146         if (!path) {
1147                 pending->error = -ENOMEM;
1148                 return 0;
1149         }
1150
1151         new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS);
1152         if (!new_root_item) {
1153                 pending->error = -ENOMEM;
1154                 goto root_item_alloc_fail;
1155         }
1156
1157         pending->error = btrfs_find_free_objectid(tree_root, &objectid);
1158         if (pending->error)
1159                 goto no_free_objectid;
1160
1161         btrfs_reloc_pre_snapshot(trans, pending, &to_reserve);
1162
1163         if (to_reserve > 0) {
1164                 pending->error = btrfs_block_rsv_add(root,
1165                                                      &pending->block_rsv,
1166                                                      to_reserve,
1167                                                      BTRFS_RESERVE_NO_FLUSH);
1168                 if (pending->error)
1169                         goto no_free_objectid;
1170         }
1171
1172         pending->error = btrfs_qgroup_inherit(trans, fs_info,
1173                                               root->root_key.objectid,
1174                                               objectid, pending->inherit);
1175         if (pending->error)
1176                 goto no_free_objectid;
1177
1178         key.objectid = objectid;
1179         key.offset = (u64)-1;
1180         key.type = BTRFS_ROOT_ITEM_KEY;
1181
1182         rsv = trans->block_rsv;
1183         trans->block_rsv = &pending->block_rsv;
1184         trans->bytes_reserved = trans->block_rsv->reserved;
1185
1186         dentry = pending->dentry;
1187         parent_inode = pending->dir;
1188         parent_root = BTRFS_I(parent_inode)->root;
1189         record_root_in_trans(trans, parent_root);
1190
1191         /*
1192          * insert the directory item
1193          */
1194         ret = btrfs_set_inode_index(parent_inode, &index);
1195         BUG_ON(ret); /* -ENOMEM */
1196
1197         /* check if there is a file/dir which has the same name. */
1198         dir_item = btrfs_lookup_dir_item(NULL, parent_root, path,
1199                                          btrfs_ino(parent_inode),
1200                                          dentry->d_name.name,
1201                                          dentry->d_name.len, 0);
1202         if (dir_item != NULL && !IS_ERR(dir_item)) {
1203                 pending->error = -EEXIST;
1204                 goto dir_item_existed;
1205         } else if (IS_ERR(dir_item)) {
1206                 ret = PTR_ERR(dir_item);
1207                 btrfs_abort_transaction(trans, root, ret);
1208                 goto fail;
1209         }
1210         btrfs_release_path(path);
1211
1212         /*
1213          * pull in the delayed directory update
1214          * and the delayed inode item
1215          * otherwise we corrupt the FS during
1216          * snapshot
1217          */
1218         ret = btrfs_run_delayed_items(trans, root);
1219         if (ret) {      /* Transaction aborted */
1220                 btrfs_abort_transaction(trans, root, ret);
1221                 goto fail;
1222         }
1223
1224         record_root_in_trans(trans, root);
1225         btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
1226         memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
1227         btrfs_check_and_init_root_item(new_root_item);
1228
1229         root_flags = btrfs_root_flags(new_root_item);
1230         if (pending->readonly)
1231                 root_flags |= BTRFS_ROOT_SUBVOL_RDONLY;
1232         else
1233                 root_flags &= ~BTRFS_ROOT_SUBVOL_RDONLY;
1234         btrfs_set_root_flags(new_root_item, root_flags);
1235
1236         btrfs_set_root_generation_v2(new_root_item,
1237                         trans->transid);
1238         uuid_le_gen(&new_uuid);
1239         memcpy(new_root_item->uuid, new_uuid.b, BTRFS_UUID_SIZE);
1240         memcpy(new_root_item->parent_uuid, root->root_item.uuid,
1241                         BTRFS_UUID_SIZE);
1242         if (!(root_flags & BTRFS_ROOT_SUBVOL_RDONLY)) {
1243                 memset(new_root_item->received_uuid, 0,
1244                        sizeof(new_root_item->received_uuid));
1245                 memset(&new_root_item->stime, 0, sizeof(new_root_item->stime));
1246                 memset(&new_root_item->rtime, 0, sizeof(new_root_item->rtime));
1247                 btrfs_set_root_stransid(new_root_item, 0);
1248                 btrfs_set_root_rtransid(new_root_item, 0);
1249         }
1250         btrfs_set_stack_timespec_sec(&new_root_item->otime, cur_time.tv_sec);
1251         btrfs_set_stack_timespec_nsec(&new_root_item->otime, cur_time.tv_nsec);
1252         btrfs_set_root_otransid(new_root_item, trans->transid);
1253
1254         old = btrfs_lock_root_node(root);
1255         ret = btrfs_cow_block(trans, root, old, NULL, 0, &old);
1256         if (ret) {
1257                 btrfs_tree_unlock(old);
1258                 free_extent_buffer(old);
1259                 btrfs_abort_transaction(trans, root, ret);
1260                 goto fail;
1261         }
1262
1263         btrfs_set_lock_blocking(old);
1264
1265         ret = btrfs_copy_root(trans, root, old, &tmp, objectid);
1266         /* clean up in any case */
1267         btrfs_tree_unlock(old);
1268         free_extent_buffer(old);
1269         if (ret) {
1270                 btrfs_abort_transaction(trans, root, ret);
1271                 goto fail;
1272         }
1273
1274         /* see comments in should_cow_block() */
1275         set_bit(BTRFS_ROOT_FORCE_COW, &root->state);
1276         smp_wmb();
1277
1278         btrfs_set_root_node(new_root_item, tmp);
1279         /* record when the snapshot was created in key.offset */
1280         key.offset = trans->transid;
1281         ret = btrfs_insert_root(trans, tree_root, &key, new_root_item);
1282         btrfs_tree_unlock(tmp);
1283         free_extent_buffer(tmp);
1284         if (ret) {
1285                 btrfs_abort_transaction(trans, root, ret);
1286                 goto fail;
1287         }
1288
1289         /*
1290          * insert root back/forward references
1291          */
1292         ret = btrfs_add_root_ref(trans, tree_root, objectid,
1293                                  parent_root->root_key.objectid,
1294                                  btrfs_ino(parent_inode), index,
1295                                  dentry->d_name.name, dentry->d_name.len);
1296         if (ret) {
1297                 btrfs_abort_transaction(trans, root, ret);
1298                 goto fail;
1299         }
1300
1301         key.offset = (u64)-1;
1302         pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key);
1303         if (IS_ERR(pending->snap)) {
1304                 ret = PTR_ERR(pending->snap);
1305                 btrfs_abort_transaction(trans, root, ret);
1306                 goto fail;
1307         }
1308
1309         ret = btrfs_reloc_post_snapshot(trans, pending);
1310         if (ret) {
1311                 btrfs_abort_transaction(trans, root, ret);
1312                 goto fail;
1313         }
1314
1315         ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1316         if (ret) {
1317                 btrfs_abort_transaction(trans, root, ret);
1318                 goto fail;
1319         }
1320
1321         ret = btrfs_insert_dir_item(trans, parent_root,
1322                                     dentry->d_name.name, dentry->d_name.len,
1323                                     parent_inode, &key,
1324                                     BTRFS_FT_DIR, index);
1325         /* We have check then name at the beginning, so it is impossible. */
1326         BUG_ON(ret == -EEXIST || ret == -EOVERFLOW);
1327         if (ret) {
1328                 btrfs_abort_transaction(trans, root, ret);
1329                 goto fail;
1330         }
1331
1332         btrfs_i_size_write(parent_inode, parent_inode->i_size +
1333                                          dentry->d_name.len * 2);
1334         parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
1335         ret = btrfs_update_inode_fallback(trans, parent_root, parent_inode);
1336         if (ret) {
1337                 btrfs_abort_transaction(trans, root, ret);
1338                 goto fail;
1339         }
1340         ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root, new_uuid.b,
1341                                   BTRFS_UUID_KEY_SUBVOL, objectid);
1342         if (ret) {
1343                 btrfs_abort_transaction(trans, root, ret);
1344                 goto fail;
1345         }
1346         if (!btrfs_is_empty_uuid(new_root_item->received_uuid)) {
1347                 ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root,
1348                                           new_root_item->received_uuid,
1349                                           BTRFS_UUID_KEY_RECEIVED_SUBVOL,
1350                                           objectid);
1351                 if (ret && ret != -EEXIST) {
1352                         btrfs_abort_transaction(trans, root, ret);
1353                         goto fail;
1354                 }
1355         }
1356 fail:
1357         pending->error = ret;
1358 dir_item_existed:
1359         trans->block_rsv = rsv;
1360         trans->bytes_reserved = 0;
1361 no_free_objectid:
1362         kfree(new_root_item);
1363 root_item_alloc_fail:
1364         btrfs_free_path(path);
1365         return ret;
1366 }
1367
1368 /*
1369  * create all the snapshots we've scheduled for creation
1370  */
1371 static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
1372                                              struct btrfs_fs_info *fs_info)
1373 {
1374         struct btrfs_pending_snapshot *pending, *next;
1375         struct list_head *head = &trans->transaction->pending_snapshots;
1376         int ret = 0;
1377
1378         list_for_each_entry_safe(pending, next, head, list) {
1379                 list_del(&pending->list);
1380                 ret = create_pending_snapshot(trans, fs_info, pending);
1381                 if (ret)
1382                         break;
1383         }
1384         return ret;
1385 }
1386
1387 static void update_super_roots(struct btrfs_root *root)
1388 {
1389         struct btrfs_root_item *root_item;
1390         struct btrfs_super_block *super;
1391
1392         super = root->fs_info->super_copy;
1393
1394         root_item = &root->fs_info->chunk_root->root_item;
1395         super->chunk_root = root_item->bytenr;
1396         super->chunk_root_generation = root_item->generation;
1397         super->chunk_root_level = root_item->level;
1398
1399         root_item = &root->fs_info->tree_root->root_item;
1400         super->root = root_item->bytenr;
1401         super->generation = root_item->generation;
1402         super->root_level = root_item->level;
1403         if (btrfs_test_opt(root, SPACE_CACHE))
1404                 super->cache_generation = root_item->generation;
1405         if (root->fs_info->update_uuid_tree_gen)
1406                 super->uuid_tree_generation = root_item->generation;
1407 }
1408
1409 int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
1410 {
1411         struct btrfs_transaction *trans;
1412         int ret = 0;
1413
1414         spin_lock(&info->trans_lock);
1415         trans = info->running_transaction;
1416         if (trans)
1417                 ret = (trans->state >= TRANS_STATE_COMMIT_START);
1418         spin_unlock(&info->trans_lock);
1419         return ret;
1420 }
1421
1422 int btrfs_transaction_blocked(struct btrfs_fs_info *info)
1423 {
1424         struct btrfs_transaction *trans;
1425         int ret = 0;
1426
1427         spin_lock(&info->trans_lock);
1428         trans = info->running_transaction;
1429         if (trans)
1430                 ret = is_transaction_blocked(trans);
1431         spin_unlock(&info->trans_lock);
1432         return ret;
1433 }
1434
1435 /*
1436  * wait for the current transaction commit to start and block subsequent
1437  * transaction joins
1438  */
1439 static void wait_current_trans_commit_start(struct btrfs_root *root,
1440                                             struct btrfs_transaction *trans)
1441 {
1442         wait_event(root->fs_info->transaction_blocked_wait,
1443                    trans->state >= TRANS_STATE_COMMIT_START ||
1444                    trans->aborted);
1445 }
1446
1447 /*
1448  * wait for the current transaction to start and then become unblocked.
1449  * caller holds ref.
1450  */
1451 static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root,
1452                                          struct btrfs_transaction *trans)
1453 {
1454         wait_event(root->fs_info->transaction_wait,
1455                    trans->state >= TRANS_STATE_UNBLOCKED ||
1456                    trans->aborted);
1457 }
1458
1459 /*
1460  * commit transactions asynchronously. once btrfs_commit_transaction_async
1461  * returns, any subsequent transaction will not be allowed to join.
1462  */
1463 struct btrfs_async_commit {
1464         struct btrfs_trans_handle *newtrans;
1465         struct btrfs_root *root;
1466         struct work_struct work;
1467 };
1468
1469 static void do_async_commit(struct work_struct *work)
1470 {
1471         struct btrfs_async_commit *ac =
1472                 container_of(work, struct btrfs_async_commit, work);
1473
1474         /*
1475          * We've got freeze protection passed with the transaction.
1476          * Tell lockdep about it.
1477          */
1478         if (ac->newtrans->type & __TRANS_FREEZABLE)
1479                 rwsem_acquire_read(
1480                      &ac->root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
1481                      0, 1, _THIS_IP_);
1482
1483         current->journal_info = ac->newtrans;
1484
1485         btrfs_commit_transaction(ac->newtrans, ac->root);
1486         kfree(ac);
1487 }
1488
1489 int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
1490                                    struct btrfs_root *root,
1491                                    int wait_for_unblock)
1492 {
1493         struct btrfs_async_commit *ac;
1494         struct btrfs_transaction *cur_trans;
1495
1496         ac = kmalloc(sizeof(*ac), GFP_NOFS);
1497         if (!ac)
1498                 return -ENOMEM;
1499
1500         INIT_WORK(&ac->work, do_async_commit);
1501         ac->root = root;
1502         ac->newtrans = btrfs_join_transaction(root);
1503         if (IS_ERR(ac->newtrans)) {
1504                 int err = PTR_ERR(ac->newtrans);
1505                 kfree(ac);
1506                 return err;
1507         }
1508
1509         /* take transaction reference */
1510         cur_trans = trans->transaction;
1511         atomic_inc(&cur_trans->use_count);
1512
1513         btrfs_end_transaction(trans, root);
1514
1515         /*
1516          * Tell lockdep we've released the freeze rwsem, since the
1517          * async commit thread will be the one to unlock it.
1518          */
1519         if (ac->newtrans->type & __TRANS_FREEZABLE)
1520                 rwsem_release(
1521                         &root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
1522                         1, _THIS_IP_);
1523
1524         schedule_work(&ac->work);
1525
1526         /* wait for transaction to start and unblock */
1527         if (wait_for_unblock)
1528                 wait_current_trans_commit_start_and_unblock(root, cur_trans);
1529         else
1530                 wait_current_trans_commit_start(root, cur_trans);
1531
1532         if (current->journal_info == trans)
1533                 current->journal_info = NULL;
1534
1535         btrfs_put_transaction(cur_trans);
1536         return 0;
1537 }
1538
1539
1540 static void cleanup_transaction(struct btrfs_trans_handle *trans,
1541                                 struct btrfs_root *root, int err)
1542 {
1543         struct btrfs_transaction *cur_trans = trans->transaction;
1544         DEFINE_WAIT(wait);
1545
1546         WARN_ON(trans->use_count > 1);
1547
1548         btrfs_abort_transaction(trans, root, err);
1549
1550         spin_lock(&root->fs_info->trans_lock);
1551
1552         /*
1553          * If the transaction is removed from the list, it means this
1554          * transaction has been committed successfully, so it is impossible
1555          * to call the cleanup function.
1556          */
1557         BUG_ON(list_empty(&cur_trans->list));
1558
1559         list_del_init(&cur_trans->list);
1560         if (cur_trans == root->fs_info->running_transaction) {
1561                 cur_trans->state = TRANS_STATE_COMMIT_DOING;
1562                 spin_unlock(&root->fs_info->trans_lock);
1563                 wait_event(cur_trans->writer_wait,
1564                            atomic_read(&cur_trans->num_writers) == 1);
1565
1566                 spin_lock(&root->fs_info->trans_lock);
1567         }
1568         spin_unlock(&root->fs_info->trans_lock);
1569
1570         btrfs_cleanup_one_transaction(trans->transaction, root);
1571
1572         spin_lock(&root->fs_info->trans_lock);
1573         if (cur_trans == root->fs_info->running_transaction)
1574                 root->fs_info->running_transaction = NULL;
1575         spin_unlock(&root->fs_info->trans_lock);
1576
1577         if (trans->type & __TRANS_FREEZABLE)
1578                 sb_end_intwrite(root->fs_info->sb);
1579         btrfs_put_transaction(cur_trans);
1580         btrfs_put_transaction(cur_trans);
1581
1582         trace_btrfs_transaction_commit(root);
1583
1584         if (current->journal_info == trans)
1585                 current->journal_info = NULL;
1586         btrfs_scrub_cancel(root->fs_info);
1587
1588         kmem_cache_free(btrfs_trans_handle_cachep, trans);
1589 }
1590
1591 static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans,
1592                                           struct btrfs_root *root)
1593 {
1594         int ret;
1595
1596         ret = btrfs_run_delayed_items(trans, root);
1597         /*
1598          * running the delayed items may have added new refs. account
1599          * them now so that they hinder processing of more delayed refs
1600          * as little as possible.
1601          */
1602         if (ret) {
1603                 btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
1604                 return ret;
1605         }
1606
1607         ret = btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
1608         if (ret)
1609                 return ret;
1610
1611         /*
1612          * rename don't use btrfs_join_transaction, so, once we
1613          * set the transaction to blocked above, we aren't going
1614          * to get any new ordered operations.  We can safely run
1615          * it here and no for sure that nothing new will be added
1616          * to the list
1617          */
1618         ret = btrfs_run_ordered_operations(trans, root, 1);
1619
1620         return ret;
1621 }
1622
1623 static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
1624 {
1625         if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT))
1626                 return btrfs_start_delalloc_roots(fs_info, 1, -1);
1627         return 0;
1628 }
1629
1630 static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
1631 {
1632         if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT))
1633                 btrfs_wait_ordered_roots(fs_info, -1);
1634 }
1635
1636 int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1637                              struct btrfs_root *root)
1638 {
1639         struct btrfs_transaction *cur_trans = trans->transaction;
1640         struct btrfs_transaction *prev_trans = NULL;
1641         int ret;
1642
1643         ret = btrfs_run_ordered_operations(trans, root, 0);
1644         if (ret) {
1645                 btrfs_abort_transaction(trans, root, ret);
1646                 btrfs_end_transaction(trans, root);
1647                 return ret;
1648         }
1649
1650         /* Stop the commit early if ->aborted is set */
1651         if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
1652                 ret = cur_trans->aborted;
1653                 btrfs_end_transaction(trans, root);
1654                 return ret;
1655         }
1656
1657         /* make a pass through all the delayed refs we have so far
1658          * any runnings procs may add more while we are here
1659          */
1660         ret = btrfs_run_delayed_refs(trans, root, 0);
1661         if (ret) {
1662                 btrfs_end_transaction(trans, root);
1663                 return ret;
1664         }
1665
1666         btrfs_trans_release_metadata(trans, root);
1667         trans->block_rsv = NULL;
1668         if (trans->qgroup_reserved) {
1669                 btrfs_qgroup_free(root, trans->qgroup_reserved);
1670                 trans->qgroup_reserved = 0;
1671         }
1672
1673         cur_trans = trans->transaction;
1674
1675         /*
1676          * set the flushing flag so procs in this transaction have to
1677          * start sending their work down.
1678          */
1679         cur_trans->delayed_refs.flushing = 1;
1680         smp_wmb();
1681
1682         if (!list_empty(&trans->new_bgs))
1683                 btrfs_create_pending_block_groups(trans, root);
1684
1685         ret = btrfs_run_delayed_refs(trans, root, 0);
1686         if (ret) {
1687                 btrfs_end_transaction(trans, root);
1688                 return ret;
1689         }
1690
1691         spin_lock(&root->fs_info->trans_lock);
1692         if (cur_trans->state >= TRANS_STATE_COMMIT_START) {
1693                 spin_unlock(&root->fs_info->trans_lock);
1694                 atomic_inc(&cur_trans->use_count);
1695                 ret = btrfs_end_transaction(trans, root);
1696
1697                 wait_for_commit(root, cur_trans);
1698
1699                 btrfs_put_transaction(cur_trans);
1700
1701                 return ret;
1702         }
1703
1704         cur_trans->state = TRANS_STATE_COMMIT_START;
1705         wake_up(&root->fs_info->transaction_blocked_wait);
1706
1707         if (cur_trans->list.prev != &root->fs_info->trans_list) {
1708                 prev_trans = list_entry(cur_trans->list.prev,
1709                                         struct btrfs_transaction, list);
1710                 if (prev_trans->state != TRANS_STATE_COMPLETED) {
1711                         atomic_inc(&prev_trans->use_count);
1712                         spin_unlock(&root->fs_info->trans_lock);
1713
1714                         wait_for_commit(root, prev_trans);
1715
1716                         btrfs_put_transaction(prev_trans);
1717                 } else {
1718                         spin_unlock(&root->fs_info->trans_lock);
1719                 }
1720         } else {
1721                 spin_unlock(&root->fs_info->trans_lock);
1722         }
1723
1724         extwriter_counter_dec(cur_trans, trans->type);
1725
1726         ret = btrfs_start_delalloc_flush(root->fs_info);
1727         if (ret)
1728                 goto cleanup_transaction;
1729
1730         ret = btrfs_flush_all_pending_stuffs(trans, root);
1731         if (ret)
1732                 goto cleanup_transaction;
1733
1734         wait_event(cur_trans->writer_wait,
1735                    extwriter_counter_read(cur_trans) == 0);
1736
1737         /* some pending stuffs might be added after the previous flush. */
1738         ret = btrfs_flush_all_pending_stuffs(trans, root);
1739         if (ret)
1740                 goto cleanup_transaction;
1741
1742         btrfs_wait_delalloc_flush(root->fs_info);
1743
1744         btrfs_scrub_pause(root);
1745         /*
1746          * Ok now we need to make sure to block out any other joins while we
1747          * commit the transaction.  We could have started a join before setting
1748          * COMMIT_DOING so make sure to wait for num_writers to == 1 again.
1749          */
1750         spin_lock(&root->fs_info->trans_lock);
1751         cur_trans->state = TRANS_STATE_COMMIT_DOING;
1752         spin_unlock(&root->fs_info->trans_lock);
1753         wait_event(cur_trans->writer_wait,
1754                    atomic_read(&cur_trans->num_writers) == 1);
1755
1756         /* ->aborted might be set after the previous check, so check it */
1757         if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
1758                 ret = cur_trans->aborted;
1759                 goto scrub_continue;
1760         }
1761         /*
1762          * the reloc mutex makes sure that we stop
1763          * the balancing code from coming in and moving
1764          * extents around in the middle of the commit
1765          */
1766         mutex_lock(&root->fs_info->reloc_mutex);
1767
1768         /*
1769          * We needn't worry about the delayed items because we will
1770          * deal with them in create_pending_snapshot(), which is the
1771          * core function of the snapshot creation.
1772          */
1773         ret = create_pending_snapshots(trans, root->fs_info);
1774         if (ret) {
1775                 mutex_unlock(&root->fs_info->reloc_mutex);
1776                 goto scrub_continue;
1777         }
1778
1779         /*
1780          * We insert the dir indexes of the snapshots and update the inode
1781          * of the snapshots' parents after the snapshot creation, so there
1782          * are some delayed items which are not dealt with. Now deal with
1783          * them.
1784          *
1785          * We needn't worry that this operation will corrupt the snapshots,
1786          * because all the tree which are snapshoted will be forced to COW
1787          * the nodes and leaves.
1788          */
1789         ret = btrfs_run_delayed_items(trans, root);
1790         if (ret) {
1791                 mutex_unlock(&root->fs_info->reloc_mutex);
1792                 goto scrub_continue;
1793         }
1794
1795         ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1796         if (ret) {
1797                 mutex_unlock(&root->fs_info->reloc_mutex);
1798                 goto scrub_continue;
1799         }
1800
1801         /*
1802          * make sure none of the code above managed to slip in a
1803          * delayed item
1804          */
1805         btrfs_assert_delayed_root_empty(root);
1806
1807         WARN_ON(cur_trans != trans->transaction);
1808
1809         /* btrfs_commit_tree_roots is responsible for getting the
1810          * various roots consistent with each other.  Every pointer
1811          * in the tree of tree roots has to point to the most up to date
1812          * root for every subvolume and other tree.  So, we have to keep
1813          * the tree logging code from jumping in and changing any
1814          * of the trees.
1815          *
1816          * At this point in the commit, there can't be any tree-log
1817          * writers, but a little lower down we drop the trans mutex
1818          * and let new people in.  By holding the tree_log_mutex
1819          * from now until after the super is written, we avoid races
1820          * with the tree-log code.
1821          */
1822         mutex_lock(&root->fs_info->tree_log_mutex);
1823
1824         ret = commit_fs_roots(trans, root);
1825         if (ret) {
1826                 mutex_unlock(&root->fs_info->tree_log_mutex);
1827                 mutex_unlock(&root->fs_info->reloc_mutex);
1828                 goto scrub_continue;
1829         }
1830
1831         /*
1832          * Since the transaction is done, we should set the inode map cache flag
1833          * before any other comming transaction.
1834          */
1835         if (btrfs_test_opt(root, CHANGE_INODE_CACHE))
1836                 btrfs_set_opt(root->fs_info->mount_opt, INODE_MAP_CACHE);
1837         else
1838                 btrfs_clear_opt(root->fs_info->mount_opt, INODE_MAP_CACHE);
1839
1840         /* commit_fs_roots gets rid of all the tree log roots, it is now
1841          * safe to free the root of tree log roots
1842          */
1843         btrfs_free_log_root_tree(trans, root->fs_info);
1844
1845         ret = commit_cowonly_roots(trans, root);
1846         if (ret) {
1847                 mutex_unlock(&root->fs_info->tree_log_mutex);
1848                 mutex_unlock(&root->fs_info->reloc_mutex);
1849                 goto scrub_continue;
1850         }
1851
1852         /*
1853          * The tasks which save the space cache and inode cache may also
1854          * update ->aborted, check it.
1855          */
1856         if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
1857                 ret = cur_trans->aborted;
1858                 mutex_unlock(&root->fs_info->tree_log_mutex);
1859                 mutex_unlock(&root->fs_info->reloc_mutex);
1860                 goto scrub_continue;
1861         }
1862
1863         btrfs_prepare_extent_commit(trans, root);
1864
1865         cur_trans = root->fs_info->running_transaction;
1866
1867         btrfs_set_root_node(&root->fs_info->tree_root->root_item,
1868                             root->fs_info->tree_root->node);
1869         list_add_tail(&root->fs_info->tree_root->dirty_list,
1870                       &cur_trans->switch_commits);
1871
1872         btrfs_set_root_node(&root->fs_info->chunk_root->root_item,
1873                             root->fs_info->chunk_root->node);
1874         list_add_tail(&root->fs_info->chunk_root->dirty_list,
1875                       &cur_trans->switch_commits);
1876
1877         switch_commit_roots(cur_trans, root->fs_info);
1878
1879         assert_qgroups_uptodate(trans);
1880         update_super_roots(root);
1881
1882         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
1883         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
1884         memcpy(root->fs_info->super_for_commit, root->fs_info->super_copy,
1885                sizeof(*root->fs_info->super_copy));
1886
1887         spin_lock(&root->fs_info->trans_lock);
1888         cur_trans->state = TRANS_STATE_UNBLOCKED;
1889         root->fs_info->running_transaction = NULL;
1890         spin_unlock(&root->fs_info->trans_lock);
1891         mutex_unlock(&root->fs_info->reloc_mutex);
1892
1893         wake_up(&root->fs_info->transaction_wait);
1894
1895         ret = btrfs_write_and_wait_transaction(trans, root);
1896         if (ret) {
1897                 btrfs_error(root->fs_info, ret,
1898                             "Error while writing out transaction");
1899                 mutex_unlock(&root->fs_info->tree_log_mutex);
1900                 goto scrub_continue;
1901         }
1902
1903         ret = write_ctree_super(trans, root, 0);
1904         if (ret) {
1905                 mutex_unlock(&root->fs_info->tree_log_mutex);
1906                 goto scrub_continue;
1907         }
1908
1909         /*
1910          * the super is written, we can safely allow the tree-loggers
1911          * to go about their business
1912          */
1913         mutex_unlock(&root->fs_info->tree_log_mutex);
1914
1915         btrfs_finish_extent_commit(trans, root);
1916
1917         root->fs_info->last_trans_committed = cur_trans->transid;
1918         /*
1919          * We needn't acquire the lock here because there is no other task
1920          * which can change it.
1921          */
1922         cur_trans->state = TRANS_STATE_COMPLETED;
1923         wake_up(&cur_trans->commit_wait);
1924
1925         spin_lock(&root->fs_info->trans_lock);
1926         list_del_init(&cur_trans->list);
1927         spin_unlock(&root->fs_info->trans_lock);
1928
1929         btrfs_put_transaction(cur_trans);
1930         btrfs_put_transaction(cur_trans);
1931
1932         if (trans->type & __TRANS_FREEZABLE)
1933                 sb_end_intwrite(root->fs_info->sb);
1934
1935         trace_btrfs_transaction_commit(root);
1936
1937         btrfs_scrub_continue(root);
1938
1939         if (current->journal_info == trans)
1940                 current->journal_info = NULL;
1941
1942         kmem_cache_free(btrfs_trans_handle_cachep, trans);
1943
1944         if (current != root->fs_info->transaction_kthread)
1945                 btrfs_run_delayed_iputs(root);
1946
1947         return ret;
1948
1949 scrub_continue:
1950         btrfs_scrub_continue(root);
1951 cleanup_transaction:
1952         btrfs_trans_release_metadata(trans, root);
1953         trans->block_rsv = NULL;
1954         if (trans->qgroup_reserved) {
1955                 btrfs_qgroup_free(root, trans->qgroup_reserved);
1956                 trans->qgroup_reserved = 0;
1957         }
1958         btrfs_warn(root->fs_info, "Skipping commit of aborted transaction.");
1959         if (current->journal_info == trans)
1960                 current->journal_info = NULL;
1961         cleanup_transaction(trans, root, ret);
1962
1963         return ret;
1964 }
1965
1966 /*
1967  * return < 0 if error
1968  * 0 if there are no more dead_roots at the time of call
1969  * 1 there are more to be processed, call me again
1970  *
1971  * The return value indicates there are certainly more snapshots to delete, but
1972  * if there comes a new one during processing, it may return 0. We don't mind,
1973  * because btrfs_commit_super will poke cleaner thread and it will process it a
1974  * few seconds later.
1975  */
1976 int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root)
1977 {
1978         int ret;
1979         struct btrfs_fs_info *fs_info = root->fs_info;
1980
1981         spin_lock(&fs_info->trans_lock);
1982         if (list_empty(&fs_info->dead_roots)) {
1983                 spin_unlock(&fs_info->trans_lock);
1984                 return 0;
1985         }
1986         root = list_first_entry(&fs_info->dead_roots,
1987                         struct btrfs_root, root_list);
1988         list_del_init(&root->root_list);
1989         spin_unlock(&fs_info->trans_lock);
1990
1991         pr_debug("BTRFS: cleaner removing %llu\n", root->objectid);
1992
1993         btrfs_kill_all_delayed_nodes(root);
1994
1995         if (btrfs_header_backref_rev(root->node) <
1996                         BTRFS_MIXED_BACKREF_REV)
1997                 ret = btrfs_drop_snapshot(root, NULL, 0, 0);
1998         else
1999                 ret = btrfs_drop_snapshot(root, NULL, 1, 0);
2000         /*
2001          * If we encounter a transaction abort during snapshot cleaning, we
2002          * don't want to crash here
2003          */
2004         return (ret < 0) ? 0 : 1;
2005 }