2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
20 #include <linux/sched.h>
21 #include <linux/writeback.h>
22 #include <linux/pagemap.h>
23 #include <linux/blkdev.h>
26 #include "transaction.h"
30 #define BTRFS_ROOT_TRANS_TAG 0
32 static noinline void put_transaction(struct btrfs_transaction *transaction)
34 WARN_ON(transaction->use_count == 0);
35 transaction->use_count--;
36 if (transaction->use_count == 0) {
37 list_del_init(&transaction->list);
38 memset(transaction, 0, sizeof(*transaction));
39 kmem_cache_free(btrfs_transaction_cachep, transaction);
43 static noinline void switch_commit_root(struct btrfs_root *root)
45 free_extent_buffer(root->commit_root);
46 root->commit_root = btrfs_root_node(root);
50 * either allocate a new transaction or hop into the existing one
52 static noinline int join_transaction(struct btrfs_root *root)
54 struct btrfs_transaction *cur_trans;
55 cur_trans = root->fs_info->running_transaction;
57 cur_trans = kmem_cache_alloc(btrfs_transaction_cachep,
60 root->fs_info->generation++;
61 cur_trans->num_writers = 1;
62 cur_trans->num_joined = 0;
63 cur_trans->transid = root->fs_info->generation;
64 init_waitqueue_head(&cur_trans->writer_wait);
65 init_waitqueue_head(&cur_trans->commit_wait);
66 cur_trans->in_commit = 0;
67 cur_trans->blocked = 0;
68 cur_trans->use_count = 1;
69 cur_trans->commit_done = 0;
70 cur_trans->start_time = get_seconds();
72 cur_trans->delayed_refs.root.rb_node = NULL;
73 cur_trans->delayed_refs.num_entries = 0;
74 cur_trans->delayed_refs.num_heads_ready = 0;
75 cur_trans->delayed_refs.num_heads = 0;
76 cur_trans->delayed_refs.flushing = 0;
77 cur_trans->delayed_refs.run_delayed_start = 0;
78 spin_lock_init(&cur_trans->delayed_refs.lock);
80 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
81 list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
82 extent_io_tree_init(&cur_trans->dirty_pages,
83 root->fs_info->btree_inode->i_mapping,
85 spin_lock(&root->fs_info->new_trans_lock);
86 root->fs_info->running_transaction = cur_trans;
87 spin_unlock(&root->fs_info->new_trans_lock);
89 cur_trans->num_writers++;
90 cur_trans->num_joined++;
97 * this does all the record keeping required to make sure that a reference
98 * counted root is properly recorded in a given transaction. This is required
99 * to make sure the old root from before we joined the transaction is deleted
100 * when the transaction commits
102 static noinline int record_root_in_trans(struct btrfs_trans_handle *trans,
103 struct btrfs_root *root)
105 if (root->ref_cows && root->last_trans < trans->transid) {
106 WARN_ON(root == root->fs_info->extent_root);
107 WARN_ON(root->root_item.refs == 0);
108 WARN_ON(root->commit_root != root->node);
110 radix_tree_tag_set(&root->fs_info->fs_roots_radix,
111 (unsigned long)root->root_key.objectid,
112 BTRFS_ROOT_TRANS_TAG);
113 root->last_trans = trans->transid;
114 btrfs_init_reloc_root(trans, root);
119 int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
120 struct btrfs_root *root)
125 mutex_lock(&root->fs_info->trans_mutex);
126 if (root->last_trans == trans->transid) {
127 mutex_unlock(&root->fs_info->trans_mutex);
131 record_root_in_trans(trans, root);
132 mutex_unlock(&root->fs_info->trans_mutex);
136 /* wait for commit against the current transaction to become unblocked
137 * when this is done, it is safe to start a new transaction, but the current
138 * transaction might not be fully on disk.
140 static void wait_current_trans(struct btrfs_root *root)
142 struct btrfs_transaction *cur_trans;
144 cur_trans = root->fs_info->running_transaction;
145 if (cur_trans && cur_trans->blocked) {
147 cur_trans->use_count++;
149 prepare_to_wait(&root->fs_info->transaction_wait, &wait,
150 TASK_UNINTERRUPTIBLE);
151 if (cur_trans->blocked) {
152 mutex_unlock(&root->fs_info->trans_mutex);
154 mutex_lock(&root->fs_info->trans_mutex);
155 finish_wait(&root->fs_info->transaction_wait,
158 finish_wait(&root->fs_info->transaction_wait,
163 put_transaction(cur_trans);
167 static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
168 int num_blocks, int wait)
170 struct btrfs_trans_handle *h =
171 kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
174 mutex_lock(&root->fs_info->trans_mutex);
175 if (!root->fs_info->log_root_recovering &&
176 ((wait == 1 && !root->fs_info->open_ioctl_trans) || wait == 2))
177 wait_current_trans(root);
178 ret = join_transaction(root);
181 h->transid = root->fs_info->running_transaction->transid;
182 h->transaction = root->fs_info->running_transaction;
183 h->blocks_reserved = num_blocks;
186 h->alloc_exclude_nr = 0;
187 h->alloc_exclude_start = 0;
188 h->delayed_ref_updates = 0;
190 root->fs_info->running_transaction->use_count++;
191 record_root_in_trans(h, root);
192 mutex_unlock(&root->fs_info->trans_mutex);
196 struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
199 return start_transaction(root, num_blocks, 1);
201 struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root,
204 return start_transaction(root, num_blocks, 0);
207 struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
210 return start_transaction(r, num_blocks, 2);
213 /* wait for a transaction commit to be fully complete */
214 static noinline int wait_for_commit(struct btrfs_root *root,
215 struct btrfs_transaction *commit)
218 mutex_lock(&root->fs_info->trans_mutex);
219 while (!commit->commit_done) {
220 prepare_to_wait(&commit->commit_wait, &wait,
221 TASK_UNINTERRUPTIBLE);
222 if (commit->commit_done)
224 mutex_unlock(&root->fs_info->trans_mutex);
226 mutex_lock(&root->fs_info->trans_mutex);
228 mutex_unlock(&root->fs_info->trans_mutex);
229 finish_wait(&commit->commit_wait, &wait);
235 * rate limit against the drop_snapshot code. This helps to slow down new
236 * operations if the drop_snapshot code isn't able to keep up.
238 static void throttle_on_drops(struct btrfs_root *root)
240 struct btrfs_fs_info *info = root->fs_info;
241 int harder_count = 0;
244 if (atomic_read(&info->throttles)) {
247 thr = atomic_read(&info->throttle_gen);
250 prepare_to_wait(&info->transaction_throttle,
251 &wait, TASK_UNINTERRUPTIBLE);
252 if (!atomic_read(&info->throttles)) {
253 finish_wait(&info->transaction_throttle, &wait);
257 finish_wait(&info->transaction_throttle, &wait);
258 } while (thr == atomic_read(&info->throttle_gen));
261 if (root->fs_info->total_ref_cache_size > 1 * 1024 * 1024 &&
265 if (root->fs_info->total_ref_cache_size > 5 * 1024 * 1024 &&
269 if (root->fs_info->total_ref_cache_size > 10 * 1024 * 1024 &&
276 void btrfs_throttle(struct btrfs_root *root)
278 mutex_lock(&root->fs_info->trans_mutex);
279 if (!root->fs_info->open_ioctl_trans)
280 wait_current_trans(root);
281 mutex_unlock(&root->fs_info->trans_mutex);
284 static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
285 struct btrfs_root *root, int throttle)
287 struct btrfs_transaction *cur_trans;
288 struct btrfs_fs_info *info = root->fs_info;
292 unsigned long cur = trans->delayed_ref_updates;
293 trans->delayed_ref_updates = 0;
295 trans->transaction->delayed_refs.num_heads_ready > 64) {
296 trans->delayed_ref_updates = 0;
299 * do a full flush if the transaction is trying
302 if (trans->transaction->delayed_refs.flushing)
304 btrfs_run_delayed_refs(trans, root, cur);
311 mutex_lock(&info->trans_mutex);
312 cur_trans = info->running_transaction;
313 WARN_ON(cur_trans != trans->transaction);
314 WARN_ON(cur_trans->num_writers < 1);
315 cur_trans->num_writers--;
317 if (waitqueue_active(&cur_trans->writer_wait))
318 wake_up(&cur_trans->writer_wait);
319 put_transaction(cur_trans);
320 mutex_unlock(&info->trans_mutex);
321 memset(trans, 0, sizeof(*trans));
322 kmem_cache_free(btrfs_trans_handle_cachep, trans);
327 int btrfs_end_transaction(struct btrfs_trans_handle *trans,
328 struct btrfs_root *root)
330 return __btrfs_end_transaction(trans, root, 0);
333 int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
334 struct btrfs_root *root)
336 return __btrfs_end_transaction(trans, root, 1);
340 * when btree blocks are allocated, they have some corresponding bits set for
341 * them in one of two extent_io trees. This is used to make sure all of
342 * those extents are on disk for transaction or log commit
344 int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
345 struct extent_io_tree *dirty_pages)
351 struct inode *btree_inode = root->fs_info->btree_inode;
357 ret = find_first_extent_bit(dirty_pages, start, &start, &end,
361 while (start <= end) {
364 index = start >> PAGE_CACHE_SHIFT;
365 start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
366 page = find_get_page(btree_inode->i_mapping, index);
370 btree_lock_page_hook(page);
371 if (!page->mapping) {
373 page_cache_release(page);
377 if (PageWriteback(page)) {
379 wait_on_page_writeback(page);
382 page_cache_release(page);
386 err = write_one_page(page, 0);
389 page_cache_release(page);
393 ret = find_first_extent_bit(dirty_pages, 0, &start, &end,
398 clear_extent_dirty(dirty_pages, start, end, GFP_NOFS);
399 while (start <= end) {
400 index = start >> PAGE_CACHE_SHIFT;
401 start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
402 page = find_get_page(btree_inode->i_mapping, index);
405 if (PageDirty(page)) {
406 btree_lock_page_hook(page);
407 wait_on_page_writeback(page);
408 err = write_one_page(page, 0);
412 wait_on_page_writeback(page);
413 page_cache_release(page);
422 int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
423 struct btrfs_root *root)
425 if (!trans || !trans->transaction) {
426 struct inode *btree_inode;
427 btree_inode = root->fs_info->btree_inode;
428 return filemap_write_and_wait(btree_inode->i_mapping);
430 return btrfs_write_and_wait_marked_extents(root,
431 &trans->transaction->dirty_pages);
435 * this is used to update the root pointer in the tree of tree roots.
437 * But, in the case of the extent allocation tree, updating the root
438 * pointer may allocate blocks which may change the root of the extent
441 * So, this loops and repeats and makes sure the cowonly root didn't
442 * change while the root pointer was being updated in the metadata.
444 static int update_cowonly_root(struct btrfs_trans_handle *trans,
445 struct btrfs_root *root)
449 struct btrfs_root *tree_root = root->fs_info->tree_root;
451 btrfs_write_dirty_block_groups(trans, root);
454 old_root_bytenr = btrfs_root_bytenr(&root->root_item);
455 if (old_root_bytenr == root->node->start)
458 btrfs_set_root_node(&root->root_item, root->node);
459 ret = btrfs_update_root(trans, tree_root,
464 ret = btrfs_write_dirty_block_groups(trans, root);
468 if (root != root->fs_info->extent_root)
469 switch_commit_root(root);
475 * update all the cowonly tree roots on disk
477 static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
478 struct btrfs_root *root)
480 struct btrfs_fs_info *fs_info = root->fs_info;
481 struct list_head *next;
482 struct extent_buffer *eb;
485 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
488 eb = btrfs_lock_root_node(fs_info->tree_root);
489 btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb);
490 btrfs_tree_unlock(eb);
491 free_extent_buffer(eb);
493 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
496 while (!list_empty(&fs_info->dirty_cowonly_roots)) {
497 next = fs_info->dirty_cowonly_roots.next;
499 root = list_entry(next, struct btrfs_root, dirty_list);
501 update_cowonly_root(trans, root);
504 down_write(&fs_info->extent_commit_sem);
505 switch_commit_root(fs_info->extent_root);
506 up_write(&fs_info->extent_commit_sem);
512 * dead roots are old snapshots that need to be deleted. This allocates
513 * a dirty root struct and adds it into the list of dead roots that need to
516 int btrfs_add_dead_root(struct btrfs_root *root)
518 mutex_lock(&root->fs_info->trans_mutex);
519 list_add(&root->root_list, &root->fs_info->dead_roots);
520 mutex_unlock(&root->fs_info->trans_mutex);
525 * update all the cowonly tree roots on disk
527 static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
528 struct btrfs_root *root)
530 struct btrfs_root *gang[8];
531 struct btrfs_fs_info *fs_info = root->fs_info;
537 ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix,
540 BTRFS_ROOT_TRANS_TAG);
543 for (i = 0; i < ret; i++) {
545 radix_tree_tag_clear(&fs_info->fs_roots_radix,
546 (unsigned long)root->root_key.objectid,
547 BTRFS_ROOT_TRANS_TAG);
549 btrfs_free_log(trans, root);
550 btrfs_update_reloc_root(trans, root);
552 if (root->commit_root != root->node) {
553 switch_commit_root(root);
554 btrfs_set_root_node(&root->root_item,
558 err = btrfs_update_root(trans, fs_info->tree_root,
569 * defrag a given btree. If cacheonly == 1, this won't read from the disk,
570 * otherwise every leaf in the btree is read and defragged.
572 int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
574 struct btrfs_fs_info *info = root->fs_info;
576 struct btrfs_trans_handle *trans;
580 if (root->defrag_running)
582 trans = btrfs_start_transaction(root, 1);
584 root->defrag_running = 1;
585 ret = btrfs_defrag_leaves(trans, root, cacheonly);
586 nr = trans->blocks_used;
587 btrfs_end_transaction(trans, root);
588 btrfs_btree_balance_dirty(info->tree_root, nr);
591 trans = btrfs_start_transaction(root, 1);
592 if (root->fs_info->closing || ret != -EAGAIN)
595 root->defrag_running = 0;
597 btrfs_end_transaction(trans, root);
603 * when dropping snapshots, we generate a ton of delayed refs, and it makes
604 * sense not to join the transaction while it is trying to flush the current
605 * queue of delayed refs out.
607 * This is used by the drop snapshot code only
609 static noinline int wait_transaction_pre_flush(struct btrfs_fs_info *info)
613 mutex_lock(&info->trans_mutex);
614 while (info->running_transaction &&
615 info->running_transaction->delayed_refs.flushing) {
616 prepare_to_wait(&info->transaction_wait, &wait,
617 TASK_UNINTERRUPTIBLE);
618 mutex_unlock(&info->trans_mutex);
622 mutex_lock(&info->trans_mutex);
623 finish_wait(&info->transaction_wait, &wait);
625 mutex_unlock(&info->trans_mutex);
630 * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on
633 int btrfs_drop_dead_root(struct btrfs_root *root)
635 struct btrfs_trans_handle *trans;
636 struct btrfs_root *tree_root = root->fs_info->tree_root;
642 * we don't want to jump in and create a bunch of
643 * delayed refs if the transaction is starting to close
645 wait_transaction_pre_flush(tree_root->fs_info);
646 trans = btrfs_start_transaction(tree_root, 1);
649 * we've joined a transaction, make sure it isn't
652 if (trans->transaction->delayed_refs.flushing) {
653 btrfs_end_transaction(trans, tree_root);
657 ret = btrfs_drop_snapshot(trans, root);
661 ret = btrfs_update_root(trans, tree_root,
667 nr = trans->blocks_used;
668 ret = btrfs_end_transaction(trans, tree_root);
671 btrfs_btree_balance_dirty(tree_root, nr);
676 ret = btrfs_del_root(trans, tree_root, &root->root_key);
679 nr = trans->blocks_used;
680 ret = btrfs_end_transaction(trans, tree_root);
683 free_extent_buffer(root->node);
684 free_extent_buffer(root->commit_root);
687 btrfs_btree_balance_dirty(tree_root, nr);
693 * new snapshots need to be created at a very specific time in the
694 * transaction commit. This does the actual creation
696 static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
697 struct btrfs_fs_info *fs_info,
698 struct btrfs_pending_snapshot *pending)
700 struct btrfs_key key;
701 struct btrfs_root_item *new_root_item;
702 struct btrfs_root *tree_root = fs_info->tree_root;
703 struct btrfs_root *root = pending->root;
704 struct extent_buffer *tmp;
705 struct extent_buffer *old;
709 new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS);
710 if (!new_root_item) {
714 ret = btrfs_find_free_objectid(trans, tree_root, 0, &objectid);
718 record_root_in_trans(trans, root);
719 btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
720 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
722 key.objectid = objectid;
723 /* record when the snapshot was created in key.offset */
724 key.offset = trans->transid;
725 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
727 old = btrfs_lock_root_node(root);
728 btrfs_cow_block(trans, root, old, NULL, 0, &old);
729 btrfs_set_lock_blocking(old);
731 btrfs_copy_root(trans, root, old, &tmp, objectid);
732 btrfs_tree_unlock(old);
733 free_extent_buffer(old);
735 btrfs_set_root_node(new_root_item, tmp);
736 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
738 btrfs_tree_unlock(tmp);
739 free_extent_buffer(tmp);
743 key.offset = (u64)-1;
744 memcpy(&pending->root_key, &key, sizeof(key));
746 kfree(new_root_item);
750 static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info,
751 struct btrfs_pending_snapshot *pending)
756 struct btrfs_trans_handle *trans;
757 struct inode *parent_inode;
759 struct btrfs_root *parent_root;
761 parent_inode = pending->dentry->d_parent->d_inode;
762 parent_root = BTRFS_I(parent_inode)->root;
763 trans = btrfs_join_transaction(parent_root, 1);
766 * insert the directory item
768 namelen = strlen(pending->name);
769 ret = btrfs_set_inode_index(parent_inode, &index);
770 ret = btrfs_insert_dir_item(trans, parent_root,
771 pending->name, namelen,
773 &pending->root_key, BTRFS_FT_DIR, index);
778 btrfs_i_size_write(parent_inode, parent_inode->i_size + namelen * 2);
779 ret = btrfs_update_inode(trans, parent_root, parent_inode);
782 ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root,
783 pending->root_key.objectid,
784 parent_root->root_key.objectid,
785 parent_inode->i_ino, index, pending->name,
790 inode = btrfs_lookup_dentry(parent_inode, pending->dentry);
791 d_instantiate(pending->dentry, inode);
793 btrfs_end_transaction(trans, fs_info->fs_root);
798 * create all the snapshots we've scheduled for creation
800 static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
801 struct btrfs_fs_info *fs_info)
803 struct btrfs_pending_snapshot *pending;
804 struct list_head *head = &trans->transaction->pending_snapshots;
807 list_for_each_entry(pending, head, list) {
808 ret = create_pending_snapshot(trans, fs_info, pending);
814 static noinline int finish_pending_snapshots(struct btrfs_trans_handle *trans,
815 struct btrfs_fs_info *fs_info)
817 struct btrfs_pending_snapshot *pending;
818 struct list_head *head = &trans->transaction->pending_snapshots;
821 while (!list_empty(head)) {
822 pending = list_entry(head->next,
823 struct btrfs_pending_snapshot, list);
824 ret = finish_pending_snapshot(fs_info, pending);
826 list_del(&pending->list);
827 kfree(pending->name);
833 static void update_super_roots(struct btrfs_root *root)
835 struct btrfs_root_item *root_item;
836 struct btrfs_super_block *super;
838 super = &root->fs_info->super_copy;
840 root_item = &root->fs_info->chunk_root->root_item;
841 super->chunk_root = root_item->bytenr;
842 super->chunk_root_generation = root_item->generation;
843 super->chunk_root_level = root_item->level;
845 root_item = &root->fs_info->tree_root->root_item;
846 super->root = root_item->bytenr;
847 super->generation = root_item->generation;
848 super->root_level = root_item->level;
851 int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
854 spin_lock(&info->new_trans_lock);
855 if (info->running_transaction)
856 ret = info->running_transaction->in_commit;
857 spin_unlock(&info->new_trans_lock);
861 int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
862 struct btrfs_root *root)
864 unsigned long joined = 0;
865 unsigned long timeout = 1;
866 struct btrfs_transaction *cur_trans;
867 struct btrfs_transaction *prev_trans = NULL;
871 unsigned long now = get_seconds();
872 int flush_on_commit = btrfs_test_opt(root, FLUSHONCOMMIT);
874 btrfs_run_ordered_operations(root, 0);
876 /* make a pass through all the delayed refs we have so far
877 * any runnings procs may add more while we are here
879 ret = btrfs_run_delayed_refs(trans, root, 0);
882 cur_trans = trans->transaction;
884 * set the flushing flag so procs in this transaction have to
885 * start sending their work down.
887 cur_trans->delayed_refs.flushing = 1;
889 ret = btrfs_run_delayed_refs(trans, root, 0);
892 mutex_lock(&root->fs_info->trans_mutex);
893 if (cur_trans->in_commit) {
894 cur_trans->use_count++;
895 mutex_unlock(&root->fs_info->trans_mutex);
896 btrfs_end_transaction(trans, root);
898 ret = wait_for_commit(root, cur_trans);
901 mutex_lock(&root->fs_info->trans_mutex);
902 put_transaction(cur_trans);
903 mutex_unlock(&root->fs_info->trans_mutex);
908 trans->transaction->in_commit = 1;
909 trans->transaction->blocked = 1;
910 if (cur_trans->list.prev != &root->fs_info->trans_list) {
911 prev_trans = list_entry(cur_trans->list.prev,
912 struct btrfs_transaction, list);
913 if (!prev_trans->commit_done) {
914 prev_trans->use_count++;
915 mutex_unlock(&root->fs_info->trans_mutex);
917 wait_for_commit(root, prev_trans);
919 mutex_lock(&root->fs_info->trans_mutex);
920 put_transaction(prev_trans);
924 if (now < cur_trans->start_time || now - cur_trans->start_time < 1)
928 int snap_pending = 0;
929 joined = cur_trans->num_joined;
930 if (!list_empty(&trans->transaction->pending_snapshots))
933 WARN_ON(cur_trans != trans->transaction);
934 prepare_to_wait(&cur_trans->writer_wait, &wait,
935 TASK_UNINTERRUPTIBLE);
937 if (cur_trans->num_writers > 1)
938 timeout = MAX_SCHEDULE_TIMEOUT;
939 else if (should_grow)
942 mutex_unlock(&root->fs_info->trans_mutex);
944 if (flush_on_commit) {
945 btrfs_start_delalloc_inodes(root);
946 ret = btrfs_wait_ordered_extents(root, 0);
948 } else if (snap_pending) {
949 ret = btrfs_wait_ordered_extents(root, 1);
954 * rename don't use btrfs_join_transaction, so, once we
955 * set the transaction to blocked above, we aren't going
956 * to get any new ordered operations. We can safely run
957 * it here and no for sure that nothing new will be added
960 btrfs_run_ordered_operations(root, 1);
963 if (cur_trans->num_writers > 1 || should_grow)
964 schedule_timeout(timeout);
966 mutex_lock(&root->fs_info->trans_mutex);
967 finish_wait(&cur_trans->writer_wait, &wait);
968 } while (cur_trans->num_writers > 1 ||
969 (should_grow && cur_trans->num_joined != joined));
971 ret = create_pending_snapshots(trans, root->fs_info);
974 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
977 WARN_ON(cur_trans != trans->transaction);
979 /* btrfs_commit_tree_roots is responsible for getting the
980 * various roots consistent with each other. Every pointer
981 * in the tree of tree roots has to point to the most up to date
982 * root for every subvolume and other tree. So, we have to keep
983 * the tree logging code from jumping in and changing any
986 * At this point in the commit, there can't be any tree-log
987 * writers, but a little lower down we drop the trans mutex
988 * and let new people in. By holding the tree_log_mutex
989 * from now until after the super is written, we avoid races
990 * with the tree-log code.
992 mutex_lock(&root->fs_info->tree_log_mutex);
994 ret = commit_fs_roots(trans, root);
997 /* commit_fs_roots gets rid of all the tree log roots, it is now
998 * safe to free the root of tree log roots
1000 btrfs_free_log_root_tree(trans, root->fs_info);
1002 ret = commit_cowonly_roots(trans, root);
1005 btrfs_prepare_extent_commit(trans, root);
1007 cur_trans = root->fs_info->running_transaction;
1008 spin_lock(&root->fs_info->new_trans_lock);
1009 root->fs_info->running_transaction = NULL;
1010 spin_unlock(&root->fs_info->new_trans_lock);
1012 btrfs_set_root_node(&root->fs_info->tree_root->root_item,
1013 root->fs_info->tree_root->node);
1014 switch_commit_root(root->fs_info->tree_root);
1016 btrfs_set_root_node(&root->fs_info->chunk_root->root_item,
1017 root->fs_info->chunk_root->node);
1018 switch_commit_root(root->fs_info->chunk_root);
1020 update_super_roots(root);
1022 if (!root->fs_info->log_root_recovering) {
1023 btrfs_set_super_log_root(&root->fs_info->super_copy, 0);
1024 btrfs_set_super_log_root_level(&root->fs_info->super_copy, 0);
1027 memcpy(&root->fs_info->super_for_commit, &root->fs_info->super_copy,
1028 sizeof(root->fs_info->super_copy));
1030 trans->transaction->blocked = 0;
1032 wake_up(&root->fs_info->transaction_wait);
1034 mutex_unlock(&root->fs_info->trans_mutex);
1035 ret = btrfs_write_and_wait_transaction(trans, root);
1037 write_ctree_super(trans, root, 0);
1040 * the super is written, we can safely allow the tree-loggers
1041 * to go about their business
1043 mutex_unlock(&root->fs_info->tree_log_mutex);
1045 btrfs_finish_extent_commit(trans, root);
1047 /* do the directory inserts of any pending snapshot creations */
1048 finish_pending_snapshots(trans, root->fs_info);
1050 mutex_lock(&root->fs_info->trans_mutex);
1052 cur_trans->commit_done = 1;
1054 root->fs_info->last_trans_committed = cur_trans->transid;
1056 wake_up(&cur_trans->commit_wait);
1058 put_transaction(cur_trans);
1059 put_transaction(cur_trans);
1061 mutex_unlock(&root->fs_info->trans_mutex);
1063 kmem_cache_free(btrfs_trans_handle_cachep, trans);
1068 * interface function to delete all the snapshots we have scheduled for deletion
1070 int btrfs_clean_old_snapshots(struct btrfs_root *root)
1073 struct btrfs_fs_info *fs_info = root->fs_info;
1075 mutex_lock(&fs_info->trans_mutex);
1076 list_splice_init(&fs_info->dead_roots, &list);
1077 mutex_unlock(&fs_info->trans_mutex);
1079 while (!list_empty(&list)) {
1080 root = list_entry(list.next, struct btrfs_root, root_list);
1081 list_del_init(&root->root_list);
1082 btrfs_drop_snapshot(root, 0);