Btrfs: hash the btree inode during fill_super
[linux-2.6-block.git] / fs / btrfs / transaction.c
CommitLineData
6cbd5570
CM
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
79154b1b 19#include <linux/fs.h>
34088780 20#include <linux/sched.h>
d3c2fdcf 21#include <linux/writeback.h>
5f39d397 22#include <linux/pagemap.h>
5f2cc086 23#include <linux/blkdev.h>
79154b1b
CM
24#include "ctree.h"
25#include "disk-io.h"
26#include "transaction.h"
925baedd 27#include "locking.h"
e02119d5 28#include "tree-log.h"
79154b1b 29
0f7d52f4
CM
30#define BTRFS_ROOT_TRANS_TAG 0
31
80b6794d 32static noinline void put_transaction(struct btrfs_transaction *transaction)
79154b1b 33{
2c90e5d6 34 WARN_ON(transaction->use_count == 0);
79154b1b 35 transaction->use_count--;
78fae27e 36 if (transaction->use_count == 0) {
8fd17795 37 list_del_init(&transaction->list);
2c90e5d6
CM
38 memset(transaction, 0, sizeof(*transaction));
39 kmem_cache_free(btrfs_transaction_cachep, transaction);
78fae27e 40 }
79154b1b
CM
41}
42
817d52f8
JB
43static noinline void switch_commit_root(struct btrfs_root *root)
44{
817d52f8
JB
45 free_extent_buffer(root->commit_root);
46 root->commit_root = btrfs_root_node(root);
817d52f8
JB
47}
48
d352ac68
CM
49/*
50 * either allocate a new transaction or hop into the existing one
51 */
80b6794d 52static noinline int join_transaction(struct btrfs_root *root)
79154b1b
CM
53{
54 struct btrfs_transaction *cur_trans;
55 cur_trans = root->fs_info->running_transaction;
56 if (!cur_trans) {
2c90e5d6
CM
57 cur_trans = kmem_cache_alloc(btrfs_transaction_cachep,
58 GFP_NOFS);
79154b1b 59 BUG_ON(!cur_trans);
0f7d52f4 60 root->fs_info->generation++;
15ee9bc7
JB
61 cur_trans->num_writers = 1;
62 cur_trans->num_joined = 0;
0f7d52f4 63 cur_trans->transid = root->fs_info->generation;
79154b1b
CM
64 init_waitqueue_head(&cur_trans->writer_wait);
65 init_waitqueue_head(&cur_trans->commit_wait);
66 cur_trans->in_commit = 0;
f9295749 67 cur_trans->blocked = 0;
d5719762 68 cur_trans->use_count = 1;
79154b1b 69 cur_trans->commit_done = 0;
08607c1b 70 cur_trans->start_time = get_seconds();
56bec294
CM
71
72 cur_trans->delayed_refs.root.rb_node = NULL;
73 cur_trans->delayed_refs.num_entries = 0;
c3e69d58
CM
74 cur_trans->delayed_refs.num_heads_ready = 0;
75 cur_trans->delayed_refs.num_heads = 0;
56bec294 76 cur_trans->delayed_refs.flushing = 0;
c3e69d58 77 cur_trans->delayed_refs.run_delayed_start = 0;
56bec294
CM
78 spin_lock_init(&cur_trans->delayed_refs.lock);
79
3063d29f 80 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
8fd17795 81 list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
d1310b2e 82 extent_io_tree_init(&cur_trans->dirty_pages,
5f39d397
CM
83 root->fs_info->btree_inode->i_mapping,
84 GFP_NOFS);
48ec2cf8
CM
85 spin_lock(&root->fs_info->new_trans_lock);
86 root->fs_info->running_transaction = cur_trans;
87 spin_unlock(&root->fs_info->new_trans_lock);
15ee9bc7
JB
88 } else {
89 cur_trans->num_writers++;
90 cur_trans->num_joined++;
79154b1b 91 }
15ee9bc7 92
79154b1b
CM
93 return 0;
94}
95
d352ac68 96/*
d397712b
CM
97 * this does all the record keeping required to make sure that a reference
98 * counted root is properly recorded in a given transaction. This is required
99 * to make sure the old root from before we joined the transaction is deleted
100 * when the transaction commits
d352ac68 101 */
5d4f98a2
YZ
102static noinline int record_root_in_trans(struct btrfs_trans_handle *trans,
103 struct btrfs_root *root)
6702ed49 104{
5d4f98a2 105 if (root->ref_cows && root->last_trans < trans->transid) {
6702ed49 106 WARN_ON(root == root->fs_info->extent_root);
5d4f98a2
YZ
107 WARN_ON(root->commit_root != root->node);
108
109 radix_tree_tag_set(&root->fs_info->fs_roots_radix,
110 (unsigned long)root->root_key.objectid,
111 BTRFS_ROOT_TRANS_TAG);
112 root->last_trans = trans->transid;
113 btrfs_init_reloc_root(trans, root);
114 }
115 return 0;
116}
bcc63abb 117
5d4f98a2
YZ
118int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
119 struct btrfs_root *root)
120{
121 if (!root->ref_cows)
122 return 0;
bcc63abb 123
5d4f98a2
YZ
124 mutex_lock(&root->fs_info->trans_mutex);
125 if (root->last_trans == trans->transid) {
126 mutex_unlock(&root->fs_info->trans_mutex);
127 return 0;
6702ed49 128 }
5d4f98a2
YZ
129
130 record_root_in_trans(trans, root);
131 mutex_unlock(&root->fs_info->trans_mutex);
6702ed49
CM
132 return 0;
133}
134
d352ac68
CM
135/* wait for commit against the current transaction to become unblocked
136 * when this is done, it is safe to start a new transaction, but the current
137 * transaction might not be fully on disk.
138 */
37d1aeee 139static void wait_current_trans(struct btrfs_root *root)
79154b1b 140{
f9295749 141 struct btrfs_transaction *cur_trans;
79154b1b 142
f9295749 143 cur_trans = root->fs_info->running_transaction;
37d1aeee 144 if (cur_trans && cur_trans->blocked) {
f9295749
CM
145 DEFINE_WAIT(wait);
146 cur_trans->use_count++;
d397712b 147 while (1) {
f9295749
CM
148 prepare_to_wait(&root->fs_info->transaction_wait, &wait,
149 TASK_UNINTERRUPTIBLE);
150 if (cur_trans->blocked) {
151 mutex_unlock(&root->fs_info->trans_mutex);
152 schedule();
153 mutex_lock(&root->fs_info->trans_mutex);
154 finish_wait(&root->fs_info->transaction_wait,
155 &wait);
156 } else {
157 finish_wait(&root->fs_info->transaction_wait,
158 &wait);
159 break;
160 }
161 }
162 put_transaction(cur_trans);
163 }
37d1aeee
CM
164}
165
e02119d5 166static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
9ca9ee09 167 int num_blocks, int wait)
37d1aeee
CM
168{
169 struct btrfs_trans_handle *h =
170 kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
171 int ret;
172
173 mutex_lock(&root->fs_info->trans_mutex);
4bef0848
CM
174 if (!root->fs_info->log_root_recovering &&
175 ((wait == 1 && !root->fs_info->open_ioctl_trans) || wait == 2))
37d1aeee 176 wait_current_trans(root);
79154b1b
CM
177 ret = join_transaction(root);
178 BUG_ON(ret);
0f7d52f4 179
6702ed49 180 h->transid = root->fs_info->running_transaction->transid;
79154b1b
CM
181 h->transaction = root->fs_info->running_transaction;
182 h->blocks_reserved = num_blocks;
183 h->blocks_used = 0;
d2fb3437 184 h->block_group = 0;
26b8003f
CM
185 h->alloc_exclude_nr = 0;
186 h->alloc_exclude_start = 0;
56bec294 187 h->delayed_ref_updates = 0;
b7ec40d7 188
79154b1b 189 root->fs_info->running_transaction->use_count++;
5d4f98a2 190 record_root_in_trans(h, root);
79154b1b
CM
191 mutex_unlock(&root->fs_info->trans_mutex);
192 return h;
193}
194
f9295749
CM
195struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
196 int num_blocks)
197{
9ca9ee09 198 return start_transaction(root, num_blocks, 1);
f9295749
CM
199}
200struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root,
201 int num_blocks)
202{
9ca9ee09 203 return start_transaction(root, num_blocks, 0);
f9295749
CM
204}
205
9ca9ee09
SW
206struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
207 int num_blocks)
208{
209 return start_transaction(r, num_blocks, 2);
210}
211
d352ac68 212/* wait for a transaction commit to be fully complete */
89ce8a63
CM
213static noinline int wait_for_commit(struct btrfs_root *root,
214 struct btrfs_transaction *commit)
215{
216 DEFINE_WAIT(wait);
217 mutex_lock(&root->fs_info->trans_mutex);
d397712b 218 while (!commit->commit_done) {
89ce8a63
CM
219 prepare_to_wait(&commit->commit_wait, &wait,
220 TASK_UNINTERRUPTIBLE);
221 if (commit->commit_done)
222 break;
223 mutex_unlock(&root->fs_info->trans_mutex);
224 schedule();
225 mutex_lock(&root->fs_info->trans_mutex);
226 }
227 mutex_unlock(&root->fs_info->trans_mutex);
228 finish_wait(&commit->commit_wait, &wait);
229 return 0;
230}
231
5d4f98a2 232#if 0
d352ac68 233/*
d397712b
CM
234 * rate limit against the drop_snapshot code. This helps to slow down new
235 * operations if the drop_snapshot code isn't able to keep up.
d352ac68 236 */
37d1aeee 237static void throttle_on_drops(struct btrfs_root *root)
ab78c84d
CM
238{
239 struct btrfs_fs_info *info = root->fs_info;
2dd3e67b 240 int harder_count = 0;
ab78c84d 241
2dd3e67b 242harder:
ab78c84d
CM
243 if (atomic_read(&info->throttles)) {
244 DEFINE_WAIT(wait);
245 int thr;
ab78c84d
CM
246 thr = atomic_read(&info->throttle_gen);
247
248 do {
249 prepare_to_wait(&info->transaction_throttle,
250 &wait, TASK_UNINTERRUPTIBLE);
251 if (!atomic_read(&info->throttles)) {
252 finish_wait(&info->transaction_throttle, &wait);
253 break;
254 }
255 schedule();
256 finish_wait(&info->transaction_throttle, &wait);
257 } while (thr == atomic_read(&info->throttle_gen));
2dd3e67b
CM
258 harder_count++;
259
260 if (root->fs_info->total_ref_cache_size > 1 * 1024 * 1024 &&
261 harder_count < 2)
262 goto harder;
263
264 if (root->fs_info->total_ref_cache_size > 5 * 1024 * 1024 &&
265 harder_count < 10)
266 goto harder;
267
268 if (root->fs_info->total_ref_cache_size > 10 * 1024 * 1024 &&
269 harder_count < 20)
270 goto harder;
ab78c84d
CM
271 }
272}
5d4f98a2 273#endif
ab78c84d 274
37d1aeee
CM
275void btrfs_throttle(struct btrfs_root *root)
276{
277 mutex_lock(&root->fs_info->trans_mutex);
9ca9ee09
SW
278 if (!root->fs_info->open_ioctl_trans)
279 wait_current_trans(root);
37d1aeee 280 mutex_unlock(&root->fs_info->trans_mutex);
37d1aeee
CM
281}
282
89ce8a63
CM
283static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
284 struct btrfs_root *root, int throttle)
79154b1b
CM
285{
286 struct btrfs_transaction *cur_trans;
ab78c84d 287 struct btrfs_fs_info *info = root->fs_info;
c3e69d58
CM
288 int count = 0;
289
290 while (count < 4) {
291 unsigned long cur = trans->delayed_ref_updates;
292 trans->delayed_ref_updates = 0;
293 if (cur &&
294 trans->transaction->delayed_refs.num_heads_ready > 64) {
295 trans->delayed_ref_updates = 0;
b7ec40d7
CM
296
297 /*
298 * do a full flush if the transaction is trying
299 * to close
300 */
301 if (trans->transaction->delayed_refs.flushing)
302 cur = 0;
c3e69d58
CM
303 btrfs_run_delayed_refs(trans, root, cur);
304 } else {
305 break;
306 }
307 count++;
56bec294
CM
308 }
309
ab78c84d
CM
310 mutex_lock(&info->trans_mutex);
311 cur_trans = info->running_transaction;
ccd467d6 312 WARN_ON(cur_trans != trans->transaction);
d5719762 313 WARN_ON(cur_trans->num_writers < 1);
ccd467d6 314 cur_trans->num_writers--;
89ce8a63 315
79154b1b
CM
316 if (waitqueue_active(&cur_trans->writer_wait))
317 wake_up(&cur_trans->writer_wait);
79154b1b 318 put_transaction(cur_trans);
ab78c84d 319 mutex_unlock(&info->trans_mutex);
d6025579 320 memset(trans, 0, sizeof(*trans));
2c90e5d6 321 kmem_cache_free(btrfs_trans_handle_cachep, trans);
ab78c84d 322
79154b1b
CM
323 return 0;
324}
325
89ce8a63
CM
326int btrfs_end_transaction(struct btrfs_trans_handle *trans,
327 struct btrfs_root *root)
328{
329 return __btrfs_end_transaction(trans, root, 0);
330}
331
332int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
333 struct btrfs_root *root)
334{
335 return __btrfs_end_transaction(trans, root, 1);
336}
337
d352ac68
CM
338/*
339 * when btree blocks are allocated, they have some corresponding bits set for
340 * them in one of two extent_io trees. This is used to make sure all of
341 * those extents are on disk for transaction or log commit
342 */
d0c803c4
CM
343int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
344 struct extent_io_tree *dirty_pages)
79154b1b 345{
7c4452b9 346 int ret;
777e6bd7 347 int err = 0;
7c4452b9
CM
348 int werr = 0;
349 struct page *page;
7c4452b9 350 struct inode *btree_inode = root->fs_info->btree_inode;
777e6bd7 351 u64 start = 0;
5f39d397
CM
352 u64 end;
353 unsigned long index;
7c4452b9 354
d397712b 355 while (1) {
777e6bd7 356 ret = find_first_extent_bit(dirty_pages, start, &start, &end,
5f39d397
CM
357 EXTENT_DIRTY);
358 if (ret)
7c4452b9 359 break;
d397712b 360 while (start <= end) {
777e6bd7
CM
361 cond_resched();
362
5f39d397 363 index = start >> PAGE_CACHE_SHIFT;
35ebb934 364 start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
4bef0848 365 page = find_get_page(btree_inode->i_mapping, index);
7c4452b9
CM
366 if (!page)
367 continue;
4bef0848
CM
368
369 btree_lock_page_hook(page);
370 if (!page->mapping) {
371 unlock_page(page);
372 page_cache_release(page);
373 continue;
374 }
375
6702ed49
CM
376 if (PageWriteback(page)) {
377 if (PageDirty(page))
378 wait_on_page_writeback(page);
379 else {
380 unlock_page(page);
381 page_cache_release(page);
382 continue;
383 }
384 }
7c4452b9
CM
385 err = write_one_page(page, 0);
386 if (err)
387 werr = err;
388 page_cache_release(page);
389 }
390 }
d397712b 391 while (1) {
777e6bd7
CM
392 ret = find_first_extent_bit(dirty_pages, 0, &start, &end,
393 EXTENT_DIRTY);
394 if (ret)
395 break;
396
397 clear_extent_dirty(dirty_pages, start, end, GFP_NOFS);
d397712b 398 while (start <= end) {
777e6bd7
CM
399 index = start >> PAGE_CACHE_SHIFT;
400 start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
401 page = find_get_page(btree_inode->i_mapping, index);
402 if (!page)
403 continue;
404 if (PageDirty(page)) {
4bef0848
CM
405 btree_lock_page_hook(page);
406 wait_on_page_writeback(page);
777e6bd7
CM
407 err = write_one_page(page, 0);
408 if (err)
409 werr = err;
410 }
105d931d 411 wait_on_page_writeback(page);
777e6bd7
CM
412 page_cache_release(page);
413 cond_resched();
414 }
415 }
7c4452b9
CM
416 if (err)
417 werr = err;
418 return werr;
79154b1b
CM
419}
420
d0c803c4
CM
421int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
422 struct btrfs_root *root)
423{
424 if (!trans || !trans->transaction) {
425 struct inode *btree_inode;
426 btree_inode = root->fs_info->btree_inode;
427 return filemap_write_and_wait(btree_inode->i_mapping);
428 }
429 return btrfs_write_and_wait_marked_extents(root,
430 &trans->transaction->dirty_pages);
431}
432
d352ac68
CM
433/*
434 * this is used to update the root pointer in the tree of tree roots.
435 *
436 * But, in the case of the extent allocation tree, updating the root
437 * pointer may allocate blocks which may change the root of the extent
438 * allocation tree.
439 *
440 * So, this loops and repeats and makes sure the cowonly root didn't
441 * change while the root pointer was being updated in the metadata.
442 */
0b86a832
CM
443static int update_cowonly_root(struct btrfs_trans_handle *trans,
444 struct btrfs_root *root)
79154b1b
CM
445{
446 int ret;
0b86a832
CM
447 u64 old_root_bytenr;
448 struct btrfs_root *tree_root = root->fs_info->tree_root;
79154b1b 449
0b86a832 450 btrfs_write_dirty_block_groups(trans, root);
56bec294 451
d397712b 452 while (1) {
0b86a832
CM
453 old_root_bytenr = btrfs_root_bytenr(&root->root_item);
454 if (old_root_bytenr == root->node->start)
79154b1b 455 break;
87ef2bb4 456
5d4f98a2 457 btrfs_set_root_node(&root->root_item, root->node);
79154b1b 458 ret = btrfs_update_root(trans, tree_root,
0b86a832
CM
459 &root->root_key,
460 &root->root_item);
79154b1b 461 BUG_ON(ret);
56bec294 462
4a8c9a62 463 ret = btrfs_write_dirty_block_groups(trans, root);
56bec294 464 BUG_ON(ret);
0b86a832 465 }
276e680d
YZ
466
467 if (root != root->fs_info->extent_root)
468 switch_commit_root(root);
469
0b86a832
CM
470 return 0;
471}
472
d352ac68
CM
473/*
474 * update all the cowonly tree roots on disk
475 */
5d4f98a2
YZ
476static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
477 struct btrfs_root *root)
0b86a832
CM
478{
479 struct btrfs_fs_info *fs_info = root->fs_info;
480 struct list_head *next;
84234f3a 481 struct extent_buffer *eb;
56bec294 482 int ret;
84234f3a 483
56bec294
CM
484 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
485 BUG_ON(ret);
87ef2bb4 486
84234f3a 487 eb = btrfs_lock_root_node(fs_info->tree_root);
9fa8cfe7 488 btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb);
84234f3a
YZ
489 btrfs_tree_unlock(eb);
490 free_extent_buffer(eb);
0b86a832 491
56bec294
CM
492 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
493 BUG_ON(ret);
87ef2bb4 494
d397712b 495 while (!list_empty(&fs_info->dirty_cowonly_roots)) {
0b86a832
CM
496 next = fs_info->dirty_cowonly_roots.next;
497 list_del_init(next);
498 root = list_entry(next, struct btrfs_root, dirty_list);
87ef2bb4 499
0b86a832 500 update_cowonly_root(trans, root);
79154b1b 501 }
276e680d
YZ
502
503 down_write(&fs_info->extent_commit_sem);
504 switch_commit_root(fs_info->extent_root);
505 up_write(&fs_info->extent_commit_sem);
506
79154b1b
CM
507 return 0;
508}
509
d352ac68
CM
510/*
511 * dead roots are old snapshots that need to be deleted. This allocates
512 * a dirty root struct and adds it into the list of dead roots that need to
513 * be deleted
514 */
5d4f98a2 515int btrfs_add_dead_root(struct btrfs_root *root)
5eda7b5e 516{
b48652c1 517 mutex_lock(&root->fs_info->trans_mutex);
5d4f98a2 518 list_add(&root->root_list, &root->fs_info->dead_roots);
b48652c1 519 mutex_unlock(&root->fs_info->trans_mutex);
5eda7b5e
CM
520 return 0;
521}
522
d352ac68 523/*
5d4f98a2 524 * update all the cowonly tree roots on disk
d352ac68 525 */
5d4f98a2
YZ
526static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
527 struct btrfs_root *root)
0f7d52f4 528{
0f7d52f4 529 struct btrfs_root *gang[8];
5d4f98a2 530 struct btrfs_fs_info *fs_info = root->fs_info;
0f7d52f4
CM
531 int i;
532 int ret;
54aa1f4d
CM
533 int err = 0;
534
d397712b 535 while (1) {
5d4f98a2
YZ
536 ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix,
537 (void **)gang, 0,
0f7d52f4
CM
538 ARRAY_SIZE(gang),
539 BTRFS_ROOT_TRANS_TAG);
540 if (ret == 0)
541 break;
542 for (i = 0; i < ret; i++) {
543 root = gang[i];
5d4f98a2
YZ
544 radix_tree_tag_clear(&fs_info->fs_roots_radix,
545 (unsigned long)root->root_key.objectid,
546 BTRFS_ROOT_TRANS_TAG);
31153d81 547
e02119d5 548 btrfs_free_log(trans, root);
5d4f98a2 549 btrfs_update_reloc_root(trans, root);
bcc63abb 550
978d910d 551 if (root->commit_root != root->node) {
817d52f8 552 switch_commit_root(root);
978d910d
YZ
553 btrfs_set_root_node(&root->root_item,
554 root->node);
555 }
5d4f98a2 556
5d4f98a2 557 err = btrfs_update_root(trans, fs_info->tree_root,
0f7d52f4
CM
558 &root->root_key,
559 &root->root_item);
54aa1f4d
CM
560 if (err)
561 break;
0f7d52f4
CM
562 }
563 }
54aa1f4d 564 return err;
0f7d52f4
CM
565}
566
d352ac68
CM
567/*
568 * defrag a given btree. If cacheonly == 1, this won't read from the disk,
569 * otherwise every leaf in the btree is read and defragged.
570 */
e9d0b13b
CM
571int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
572{
573 struct btrfs_fs_info *info = root->fs_info;
574 int ret;
575 struct btrfs_trans_handle *trans;
d3c2fdcf 576 unsigned long nr;
e9d0b13b 577
a2135011 578 smp_mb();
e9d0b13b
CM
579 if (root->defrag_running)
580 return 0;
e9d0b13b 581 trans = btrfs_start_transaction(root, 1);
6b80053d 582 while (1) {
e9d0b13b
CM
583 root->defrag_running = 1;
584 ret = btrfs_defrag_leaves(trans, root, cacheonly);
d3c2fdcf 585 nr = trans->blocks_used;
e9d0b13b 586 btrfs_end_transaction(trans, root);
d3c2fdcf 587 btrfs_btree_balance_dirty(info->tree_root, nr);
e9d0b13b
CM
588 cond_resched();
589
e9d0b13b 590 trans = btrfs_start_transaction(root, 1);
3f157a2f 591 if (root->fs_info->closing || ret != -EAGAIN)
e9d0b13b
CM
592 break;
593 }
594 root->defrag_running = 0;
a2135011 595 smp_mb();
e9d0b13b
CM
596 btrfs_end_transaction(trans, root);
597 return 0;
598}
599
2c47e605 600#if 0
b7ec40d7
CM
601/*
602 * when dropping snapshots, we generate a ton of delayed refs, and it makes
603 * sense not to join the transaction while it is trying to flush the current
604 * queue of delayed refs out.
605 *
606 * This is used by the drop snapshot code only
607 */
608static noinline int wait_transaction_pre_flush(struct btrfs_fs_info *info)
609{
610 DEFINE_WAIT(wait);
611
612 mutex_lock(&info->trans_mutex);
613 while (info->running_transaction &&
614 info->running_transaction->delayed_refs.flushing) {
615 prepare_to_wait(&info->transaction_wait, &wait,
616 TASK_UNINTERRUPTIBLE);
617 mutex_unlock(&info->trans_mutex);
59bc5c75 618
b7ec40d7 619 schedule();
59bc5c75 620
b7ec40d7
CM
621 mutex_lock(&info->trans_mutex);
622 finish_wait(&info->transaction_wait, &wait);
623 }
624 mutex_unlock(&info->trans_mutex);
625 return 0;
626}
627
d352ac68
CM
628/*
629 * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on
630 * all of them
631 */
5d4f98a2 632int btrfs_drop_dead_root(struct btrfs_root *root)
0f7d52f4 633{
0f7d52f4 634 struct btrfs_trans_handle *trans;
5d4f98a2 635 struct btrfs_root *tree_root = root->fs_info->tree_root;
d3c2fdcf 636 unsigned long nr;
5d4f98a2 637 int ret;
58176a96 638
5d4f98a2
YZ
639 while (1) {
640 /*
641 * we don't want to jump in and create a bunch of
642 * delayed refs if the transaction is starting to close
643 */
644 wait_transaction_pre_flush(tree_root->fs_info);
645 trans = btrfs_start_transaction(tree_root, 1);
a2135011 646
5d4f98a2
YZ
647 /*
648 * we've joined a transaction, make sure it isn't
649 * closing right now
650 */
651 if (trans->transaction->delayed_refs.flushing) {
652 btrfs_end_transaction(trans, tree_root);
653 continue;
9f3a7427 654 }
58176a96 655
5d4f98a2
YZ
656 ret = btrfs_drop_snapshot(trans, root);
657 if (ret != -EAGAIN)
658 break;
a2135011 659
5d4f98a2
YZ
660 ret = btrfs_update_root(trans, tree_root,
661 &root->root_key,
662 &root->root_item);
663 if (ret)
54aa1f4d 664 break;
bcc63abb 665
d3c2fdcf 666 nr = trans->blocks_used;
0f7d52f4
CM
667 ret = btrfs_end_transaction(trans, tree_root);
668 BUG_ON(ret);
5eda7b5e 669
d3c2fdcf 670 btrfs_btree_balance_dirty(tree_root, nr);
4dc11904 671 cond_resched();
0f7d52f4 672 }
5d4f98a2
YZ
673 BUG_ON(ret);
674
675 ret = btrfs_del_root(trans, tree_root, &root->root_key);
676 BUG_ON(ret);
677
678 nr = trans->blocks_used;
679 ret = btrfs_end_transaction(trans, tree_root);
680 BUG_ON(ret);
681
682 free_extent_buffer(root->node);
683 free_extent_buffer(root->commit_root);
684 kfree(root);
685
686 btrfs_btree_balance_dirty(tree_root, nr);
54aa1f4d 687 return ret;
0f7d52f4 688}
2c47e605 689#endif
0f7d52f4 690
d352ac68
CM
691/*
692 * new snapshots need to be created at a very specific time in the
693 * transaction commit. This does the actual creation
694 */
80b6794d 695static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
3063d29f
CM
696 struct btrfs_fs_info *fs_info,
697 struct btrfs_pending_snapshot *pending)
698{
699 struct btrfs_key key;
80b6794d 700 struct btrfs_root_item *new_root_item;
3063d29f
CM
701 struct btrfs_root *tree_root = fs_info->tree_root;
702 struct btrfs_root *root = pending->root;
703 struct extent_buffer *tmp;
925baedd 704 struct extent_buffer *old;
3063d29f
CM
705 int ret;
706 u64 objectid;
707
80b6794d
CM
708 new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS);
709 if (!new_root_item) {
710 ret = -ENOMEM;
711 goto fail;
712 }
3063d29f
CM
713 ret = btrfs_find_free_objectid(trans, tree_root, 0, &objectid);
714 if (ret)
715 goto fail;
716
5d4f98a2 717 record_root_in_trans(trans, root);
80ff3856 718 btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
80b6794d 719 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
3063d29f
CM
720
721 key.objectid = objectid;
1c4850e2
YZ
722 /* record when the snapshot was created in key.offset */
723 key.offset = trans->transid;
3063d29f
CM
724 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
725
925baedd 726 old = btrfs_lock_root_node(root);
9fa8cfe7 727 btrfs_cow_block(trans, root, old, NULL, 0, &old);
5d4f98a2 728 btrfs_set_lock_blocking(old);
3063d29f 729
925baedd
CM
730 btrfs_copy_root(trans, root, old, &tmp, objectid);
731 btrfs_tree_unlock(old);
732 free_extent_buffer(old);
3063d29f 733
5d4f98a2 734 btrfs_set_root_node(new_root_item, tmp);
3063d29f 735 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
80b6794d 736 new_root_item);
925baedd 737 btrfs_tree_unlock(tmp);
3063d29f
CM
738 free_extent_buffer(tmp);
739 if (ret)
740 goto fail;
741
3de4586c
CM
742 key.offset = (u64)-1;
743 memcpy(&pending->root_key, &key, sizeof(key));
744fail:
745 kfree(new_root_item);
746 return ret;
747}
748
749static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info,
750 struct btrfs_pending_snapshot *pending)
751{
752 int ret;
753 int namelen;
754 u64 index = 0;
755 struct btrfs_trans_handle *trans;
756 struct inode *parent_inode;
757 struct inode *inode;
0660b5af 758 struct btrfs_root *parent_root;
3de4586c 759
3394e160 760 parent_inode = pending->dentry->d_parent->d_inode;
0660b5af 761 parent_root = BTRFS_I(parent_inode)->root;
180591bc 762 trans = btrfs_join_transaction(parent_root, 1);
3de4586c 763
3063d29f
CM
764 /*
765 * insert the directory item
766 */
3b96362c 767 namelen = strlen(pending->name);
3de4586c 768 ret = btrfs_set_inode_index(parent_inode, &index);
0660b5af 769 ret = btrfs_insert_dir_item(trans, parent_root,
3de4586c
CM
770 pending->name, namelen,
771 parent_inode->i_ino,
772 &pending->root_key, BTRFS_FT_DIR, index);
3063d29f
CM
773
774 if (ret)
775 goto fail;
0660b5af 776
52c26179
YZ
777 btrfs_i_size_write(parent_inode, parent_inode->i_size + namelen * 2);
778 ret = btrfs_update_inode(trans, parent_root, parent_inode);
779 BUG_ON(ret);
780
0660b5af
CM
781 ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root,
782 pending->root_key.objectid,
0660b5af
CM
783 parent_root->root_key.objectid,
784 parent_inode->i_ino, index, pending->name,
785 namelen);
786
787 BUG_ON(ret);
788
3de4586c
CM
789 inode = btrfs_lookup_dentry(parent_inode, pending->dentry);
790 d_instantiate(pending->dentry, inode);
3063d29f 791fail:
3de4586c 792 btrfs_end_transaction(trans, fs_info->fs_root);
3063d29f
CM
793 return ret;
794}
795
d352ac68
CM
796/*
797 * create all the snapshots we've scheduled for creation
798 */
80b6794d
CM
799static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
800 struct btrfs_fs_info *fs_info)
3de4586c
CM
801{
802 struct btrfs_pending_snapshot *pending;
803 struct list_head *head = &trans->transaction->pending_snapshots;
3de4586c
CM
804 int ret;
805
c6e30871 806 list_for_each_entry(pending, head, list) {
3de4586c
CM
807 ret = create_pending_snapshot(trans, fs_info, pending);
808 BUG_ON(ret);
809 }
810 return 0;
811}
812
813static noinline int finish_pending_snapshots(struct btrfs_trans_handle *trans,
814 struct btrfs_fs_info *fs_info)
3063d29f
CM
815{
816 struct btrfs_pending_snapshot *pending;
817 struct list_head *head = &trans->transaction->pending_snapshots;
818 int ret;
819
d397712b 820 while (!list_empty(head)) {
3063d29f
CM
821 pending = list_entry(head->next,
822 struct btrfs_pending_snapshot, list);
3de4586c 823 ret = finish_pending_snapshot(fs_info, pending);
3063d29f
CM
824 BUG_ON(ret);
825 list_del(&pending->list);
826 kfree(pending->name);
827 kfree(pending);
828 }
dc17ff8f
CM
829 return 0;
830}
831
5d4f98a2
YZ
832static void update_super_roots(struct btrfs_root *root)
833{
834 struct btrfs_root_item *root_item;
835 struct btrfs_super_block *super;
836
837 super = &root->fs_info->super_copy;
838
839 root_item = &root->fs_info->chunk_root->root_item;
840 super->chunk_root = root_item->bytenr;
841 super->chunk_root_generation = root_item->generation;
842 super->chunk_root_level = root_item->level;
843
844 root_item = &root->fs_info->tree_root->root_item;
845 super->root = root_item->bytenr;
846 super->generation = root_item->generation;
847 super->root_level = root_item->level;
848}
849
f36f3042
CM
850int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
851{
852 int ret = 0;
853 spin_lock(&info->new_trans_lock);
854 if (info->running_transaction)
855 ret = info->running_transaction->in_commit;
856 spin_unlock(&info->new_trans_lock);
857 return ret;
858}
859
79154b1b
CM
860int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
861 struct btrfs_root *root)
862{
15ee9bc7
JB
863 unsigned long joined = 0;
864 unsigned long timeout = 1;
79154b1b 865 struct btrfs_transaction *cur_trans;
8fd17795 866 struct btrfs_transaction *prev_trans = NULL;
79154b1b 867 DEFINE_WAIT(wait);
15ee9bc7 868 int ret;
89573b9c
CM
869 int should_grow = 0;
870 unsigned long now = get_seconds();
dccae999 871 int flush_on_commit = btrfs_test_opt(root, FLUSHONCOMMIT);
79154b1b 872
5a3f23d5
CM
873 btrfs_run_ordered_operations(root, 0);
874
56bec294
CM
875 /* make a pass through all the delayed refs we have so far
876 * any runnings procs may add more while we are here
877 */
878 ret = btrfs_run_delayed_refs(trans, root, 0);
879 BUG_ON(ret);
880
b7ec40d7 881 cur_trans = trans->transaction;
56bec294
CM
882 /*
883 * set the flushing flag so procs in this transaction have to
884 * start sending their work down.
885 */
b7ec40d7 886 cur_trans->delayed_refs.flushing = 1;
56bec294 887
c3e69d58 888 ret = btrfs_run_delayed_refs(trans, root, 0);
56bec294
CM
889 BUG_ON(ret);
890
79154b1b 891 mutex_lock(&root->fs_info->trans_mutex);
b7ec40d7
CM
892 if (cur_trans->in_commit) {
893 cur_trans->use_count++;
ccd467d6 894 mutex_unlock(&root->fs_info->trans_mutex);
79154b1b 895 btrfs_end_transaction(trans, root);
ccd467d6 896
79154b1b
CM
897 ret = wait_for_commit(root, cur_trans);
898 BUG_ON(ret);
15ee9bc7
JB
899
900 mutex_lock(&root->fs_info->trans_mutex);
79154b1b 901 put_transaction(cur_trans);
15ee9bc7
JB
902 mutex_unlock(&root->fs_info->trans_mutex);
903
79154b1b
CM
904 return 0;
905 }
4313b399 906
2c90e5d6 907 trans->transaction->in_commit = 1;
f9295749 908 trans->transaction->blocked = 1;
ccd467d6
CM
909 if (cur_trans->list.prev != &root->fs_info->trans_list) {
910 prev_trans = list_entry(cur_trans->list.prev,
911 struct btrfs_transaction, list);
912 if (!prev_trans->commit_done) {
913 prev_trans->use_count++;
ccd467d6
CM
914 mutex_unlock(&root->fs_info->trans_mutex);
915
916 wait_for_commit(root, prev_trans);
ccd467d6 917
ccd467d6 918 mutex_lock(&root->fs_info->trans_mutex);
15ee9bc7 919 put_transaction(prev_trans);
ccd467d6
CM
920 }
921 }
15ee9bc7 922
89573b9c
CM
923 if (now < cur_trans->start_time || now - cur_trans->start_time < 1)
924 should_grow = 1;
925
15ee9bc7 926 do {
7ea394f1 927 int snap_pending = 0;
15ee9bc7 928 joined = cur_trans->num_joined;
7ea394f1
YZ
929 if (!list_empty(&trans->transaction->pending_snapshots))
930 snap_pending = 1;
931
2c90e5d6 932 WARN_ON(cur_trans != trans->transaction);
15ee9bc7 933 prepare_to_wait(&cur_trans->writer_wait, &wait,
79154b1b 934 TASK_UNINTERRUPTIBLE);
15ee9bc7
JB
935
936 if (cur_trans->num_writers > 1)
937 timeout = MAX_SCHEDULE_TIMEOUT;
89573b9c 938 else if (should_grow)
15ee9bc7
JB
939 timeout = 1;
940
79154b1b 941 mutex_unlock(&root->fs_info->trans_mutex);
15ee9bc7 942
ebecd3d9
SW
943 if (flush_on_commit) {
944 btrfs_start_delalloc_inodes(root);
945 ret = btrfs_wait_ordered_extents(root, 0);
946 BUG_ON(ret);
947 } else if (snap_pending) {
7ea394f1
YZ
948 ret = btrfs_wait_ordered_extents(root, 1);
949 BUG_ON(ret);
950 }
951
5a3f23d5
CM
952 /*
953 * rename don't use btrfs_join_transaction, so, once we
954 * set the transaction to blocked above, we aren't going
955 * to get any new ordered operations. We can safely run
956 * it here and no for sure that nothing new will be added
957 * to the list
958 */
959 btrfs_run_ordered_operations(root, 1);
960
89573b9c
CM
961 smp_mb();
962 if (cur_trans->num_writers > 1 || should_grow)
963 schedule_timeout(timeout);
15ee9bc7 964
79154b1b 965 mutex_lock(&root->fs_info->trans_mutex);
15ee9bc7
JB
966 finish_wait(&cur_trans->writer_wait, &wait);
967 } while (cur_trans->num_writers > 1 ||
89573b9c 968 (should_grow && cur_trans->num_joined != joined));
15ee9bc7 969
3063d29f
CM
970 ret = create_pending_snapshots(trans, root->fs_info);
971 BUG_ON(ret);
972
56bec294
CM
973 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
974 BUG_ON(ret);
975
2c90e5d6 976 WARN_ON(cur_trans != trans->transaction);
dc17ff8f 977
e02119d5
CM
978 /* btrfs_commit_tree_roots is responsible for getting the
979 * various roots consistent with each other. Every pointer
980 * in the tree of tree roots has to point to the most up to date
981 * root for every subvolume and other tree. So, we have to keep
982 * the tree logging code from jumping in and changing any
983 * of the trees.
984 *
985 * At this point in the commit, there can't be any tree-log
986 * writers, but a little lower down we drop the trans mutex
987 * and let new people in. By holding the tree_log_mutex
988 * from now until after the super is written, we avoid races
989 * with the tree-log code.
990 */
991 mutex_lock(&root->fs_info->tree_log_mutex);
992
5d4f98a2 993 ret = commit_fs_roots(trans, root);
54aa1f4d
CM
994 BUG_ON(ret);
995
5d4f98a2 996 /* commit_fs_roots gets rid of all the tree log roots, it is now
e02119d5
CM
997 * safe to free the root of tree log roots
998 */
999 btrfs_free_log_root_tree(trans, root->fs_info);
1000
5d4f98a2 1001 ret = commit_cowonly_roots(trans, root);
79154b1b 1002 BUG_ON(ret);
54aa1f4d 1003
11833d66
YZ
1004 btrfs_prepare_extent_commit(trans, root);
1005
78fae27e 1006 cur_trans = root->fs_info->running_transaction;
cee36a03 1007 spin_lock(&root->fs_info->new_trans_lock);
78fae27e 1008 root->fs_info->running_transaction = NULL;
cee36a03 1009 spin_unlock(&root->fs_info->new_trans_lock);
5d4f98a2
YZ
1010
1011 btrfs_set_root_node(&root->fs_info->tree_root->root_item,
1012 root->fs_info->tree_root->node);
817d52f8 1013 switch_commit_root(root->fs_info->tree_root);
5d4f98a2
YZ
1014
1015 btrfs_set_root_node(&root->fs_info->chunk_root->root_item,
1016 root->fs_info->chunk_root->node);
817d52f8 1017 switch_commit_root(root->fs_info->chunk_root);
5d4f98a2
YZ
1018
1019 update_super_roots(root);
e02119d5
CM
1020
1021 if (!root->fs_info->log_root_recovering) {
1022 btrfs_set_super_log_root(&root->fs_info->super_copy, 0);
1023 btrfs_set_super_log_root_level(&root->fs_info->super_copy, 0);
1024 }
1025
a061fc8d
CM
1026 memcpy(&root->fs_info->super_for_commit, &root->fs_info->super_copy,
1027 sizeof(root->fs_info->super_copy));
ccd467d6 1028
f9295749 1029 trans->transaction->blocked = 0;
b7ec40d7 1030
f9295749 1031 wake_up(&root->fs_info->transaction_wait);
e6dcd2dc 1032
78fae27e 1033 mutex_unlock(&root->fs_info->trans_mutex);
79154b1b
CM
1034 ret = btrfs_write_and_wait_transaction(trans, root);
1035 BUG_ON(ret);
a512bbf8 1036 write_ctree_super(trans, root, 0);
4313b399 1037
e02119d5
CM
1038 /*
1039 * the super is written, we can safely allow the tree-loggers
1040 * to go about their business
1041 */
1042 mutex_unlock(&root->fs_info->tree_log_mutex);
1043
11833d66 1044 btrfs_finish_extent_commit(trans, root);
4313b399 1045
3de4586c
CM
1046 /* do the directory inserts of any pending snapshot creations */
1047 finish_pending_snapshots(trans, root->fs_info);
1048
1a40e23b
ZY
1049 mutex_lock(&root->fs_info->trans_mutex);
1050
2c90e5d6 1051 cur_trans->commit_done = 1;
b7ec40d7 1052
15ee9bc7 1053 root->fs_info->last_trans_committed = cur_trans->transid;
817d52f8 1054
2c90e5d6 1055 wake_up(&cur_trans->commit_wait);
3de4586c 1056
78fae27e 1057 put_transaction(cur_trans);
79154b1b 1058 put_transaction(cur_trans);
58176a96 1059
78fae27e 1060 mutex_unlock(&root->fs_info->trans_mutex);
3de4586c 1061
2c90e5d6 1062 kmem_cache_free(btrfs_trans_handle_cachep, trans);
79154b1b
CM
1063 return ret;
1064}
1065
d352ac68
CM
1066/*
1067 * interface function to delete all the snapshots we have scheduled for deletion
1068 */
e9d0b13b
CM
1069int btrfs_clean_old_snapshots(struct btrfs_root *root)
1070{
5d4f98a2
YZ
1071 LIST_HEAD(list);
1072 struct btrfs_fs_info *fs_info = root->fs_info;
1073
1074 mutex_lock(&fs_info->trans_mutex);
1075 list_splice_init(&fs_info->dead_roots, &list);
1076 mutex_unlock(&fs_info->trans_mutex);
e9d0b13b 1077
5d4f98a2
YZ
1078 while (!list_empty(&list)) {
1079 root = list_entry(list.next, struct btrfs_root, root_list);
76dda93c
YZ
1080 list_del(&root->root_list);
1081
1082 if (btrfs_header_backref_rev(root->node) <
1083 BTRFS_MIXED_BACKREF_REV)
1084 btrfs_drop_snapshot(root, 0);
1085 else
1086 btrfs_drop_snapshot(root, 1);
e9d0b13b
CM
1087 }
1088 return 0;
1089}