Btrfs: properly set blocksize when adding new device.
[linux-2.6-block.git] / fs / btrfs / transaction.c
CommitLineData
6cbd5570
CM
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
79154b1b 19#include <linux/fs.h>
34088780 20#include <linux/sched.h>
d3c2fdcf 21#include <linux/writeback.h>
5f39d397 22#include <linux/pagemap.h>
79154b1b
CM
23#include "ctree.h"
24#include "disk-io.h"
25#include "transaction.h"
925baedd 26#include "locking.h"
31153d81 27#include "ref-cache.h"
e02119d5 28#include "tree-log.h"
79154b1b 29
78fae27e 30static int total_trans = 0;
2c90e5d6
CM
31extern struct kmem_cache *btrfs_trans_handle_cachep;
32extern struct kmem_cache *btrfs_transaction_cachep;
33
0f7d52f4
CM
34#define BTRFS_ROOT_TRANS_TAG 0
35
80b6794d 36static noinline void put_transaction(struct btrfs_transaction *transaction)
79154b1b 37{
2c90e5d6 38 WARN_ON(transaction->use_count == 0);
79154b1b 39 transaction->use_count--;
78fae27e
CM
40 if (transaction->use_count == 0) {
41 WARN_ON(total_trans == 0);
42 total_trans--;
8fd17795 43 list_del_init(&transaction->list);
2c90e5d6
CM
44 memset(transaction, 0, sizeof(*transaction));
45 kmem_cache_free(btrfs_transaction_cachep, transaction);
78fae27e 46 }
79154b1b
CM
47}
48
80b6794d 49static noinline int join_transaction(struct btrfs_root *root)
79154b1b
CM
50{
51 struct btrfs_transaction *cur_trans;
52 cur_trans = root->fs_info->running_transaction;
53 if (!cur_trans) {
2c90e5d6
CM
54 cur_trans = kmem_cache_alloc(btrfs_transaction_cachep,
55 GFP_NOFS);
78fae27e 56 total_trans++;
79154b1b 57 BUG_ON(!cur_trans);
0f7d52f4 58 root->fs_info->generation++;
e18e4809 59 root->fs_info->last_alloc = 0;
4529ba49 60 root->fs_info->last_data_alloc = 0;
e02119d5 61 root->fs_info->last_log_alloc = 0;
15ee9bc7
JB
62 cur_trans->num_writers = 1;
63 cur_trans->num_joined = 0;
0f7d52f4 64 cur_trans->transid = root->fs_info->generation;
79154b1b
CM
65 init_waitqueue_head(&cur_trans->writer_wait);
66 init_waitqueue_head(&cur_trans->commit_wait);
67 cur_trans->in_commit = 0;
f9295749 68 cur_trans->blocked = 0;
d5719762 69 cur_trans->use_count = 1;
79154b1b 70 cur_trans->commit_done = 0;
08607c1b 71 cur_trans->start_time = get_seconds();
3063d29f 72 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
8fd17795 73 list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
d1310b2e 74 extent_io_tree_init(&cur_trans->dirty_pages,
5f39d397
CM
75 root->fs_info->btree_inode->i_mapping,
76 GFP_NOFS);
48ec2cf8
CM
77 spin_lock(&root->fs_info->new_trans_lock);
78 root->fs_info->running_transaction = cur_trans;
79 spin_unlock(&root->fs_info->new_trans_lock);
15ee9bc7
JB
80 } else {
81 cur_trans->num_writers++;
82 cur_trans->num_joined++;
79154b1b 83 }
15ee9bc7 84
79154b1b
CM
85 return 0;
86}
87
e02119d5 88noinline int btrfs_record_root_in_trans(struct btrfs_root *root)
6702ed49 89{
f321e491 90 struct btrfs_dirty_root *dirty;
6702ed49
CM
91 u64 running_trans_id = root->fs_info->running_transaction->transid;
92 if (root->ref_cows && root->last_trans < running_trans_id) {
93 WARN_ON(root == root->fs_info->extent_root);
94 if (root->root_item.refs != 0) {
95 radix_tree_tag_set(&root->fs_info->fs_roots_radix,
96 (unsigned long)root->root_key.objectid,
97 BTRFS_ROOT_TRANS_TAG);
31153d81
YZ
98
99 dirty = kmalloc(sizeof(*dirty), GFP_NOFS);
100 BUG_ON(!dirty);
101 dirty->root = kmalloc(sizeof(*dirty->root), GFP_NOFS);
102 BUG_ON(!dirty->root);
31153d81
YZ
103 dirty->latest_root = root;
104 INIT_LIST_HEAD(&dirty->list);
31153d81 105
925baedd 106 root->commit_root = btrfs_root_node(root);
31153d81
YZ
107
108 memcpy(dirty->root, root, sizeof(*root));
109 spin_lock_init(&dirty->root->node_lock);
bcc63abb 110 spin_lock_init(&dirty->root->list_lock);
31153d81 111 mutex_init(&dirty->root->objectid_mutex);
bcc63abb 112 INIT_LIST_HEAD(&dirty->root->dead_list);
31153d81
YZ
113 dirty->root->node = root->commit_root;
114 dirty->root->commit_root = NULL;
bcc63abb
Y
115
116 spin_lock(&root->list_lock);
117 list_add(&dirty->root->dead_list, &root->dead_list);
118 spin_unlock(&root->list_lock);
119
120 root->dirty_root = dirty;
6702ed49
CM
121 } else {
122 WARN_ON(1);
123 }
124 root->last_trans = running_trans_id;
125 }
126 return 0;
127}
128
37d1aeee 129static void wait_current_trans(struct btrfs_root *root)
79154b1b 130{
f9295749 131 struct btrfs_transaction *cur_trans;
79154b1b 132
f9295749 133 cur_trans = root->fs_info->running_transaction;
37d1aeee 134 if (cur_trans && cur_trans->blocked) {
f9295749
CM
135 DEFINE_WAIT(wait);
136 cur_trans->use_count++;
137 while(1) {
138 prepare_to_wait(&root->fs_info->transaction_wait, &wait,
139 TASK_UNINTERRUPTIBLE);
140 if (cur_trans->blocked) {
141 mutex_unlock(&root->fs_info->trans_mutex);
142 schedule();
143 mutex_lock(&root->fs_info->trans_mutex);
144 finish_wait(&root->fs_info->transaction_wait,
145 &wait);
146 } else {
147 finish_wait(&root->fs_info->transaction_wait,
148 &wait);
149 break;
150 }
151 }
152 put_transaction(cur_trans);
153 }
37d1aeee
CM
154}
155
e02119d5 156static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
9ca9ee09 157 int num_blocks, int wait)
37d1aeee
CM
158{
159 struct btrfs_trans_handle *h =
160 kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
161 int ret;
162
163 mutex_lock(&root->fs_info->trans_mutex);
9ca9ee09 164 if ((wait == 1 && !root->fs_info->open_ioctl_trans) || wait == 2)
37d1aeee 165 wait_current_trans(root);
79154b1b
CM
166 ret = join_transaction(root);
167 BUG_ON(ret);
0f7d52f4 168
e02119d5 169 btrfs_record_root_in_trans(root);
6702ed49 170 h->transid = root->fs_info->running_transaction->transid;
79154b1b
CM
171 h->transaction = root->fs_info->running_transaction;
172 h->blocks_reserved = num_blocks;
173 h->blocks_used = 0;
31f3c99b 174 h->block_group = NULL;
26b8003f
CM
175 h->alloc_exclude_nr = 0;
176 h->alloc_exclude_start = 0;
79154b1b
CM
177 root->fs_info->running_transaction->use_count++;
178 mutex_unlock(&root->fs_info->trans_mutex);
179 return h;
180}
181
f9295749
CM
182struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
183 int num_blocks)
184{
9ca9ee09 185 return start_transaction(root, num_blocks, 1);
f9295749
CM
186}
187struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root,
188 int num_blocks)
189{
9ca9ee09 190 return start_transaction(root, num_blocks, 0);
f9295749
CM
191}
192
9ca9ee09
SW
193struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
194 int num_blocks)
195{
196 return start_transaction(r, num_blocks, 2);
197}
198
199
89ce8a63
CM
200static noinline int wait_for_commit(struct btrfs_root *root,
201 struct btrfs_transaction *commit)
202{
203 DEFINE_WAIT(wait);
204 mutex_lock(&root->fs_info->trans_mutex);
205 while(!commit->commit_done) {
206 prepare_to_wait(&commit->commit_wait, &wait,
207 TASK_UNINTERRUPTIBLE);
208 if (commit->commit_done)
209 break;
210 mutex_unlock(&root->fs_info->trans_mutex);
211 schedule();
212 mutex_lock(&root->fs_info->trans_mutex);
213 }
214 mutex_unlock(&root->fs_info->trans_mutex);
215 finish_wait(&commit->commit_wait, &wait);
216 return 0;
217}
218
37d1aeee 219static void throttle_on_drops(struct btrfs_root *root)
ab78c84d
CM
220{
221 struct btrfs_fs_info *info = root->fs_info;
2dd3e67b 222 int harder_count = 0;
ab78c84d 223
2dd3e67b 224harder:
ab78c84d
CM
225 if (atomic_read(&info->throttles)) {
226 DEFINE_WAIT(wait);
227 int thr;
ab78c84d
CM
228 thr = atomic_read(&info->throttle_gen);
229
230 do {
231 prepare_to_wait(&info->transaction_throttle,
232 &wait, TASK_UNINTERRUPTIBLE);
233 if (!atomic_read(&info->throttles)) {
234 finish_wait(&info->transaction_throttle, &wait);
235 break;
236 }
237 schedule();
238 finish_wait(&info->transaction_throttle, &wait);
239 } while (thr == atomic_read(&info->throttle_gen));
2dd3e67b
CM
240 harder_count++;
241
242 if (root->fs_info->total_ref_cache_size > 1 * 1024 * 1024 &&
243 harder_count < 2)
244 goto harder;
245
246 if (root->fs_info->total_ref_cache_size > 5 * 1024 * 1024 &&
247 harder_count < 10)
248 goto harder;
249
250 if (root->fs_info->total_ref_cache_size > 10 * 1024 * 1024 &&
251 harder_count < 20)
252 goto harder;
ab78c84d
CM
253 }
254}
255
37d1aeee
CM
256void btrfs_throttle(struct btrfs_root *root)
257{
258 mutex_lock(&root->fs_info->trans_mutex);
9ca9ee09
SW
259 if (!root->fs_info->open_ioctl_trans)
260 wait_current_trans(root);
37d1aeee
CM
261 mutex_unlock(&root->fs_info->trans_mutex);
262
263 throttle_on_drops(root);
264}
265
89ce8a63
CM
266static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
267 struct btrfs_root *root, int throttle)
79154b1b
CM
268{
269 struct btrfs_transaction *cur_trans;
ab78c84d 270 struct btrfs_fs_info *info = root->fs_info;
d6e4a428 271
ab78c84d
CM
272 mutex_lock(&info->trans_mutex);
273 cur_trans = info->running_transaction;
ccd467d6 274 WARN_ON(cur_trans != trans->transaction);
d5719762 275 WARN_ON(cur_trans->num_writers < 1);
ccd467d6 276 cur_trans->num_writers--;
89ce8a63 277
79154b1b
CM
278 if (waitqueue_active(&cur_trans->writer_wait))
279 wake_up(&cur_trans->writer_wait);
79154b1b 280 put_transaction(cur_trans);
ab78c84d 281 mutex_unlock(&info->trans_mutex);
d6025579 282 memset(trans, 0, sizeof(*trans));
2c90e5d6 283 kmem_cache_free(btrfs_trans_handle_cachep, trans);
ab78c84d
CM
284
285 if (throttle)
37d1aeee 286 throttle_on_drops(root);
ab78c84d 287
79154b1b
CM
288 return 0;
289}
290
89ce8a63
CM
291int btrfs_end_transaction(struct btrfs_trans_handle *trans,
292 struct btrfs_root *root)
293{
294 return __btrfs_end_transaction(trans, root, 0);
295}
296
297int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
298 struct btrfs_root *root)
299{
300 return __btrfs_end_transaction(trans, root, 1);
301}
302
79154b1b
CM
303
304int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
305 struct btrfs_root *root)
306{
7c4452b9 307 int ret;
777e6bd7 308 int err = 0;
7c4452b9 309 int werr = 0;
d1310b2e 310 struct extent_io_tree *dirty_pages;
7c4452b9 311 struct page *page;
7c4452b9 312 struct inode *btree_inode = root->fs_info->btree_inode;
777e6bd7 313 u64 start = 0;
5f39d397
CM
314 u64 end;
315 unsigned long index;
7c4452b9
CM
316
317 if (!trans || !trans->transaction) {
318 return filemap_write_and_wait(btree_inode->i_mapping);
319 }
320 dirty_pages = &trans->transaction->dirty_pages;
321 while(1) {
777e6bd7 322 ret = find_first_extent_bit(dirty_pages, start, &start, &end,
5f39d397
CM
323 EXTENT_DIRTY);
324 if (ret)
7c4452b9 325 break;
5f39d397 326 while(start <= end) {
777e6bd7
CM
327 cond_resched();
328
5f39d397 329 index = start >> PAGE_CACHE_SHIFT;
35ebb934 330 start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
5f39d397 331 page = find_lock_page(btree_inode->i_mapping, index);
7c4452b9
CM
332 if (!page)
333 continue;
6702ed49
CM
334 if (PageWriteback(page)) {
335 if (PageDirty(page))
336 wait_on_page_writeback(page);
337 else {
338 unlock_page(page);
339 page_cache_release(page);
340 continue;
341 }
342 }
7c4452b9
CM
343 err = write_one_page(page, 0);
344 if (err)
345 werr = err;
346 page_cache_release(page);
347 }
348 }
777e6bd7
CM
349 while(1) {
350 ret = find_first_extent_bit(dirty_pages, 0, &start, &end,
351 EXTENT_DIRTY);
352 if (ret)
353 break;
354
355 clear_extent_dirty(dirty_pages, start, end, GFP_NOFS);
356 while(start <= end) {
357 index = start >> PAGE_CACHE_SHIFT;
358 start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
359 page = find_get_page(btree_inode->i_mapping, index);
360 if (!page)
361 continue;
362 if (PageDirty(page)) {
363 lock_page(page);
364 err = write_one_page(page, 0);
365 if (err)
366 werr = err;
367 }
368 wait_on_page_writeback(page);
369 page_cache_release(page);
370 cond_resched();
371 }
372 }
7c4452b9
CM
373 if (err)
374 werr = err;
375 return werr;
79154b1b
CM
376}
377
0b86a832
CM
378static int update_cowonly_root(struct btrfs_trans_handle *trans,
379 struct btrfs_root *root)
79154b1b
CM
380{
381 int ret;
0b86a832
CM
382 u64 old_root_bytenr;
383 struct btrfs_root *tree_root = root->fs_info->tree_root;
79154b1b 384
0b86a832 385 btrfs_write_dirty_block_groups(trans, root);
79154b1b 386 while(1) {
0b86a832
CM
387 old_root_bytenr = btrfs_root_bytenr(&root->root_item);
388 if (old_root_bytenr == root->node->start)
79154b1b 389 break;
0b86a832
CM
390 btrfs_set_root_bytenr(&root->root_item,
391 root->node->start);
392 btrfs_set_root_level(&root->root_item,
393 btrfs_header_level(root->node));
79154b1b 394 ret = btrfs_update_root(trans, tree_root,
0b86a832
CM
395 &root->root_key,
396 &root->root_item);
79154b1b 397 BUG_ON(ret);
0b86a832
CM
398 btrfs_write_dirty_block_groups(trans, root);
399 }
400 return 0;
401}
402
403int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
404 struct btrfs_root *root)
405{
406 struct btrfs_fs_info *fs_info = root->fs_info;
407 struct list_head *next;
408
409 while(!list_empty(&fs_info->dirty_cowonly_roots)) {
410 next = fs_info->dirty_cowonly_roots.next;
411 list_del_init(next);
412 root = list_entry(next, struct btrfs_root, dirty_list);
413 update_cowonly_root(trans, root);
79154b1b
CM
414 }
415 return 0;
416}
417
b48652c1 418int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest)
5eda7b5e 419{
f321e491 420 struct btrfs_dirty_root *dirty;
5eda7b5e
CM
421
422 dirty = kmalloc(sizeof(*dirty), GFP_NOFS);
423 if (!dirty)
424 return -ENOMEM;
5eda7b5e 425 dirty->root = root;
5ce14bbc 426 dirty->latest_root = latest;
b48652c1
YZ
427
428 mutex_lock(&root->fs_info->trans_mutex);
429 list_add(&dirty->list, &latest->fs_info->dead_roots);
430 mutex_unlock(&root->fs_info->trans_mutex);
5eda7b5e
CM
431 return 0;
432}
433
80b6794d
CM
434static noinline int add_dirty_roots(struct btrfs_trans_handle *trans,
435 struct radix_tree_root *radix,
436 struct list_head *list)
0f7d52f4 437{
f321e491 438 struct btrfs_dirty_root *dirty;
0f7d52f4
CM
439 struct btrfs_root *gang[8];
440 struct btrfs_root *root;
441 int i;
442 int ret;
54aa1f4d 443 int err = 0;
5eda7b5e 444 u32 refs;
54aa1f4d 445
0f7d52f4
CM
446 while(1) {
447 ret = radix_tree_gang_lookup_tag(radix, (void **)gang, 0,
448 ARRAY_SIZE(gang),
449 BTRFS_ROOT_TRANS_TAG);
450 if (ret == 0)
451 break;
452 for (i = 0; i < ret; i++) {
453 root = gang[i];
2619ba1f
CM
454 radix_tree_tag_clear(radix,
455 (unsigned long)root->root_key.objectid,
456 BTRFS_ROOT_TRANS_TAG);
31153d81
YZ
457
458 BUG_ON(!root->ref_tree);
017e5369 459 dirty = root->dirty_root;
31153d81 460
e02119d5
CM
461 btrfs_free_log(trans, root);
462
0f7d52f4 463 if (root->commit_root == root->node) {
db94535d
CM
464 WARN_ON(root->node->start !=
465 btrfs_root_bytenr(&root->root_item));
31153d81 466
5f39d397 467 free_extent_buffer(root->commit_root);
0f7d52f4 468 root->commit_root = NULL;
7ea394f1 469 root->dirty_root = NULL;
bcc63abb
Y
470
471 spin_lock(&root->list_lock);
472 list_del_init(&dirty->root->dead_list);
473 spin_unlock(&root->list_lock);
474
31153d81
YZ
475 kfree(dirty->root);
476 kfree(dirty);
58176a96
JB
477
478 /* make sure to update the root on disk
479 * so we get any updates to the block used
480 * counts
481 */
482 err = btrfs_update_root(trans,
483 root->fs_info->tree_root,
484 &root->root_key,
485 &root->root_item);
0f7d52f4
CM
486 continue;
487 }
9f3a7427
CM
488
489 memset(&root->root_item.drop_progress, 0,
490 sizeof(struct btrfs_disk_key));
491 root->root_item.drop_level = 0;
0f7d52f4 492 root->commit_root = NULL;
7ea394f1 493 root->dirty_root = NULL;
0f7d52f4 494 root->root_key.offset = root->fs_info->generation;
db94535d
CM
495 btrfs_set_root_bytenr(&root->root_item,
496 root->node->start);
497 btrfs_set_root_level(&root->root_item,
498 btrfs_header_level(root->node));
0f7d52f4
CM
499 err = btrfs_insert_root(trans, root->fs_info->tree_root,
500 &root->root_key,
501 &root->root_item);
54aa1f4d
CM
502 if (err)
503 break;
9f3a7427
CM
504
505 refs = btrfs_root_refs(&dirty->root->root_item);
506 btrfs_set_root_refs(&dirty->root->root_item, refs - 1);
5eda7b5e 507 err = btrfs_update_root(trans, root->fs_info->tree_root,
9f3a7427
CM
508 &dirty->root->root_key,
509 &dirty->root->root_item);
5eda7b5e
CM
510
511 BUG_ON(err);
9f3a7427 512 if (refs == 1) {
5eda7b5e 513 list_add(&dirty->list, list);
9f3a7427
CM
514 } else {
515 WARN_ON(1);
31153d81 516 free_extent_buffer(dirty->root->node);
9f3a7427 517 kfree(dirty->root);
5eda7b5e 518 kfree(dirty);
9f3a7427 519 }
0f7d52f4
CM
520 }
521 }
54aa1f4d 522 return err;
0f7d52f4
CM
523}
524
e9d0b13b
CM
525int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
526{
527 struct btrfs_fs_info *info = root->fs_info;
528 int ret;
529 struct btrfs_trans_handle *trans;
d3c2fdcf 530 unsigned long nr;
e9d0b13b 531
a2135011 532 smp_mb();
e9d0b13b
CM
533 if (root->defrag_running)
534 return 0;
e9d0b13b 535 trans = btrfs_start_transaction(root, 1);
6b80053d 536 while (1) {
e9d0b13b
CM
537 root->defrag_running = 1;
538 ret = btrfs_defrag_leaves(trans, root, cacheonly);
d3c2fdcf 539 nr = trans->blocks_used;
e9d0b13b 540 btrfs_end_transaction(trans, root);
d3c2fdcf 541 btrfs_btree_balance_dirty(info->tree_root, nr);
e9d0b13b
CM
542 cond_resched();
543
e9d0b13b 544 trans = btrfs_start_transaction(root, 1);
3f157a2f 545 if (root->fs_info->closing || ret != -EAGAIN)
e9d0b13b
CM
546 break;
547 }
548 root->defrag_running = 0;
a2135011 549 smp_mb();
e9d0b13b
CM
550 btrfs_end_transaction(trans, root);
551 return 0;
552}
553
80b6794d
CM
554static noinline int drop_dirty_roots(struct btrfs_root *tree_root,
555 struct list_head *list)
0f7d52f4 556{
f321e491 557 struct btrfs_dirty_root *dirty;
0f7d52f4 558 struct btrfs_trans_handle *trans;
d3c2fdcf 559 unsigned long nr;
db94535d
CM
560 u64 num_bytes;
561 u64 bytes_used;
bcc63abb 562 u64 max_useless;
54aa1f4d 563 int ret = 0;
9f3a7427
CM
564 int err;
565
0f7d52f4 566 while(!list_empty(list)) {
58176a96
JB
567 struct btrfs_root *root;
568
f321e491 569 dirty = list_entry(list->prev, struct btrfs_dirty_root, list);
0f7d52f4 570 list_del_init(&dirty->list);
5eda7b5e 571
db94535d 572 num_bytes = btrfs_root_used(&dirty->root->root_item);
58176a96 573 root = dirty->latest_root;
a2135011 574 atomic_inc(&root->fs_info->throttles);
58176a96 575
a2135011 576 mutex_lock(&root->fs_info->drop_mutex);
9f3a7427
CM
577 while(1) {
578 trans = btrfs_start_transaction(tree_root, 1);
579 ret = btrfs_drop_snapshot(trans, dirty->root);
580 if (ret != -EAGAIN) {
581 break;
582 }
58176a96 583
9f3a7427
CM
584 err = btrfs_update_root(trans,
585 tree_root,
586 &dirty->root->root_key,
587 &dirty->root->root_item);
588 if (err)
589 ret = err;
d3c2fdcf 590 nr = trans->blocks_used;
017e5369 591 ret = btrfs_end_transaction(trans, tree_root);
9f3a7427 592 BUG_ON(ret);
a2135011
CM
593
594 mutex_unlock(&root->fs_info->drop_mutex);
d3c2fdcf 595 btrfs_btree_balance_dirty(tree_root, nr);
4dc11904 596 cond_resched();
a2135011 597 mutex_lock(&root->fs_info->drop_mutex);
9f3a7427 598 }
0f7d52f4 599 BUG_ON(ret);
a2135011 600 atomic_dec(&root->fs_info->throttles);
017e5369 601 wake_up(&root->fs_info->transaction_throttle);
58176a96 602
a2135011 603 mutex_lock(&root->fs_info->alloc_mutex);
db94535d
CM
604 num_bytes -= btrfs_root_used(&dirty->root->root_item);
605 bytes_used = btrfs_root_used(&root->root_item);
606 if (num_bytes) {
e02119d5 607 btrfs_record_root_in_trans(root);
5f39d397 608 btrfs_set_root_used(&root->root_item,
db94535d 609 bytes_used - num_bytes);
58176a96 610 }
a2135011
CM
611 mutex_unlock(&root->fs_info->alloc_mutex);
612
9f3a7427 613 ret = btrfs_del_root(trans, tree_root, &dirty->root->root_key);
58176a96
JB
614 if (ret) {
615 BUG();
54aa1f4d 616 break;
58176a96 617 }
a2135011
CM
618 mutex_unlock(&root->fs_info->drop_mutex);
619
bcc63abb
Y
620 spin_lock(&root->list_lock);
621 list_del_init(&dirty->root->dead_list);
622 if (!list_empty(&root->dead_list)) {
623 struct btrfs_root *oldest;
624 oldest = list_entry(root->dead_list.prev,
625 struct btrfs_root, dead_list);
626 max_useless = oldest->root_key.offset - 1;
627 } else {
628 max_useless = root->root_key.offset - 1;
629 }
630 spin_unlock(&root->list_lock);
631
d3c2fdcf 632 nr = trans->blocks_used;
0f7d52f4
CM
633 ret = btrfs_end_transaction(trans, tree_root);
634 BUG_ON(ret);
5eda7b5e 635
bcc63abb
Y
636 ret = btrfs_remove_leaf_refs(root, max_useless);
637 BUG_ON(ret);
638
f510cfec 639 free_extent_buffer(dirty->root->node);
9f3a7427 640 kfree(dirty->root);
0f7d52f4 641 kfree(dirty);
d3c2fdcf
CM
642
643 btrfs_btree_balance_dirty(tree_root, nr);
4dc11904 644 cond_resched();
0f7d52f4 645 }
54aa1f4d 646 return ret;
0f7d52f4
CM
647}
648
80b6794d 649static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
3063d29f
CM
650 struct btrfs_fs_info *fs_info,
651 struct btrfs_pending_snapshot *pending)
652{
653 struct btrfs_key key;
80b6794d 654 struct btrfs_root_item *new_root_item;
3063d29f
CM
655 struct btrfs_root *tree_root = fs_info->tree_root;
656 struct btrfs_root *root = pending->root;
657 struct extent_buffer *tmp;
925baedd 658 struct extent_buffer *old;
3063d29f 659 int ret;
3b96362c 660 int namelen;
3063d29f
CM
661 u64 objectid;
662
80b6794d
CM
663 new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS);
664 if (!new_root_item) {
665 ret = -ENOMEM;
666 goto fail;
667 }
3063d29f
CM
668 ret = btrfs_find_free_objectid(trans, tree_root, 0, &objectid);
669 if (ret)
670 goto fail;
671
80b6794d 672 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
3063d29f
CM
673
674 key.objectid = objectid;
675 key.offset = 1;
676 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
677
925baedd 678 old = btrfs_lock_root_node(root);
65b51a00 679 btrfs_cow_block(trans, root, old, NULL, 0, &old, 0);
3063d29f 680
925baedd
CM
681 btrfs_copy_root(trans, root, old, &tmp, objectid);
682 btrfs_tree_unlock(old);
683 free_extent_buffer(old);
3063d29f 684
80b6794d
CM
685 btrfs_set_root_bytenr(new_root_item, tmp->start);
686 btrfs_set_root_level(new_root_item, btrfs_header_level(tmp));
3063d29f 687 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
80b6794d 688 new_root_item);
925baedd 689 btrfs_tree_unlock(tmp);
3063d29f
CM
690 free_extent_buffer(tmp);
691 if (ret)
692 goto fail;
693
694 /*
695 * insert the directory item
696 */
697 key.offset = (u64)-1;
3b96362c 698 namelen = strlen(pending->name);
3063d29f 699 ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
3b96362c 700 pending->name, namelen,
3063d29f 701 root->fs_info->sb->s_root->d_inode->i_ino,
aec7477b 702 &key, BTRFS_FT_DIR, 0);
3063d29f
CM
703
704 if (ret)
705 goto fail;
706
707 ret = btrfs_insert_inode_ref(trans, root->fs_info->tree_root,
708 pending->name, strlen(pending->name), objectid,
aec7477b 709 root->fs_info->sb->s_root->d_inode->i_ino, 0);
3b96362c
SW
710
711 /* Invalidate existing dcache entry for new snapshot. */
712 btrfs_invalidate_dcache_root(root, pending->name, namelen);
713
3063d29f 714fail:
80b6794d 715 kfree(new_root_item);
3063d29f
CM
716 return ret;
717}
718
80b6794d
CM
719static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
720 struct btrfs_fs_info *fs_info)
3063d29f
CM
721{
722 struct btrfs_pending_snapshot *pending;
723 struct list_head *head = &trans->transaction->pending_snapshots;
724 int ret;
725
726 while(!list_empty(head)) {
727 pending = list_entry(head->next,
728 struct btrfs_pending_snapshot, list);
729 ret = create_pending_snapshot(trans, fs_info, pending);
730 BUG_ON(ret);
731 list_del(&pending->list);
732 kfree(pending->name);
733 kfree(pending);
734 }
dc17ff8f
CM
735 return 0;
736}
737
79154b1b
CM
738int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
739 struct btrfs_root *root)
740{
15ee9bc7
JB
741 unsigned long joined = 0;
742 unsigned long timeout = 1;
79154b1b 743 struct btrfs_transaction *cur_trans;
8fd17795 744 struct btrfs_transaction *prev_trans = NULL;
0b86a832 745 struct btrfs_root *chunk_root = root->fs_info->chunk_root;
0f7d52f4 746 struct list_head dirty_fs_roots;
d1310b2e 747 struct extent_io_tree *pinned_copy;
79154b1b 748 DEFINE_WAIT(wait);
15ee9bc7 749 int ret;
79154b1b 750
0f7d52f4 751 INIT_LIST_HEAD(&dirty_fs_roots);
79154b1b
CM
752 mutex_lock(&root->fs_info->trans_mutex);
753 if (trans->transaction->in_commit) {
754 cur_trans = trans->transaction;
755 trans->transaction->use_count++;
ccd467d6 756 mutex_unlock(&root->fs_info->trans_mutex);
79154b1b 757 btrfs_end_transaction(trans, root);
ccd467d6 758
79154b1b
CM
759 ret = wait_for_commit(root, cur_trans);
760 BUG_ON(ret);
15ee9bc7
JB
761
762 mutex_lock(&root->fs_info->trans_mutex);
79154b1b 763 put_transaction(cur_trans);
15ee9bc7
JB
764 mutex_unlock(&root->fs_info->trans_mutex);
765
79154b1b
CM
766 return 0;
767 }
4313b399
CM
768
769 pinned_copy = kmalloc(sizeof(*pinned_copy), GFP_NOFS);
770 if (!pinned_copy)
771 return -ENOMEM;
772
d1310b2e 773 extent_io_tree_init(pinned_copy,
4313b399
CM
774 root->fs_info->btree_inode->i_mapping, GFP_NOFS);
775
2c90e5d6 776 trans->transaction->in_commit = 1;
f9295749 777 trans->transaction->blocked = 1;
ccd467d6
CM
778 cur_trans = trans->transaction;
779 if (cur_trans->list.prev != &root->fs_info->trans_list) {
780 prev_trans = list_entry(cur_trans->list.prev,
781 struct btrfs_transaction, list);
782 if (!prev_trans->commit_done) {
783 prev_trans->use_count++;
ccd467d6
CM
784 mutex_unlock(&root->fs_info->trans_mutex);
785
786 wait_for_commit(root, prev_trans);
ccd467d6 787
ccd467d6 788 mutex_lock(&root->fs_info->trans_mutex);
15ee9bc7 789 put_transaction(prev_trans);
ccd467d6
CM
790 }
791 }
15ee9bc7
JB
792
793 do {
7ea394f1 794 int snap_pending = 0;
15ee9bc7 795 joined = cur_trans->num_joined;
7ea394f1
YZ
796 if (!list_empty(&trans->transaction->pending_snapshots))
797 snap_pending = 1;
798
2c90e5d6 799 WARN_ON(cur_trans != trans->transaction);
15ee9bc7 800 prepare_to_wait(&cur_trans->writer_wait, &wait,
79154b1b 801 TASK_UNINTERRUPTIBLE);
15ee9bc7
JB
802
803 if (cur_trans->num_writers > 1)
804 timeout = MAX_SCHEDULE_TIMEOUT;
805 else
806 timeout = 1;
807
79154b1b 808 mutex_unlock(&root->fs_info->trans_mutex);
15ee9bc7 809
7ea394f1
YZ
810 if (snap_pending) {
811 ret = btrfs_wait_ordered_extents(root, 1);
812 BUG_ON(ret);
813 }
814
15ee9bc7
JB
815 schedule_timeout(timeout);
816
79154b1b 817 mutex_lock(&root->fs_info->trans_mutex);
15ee9bc7
JB
818 finish_wait(&cur_trans->writer_wait, &wait);
819 } while (cur_trans->num_writers > 1 ||
820 (cur_trans->num_joined != joined));
821
3063d29f
CM
822 ret = create_pending_snapshots(trans, root->fs_info);
823 BUG_ON(ret);
824
2c90e5d6 825 WARN_ON(cur_trans != trans->transaction);
dc17ff8f 826
e02119d5
CM
827 /* btrfs_commit_tree_roots is responsible for getting the
828 * various roots consistent with each other. Every pointer
829 * in the tree of tree roots has to point to the most up to date
830 * root for every subvolume and other tree. So, we have to keep
831 * the tree logging code from jumping in and changing any
832 * of the trees.
833 *
834 * At this point in the commit, there can't be any tree-log
835 * writers, but a little lower down we drop the trans mutex
836 * and let new people in. By holding the tree_log_mutex
837 * from now until after the super is written, we avoid races
838 * with the tree-log code.
839 */
840 mutex_lock(&root->fs_info->tree_log_mutex);
841
54aa1f4d
CM
842 ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix,
843 &dirty_fs_roots);
844 BUG_ON(ret);
845
e02119d5
CM
846 /* add_dirty_roots gets rid of all the tree log roots, it is now
847 * safe to free the root of tree log roots
848 */
849 btrfs_free_log_root_tree(trans, root->fs_info);
850
79154b1b
CM
851 ret = btrfs_commit_tree_roots(trans, root);
852 BUG_ON(ret);
54aa1f4d 853
78fae27e 854 cur_trans = root->fs_info->running_transaction;
cee36a03 855 spin_lock(&root->fs_info->new_trans_lock);
78fae27e 856 root->fs_info->running_transaction = NULL;
cee36a03 857 spin_unlock(&root->fs_info->new_trans_lock);
4b52dff6
CM
858 btrfs_set_super_generation(&root->fs_info->super_copy,
859 cur_trans->transid);
860 btrfs_set_super_root(&root->fs_info->super_copy,
db94535d
CM
861 root->fs_info->tree_root->node->start);
862 btrfs_set_super_root_level(&root->fs_info->super_copy,
863 btrfs_header_level(root->fs_info->tree_root->node));
5f39d397 864
0b86a832
CM
865 btrfs_set_super_chunk_root(&root->fs_info->super_copy,
866 chunk_root->node->start);
867 btrfs_set_super_chunk_root_level(&root->fs_info->super_copy,
868 btrfs_header_level(chunk_root->node));
e02119d5
CM
869
870 if (!root->fs_info->log_root_recovering) {
871 btrfs_set_super_log_root(&root->fs_info->super_copy, 0);
872 btrfs_set_super_log_root_level(&root->fs_info->super_copy, 0);
873 }
874
a061fc8d
CM
875 memcpy(&root->fs_info->super_for_commit, &root->fs_info->super_copy,
876 sizeof(root->fs_info->super_copy));
ccd467d6 877
4313b399 878 btrfs_copy_pinned(root, pinned_copy);
ccd467d6 879
f9295749 880 trans->transaction->blocked = 0;
e6dcd2dc 881 wake_up(&root->fs_info->transaction_throttle);
f9295749 882 wake_up(&root->fs_info->transaction_wait);
e6dcd2dc 883
78fae27e 884 mutex_unlock(&root->fs_info->trans_mutex);
79154b1b
CM
885 ret = btrfs_write_and_wait_transaction(trans, root);
886 BUG_ON(ret);
79154b1b 887 write_ctree_super(trans, root);
4313b399 888
e02119d5
CM
889 /*
890 * the super is written, we can safely allow the tree-loggers
891 * to go about their business
892 */
893 mutex_unlock(&root->fs_info->tree_log_mutex);
894
4313b399 895 btrfs_finish_extent_commit(trans, root, pinned_copy);
78fae27e 896 mutex_lock(&root->fs_info->trans_mutex);
4313b399
CM
897
898 kfree(pinned_copy);
899
2c90e5d6 900 cur_trans->commit_done = 1;
15ee9bc7 901 root->fs_info->last_trans_committed = cur_trans->transid;
2c90e5d6 902 wake_up(&cur_trans->commit_wait);
78fae27e 903 put_transaction(cur_trans);
79154b1b 904 put_transaction(cur_trans);
58176a96 905
bcc63abb 906 list_splice_init(&dirty_fs_roots, &root->fs_info->dead_roots);
facda1e7
CM
907 if (root->fs_info->closing)
908 list_splice_init(&root->fs_info->dead_roots, &dirty_fs_roots);
58176a96 909
78fae27e 910 mutex_unlock(&root->fs_info->trans_mutex);
2c90e5d6 911 kmem_cache_free(btrfs_trans_handle_cachep, trans);
79154b1b 912
facda1e7 913 if (root->fs_info->closing) {
facda1e7 914 drop_dirty_roots(root->fs_info->tree_root, &dirty_fs_roots);
facda1e7 915 }
79154b1b
CM
916 return ret;
917}
918
e9d0b13b
CM
919int btrfs_clean_old_snapshots(struct btrfs_root *root)
920{
921 struct list_head dirty_roots;
922 INIT_LIST_HEAD(&dirty_roots);
a74a4b97 923again:
e9d0b13b
CM
924 mutex_lock(&root->fs_info->trans_mutex);
925 list_splice_init(&root->fs_info->dead_roots, &dirty_roots);
926 mutex_unlock(&root->fs_info->trans_mutex);
927
928 if (!list_empty(&dirty_roots)) {
929 drop_dirty_roots(root, &dirty_roots);
a74a4b97 930 goto again;
e9d0b13b
CM
931 }
932 return 0;
933}