2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
20 #include <linux/sched.h>
21 #include <linux/writeback.h>
24 #include "transaction.h"
26 static int total_trans = 0;
27 extern struct kmem_cache *btrfs_trans_handle_cachep;
28 extern struct kmem_cache *btrfs_transaction_cachep;
30 static struct workqueue_struct *trans_wq;
32 #define BTRFS_ROOT_TRANS_TAG 0
33 #define BTRFS_ROOT_DEFRAG_TAG 1
35 static void put_transaction(struct btrfs_transaction *transaction)
37 WARN_ON(transaction->use_count == 0);
38 transaction->use_count--;
39 if (transaction->use_count == 0) {
40 WARN_ON(total_trans == 0);
42 list_del_init(&transaction->list);
43 memset(transaction, 0, sizeof(*transaction));
44 kmem_cache_free(btrfs_transaction_cachep, transaction);
48 static int join_transaction(struct btrfs_root *root)
50 struct btrfs_transaction *cur_trans;
51 cur_trans = root->fs_info->running_transaction;
53 cur_trans = kmem_cache_alloc(btrfs_transaction_cachep,
57 root->fs_info->generation++;
58 root->fs_info->running_transaction = cur_trans;
59 cur_trans->num_writers = 1;
60 cur_trans->num_joined = 0;
61 cur_trans->transid = root->fs_info->generation;
62 init_waitqueue_head(&cur_trans->writer_wait);
63 init_waitqueue_head(&cur_trans->commit_wait);
64 cur_trans->in_commit = 0;
65 cur_trans->use_count = 1;
66 cur_trans->commit_done = 0;
67 cur_trans->start_time = get_seconds();
68 list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
69 init_bit_radix(&cur_trans->dirty_pages);
71 cur_trans->num_writers++;
72 cur_trans->num_joined++;
78 static int record_root_in_trans(struct btrfs_root *root)
80 u64 running_trans_id = root->fs_info->running_transaction->transid;
81 if (root->ref_cows && root->last_trans < running_trans_id) {
82 WARN_ON(root == root->fs_info->extent_root);
83 if (root->root_item.refs != 0) {
84 radix_tree_tag_set(&root->fs_info->fs_roots_radix,
85 (unsigned long)root->root_key.objectid,
86 BTRFS_ROOT_TRANS_TAG);
87 radix_tree_tag_set(&root->fs_info->fs_roots_radix,
88 (unsigned long)root->root_key.objectid,
89 BTRFS_ROOT_DEFRAG_TAG);
90 root->commit_root = root->node;
95 root->last_trans = running_trans_id;
100 struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
103 struct btrfs_trans_handle *h =
104 kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
107 mutex_lock(&root->fs_info->trans_mutex);
108 ret = join_transaction(root);
111 record_root_in_trans(root);
112 h->transid = root->fs_info->running_transaction->transid;
113 h->transaction = root->fs_info->running_transaction;
114 h->blocks_reserved = num_blocks;
116 h->block_group = NULL;
117 h->alloc_exclude_nr = 0;
118 h->alloc_exclude_start = 0;
119 root->fs_info->running_transaction->use_count++;
120 mutex_unlock(&root->fs_info->trans_mutex);
124 int btrfs_end_transaction(struct btrfs_trans_handle *trans,
125 struct btrfs_root *root)
127 struct btrfs_transaction *cur_trans;
129 mutex_lock(&root->fs_info->trans_mutex);
130 cur_trans = root->fs_info->running_transaction;
131 WARN_ON(cur_trans != trans->transaction);
132 WARN_ON(cur_trans->num_writers < 1);
133 cur_trans->num_writers--;
134 if (waitqueue_active(&cur_trans->writer_wait))
135 wake_up(&cur_trans->writer_wait);
136 put_transaction(cur_trans);
137 mutex_unlock(&root->fs_info->trans_mutex);
138 memset(trans, 0, sizeof(*trans));
139 kmem_cache_free(btrfs_trans_handle_cachep, trans);
144 int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
145 struct btrfs_root *root)
147 unsigned long gang[16];
153 struct radix_tree_root *dirty_pages;
154 struct inode *btree_inode = root->fs_info->btree_inode;
156 if (!trans || !trans->transaction) {
157 return filemap_write_and_wait(btree_inode->i_mapping);
159 dirty_pages = &trans->transaction->dirty_pages;
161 ret = find_first_radix_bit(dirty_pages, gang,
162 0, ARRAY_SIZE(gang));
165 for (i = 0; i < ret; i++) {
167 clear_radix_bit(dirty_pages, gang[i]);
168 page = find_lock_page(btree_inode->i_mapping,
172 if (PageWriteback(page)) {
174 wait_on_page_writeback(page);
177 page_cache_release(page);
181 err = write_one_page(page, 0);
184 page_cache_release(page);
187 err = filemap_fdatawait(btree_inode->i_mapping);
193 int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
194 struct btrfs_root *root)
197 u64 old_extent_block;
198 struct btrfs_fs_info *fs_info = root->fs_info;
199 struct btrfs_root *tree_root = fs_info->tree_root;
200 struct btrfs_root *extent_root = fs_info->extent_root;
202 btrfs_write_dirty_block_groups(trans, extent_root);
204 old_extent_block = btrfs_root_blocknr(&extent_root->root_item);
205 if (old_extent_block == bh_blocknr(extent_root->node))
207 btrfs_set_root_blocknr(&extent_root->root_item,
208 bh_blocknr(extent_root->node));
209 ret = btrfs_update_root(trans, tree_root,
210 &extent_root->root_key,
211 &extent_root->root_item);
213 btrfs_write_dirty_block_groups(trans, extent_root);
218 static int wait_for_commit(struct btrfs_root *root,
219 struct btrfs_transaction *commit)
222 mutex_lock(&root->fs_info->trans_mutex);
223 while(!commit->commit_done) {
224 prepare_to_wait(&commit->commit_wait, &wait,
225 TASK_UNINTERRUPTIBLE);
226 if (commit->commit_done)
228 mutex_unlock(&root->fs_info->trans_mutex);
230 mutex_lock(&root->fs_info->trans_mutex);
232 mutex_unlock(&root->fs_info->trans_mutex);
233 finish_wait(&commit->commit_wait, &wait);
238 struct list_head list;
239 struct btrfs_root *root;
240 struct btrfs_root *latest_root;
243 int btrfs_add_dead_root(struct btrfs_root *root,
244 struct btrfs_root *latest,
245 struct list_head *dead_list)
247 struct dirty_root *dirty;
249 dirty = kmalloc(sizeof(*dirty), GFP_NOFS);
253 dirty->latest_root = latest;
254 list_add(&dirty->list, dead_list);
258 static int add_dirty_roots(struct btrfs_trans_handle *trans,
259 struct radix_tree_root *radix,
260 struct list_head *list)
262 struct dirty_root *dirty;
263 struct btrfs_root *gang[8];
264 struct btrfs_root *root;
271 ret = radix_tree_gang_lookup_tag(radix, (void **)gang, 0,
273 BTRFS_ROOT_TRANS_TAG);
276 for (i = 0; i < ret; i++) {
278 radix_tree_tag_clear(radix,
279 (unsigned long)root->root_key.objectid,
280 BTRFS_ROOT_TRANS_TAG);
281 if (root->commit_root == root->node) {
282 WARN_ON(bh_blocknr(root->node) !=
283 btrfs_root_blocknr(&root->root_item));
284 brelse(root->commit_root);
285 root->commit_root = NULL;
287 /* make sure to update the root on disk
288 * so we get any updates to the block used
291 err = btrfs_update_root(trans,
292 root->fs_info->tree_root,
297 dirty = kmalloc(sizeof(*dirty), GFP_NOFS);
299 dirty->root = kmalloc(sizeof(*dirty->root), GFP_NOFS);
300 BUG_ON(!dirty->root);
302 memset(&root->root_item.drop_progress, 0,
303 sizeof(struct btrfs_disk_key));
304 root->root_item.drop_level = 0;
306 memcpy(dirty->root, root, sizeof(*root));
307 dirty->root->node = root->commit_root;
308 dirty->latest_root = root;
309 root->commit_root = NULL;
311 root->root_key.offset = root->fs_info->generation;
312 btrfs_set_root_blocknr(&root->root_item,
313 bh_blocknr(root->node));
314 err = btrfs_insert_root(trans, root->fs_info->tree_root,
320 refs = btrfs_root_refs(&dirty->root->root_item);
321 btrfs_set_root_refs(&dirty->root->root_item, refs - 1);
322 err = btrfs_update_root(trans, root->fs_info->tree_root,
323 &dirty->root->root_key,
324 &dirty->root->root_item);
328 list_add(&dirty->list, list);
339 int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
341 struct btrfs_fs_info *info = root->fs_info;
343 struct btrfs_trans_handle *trans;
346 if (root->defrag_running)
349 trans = btrfs_start_transaction(root, 1);
351 root->defrag_running = 1;
352 ret = btrfs_defrag_leaves(trans, root, cacheonly);
353 nr = trans->blocks_used;
354 btrfs_end_transaction(trans, root);
355 mutex_unlock(&info->fs_mutex);
357 btrfs_btree_balance_dirty(info->tree_root, nr);
360 mutex_lock(&info->fs_mutex);
361 trans = btrfs_start_transaction(root, 1);
365 root->defrag_running = 0;
366 radix_tree_tag_clear(&info->fs_roots_radix,
367 (unsigned long)root->root_key.objectid,
368 BTRFS_ROOT_DEFRAG_TAG);
369 btrfs_end_transaction(trans, root);
373 int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info)
375 struct btrfs_root *gang[1];
376 struct btrfs_root *root;
383 ret = radix_tree_gang_lookup_tag(&info->fs_roots_radix,
386 BTRFS_ROOT_DEFRAG_TAG);
389 for (i = 0; i < ret; i++) {
391 last = root->root_key.objectid + 1;
392 btrfs_defrag_root(root, 1);
395 btrfs_defrag_root(info->extent_root, 1);
399 static int drop_dirty_roots(struct btrfs_root *tree_root,
400 struct list_head *list)
402 struct dirty_root *dirty;
403 struct btrfs_trans_handle *trans;
410 while(!list_empty(list)) {
411 struct btrfs_root *root;
413 mutex_lock(&tree_root->fs_info->fs_mutex);
414 dirty = list_entry(list->next, struct dirty_root, list);
415 list_del_init(&dirty->list);
417 num_blocks = btrfs_root_blocks_used(&dirty->root->root_item);
418 root = dirty->latest_root;
421 trans = btrfs_start_transaction(tree_root, 1);
422 ret = btrfs_drop_snapshot(trans, dirty->root);
423 if (ret != -EAGAIN) {
427 err = btrfs_update_root(trans,
429 &dirty->root->root_key,
430 &dirty->root->root_item);
433 nr = trans->blocks_used;
434 ret = btrfs_end_transaction(trans, tree_root);
436 mutex_unlock(&tree_root->fs_info->fs_mutex);
437 btrfs_btree_balance_dirty(tree_root, nr);
440 mutex_lock(&tree_root->fs_info->fs_mutex);
444 num_blocks -= btrfs_root_blocks_used(&dirty->root->root_item);
445 blocks_used = btrfs_root_blocks_used(&root->root_item);
447 record_root_in_trans(root);
448 btrfs_set_root_blocks_used(&root->root_item,
449 blocks_used - num_blocks);
451 ret = btrfs_del_root(trans, tree_root, &dirty->root->root_key);
456 nr = trans->blocks_used;
457 ret = btrfs_end_transaction(trans, tree_root);
462 mutex_unlock(&tree_root->fs_info->fs_mutex);
464 btrfs_btree_balance_dirty(tree_root, nr);
470 int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
471 struct btrfs_root *root)
473 unsigned long joined = 0;
474 unsigned long timeout = 1;
475 struct btrfs_transaction *cur_trans;
476 struct btrfs_transaction *prev_trans = NULL;
477 struct list_head dirty_fs_roots;
478 struct radix_tree_root pinned_copy;
482 init_bit_radix(&pinned_copy);
483 INIT_LIST_HEAD(&dirty_fs_roots);
485 mutex_lock(&root->fs_info->trans_mutex);
486 if (trans->transaction->in_commit) {
487 cur_trans = trans->transaction;
488 trans->transaction->use_count++;
489 mutex_unlock(&root->fs_info->trans_mutex);
490 btrfs_end_transaction(trans, root);
492 mutex_unlock(&root->fs_info->fs_mutex);
493 ret = wait_for_commit(root, cur_trans);
496 mutex_lock(&root->fs_info->trans_mutex);
497 put_transaction(cur_trans);
498 mutex_unlock(&root->fs_info->trans_mutex);
500 mutex_lock(&root->fs_info->fs_mutex);
503 trans->transaction->in_commit = 1;
504 cur_trans = trans->transaction;
505 if (cur_trans->list.prev != &root->fs_info->trans_list) {
506 prev_trans = list_entry(cur_trans->list.prev,
507 struct btrfs_transaction, list);
508 if (!prev_trans->commit_done) {
509 prev_trans->use_count++;
510 mutex_unlock(&root->fs_info->fs_mutex);
511 mutex_unlock(&root->fs_info->trans_mutex);
513 wait_for_commit(root, prev_trans);
515 mutex_lock(&root->fs_info->fs_mutex);
516 mutex_lock(&root->fs_info->trans_mutex);
517 put_transaction(prev_trans);
522 joined = cur_trans->num_joined;
523 WARN_ON(cur_trans != trans->transaction);
524 prepare_to_wait(&cur_trans->writer_wait, &wait,
525 TASK_UNINTERRUPTIBLE);
527 if (cur_trans->num_writers > 1)
528 timeout = MAX_SCHEDULE_TIMEOUT;
532 mutex_unlock(&root->fs_info->fs_mutex);
533 mutex_unlock(&root->fs_info->trans_mutex);
535 schedule_timeout(timeout);
537 mutex_lock(&root->fs_info->fs_mutex);
538 mutex_lock(&root->fs_info->trans_mutex);
539 finish_wait(&cur_trans->writer_wait, &wait);
540 } while (cur_trans->num_writers > 1 ||
541 (cur_trans->num_joined != joined));
543 WARN_ON(cur_trans != trans->transaction);
544 ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix,
548 ret = btrfs_commit_tree_roots(trans, root);
551 cur_trans = root->fs_info->running_transaction;
552 root->fs_info->running_transaction = NULL;
553 btrfs_set_super_generation(&root->fs_info->super_copy,
555 btrfs_set_super_root(&root->fs_info->super_copy,
556 bh_blocknr(root->fs_info->tree_root->node));
557 memcpy(root->fs_info->disk_super, &root->fs_info->super_copy,
558 sizeof(root->fs_info->super_copy));
560 btrfs_copy_pinned(root, &pinned_copy);
562 mutex_unlock(&root->fs_info->trans_mutex);
563 mutex_unlock(&root->fs_info->fs_mutex);
564 ret = btrfs_write_and_wait_transaction(trans, root);
566 write_ctree_super(trans, root);
567 mutex_lock(&root->fs_info->fs_mutex);
568 btrfs_finish_extent_commit(trans, root, &pinned_copy);
569 mutex_lock(&root->fs_info->trans_mutex);
570 cur_trans->commit_done = 1;
571 root->fs_info->last_trans_committed = cur_trans->transid;
572 wake_up(&cur_trans->commit_wait);
573 put_transaction(cur_trans);
574 put_transaction(cur_trans);
576 if (root->fs_info->closing)
577 list_splice_init(&root->fs_info->dead_roots, &dirty_fs_roots);
579 list_splice_init(&dirty_fs_roots, &root->fs_info->dead_roots);
581 mutex_unlock(&root->fs_info->trans_mutex);
582 kmem_cache_free(btrfs_trans_handle_cachep, trans);
584 if (root->fs_info->closing) {
585 mutex_unlock(&root->fs_info->fs_mutex);
586 drop_dirty_roots(root->fs_info->tree_root, &dirty_fs_roots);
587 mutex_lock(&root->fs_info->fs_mutex);
592 int btrfs_clean_old_snapshots(struct btrfs_root *root)
594 struct list_head dirty_roots;
595 INIT_LIST_HEAD(&dirty_roots);
597 mutex_lock(&root->fs_info->trans_mutex);
598 list_splice_init(&root->fs_info->dead_roots, &dirty_roots);
599 mutex_unlock(&root->fs_info->trans_mutex);
601 if (!list_empty(&dirty_roots)) {
602 drop_dirty_roots(root, &dirty_roots);
606 void btrfs_transaction_cleaner(struct work_struct *work)
608 struct btrfs_fs_info *fs_info = container_of(work,
609 struct btrfs_fs_info,
612 struct btrfs_root *root = fs_info->tree_root;
613 struct btrfs_transaction *cur;
614 struct btrfs_trans_handle *trans;
616 unsigned long delay = HZ * 30;
619 mutex_lock(&root->fs_info->fs_mutex);
620 mutex_lock(&root->fs_info->trans_mutex);
621 cur = root->fs_info->running_transaction;
623 mutex_unlock(&root->fs_info->trans_mutex);
627 if (now < cur->start_time || now - cur->start_time < 30) {
628 mutex_unlock(&root->fs_info->trans_mutex);
632 mutex_unlock(&root->fs_info->trans_mutex);
633 btrfs_defrag_dirty_roots(root->fs_info);
634 trans = btrfs_start_transaction(root, 1);
635 ret = btrfs_commit_transaction(trans, root);
637 mutex_unlock(&root->fs_info->fs_mutex);
638 btrfs_clean_old_snapshots(root);
639 btrfs_transaction_queue_work(root, delay);
642 void btrfs_transaction_queue_work(struct btrfs_root *root, int delay)
644 queue_delayed_work(trans_wq, &root->fs_info->trans_work, delay);
647 void btrfs_transaction_flush_work(struct btrfs_root *root)
649 cancel_rearming_delayed_workqueue(trans_wq, &root->fs_info->trans_work);
650 flush_workqueue(trans_wq);
653 void __init btrfs_init_transaction_sys(void)
655 trans_wq = create_workqueue("btrfs");
658 void __exit btrfs_exit_transaction_sys(void)
660 destroy_workqueue(trans_wq);