Btrfs: Use async helpers to deal with pages that have been improperly dirtied
[linux-2.6-block.git] / fs / btrfs / disk-io.c
index c6a710a668cb5846955308d2eb1e47f090bd2cc5..66466d125c0521c9dd8f5b2d22faed94166d8ff6 100644 (file)
@@ -16,6 +16,7 @@
  * Boston, MA 021110-1307, USA.
  */
 
+#include <linux/version.h>
 #include <linux/fs.h>
 #include <linux/blkdev.h>
 #include <linux/scatterlist.h>
 #include <linux/writeback.h>
 #include <linux/buffer_head.h> // for block_sync_page
 #include <linux/workqueue.h>
+#include <linux/kthread.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
+# include <linux/freezer.h>
+#else
+# include <linux/sched.h>
+#endif
 #include "crc32c.h"
 #include "ctree.h"
 #include "disk-io.h"
@@ -32,6 +39,7 @@
 #include "volumes.h"
 #include "print-tree.h"
 #include "async-thread.h"
+#include "locking.h"
 
 #if 0
 static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf)
@@ -287,7 +295,6 @@ int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
        ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE,
                                             btrfs_header_generation(eb));
        BUG_ON(ret);
-       btrfs_clear_buffer_defrag(eb);
        found_start = btrfs_header_bytenr(eb);
        if (found_start != start) {
                printk("warning: eb start incorrect %Lu buffer %Lu len %lu\n",
@@ -347,7 +354,6 @@ int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
        }
        eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS);
 
-       btrfs_clear_buffer_defrag(eb);
        found_start = btrfs_header_bytenr(eb);
        if (found_start != start) {
                ret = -EIO;
@@ -401,7 +407,11 @@ static int end_workqueue_bio(struct bio *bio,
        end_io_wq->error = err;
        end_io_wq->work.func = end_workqueue_fn;
        end_io_wq->work.flags = 0;
-       btrfs_queue_worker(&fs_info->endio_workers, &end_io_wq->work);
+       if (bio->bi_rw & (1 << BIO_RW))
+               btrfs_queue_worker(&fs_info->endio_write_workers,
+                                  &end_io_wq->work);
+       else
+               btrfs_queue_worker(&fs_info->endio_workers, &end_io_wq->work);
 
 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
        return 0;
@@ -681,9 +691,11 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 {
        struct inode *btree_inode = root->fs_info->btree_inode;
        if (btrfs_header_generation(buf) ==
-           root->fs_info->running_transaction->transid)
+           root->fs_info->running_transaction->transid) {
+               WARN_ON(!btrfs_tree_locked(buf));
                clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree,
                                          buf);
+       }
        return 0;
 }
 
@@ -720,10 +732,13 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
        root->in_sysfs = 0;
 
        INIT_LIST_HEAD(&root->dirty_list);
+       spin_lock_init(&root->node_lock);
+       mutex_init(&root->objectid_mutex);
        memset(&root->root_key, 0, sizeof(root->root_key));
        memset(&root->root_item, 0, sizeof(root->root_item));
        memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
        memset(&root->root_kobj, 0, sizeof(root->root_kobj));
+       root->defrag_trans_start = fs_info->generation;
        init_completion(&root->kobj_unregister);
        root->defrag_running = 0;
        root->defrag_level = 0;
@@ -1095,6 +1110,84 @@ static void end_workqueue_fn(struct btrfs_work *work)
 #endif
 }
 
+static int cleaner_kthread(void *arg)
+{
+       struct btrfs_root *root = arg;
+
+       do {
+               smp_mb();
+               if (root->fs_info->closing)
+                       break;
+
+               vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
+               mutex_lock(&root->fs_info->cleaner_mutex);
+               btrfs_clean_old_snapshots(root);
+               mutex_unlock(&root->fs_info->cleaner_mutex);
+
+               if (freezing(current)) {
+                       refrigerator();
+               } else {
+                       smp_mb();
+                       if (root->fs_info->closing)
+                               break;
+                       set_current_state(TASK_INTERRUPTIBLE);
+                       schedule();
+                       __set_current_state(TASK_RUNNING);
+               }
+       } while (!kthread_should_stop());
+       return 0;
+}
+
+static int transaction_kthread(void *arg)
+{
+       struct btrfs_root *root = arg;
+       struct btrfs_trans_handle *trans;
+       struct btrfs_transaction *cur;
+       unsigned long now;
+       unsigned long delay;
+       int ret;
+
+       do {
+               smp_mb();
+               if (root->fs_info->closing)
+                       break;
+
+               delay = HZ * 30;
+               vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
+               mutex_lock(&root->fs_info->transaction_kthread_mutex);
+
+               mutex_lock(&root->fs_info->trans_mutex);
+               cur = root->fs_info->running_transaction;
+               if (!cur) {
+                       mutex_unlock(&root->fs_info->trans_mutex);
+                       goto sleep;
+               }
+               now = get_seconds();
+               if (now < cur->start_time || now - cur->start_time < 30) {
+                       mutex_unlock(&root->fs_info->trans_mutex);
+                       delay = HZ * 5;
+                       goto sleep;
+               }
+               mutex_unlock(&root->fs_info->trans_mutex);
+               trans = btrfs_start_transaction(root, 1);
+               ret = btrfs_commit_transaction(trans, root);
+sleep:
+               wake_up_process(root->fs_info->cleaner_kthread);
+               mutex_unlock(&root->fs_info->transaction_kthread_mutex);
+
+               if (freezing(current)) {
+                       refrigerator();
+               } else {
+                       if (root->fs_info->closing)
+                               break;
+                       set_current_state(TASK_INTERRUPTIBLE);
+                       schedule_timeout(delay);
+                       __set_current_state(TASK_RUNNING);
+               }
+       } while (!kthread_should_stop());
+       return 0;
+}
+
 struct btrfs_root *open_ctree(struct super_block *sb,
                              struct btrfs_fs_devices *fs_devices,
                              char *options)
@@ -1142,6 +1235,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        INIT_LIST_HEAD(&fs_info->space_info);
        btrfs_mapping_init(&fs_info->mapping_tree);
        atomic_set(&fs_info->nr_async_submits, 0);
+       atomic_set(&fs_info->throttles, 0);
        fs_info->sb = sb;
        fs_info->max_extent = (u64)-1;
        fs_info->max_inline = 8192 * 1024;
@@ -1183,11 +1277,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
                             fs_info->btree_inode->i_mapping, GFP_NOFS);
        fs_info->do_barriers = 1;
 
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
-       INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info);
-#else
-       INIT_DELAYED_WORK(&fs_info->trans_work, btrfs_transaction_cleaner);
-#endif
        BTRFS_I(fs_info->btree_inode)->root = tree_root;
        memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
               sizeof(struct btrfs_key));
@@ -1195,7 +1284,13 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
 
        mutex_init(&fs_info->trans_mutex);
-       mutex_init(&fs_info->fs_mutex);
+       mutex_init(&fs_info->drop_mutex);
+       mutex_init(&fs_info->alloc_mutex);
+       mutex_init(&fs_info->chunk_mutex);
+       mutex_init(&fs_info->transaction_kthread_mutex);
+       mutex_init(&fs_info->cleaner_mutex);
+       mutex_init(&fs_info->volume_mutex);
+       init_waitqueue_head(&fs_info->transaction_throttle);
 
 #if 0
        ret = add_hasher(fs_info, "crc32c");
@@ -1233,10 +1328,17 @@ struct btrfs_root *open_ctree(struct super_block *sb,
         * cannot dynamically grow.
         */
        btrfs_init_workers(&fs_info->workers, fs_info->thread_pool_size);
+       btrfs_init_workers(&fs_info->submit_workers, fs_info->thread_pool_size);
+       btrfs_init_workers(&fs_info->fixup_workers, 1);
        btrfs_init_workers(&fs_info->endio_workers, fs_info->thread_pool_size);
+       btrfs_init_workers(&fs_info->endio_write_workers,
+                          fs_info->thread_pool_size);
        btrfs_start_workers(&fs_info->workers, 1);
+       btrfs_start_workers(&fs_info->submit_workers, 1);
+       btrfs_start_workers(&fs_info->fixup_workers, 1);
        btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size);
-
+       btrfs_start_workers(&fs_info->endio_write_workers,
+                           fs_info->thread_pool_size);
 
        err = -EINVAL;
        if (btrfs_super_num_devices(disk_super) > fs_devices->open_devices) {
@@ -1270,9 +1372,9 @@ struct btrfs_root *open_ctree(struct super_block *sb,
                goto fail_sb_buffer;
        }
 
-       mutex_lock(&fs_info->fs_mutex);
-
+       mutex_lock(&fs_info->chunk_mutex);
        ret = btrfs_read_sys_array(tree_root);
+       mutex_unlock(&fs_info->chunk_mutex);
        if (ret) {
                printk("btrfs: failed to read the system array on %s\n",
                       sb->s_id);
@@ -1294,7 +1396,9 @@ struct btrfs_root *open_ctree(struct super_block *sb,
                 (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node),
                 BTRFS_UUID_SIZE);
 
+       mutex_lock(&fs_info->chunk_mutex);
        ret = btrfs_read_chunk_tree(chunk_root);
+       mutex_unlock(&fs_info->chunk_mutex);
        BUG_ON(ret);
 
        btrfs_close_extra_devices(fs_devices);
@@ -1329,20 +1433,34 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        fs_info->data_alloc_profile = (u64)-1;
        fs_info->metadata_alloc_profile = (u64)-1;
        fs_info->system_alloc_profile = fs_info->metadata_alloc_profile;
+       fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
+                                              "btrfs-cleaner");
+       if (!fs_info->cleaner_kthread)
+               goto fail_extent_root;
+
+       fs_info->transaction_kthread = kthread_run(transaction_kthread,
+                                                  tree_root,
+                                                  "btrfs-transaction");
+       if (!fs_info->transaction_kthread)
+               goto fail_cleaner;
+
 
-       mutex_unlock(&fs_info->fs_mutex);
        return tree_root;
 
+fail_cleaner:
+       kthread_stop(fs_info->cleaner_kthread);
 fail_extent_root:
        free_extent_buffer(extent_root->node);
 fail_tree_root:
        free_extent_buffer(tree_root->node);
 fail_sys_array:
-       mutex_unlock(&fs_info->fs_mutex);
 fail_sb_buffer:
        extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree);
+       btrfs_stop_workers(&fs_info->fixup_workers);
        btrfs_stop_workers(&fs_info->workers);
        btrfs_stop_workers(&fs_info->endio_workers);
+       btrfs_stop_workers(&fs_info->endio_write_workers);
+       btrfs_stop_workers(&fs_info->submit_workers);
 fail_iput:
        iput(fs_info->btree_inode);
 fail:
@@ -1549,9 +1667,12 @@ int close_ctree(struct btrfs_root *root)
        struct btrfs_fs_info *fs_info = root->fs_info;
 
        fs_info->closing = 1;
-       btrfs_transaction_flush_work(root);
-       mutex_lock(&fs_info->fs_mutex);
-       btrfs_defrag_dirty_roots(root->fs_info);
+       smp_mb();
+
+       kthread_stop(root->fs_info->transaction_kthread);
+       kthread_stop(root->fs_info->cleaner_kthread);
+
+       btrfs_clean_old_snapshots(root);
        trans = btrfs_start_transaction(root, 1);
        ret = btrfs_commit_transaction(trans, root);
        /* run commit again to  drop the original snapshot */
@@ -1561,9 +1682,6 @@ int close_ctree(struct btrfs_root *root)
        BUG_ON(ret);
 
        write_ctree_super(NULL, root);
-       mutex_unlock(&fs_info->fs_mutex);
-
-       btrfs_transaction_flush_work(root);
 
        if (fs_info->delalloc_bytes) {
                printk("btrfs: at unmount delalloc count %Lu\n",
@@ -1595,8 +1713,11 @@ int close_ctree(struct btrfs_root *root)
 
        truncate_inode_pages(fs_info->btree_inode->i_mapping, 0);
 
+       btrfs_stop_workers(&fs_info->fixup_workers);
        btrfs_stop_workers(&fs_info->workers);
        btrfs_stop_workers(&fs_info->endio_workers);
+       btrfs_stop_workers(&fs_info->endio_write_workers);
+       btrfs_stop_workers(&fs_info->submit_workers);
 
        iput(fs_info->btree_inode);
 #if 0
@@ -1650,6 +1771,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
        u64 transid = btrfs_header_generation(buf);
        struct inode *btree_inode = root->fs_info->btree_inode;
 
+       WARN_ON(!btrfs_tree_locked(buf));
        if (transid != root->fs_info->generation) {
                printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n",
                        (unsigned long long)buf->start,
@@ -1659,20 +1781,6 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
        set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf);
 }
 
-void btrfs_throttle(struct btrfs_root *root)
-{
-       struct backing_dev_info *bdi;
-
-       bdi = &root->fs_info->bdi;
-       if (root->fs_info->throttles && bdi_write_congested(bdi)) {
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
-               congestion_wait(WRITE, HZ/20);
-#else
-               blk_congestion_wait(WRITE, HZ/20);
-#endif
-       }
-}
-
 void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
 {
        /*
@@ -1697,58 +1805,6 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
        return;
 }
 
-void btrfs_set_buffer_defrag(struct extent_buffer *buf)
-{
-       struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
-       struct inode *btree_inode = root->fs_info->btree_inode;
-       set_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start,
-                       buf->start + buf->len - 1, EXTENT_DEFRAG, GFP_NOFS);
-}
-
-void btrfs_set_buffer_defrag_done(struct extent_buffer *buf)
-{
-       struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
-       struct inode *btree_inode = root->fs_info->btree_inode;
-       set_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start,
-                       buf->start + buf->len - 1, EXTENT_DEFRAG_DONE,
-                       GFP_NOFS);
-}
-
-int btrfs_buffer_defrag(struct extent_buffer *buf)
-{
-       struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
-       struct inode *btree_inode = root->fs_info->btree_inode;
-       return test_range_bit(&BTRFS_I(btree_inode)->io_tree,
-                    buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, 0);
-}
-
-int btrfs_buffer_defrag_done(struct extent_buffer *buf)
-{
-       struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
-       struct inode *btree_inode = root->fs_info->btree_inode;
-       return test_range_bit(&BTRFS_I(btree_inode)->io_tree,
-                    buf->start, buf->start + buf->len - 1,
-                    EXTENT_DEFRAG_DONE, 0);
-}
-
-int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf)
-{
-       struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
-       struct inode *btree_inode = root->fs_info->btree_inode;
-       return clear_extent_bits(&BTRFS_I(btree_inode)->io_tree,
-                    buf->start, buf->start + buf->len - 1,
-                    EXTENT_DEFRAG_DONE, GFP_NOFS);
-}
-
-int btrfs_clear_buffer_defrag(struct extent_buffer *buf)
-{
-       struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
-       struct inode *btree_inode = root->fs_info->btree_inode;
-       return clear_extent_bits(&BTRFS_I(btree_inode)->io_tree,
-                    buf->start, buf->start + buf->len - 1,
-                    EXTENT_DEFRAG, GFP_NOFS);
-}
-
 int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
 {
        struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;