Btrfs: fix unprotected defragable inode insertion
[linux-2.6-block.git] / fs / btrfs / disk-io.c
index 07a2162cdd65ec85911a29aaa6130c7cf2a8cd9b..76b82506bf9299305537b4d1b021a3187c8bfbd6 100644 (file)
@@ -45,6 +45,7 @@
 #include "inode-map.h"
 #include "check-integrity.h"
 #include "rcu-string.h"
+#include "dev-replace.h"
 
 #ifdef CONFIG_X86
 #include <asm/cpufeature.h>
@@ -387,7 +388,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
                if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags))
                        break;
 
-               num_copies = btrfs_num_copies(&root->fs_info->mapping_tree,
+               num_copies = btrfs_num_copies(root->fs_info,
                                              eb->start, eb->len);
                if (num_copies == 1)
                        break;
@@ -852,11 +853,16 @@ static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
                                 int mirror_num, unsigned long bio_flags,
                                 u64 bio_offset)
 {
+       int ret;
+
        /*
         * when we're called for a write, we're already in the async
         * submission context.  Just jump into btrfs_map_bio
         */
-       return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1);
+       ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1);
+       if (ret)
+               bio_endio(bio, ret);
+       return ret;
 }
 
 static int check_async_write(struct inode *inode, unsigned long bio_flags)
@@ -878,7 +884,6 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
        int ret;
 
        if (!(rw & REQ_WRITE)) {
-
                /*
                 * called for a read, do the setup so that checksum validation
                 * can happen in the async kernel threads
@@ -886,26 +891,32 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
                ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info,
                                          bio, 1);
                if (ret)
-                       return ret;
-               return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
-                                    mirror_num, 0);
+                       goto out_w_error;
+               ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
+                                   mirror_num, 0);
        } else if (!async) {
                ret = btree_csum_one_bio(bio);
                if (ret)
-                       return ret;
-               return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
-                                    mirror_num, 0);
+                       goto out_w_error;
+               ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
+                                   mirror_num, 0);
+       } else {
+               /*
+                * kthread helpers are used to submit writes so that
+                * checksumming can happen in parallel across all CPUs
+                */
+               ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
+                                         inode, rw, bio, mirror_num, 0,
+                                         bio_offset,
+                                         __btree_submit_bio_start,
+                                         __btree_submit_bio_done);
        }
 
-       /*
-        * kthread helpers are used to submit writes so that checksumming
-        * can happen in parallel across all CPUs
-        */
-       return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
-                                  inode, rw, bio, mirror_num, 0,
-                                  bio_offset,
-                                  __btree_submit_bio_start,
-                                  __btree_submit_bio_done);
+       if (ret) {
+out_w_error:
+               bio_endio(bio, ret);
+       }
+       return ret;
 }
 
 #ifdef CONFIG_MIGRATION
@@ -2131,6 +2142,11 @@ int open_ctree(struct super_block *sb,
        init_rwsem(&fs_info->extent_commit_sem);
        init_rwsem(&fs_info->cleanup_work_sem);
        init_rwsem(&fs_info->subvol_sem);
+       fs_info->dev_replace.lock_owner = 0;
+       atomic_set(&fs_info->dev_replace.nesting_level, 0);
+       mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount);
+       mutex_init(&fs_info->dev_replace.lock_management_lock);
+       mutex_init(&fs_info->dev_replace.lock);
 
        spin_lock_init(&fs_info->qgroup_lock);
        fs_info->qgroup_tree = RB_ROOT;
@@ -2423,7 +2439,11 @@ int open_ctree(struct super_block *sb,
                goto fail_tree_roots;
        }
 
-       btrfs_close_extra_devices(fs_devices);
+       /*
+        * keep the device that is marked to be the target device for the
+        * dev_replace procedure
+        */
+       btrfs_close_extra_devices(fs_info, fs_devices, 0);
 
        if (!fs_devices->latest_bdev) {
                printk(KERN_CRIT "btrfs: failed to read devices on %s\n",
@@ -2495,6 +2515,14 @@ retry_root_backup:
                goto fail_block_groups;
        }
 
+       ret = btrfs_init_dev_replace(fs_info);
+       if (ret) {
+               pr_err("btrfs: failed to init dev_replace: %d\n", ret);
+               goto fail_block_groups;
+       }
+
+       btrfs_close_extra_devices(fs_info, fs_devices, 1);
+
        ret = btrfs_init_space_info(fs_info);
        if (ret) {
                printk(KERN_ERR "Failed to initial space info: %d\n", ret);
@@ -2643,6 +2671,13 @@ retry_root_backup:
                return ret;
        }
 
+       ret = btrfs_resume_dev_replace_async(fs_info);
+       if (ret) {
+               pr_warn("btrfs: failed to resume dev_replace\n");
+               close_ctree(tree_root);
+               return ret;
+       }
+
        return 0;
 
 fail_qgroup:
@@ -3283,9 +3318,11 @@ int close_ctree(struct btrfs_root *root)
        smp_mb();
 
        /* pause restriper - we want to resume on mount */
-       btrfs_pause_balance(root->fs_info);
+       btrfs_pause_balance(fs_info);
+
+       btrfs_dev_replace_suspend_for_unmount(fs_info);
 
-       btrfs_scrub_cancel(root);
+       btrfs_scrub_cancel(fs_info);
 
        /* wait for any defraggers to finish */
        wait_event(fs_info->transaction_wait,
@@ -3411,7 +3448,8 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
        }
 }
 
-void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
+static void __btrfs_btree_balance_dirty(struct btrfs_root *root,
+                                       int flush_delayed)
 {
        /*
         * looks as though older kernels can get into trouble with
@@ -3423,7 +3461,8 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
        if (current->flags & PF_MEMALLOC)
                return;
 
-       btrfs_balance_delayed_items(root);
+       if (flush_delayed)
+               btrfs_balance_delayed_items(root);
 
        num_dirty = root->fs_info->dirty_metadata_bytes;
 
@@ -3434,25 +3473,14 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
        return;
 }
 
-void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
+void btrfs_btree_balance_dirty(struct btrfs_root *root)
 {
-       /*
-        * looks as though older kernels can get into trouble with
-        * this code, they end up stuck in balance_dirty_pages forever
-        */
-       u64 num_dirty;
-       unsigned long thresh = 32 * 1024 * 1024;
-
-       if (current->flags & PF_MEMALLOC)
-               return;
-
-       num_dirty = root->fs_info->dirty_metadata_bytes;
+       __btrfs_btree_balance_dirty(root, 1);
+}
 
-       if (num_dirty > thresh) {
-               balance_dirty_pages_ratelimited_nr(
-                                  root->fs_info->btree_inode->i_mapping, 1);
-       }
-       return;
+void btrfs_btree_balance_dirty_nodelay(struct btrfs_root *root)
+{
+       __btrfs_btree_balance_dirty(root, 0);
 }
 
 int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)