Merge tag 'drm-vc4-fixes-2016-09-14' of https://github.com/anholt/linux into drm...
[linux-2.6-block.git] / fs / btrfs / disk-io.c
index 9a726ded2c6d150e73bd2719a09cb95a8634ab57..54bc8c7c6bcd387ef48ffc9b69e9771de6e2ca2c 100644 (file)
@@ -101,7 +101,7 @@ int __init btrfs_end_io_wq_init(void)
        btrfs_end_io_wq_cache = kmem_cache_create("btrfs_end_io_wq",
                                        sizeof(struct btrfs_end_io_wq),
                                        0,
-                                       SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
+                                       SLAB_MEM_SPREAD,
                                        NULL);
        if (!btrfs_end_io_wq_cache)
                return -ENOMEM;
@@ -559,8 +559,29 @@ static noinline int check_leaf(struct btrfs_root *root,
        u32 nritems = btrfs_header_nritems(leaf);
        int slot;
 
-       if (nritems == 0)
+       if (nritems == 0) {
+               struct btrfs_root *check_root;
+
+               key.objectid = btrfs_header_owner(leaf);
+               key.type = BTRFS_ROOT_ITEM_KEY;
+               key.offset = (u64)-1;
+
+               check_root = btrfs_get_fs_root(root->fs_info, &key, false);
+               /*
+                * The only reason we also check NULL here is that during
+                * open_ctree() some roots has not yet been set up.
+                */
+               if (!IS_ERR_OR_NULL(check_root)) {
+                       /* if leaf is the root, then it's fine */
+                       if (leaf->start !=
+                           btrfs_root_bytenr(&check_root->root_item)) {
+                               CORRUPT("non-root leaf's nritems is 0",
+                                       leaf, root, 0);
+                               return -EIO;
+                       }
+               }
                return 0;
+       }
 
        /* Check the 0 item */
        if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) !=
@@ -612,6 +633,19 @@ static noinline int check_leaf(struct btrfs_root *root,
        return 0;
 }
 
+static int check_node(struct btrfs_root *root, struct extent_buffer *node)
+{
+       unsigned long nr = btrfs_header_nritems(node);
+
+       if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root)) {
+               btrfs_crit(root->fs_info,
+                          "corrupt node: block %llu root %llu nritems %lu",
+                          node->start, root->objectid, nr);
+               return -EIO;
+       }
+       return 0;
+}
+
 static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
                                      u64 phy_offset, struct page *page,
                                      u64 start, u64 end, int mirror)
@@ -682,6 +716,9 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
                ret = -EIO;
        }
 
+       if (found_level > 0 && check_node(root, eb))
+               ret = -EIO;
+
        if (!ret)
                set_extent_buffer_uptodate(eb);
 err:
@@ -870,7 +907,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
 
        atomic_inc(&fs_info->nr_async_submits);
 
-       if (bio->bi_rw & REQ_SYNC)
+       if (bio->bi_opf & REQ_SYNC)
                btrfs_set_work_high_priority(&async->work);
 
        btrfs_queue_work(fs_info->workers, &async->work);
@@ -1140,7 +1177,7 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info,
 struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
                                                 u64 bytenr)
 {
-       if (btrfs_test_is_dummy_root(root))
+       if (btrfs_is_testing(root->fs_info))
                return alloc_test_extent_buffer(root->fs_info, bytenr,
                                root->nodesize);
        return alloc_extent_buffer(root->fs_info, bytenr);
@@ -1227,6 +1264,7 @@ static void __setup_root(u32 nodesize, u32 sectorsize, u32 stripesize,
                         struct btrfs_root *root, struct btrfs_fs_info *fs_info,
                         u64 objectid)
 {
+       bool dummy = test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state);
        root->node = NULL;
        root->commit_root = NULL;
        root->sectorsize = sectorsize;
@@ -1281,14 +1319,14 @@ static void __setup_root(u32 nodesize, u32 sectorsize, u32 stripesize,
        root->log_transid = 0;
        root->log_transid_committed = -1;
        root->last_log_commit = 0;
-       if (fs_info)
+       if (!dummy)
                extent_io_tree_init(&root->dirty_log_pages,
                                     fs_info->btree_inode->i_mapping);
 
        memset(&root->root_key, 0, sizeof(root->root_key));
        memset(&root->root_item, 0, sizeof(root->root_item));
        memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
-       if (fs_info)
+       if (!dummy)
                root->defrag_trans_start = fs_info->generation;
        else
                root->defrag_trans_start = 0;
@@ -1309,17 +1347,20 @@ static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info,
 
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
 /* Should only be used by the testing infrastructure */
-struct btrfs_root *btrfs_alloc_dummy_root(u32 sectorsize, u32 nodesize)
+struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info,
+                                         u32 sectorsize, u32 nodesize)
 {
        struct btrfs_root *root;
 
-       root = btrfs_alloc_root(NULL, GFP_KERNEL);
+       if (!fs_info)
+               return ERR_PTR(-EINVAL);
+
+       root = btrfs_alloc_root(fs_info, GFP_KERNEL);
        if (!root)
                return ERR_PTR(-ENOMEM);
        /* We don't use the stripesize in selftest, set it as sectorsize */
-       __setup_root(nodesize, sectorsize, sectorsize, root, NULL,
+       __setup_root(nodesize, sectorsize, sectorsize, root, fs_info,
                        BTRFS_ROOT_TREE_OBJECTID);
-       set_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state);
        root->alloc_bytenr = 0;
 
        return root;
@@ -1594,14 +1635,14 @@ int btrfs_init_fs_root(struct btrfs_root *root)
 
        ret = get_anon_bdev(&root->anon_dev);
        if (ret)
-               goto free_writers;
+               goto fail;
 
        mutex_lock(&root->objectid_mutex);
        ret = btrfs_find_highest_objectid(root,
                                        &root->highest_objectid);
        if (ret) {
                mutex_unlock(&root->objectid_mutex);
-               goto free_root_dev;
+               goto fail;
        }
 
        ASSERT(root->highest_objectid <= BTRFS_LAST_FREE_OBJECTID);
@@ -1609,19 +1650,13 @@ int btrfs_init_fs_root(struct btrfs_root *root)
        mutex_unlock(&root->objectid_mutex);
 
        return 0;
-
-free_root_dev:
-       free_anon_bdev(root->anon_dev);
-free_writers:
-       btrfs_free_subvolume_writers(root->subv_writers);
 fail:
-       kfree(root->free_ino_ctl);
-       kfree(root->free_ino_pinned);
+       /* the caller is responsible to call free_fs_root */
        return ret;
 }
 
-static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
-                                              u64 root_id)
+struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
+                                       u64 root_id)
 {
        struct btrfs_root *root;
 
@@ -2300,6 +2335,7 @@ static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info)
        fs_info->quota_enabled = 0;
        fs_info->pending_quota_state = 0;
        fs_info->qgroup_ulist = NULL;
+       fs_info->qgroup_rescan_running = false;
        mutex_init(&fs_info->qgroup_rescan_lock);
 }
 
@@ -2310,17 +2346,19 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info,
        unsigned int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND;
 
        fs_info->workers =
-               btrfs_alloc_workqueue("worker", flags | WQ_HIGHPRI,
-                                     max_active, 16);
+               btrfs_alloc_workqueue(fs_info, "worker",
+                                     flags | WQ_HIGHPRI, max_active, 16);
 
        fs_info->delalloc_workers =
-               btrfs_alloc_workqueue("delalloc", flags, max_active, 2);
+               btrfs_alloc_workqueue(fs_info, "delalloc",
+                                     flags, max_active, 2);
 
        fs_info->flush_workers =
-               btrfs_alloc_workqueue("flush_delalloc", flags, max_active, 0);
+               btrfs_alloc_workqueue(fs_info, "flush_delalloc",
+                                     flags, max_active, 0);
 
        fs_info->caching_workers =
-               btrfs_alloc_workqueue("cache", flags, max_active, 0);
+               btrfs_alloc_workqueue(fs_info, "cache", flags, max_active, 0);
 
        /*
         * a higher idle thresh on the submit workers makes it much more
@@ -2328,41 +2366,48 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info,
         * devices
         */
        fs_info->submit_workers =
-               btrfs_alloc_workqueue("submit", flags,
+               btrfs_alloc_workqueue(fs_info, "submit", flags,
                                      min_t(u64, fs_devices->num_devices,
                                            max_active), 64);
 
        fs_info->fixup_workers =
-               btrfs_alloc_workqueue("fixup", flags, 1, 0);
+               btrfs_alloc_workqueue(fs_info, "fixup", flags, 1, 0);
 
        /*
         * endios are largely parallel and should have a very
         * low idle thresh
         */
        fs_info->endio_workers =
-               btrfs_alloc_workqueue("endio", flags, max_active, 4);
+               btrfs_alloc_workqueue(fs_info, "endio", flags, max_active, 4);
        fs_info->endio_meta_workers =
-               btrfs_alloc_workqueue("endio-meta", flags, max_active, 4);
+               btrfs_alloc_workqueue(fs_info, "endio-meta", flags,
+                                     max_active, 4);
        fs_info->endio_meta_write_workers =
-               btrfs_alloc_workqueue("endio-meta-write", flags, max_active, 2);
+               btrfs_alloc_workqueue(fs_info, "endio-meta-write", flags,
+                                     max_active, 2);
        fs_info->endio_raid56_workers =
-               btrfs_alloc_workqueue("endio-raid56", flags, max_active, 4);
+               btrfs_alloc_workqueue(fs_info, "endio-raid56", flags,
+                                     max_active, 4);
        fs_info->endio_repair_workers =
-               btrfs_alloc_workqueue("endio-repair", flags, 1, 0);
+               btrfs_alloc_workqueue(fs_info, "endio-repair", flags, 1, 0);
        fs_info->rmw_workers =
-               btrfs_alloc_workqueue("rmw", flags, max_active, 2);
+               btrfs_alloc_workqueue(fs_info, "rmw", flags, max_active, 2);
        fs_info->endio_write_workers =
-               btrfs_alloc_workqueue("endio-write", flags, max_active, 2);
+               btrfs_alloc_workqueue(fs_info, "endio-write", flags,
+                                     max_active, 2);
        fs_info->endio_freespace_worker =
-               btrfs_alloc_workqueue("freespace-write", flags, max_active, 0);
+               btrfs_alloc_workqueue(fs_info, "freespace-write", flags,
+                                     max_active, 0);
        fs_info->delayed_workers =
-               btrfs_alloc_workqueue("delayed-meta", flags, max_active, 0);
+               btrfs_alloc_workqueue(fs_info, "delayed-meta", flags,
+                                     max_active, 0);
        fs_info->readahead_workers =
-               btrfs_alloc_workqueue("readahead", flags, max_active, 2);
+               btrfs_alloc_workqueue(fs_info, "readahead", flags,
+                                     max_active, 2);
        fs_info->qgroup_rescan_workers =
-               btrfs_alloc_workqueue("qgroup-rescan", flags, 1, 0);
+               btrfs_alloc_workqueue(fs_info, "qgroup-rescan", flags, 1, 0);
        fs_info->extent_workers =
-               btrfs_alloc_workqueue("extent-refs", flags,
+               btrfs_alloc_workqueue(fs_info, "extent-refs", flags,
                                      min_t(u64, fs_devices->num_devices,
                                            max_active), 8);
 
@@ -2617,6 +2662,7 @@ int open_ctree(struct super_block *sb,
        atomic_set(&fs_info->qgroup_op_seq, 0);
        atomic_set(&fs_info->reada_works_cnt, 0);
        atomic64_set(&fs_info->tree_mod_seq, 0);
+       fs_info->fs_frozen = 0;
        fs_info->sb = sb;
        fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE;
        fs_info->metadata_ratio = 0;
@@ -3010,8 +3056,8 @@ retry_root_backup:
        if (IS_ERR(fs_info->transaction_kthread))
                goto fail_cleaner;
 
-       if (!btrfs_test_opt(tree_root, SSD) &&
-           !btrfs_test_opt(tree_root, NOSSD) &&
+       if (!btrfs_test_opt(tree_root->fs_info, SSD) &&
+           !btrfs_test_opt(tree_root->fs_info, NOSSD) &&
            !fs_info->fs_devices->rotating) {
                btrfs_info(fs_info, "detected SSD devices, enabling SSD mode");
                btrfs_set_opt(fs_info->mount_opt, SSD);
@@ -3024,9 +3070,9 @@ retry_root_backup:
        btrfs_apply_pending_changes(fs_info);
 
 #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
-       if (btrfs_test_opt(tree_root, CHECK_INTEGRITY)) {
+       if (btrfs_test_opt(tree_root->fs_info, CHECK_INTEGRITY)) {
                ret = btrfsic_mount(tree_root, fs_devices,
-                                   btrfs_test_opt(tree_root,
+                                   btrfs_test_opt(tree_root->fs_info,
                                        CHECK_INTEGRITY_INCLUDING_EXTENT_DATA) ?
                                    1 : 0,
                                    fs_info->check_integrity_print_mask);
@@ -3042,7 +3088,7 @@ retry_root_backup:
 
        /* do not make disk changes in broken FS or nologreplay is given */
        if (btrfs_super_log_root(disk_super) != 0 &&
-           !btrfs_test_opt(tree_root, NOLOGREPLAY)) {
+           !btrfs_test_opt(tree_root->fs_info, NOLOGREPLAY)) {
                ret = btrfs_replay_log(fs_info, fs_devices);
                if (ret) {
                        err = ret;
@@ -3083,7 +3129,7 @@ retry_root_backup:
        if (sb->s_flags & MS_RDONLY)
                return 0;
 
-       if (btrfs_test_opt(tree_root, FREE_SPACE_TREE) &&
+       if (btrfs_test_opt(tree_root->fs_info, FREE_SPACE_TREE) &&
            !btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
                btrfs_info(fs_info, "creating free space tree");
                ret = btrfs_create_free_space_tree(fs_info);
@@ -3120,7 +3166,7 @@ retry_root_backup:
 
        btrfs_qgroup_rescan_resume(fs_info);
 
-       if (btrfs_test_opt(tree_root, CLEAR_CACHE) &&
+       if (btrfs_test_opt(tree_root->fs_info, CLEAR_CACHE) &&
            btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
                btrfs_info(fs_info, "clearing free space tree");
                ret = btrfs_clear_free_space_tree(fs_info);
@@ -3141,7 +3187,7 @@ retry_root_backup:
                        close_ctree(tree_root);
                        return ret;
                }
-       } else if (btrfs_test_opt(tree_root, RESCAN_UUID_TREE) ||
+       } else if (btrfs_test_opt(tree_root->fs_info, RESCAN_UUID_TREE) ||
                   fs_info->generation !=
                                btrfs_super_uuid_tree_generation(disk_super)) {
                btrfs_info(fs_info, "checking UUID tree");
@@ -3218,7 +3264,7 @@ fail:
        return err;
 
 recovery_tree_root:
-       if (!btrfs_test_opt(tree_root, USEBACKUPROOT))
+       if (!btrfs_test_opt(tree_root->fs_info, USEBACKUPROOT))
                goto fail_tree_roots;
 
        free_root_pointers(fs_info, 0);
@@ -3634,7 +3680,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
        int total_errors = 0;
        u64 flags;
 
-       do_barriers = !btrfs_test_opt(root, NOBARRIER);
+       do_barriers = !btrfs_test_opt(root->fs_info, NOBARRIER);
        backup_super_roots(root->fs_info);
 
        sb = root->fs_info->super_for_commit;
@@ -3732,8 +3778,15 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
        if (btrfs_root_refs(&root->root_item) == 0)
                synchronize_srcu(&fs_info->subvol_srcu);
 
-       if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
+       if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
                btrfs_free_log(NULL, root);
+               if (root->reloc_root) {
+                       free_extent_buffer(root->reloc_root->node);
+                       free_extent_buffer(root->reloc_root->commit_root);
+                       btrfs_put_fs_root(root->reloc_root);
+                       root->reloc_root = NULL;
+               }
+       }
 
        if (root->free_ino_pinned)
                __btrfs_remove_free_space_cache(root->free_ino_pinned);
@@ -3844,7 +3897,7 @@ void close_ctree(struct btrfs_root *root)
        smp_mb();
 
        /* wait for the qgroup rescan worker to stop */
-       btrfs_qgroup_wait_for_completion(fs_info);
+       btrfs_qgroup_wait_for_completion(fs_info, false);
 
        /* wait for the uuid_scan task to finish */
        down(&fs_info->uuid_tree_rescan_sem);
@@ -3918,7 +3971,7 @@ void close_ctree(struct btrfs_root *root)
        iput(fs_info->btree_inode);
 
 #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
-       if (btrfs_test_opt(root, CHECK_INTEGRITY))
+       if (btrfs_test_opt(root->fs_info, CHECK_INTEGRITY))
                btrfsic_unmount(root, fs_info->fs_devices);
 #endif