Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux...
[linux-2.6-block.git] / fs / btrfs / inode.c
index 03708ef3deefb11edd7998ee831bf370e6ad8a66..016c403bfe7e4241b33c6b6c3e4101ba5ea993b2 100644 (file)
@@ -778,8 +778,12 @@ retry:
                                                ins.offset,
                                                BTRFS_ORDERED_COMPRESSED,
                                                async_extent->compress_type);
-               if (ret)
+               if (ret) {
+                       btrfs_drop_extent_cache(inode, async_extent->start,
+                                               async_extent->start +
+                                               async_extent->ram_size - 1, 0);
                        goto out_free_reserve;
+               }
 
                /*
                 * clear dirty, set writeback and unlock the pages.
@@ -971,14 +975,14 @@ static noinline int cow_file_range(struct inode *inode,
                ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
                                               ram_size, cur_alloc_size, 0);
                if (ret)
-                       goto out_reserve;
+                       goto out_drop_extent_cache;
 
                if (root->root_key.objectid ==
                    BTRFS_DATA_RELOC_TREE_OBJECTID) {
                        ret = btrfs_reloc_clone_csums(inode, start,
                                                      cur_alloc_size);
                        if (ret)
-                               goto out_reserve;
+                               goto out_drop_extent_cache;
                }
 
                if (disk_num_bytes < cur_alloc_size)
@@ -1006,6 +1010,8 @@ static noinline int cow_file_range(struct inode *inode,
 out:
        return ret;
 
+out_drop_extent_cache:
+       btrfs_drop_extent_cache(inode, start, start + ram_size - 1, 0);
 out_reserve:
        btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
 out_unlock:
@@ -1096,8 +1102,10 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
                async_cow->end = cur_end;
                INIT_LIST_HEAD(&async_cow->extents);
 
-               btrfs_init_work(&async_cow->work, async_cow_start,
-                               async_cow_submit, async_cow_free);
+               btrfs_init_work(&async_cow->work,
+                               btrfs_delalloc_helper,
+                               async_cow_start, async_cow_submit,
+                               async_cow_free);
 
                nr_pages = (cur_end - start + PAGE_CACHE_SIZE) >>
                        PAGE_CACHE_SHIFT;
@@ -1881,7 +1889,8 @@ static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end)
 
        SetPageChecked(page);
        page_cache_get(page);
-       btrfs_init_work(&fixup->work, btrfs_writepage_fixup_worker, NULL, NULL);
+       btrfs_init_work(&fixup->work, btrfs_fixup_helper,
+                       btrfs_writepage_fixup_worker, NULL, NULL);
        fixup->page = page;
        btrfs_queue_work(root->fs_info->fixup_workers, &fixup->work);
        return -EBUSY;
@@ -2822,7 +2831,8 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
        struct inode *inode = page->mapping->host;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_ordered_extent *ordered_extent = NULL;
-       struct btrfs_workqueue *workers;
+       struct btrfs_workqueue *wq;
+       btrfs_work_func_t func;
 
        trace_btrfs_writepage_end_io_hook(page, start, end, uptodate);
 
@@ -2831,13 +2841,17 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
                                            end - start + 1, uptodate))
                return 0;
 
-       btrfs_init_work(&ordered_extent->work, finish_ordered_fn, NULL, NULL);
+       if (btrfs_is_free_space_inode(inode)) {
+               wq = root->fs_info->endio_freespace_worker;
+               func = btrfs_freespace_write_helper;
+       } else {
+               wq = root->fs_info->endio_write_workers;
+               func = btrfs_endio_write_helper;
+       }
 
-       if (btrfs_is_free_space_inode(inode))
-               workers = root->fs_info->endio_freespace_worker;
-       else
-               workers = root->fs_info->endio_write_workers;
-       btrfs_queue_work(workers, &ordered_extent->work);
+       btrfs_init_work(&ordered_extent->work, func, finish_ordered_fn, NULL,
+                       NULL);
+       btrfs_queue_work(wq, &ordered_extent->work);
 
        return 0;
 }
@@ -4234,7 +4248,8 @@ out:
                        btrfs_abort_transaction(trans, root, ret);
        }
 error:
-       if (last_size != (u64)-1)
+       if (last_size != (u64)-1 &&
+           root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
                btrfs_ordered_update_i_size(inode, last_size, NULL);
        btrfs_free_path(path);
        return err;
@@ -4674,6 +4689,11 @@ static void evict_inode_truncate_pages(struct inode *inode)
                clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
                remove_extent_mapping(map_tree, em);
                free_extent_map(em);
+               if (need_resched()) {
+                       write_unlock(&map_tree->lock);
+                       cond_resched();
+                       write_lock(&map_tree->lock);
+               }
        }
        write_unlock(&map_tree->lock);
 
@@ -4696,6 +4716,7 @@ static void evict_inode_truncate_pages(struct inode *inode)
                                 &cached_state, GFP_NOFS);
                free_extent_state(state);
 
+               cond_resched();
                spin_lock(&io_tree->lock);
        }
        spin_unlock(&io_tree->lock);
@@ -5181,6 +5202,42 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
                        iput(inode);
                        inode = ERR_PTR(ret);
                }
+               /*
+                * If orphan cleanup did remove any orphans, it means the tree
+                * was modified and therefore the commit root is not the same as
+                * the current root anymore. This is a problem, because send
+                * uses the commit root and therefore can see inode items that
+                * don't exist in the current root anymore, and for example make
+                * calls to btrfs_iget, which will do tree lookups based on the
+                * current root and not on the commit root. Those lookups will
+                * fail, returning a -ESTALE error, and making send fail with
+                * that error. So make sure a send does not see any orphans we
+                * have just removed, and that it will see the same inodes
+                * regardless of whether a transaction commit happened before
+                * it started (meaning that the commit root will be the same as
+                * the current root) or not.
+                */
+               if (sub_root->node != sub_root->commit_root) {
+                       u64 sub_flags = btrfs_root_flags(&sub_root->root_item);
+
+                       if (sub_flags & BTRFS_ROOT_SUBVOL_RDONLY) {
+                               struct extent_buffer *eb;
+
+                               /*
+                                * Assert we can't have races between dentry
+                                * lookup called through the snapshot creation
+                                * ioctl and the VFS.
+                                */
+                               ASSERT(mutex_is_locked(&dir->i_mutex));
+
+                               down_write(&root->fs_info->commit_root_sem);
+                               eb = sub_root->commit_root;
+                               sub_root->commit_root =
+                                       btrfs_root_node(sub_root);
+                               up_write(&root->fs_info->commit_root_sem);
+                               free_extent_buffer(eb);
+                       }
+               }
        }
 
        return inode;
@@ -5577,6 +5634,17 @@ int btrfs_set_inode_index(struct inode *dir, u64 *index)
        return ret;
 }
 
+static int btrfs_insert_inode_locked(struct inode *inode)
+{
+       struct btrfs_iget_args args;
+       args.location = &BTRFS_I(inode)->location;
+       args.root = BTRFS_I(inode)->root;
+
+       return insert_inode_locked4(inode,
+                  btrfs_inode_hash(inode->i_ino, BTRFS_I(inode)->root),
+                  btrfs_find_actor, &args);
+}
+
 static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
                                     struct btrfs_root *root,
                                     struct inode *dir,
@@ -5605,6 +5673,13 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
                return ERR_PTR(-ENOMEM);
        }
 
+       /*
+        * O_TMPFILE, set link count to 0, so that after this point,
+        * we fill in an inode item with the correct link count.
+        */
+       if (!name)
+               set_nlink(inode, 0);
+
        /*
         * we have to initialize this early, so we can reclaim the inode
         * number if we fail afterwards in this function.
@@ -5662,10 +5737,19 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
                sizes[1] = name_len + sizeof(*ref);
        }
 
+       location = &BTRFS_I(inode)->location;
+       location->objectid = objectid;
+       location->offset = 0;
+       btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
+
+       ret = btrfs_insert_inode_locked(inode);
+       if (ret < 0)
+               goto fail;
+
        path->leave_spinning = 1;
        ret = btrfs_insert_empty_items(trans, root, path, key, sizes, nitems);
        if (ret != 0)
-               goto fail;
+               goto fail_unlock;
 
        inode_init_owner(inode, dir, mode);
        inode_set_bytes(inode, 0);
@@ -5688,11 +5772,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
        btrfs_mark_buffer_dirty(path->nodes[0]);
        btrfs_free_path(path);
 
-       location = &BTRFS_I(inode)->location;
-       location->objectid = objectid;
-       location->offset = 0;
-       btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
-
        btrfs_inherit_iflags(inode, dir);
 
        if (S_ISREG(mode)) {
@@ -5703,7 +5782,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
                                BTRFS_INODE_NODATASUM;
        }
 
-       btrfs_insert_inode_hash(inode);
        inode_tree_add(inode);
 
        trace_btrfs_inode_new(inode);
@@ -5718,6 +5796,9 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
                          btrfs_ino(inode), root->root_key.objectid, ret);
 
        return inode;
+
+fail_unlock:
+       unlock_new_inode(inode);
 fail:
        if (dir && name)
                BTRFS_I(dir)->index_cnt--;
@@ -5852,28 +5933,28 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
                goto out_unlock;
        }
 
-       err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
-       if (err) {
-               drop_inode = 1;
-               goto out_unlock;
-       }
-
        /*
        * If the active LSM wants to access the inode during
        * d_instantiate it needs these. Smack checks to see
        * if the filesystem supports xattrs by looking at the
        * ops vector.
        */
-
        inode->i_op = &btrfs_special_inode_operations;
-       err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
+       init_special_inode(inode, inode->i_mode, rdev);
+
+       err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
        if (err)
-               drop_inode = 1;
-       else {
-               init_special_inode(inode, inode->i_mode, rdev);
+               goto out_unlock_inode;
+
+       err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
+       if (err) {
+               goto out_unlock_inode;
+       } else {
                btrfs_update_inode(trans, root, inode);
+               unlock_new_inode(inode);
                d_instantiate(dentry, inode);
        }
+
 out_unlock:
        btrfs_end_transaction(trans, root);
        btrfs_balance_delayed_items(root);
@@ -5883,6 +5964,12 @@ out_unlock:
                iput(inode);
        }
        return err;
+
+out_unlock_inode:
+       drop_inode = 1;
+       unlock_new_inode(inode);
+       goto out_unlock;
+
 }
 
 static int btrfs_create(struct inode *dir, struct dentry *dentry,
@@ -5917,15 +6004,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
                goto out_unlock;
        }
        drop_inode_on_err = 1;
-
-       err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
-       if (err)
-               goto out_unlock;
-
-       err = btrfs_update_inode(trans, root, inode);
-       if (err)
-               goto out_unlock;
-
        /*
        * If the active LSM wants to access the inode during
        * d_instantiate it needs these. Smack checks to see
@@ -5934,14 +6012,23 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
        */
        inode->i_fop = &btrfs_file_operations;
        inode->i_op = &btrfs_file_inode_operations;
+       inode->i_mapping->a_ops = &btrfs_aops;
+       inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
+
+       err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
+       if (err)
+               goto out_unlock_inode;
+
+       err = btrfs_update_inode(trans, root, inode);
+       if (err)
+               goto out_unlock_inode;
 
        err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
        if (err)
-               goto out_unlock;
+               goto out_unlock_inode;
 
-       inode->i_mapping->a_ops = &btrfs_aops;
-       inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
        BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
+       unlock_new_inode(inode);
        d_instantiate(dentry, inode);
 
 out_unlock:
@@ -5953,6 +6040,11 @@ out_unlock:
        btrfs_balance_delayed_items(root);
        btrfs_btree_balance_dirty(root);
        return err;
+
+out_unlock_inode:
+       unlock_new_inode(inode);
+       goto out_unlock;
+
 }
 
 static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
@@ -6060,25 +6152,30 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
        }
 
        drop_on_err = 1;
+       /* these must be set before we unlock the inode */
+       inode->i_op = &btrfs_dir_inode_operations;
+       inode->i_fop = &btrfs_dir_file_operations;
 
        err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
        if (err)
-               goto out_fail;
-
-       inode->i_op = &btrfs_dir_inode_operations;
-       inode->i_fop = &btrfs_dir_file_operations;
+               goto out_fail_inode;
 
        btrfs_i_size_write(inode, 0);
        err = btrfs_update_inode(trans, root, inode);
        if (err)
-               goto out_fail;
+               goto out_fail_inode;
 
        err = btrfs_add_link(trans, dir, inode, dentry->d_name.name,
                             dentry->d_name.len, 0, index);
        if (err)
-               goto out_fail;
+               goto out_fail_inode;
 
        d_instantiate(dentry, inode);
+       /*
+        * mkdir is special.  We're unlocking after we call d_instantiate
+        * to avoid a race with nfsd calling d_instantiate.
+        */
+       unlock_new_inode(inode);
        drop_on_err = 0;
 
 out_fail:
@@ -6088,6 +6185,10 @@ out_fail:
        btrfs_balance_delayed_items(root);
        btrfs_btree_balance_dirty(root);
        return err;
+
+out_fail_inode:
+       unlock_new_inode(inode);
+       goto out_fail;
 }
 
 /* helper for btfs_get_extent.  Given an existing extent in the tree,
@@ -6097,14 +6198,14 @@ out_fail:
 static int merge_extent_mapping(struct extent_map_tree *em_tree,
                                struct extent_map *existing,
                                struct extent_map *em,
-                               u64 map_start, u64 map_len)
+                               u64 map_start)
 {
        u64 start_diff;
 
        BUG_ON(map_start < em->start || map_start >= extent_map_end(em));
        start_diff = map_start - em->start;
        em->start = map_start;
-       em->len = map_len;
+       em->len = existing->start - em->start;
        if (em->block_start < EXTENT_MAP_LAST_BYTE &&
            !test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
                em->block_start += start_diff;
@@ -6275,6 +6376,8 @@ next:
                        goto not_found;
                if (start + len <= found_key.offset)
                        goto not_found;
+               if (start > found_key.offset)
+                       goto next;
                em->start = start;
                em->orig_start = start;
                em->len = found_key.offset - start;
@@ -6390,8 +6493,7 @@ insert:
                                                         em->len);
                        if (existing) {
                                err = merge_extent_mapping(em_tree, existing,
-                                                          em, start,
-                                                          root->sectorsize);
+                                                          em, start);
                                free_extent_map(existing);
                                if (err) {
                                        free_extent_map(em);
@@ -7158,7 +7260,8 @@ again:
        if (!ret)
                goto out_test;
 
-       btrfs_init_work(&ordered->work, finish_ordered_fn, NULL, NULL);
+       btrfs_init_work(&ordered->work, btrfs_endio_write_helper,
+                       finish_ordered_fn, NULL, NULL);
        btrfs_queue_work(root->fs_info->endio_write_workers,
                         &ordered->work);
 out_test:
@@ -7306,10 +7409,8 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
        map_length = orig_bio->bi_iter.bi_size;
        ret = btrfs_map_block(root->fs_info, rw, start_sector << 9,
                              &map_length, NULL, 0);
-       if (ret) {
-               bio_put(orig_bio);
+       if (ret)
                return -EIO;
-       }
 
        if (map_length >= orig_bio->bi_iter.bi_size) {
                bio = orig_bio;
@@ -7326,6 +7427,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
        bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
        if (!bio)
                return -ENOMEM;
+
        bio->bi_private = dip;
        bio->bi_end_io = btrfs_end_dio_bio;
        atomic_inc(&dip->pending_bios);
@@ -7534,7 +7636,8 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
        count = iov_iter_count(iter);
        if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
                     &BTRFS_I(inode)->runtime_flags))
-               filemap_fdatawrite_range(inode->i_mapping, offset, count);
+               filemap_fdatawrite_range(inode->i_mapping, offset,
+                                        offset + count - 1);
 
        if (rw & WRITE) {
                /*
@@ -8041,6 +8144,7 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
 
        set_nlink(inode, 1);
        btrfs_i_size_write(inode, 0);
+       unlock_new_inode(inode);
 
        err = btrfs_subvol_inherit_props(trans, new_root, parent_root);
        if (err)
@@ -8495,7 +8599,9 @@ struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode,
        work->inode = inode;
        work->wait = wait;
        work->delay_iput = delay_iput;
-       btrfs_init_work(&work->work, btrfs_run_delalloc_work, NULL, NULL);
+       WARN_ON_ONCE(!inode);
+       btrfs_init_work(&work->work, btrfs_flush_delalloc_helper,
+                       btrfs_run_delalloc_work, NULL, NULL);
 
        return work;
 }
@@ -8699,12 +8805,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
                goto out_unlock;
        }
 
-       err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
-       if (err) {
-               drop_inode = 1;
-               goto out_unlock;
-       }
-
        /*
        * If the active LSM wants to access the inode during
        * d_instantiate it needs these. Smack checks to see
@@ -8713,23 +8813,22 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
        */
        inode->i_fop = &btrfs_file_operations;
        inode->i_op = &btrfs_file_inode_operations;
+       inode->i_mapping->a_ops = &btrfs_aops;
+       inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
+       BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
+
+       err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
+       if (err)
+               goto out_unlock_inode;
 
        err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
        if (err)
-               drop_inode = 1;
-       else {
-               inode->i_mapping->a_ops = &btrfs_aops;
-               inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
-               BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
-       }
-       if (drop_inode)
-               goto out_unlock;
+               goto out_unlock_inode;
 
        path = btrfs_alloc_path();
        if (!path) {
                err = -ENOMEM;
-               drop_inode = 1;
-               goto out_unlock;
+               goto out_unlock_inode;
        }
        key.objectid = btrfs_ino(inode);
        key.offset = 0;
@@ -8738,9 +8837,8 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
        err = btrfs_insert_empty_item(trans, root, path, &key,
                                      datasize);
        if (err) {
-               drop_inode = 1;
                btrfs_free_path(path);
-               goto out_unlock;
+               goto out_unlock_inode;
        }
        leaf = path->nodes[0];
        ei = btrfs_item_ptr(leaf, path->slots[0],
@@ -8764,12 +8862,15 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
        inode_set_bytes(inode, name_len);
        btrfs_i_size_write(inode, name_len);
        err = btrfs_update_inode(trans, root, inode);
-       if (err)
+       if (err) {
                drop_inode = 1;
+               goto out_unlock_inode;
+       }
+
+       unlock_new_inode(inode);
+       d_instantiate(dentry, inode);
 
 out_unlock:
-       if (!err)
-               d_instantiate(dentry, inode);
        btrfs_end_transaction(trans, root);
        if (drop_inode) {
                inode_dec_link_count(inode);
@@ -8777,6 +8878,11 @@ out_unlock:
        }
        btrfs_btree_balance_dirty(root);
        return err;
+
+out_unlock_inode:
+       drop_inode = 1;
+       unlock_new_inode(inode);
+       goto out_unlock;
 }
 
 static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
@@ -8960,14 +9066,6 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
                goto out;
        }
 
-       ret = btrfs_init_inode_security(trans, inode, dir, NULL);
-       if (ret)
-               goto out;
-
-       ret = btrfs_update_inode(trans, root, inode);
-       if (ret)
-               goto out;
-
        inode->i_fop = &btrfs_file_operations;
        inode->i_op = &btrfs_file_inode_operations;
 
@@ -8975,10 +9073,26 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
        inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
        BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
 
+       ret = btrfs_init_inode_security(trans, inode, dir, NULL);
+       if (ret)
+               goto out_inode;
+
+       ret = btrfs_update_inode(trans, root, inode);
+       if (ret)
+               goto out_inode;
        ret = btrfs_orphan_add(trans, inode);
        if (ret)
-               goto out;
+               goto out_inode;
 
+       /*
+        * We set number of links to 0 in btrfs_new_inode(), and here we set
+        * it to 1 because d_tmpfile() will issue a warning if the count is 0,
+        * through:
+        *
+        *    d_tmpfile() -> inode_dec_link_count() -> drop_nlink()
+        */
+       set_nlink(inode, 1);
+       unlock_new_inode(inode);
        d_tmpfile(dentry, inode);
        mark_inode_dirty(inode);
 
@@ -8988,8 +9102,12 @@ out:
                iput(inode);
        btrfs_balance_delayed_items(root);
        btrfs_btree_balance_dirty(root);
-
        return ret;
+
+out_inode:
+       unlock_new_inode(inode);
+       goto out;
+
 }
 
 static const struct inode_operations btrfs_dir_inode_operations = {