Btrfs: Wait for IO on the block device inodes of newly added devices
[linux-2.6-block.git] / fs / btrfs / inode.c
index 65b4f864b0df5ab23024bdd61c337712d9bc78d8..404704d268225c376254a2cdc8ecf1920e015c1f 100644 (file)
@@ -135,7 +135,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end)
 
        BUG_ON(num_bytes > btrfs_super_total_bytes(&root->fs_info->super_copy));
        mutex_lock(&BTRFS_I(inode)->extent_mutex);
-       btrfs_drop_extent_cache(inode, start, start + num_bytes - 1);
+       btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
        mutex_unlock(&BTRFS_I(inode)->extent_mutex);
 
        while(num_bytes > 0) {
@@ -163,7 +163,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end)
                                break;
                        }
                        btrfs_drop_extent_cache(inode, start,
-                                               start + ins.offset - 1);
+                                               start + ins.offset - 1, 0);
                }
                mutex_unlock(&BTRFS_I(inode)->extent_mutex);
 
@@ -528,6 +528,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
        struct btrfs_trans_handle *trans;
        struct btrfs_ordered_extent *ordered_extent;
        struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+       struct btrfs_file_extent_item *extent_item;
+       struct btrfs_path *path = NULL;
+       struct extent_buffer *leaf;
        u64 alloc_hint = 0;
        struct list_head list;
        struct btrfs_key ins;
@@ -544,20 +547,15 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
        if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags))
                goto nocow;
 
+       path = btrfs_alloc_path();
+       BUG_ON(!path);
+
        lock_extent(io_tree, ordered_extent->file_offset,
                    ordered_extent->file_offset + ordered_extent->len - 1,
                    GFP_NOFS);
 
        INIT_LIST_HEAD(&list);
 
-       ins.objectid = ordered_extent->start;
-       ins.offset = ordered_extent->len;
-       ins.type = BTRFS_EXTENT_ITEM_KEY;
-
-       ret = btrfs_alloc_reserved_extent(trans, root, root->root_key.objectid,
-                                         trans->transid, inode->i_ino,
-                                         ordered_extent->file_offset, &ins);
-       BUG_ON(ret);
        mutex_lock(&BTRFS_I(inode)->extent_mutex);
 
        ret = btrfs_drop_extents(trans, root, inode,
@@ -566,18 +564,42 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
                                 ordered_extent->len,
                                 ordered_extent->file_offset, &alloc_hint);
        BUG_ON(ret);
-       ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
-                                      ordered_extent->file_offset,
-                                      ordered_extent->start,
-                                      ordered_extent->len,
-                                      ordered_extent->len, 0);
+
+       ins.objectid = inode->i_ino;
+       ins.offset = ordered_extent->file_offset;
+       ins.type = BTRFS_EXTENT_DATA_KEY;
+       ret = btrfs_insert_empty_item(trans, root, path, &ins,
+                                     sizeof(*extent_item));
        BUG_ON(ret);
+       leaf = path->nodes[0];
+       extent_item = btrfs_item_ptr(leaf, path->slots[0],
+                                    struct btrfs_file_extent_item);
+       btrfs_set_file_extent_generation(leaf, extent_item, trans->transid);
+       btrfs_set_file_extent_type(leaf, extent_item, BTRFS_FILE_EXTENT_REG);
+       btrfs_set_file_extent_disk_bytenr(leaf, extent_item,
+                                         ordered_extent->start);
+       btrfs_set_file_extent_disk_num_bytes(leaf, extent_item,
+                                            ordered_extent->len);
+       btrfs_set_file_extent_offset(leaf, extent_item, 0);
+       btrfs_set_file_extent_num_bytes(leaf, extent_item,
+                                       ordered_extent->len);
+       btrfs_mark_buffer_dirty(leaf);
 
        btrfs_drop_extent_cache(inode, ordered_extent->file_offset,
                                ordered_extent->file_offset +
-                               ordered_extent->len - 1);
+                               ordered_extent->len - 1, 0);
        mutex_unlock(&BTRFS_I(inode)->extent_mutex);
 
+       ins.objectid = ordered_extent->start;
+       ins.offset = ordered_extent->len;
+       ins.type = BTRFS_EXTENT_ITEM_KEY;
+       ret = btrfs_alloc_reserved_extent(trans, root, leaf->start,
+                                         root->root_key.objectid,
+                                         trans->transid, inode->i_ino,
+                                         ordered_extent->file_offset, &ins);
+       BUG_ON(ret);
+       btrfs_release_path(root, path);
+
        inode->i_blocks += ordered_extent->len >> 9;
        unlock_extent(io_tree, ordered_extent->file_offset,
                    ordered_extent->file_offset + ordered_extent->len - 1,
@@ -586,9 +608,11 @@ nocow:
        add_pending_csums(trans, inode, ordered_extent->file_offset,
                          &ordered_extent->list);
 
+       mutex_lock(&BTRFS_I(inode)->extent_mutex);
        btrfs_ordered_update_i_size(inode, ordered_extent);
        btrfs_update_inode(trans, root, inode);
        btrfs_remove_ordered_extent(inode, ordered_extent);
+       mutex_unlock(&BTRFS_I(inode)->extent_mutex);
 
        /* once for us */
        btrfs_put_ordered_extent(ordered_extent);
@@ -596,6 +620,8 @@ nocow:
        btrfs_put_ordered_extent(ordered_extent);
 
        btrfs_end_transaction(trans, root);
+       if (path)
+               btrfs_free_path(path);
        return 0;
 }
 
@@ -854,7 +880,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
        int ret = 0, nr_unlink = 0, nr_truncate = 0;
 
        /* don't do orphan cleanup if the fs is readonly. */
-       if (root->inode->i_sb->s_flags & MS_RDONLY)
+       if (root->fs_info->sb->s_flags & MS_RDONLY)
                return;
 
        path = btrfs_alloc_path();
@@ -866,8 +892,6 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
        btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY);
        key.offset = (u64)-1;
 
-       trans = btrfs_start_transaction(root, 1);
-       btrfs_set_trans_block_group(trans, root->inode);
 
        while (1) {
                ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
@@ -907,7 +931,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
                 * crossing root thing.  we store the inode number in the
                 * offset of the orphan item.
                 */
-               inode = btrfs_iget_locked(root->inode->i_sb,
+               inode = btrfs_iget_locked(root->fs_info->sb,
                                          found_key.offset, root);
                if (!inode)
                        break;
@@ -939,7 +963,9 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
                 * do a destroy_inode
                 */
                if (is_bad_inode(inode)) {
+                       trans = btrfs_start_transaction(root, 1);
                        btrfs_orphan_del(trans, inode);
+                       btrfs_end_transaction(trans, root);
                        iput(inode);
                        continue;
                }
@@ -962,7 +988,6 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
                printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate);
 
        btrfs_free_path(path);
-       btrfs_end_transaction(trans, root);
 }
 
 void btrfs_read_locked_inode(struct inode *inode)
@@ -1317,8 +1342,7 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
        u64 mask = root->sectorsize - 1;
 
        if (root->ref_cows)
-               btrfs_drop_extent_cache(inode,
-                                       new_size & (~mask), (u64)-1);
+               btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
        path = btrfs_alloc_path();
        path->reada = -1;
        BUG_ON(!path);
@@ -1433,10 +1457,7 @@ search_again:
                                        if (root->ref_cows)
                                                dec_i_blocks(inode, num_dec);
                                }
-                               if (root->ref_cows) {
-                                       root_gen =
-                                               btrfs_header_generation(leaf);
-                               }
+                               root_gen = btrfs_header_generation(leaf);
                                root_owner = btrfs_header_owner(leaf);
                        }
                } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
@@ -1477,7 +1498,7 @@ delete:
                if (found_extent) {
                        ret = btrfs_free_extent(trans, root, extent_start,
                                                extent_num_bytes,
-                                               root_owner,
+                                               leaf->start, root_owner,
                                                root_gen, inode->i_ino,
                                                found_key.offset, 0);
                        BUG_ON(ret);
@@ -1654,7 +1675,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
                                                       hole_start, 0, 0,
                                                       hole_size, 0);
                        btrfs_drop_extent_cache(inode, hole_start,
-                                               (u64)-1);
+                                               (u64)-1, 0);
                        btrfs_check_file(root, inode);
                }
                mutex_unlock(&BTRFS_I(inode)->extent_mutex);
@@ -1820,6 +1841,24 @@ static int btrfs_find_actor(struct inode *inode, void *opaque)
                args->root == BTRFS_I(inode)->root);
 }
 
+struct inode *btrfs_ilookup(struct super_block *s, u64 objectid,
+                           struct btrfs_root *root, int wait)
+{
+       struct inode *inode;
+       struct btrfs_iget_args args;
+       args.ino = objectid;
+       args.root = root;
+
+       if (wait) {
+               inode = ilookup5(s, objectid, btrfs_find_actor,
+                                (void *)&args);
+       } else {
+               inode = ilookup5_nowait(s, objectid, btrfs_find_actor,
+                                       (void *)&args);
+       }
+       return inode;
+}
+
 struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
                                struct btrfs_root *root)
 {
@@ -2050,104 +2089,6 @@ err:
        return ret;
 }
 
-/* Kernels earlier than 2.6.28 still have the NFS deadlock where nfsd
-   will call the file system's ->lookup() method from within its
-   filldir callback, which in turn was called from the file system's
-   ->readdir() method. And will deadlock for many file systems. */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
-
-struct nfshack_dirent {
-       u64             ino;
-       loff_t          offset;
-       int             namlen;
-       unsigned int    d_type;
-       char            name[];
-};
-
-struct nfshack_readdir {
-       char            *dirent;
-       size_t          used;
-       int             full;
-};
-
-
-
-static int btrfs_nfshack_filldir(void *__buf, const char *name, int namlen,
-                             loff_t offset, u64 ino, unsigned int d_type)
-{
-       struct nfshack_readdir *buf = __buf;
-       struct nfshack_dirent *de = (void *)(buf->dirent + buf->used);
-       unsigned int reclen;
-
-       reclen = ALIGN(sizeof(struct nfshack_dirent) + namlen, sizeof(u64));
-       if (buf->used + reclen > PAGE_SIZE) {
-               buf->full = 1;
-               return -EINVAL;
-       }
-
-       de->namlen = namlen;
-       de->offset = offset;
-       de->ino = ino;
-       de->d_type = d_type;
-       memcpy(de->name, name, namlen);
-       buf->used += reclen;
-
-       return 0;
-}
-
-static int btrfs_nfshack_readdir(struct file *file, void *dirent,
-                                filldir_t filldir)
-{
-       struct nfshack_readdir buf;
-       struct nfshack_dirent *de;
-       int err;
-       int size;
-       loff_t offset;
-
-       buf.dirent = (void *)__get_free_page(GFP_KERNEL);
-       if (!buf.dirent)
-               return -ENOMEM;
-
-       offset = file->f_pos;
-
-       do {
-               unsigned int reclen;
-
-               buf.used = 0;
-               buf.full = 0;
-               err = btrfs_real_readdir(file, &buf, btrfs_nfshack_filldir);
-               if (err)
-                       break;
-
-               size = buf.used;
-
-               if (!size)
-                       break;
-
-               de = (struct nfshack_dirent *)buf.dirent;
-               while (size > 0) {
-                       offset = de->offset;
-
-                       if (filldir(dirent, de->name, de->namlen, de->offset,
-                                   de->ino, de->d_type))
-                               goto done;
-                       offset = file->f_pos;
-
-                       reclen = ALIGN(sizeof(*de) + de->namlen,
-                                      sizeof(u64));
-                       size -= reclen;
-                       de = (struct nfshack_dirent *)((char *)de + reclen);
-               }
-       } while (buf.full);
-
- done:
-       free_page((unsigned long)buf.dirent);
-       file->f_pos = offset;
-
-       return err;
-}
-#endif
-
 int btrfs_write_inode(struct inode *inode, int wait)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -3288,13 +3229,8 @@ unsigned long btrfs_force_ra(struct address_space *mapping,
 {
        pgoff_t req_size = last_index - offset + 1;
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
-       offset = page_cache_readahead(mapping, ra, file, offset, req_size);
-       return offset;
-#else
        page_cache_sync_readahead(mapping, ra, file, offset, req_size);
        return offset + req_size;
-#endif
 }
 
 struct inode *btrfs_alloc_inode(struct super_block *sb)
@@ -3346,18 +3282,11 @@ void btrfs_destroy_inode(struct inode *inode)
                        btrfs_put_ordered_extent(ordered);
                }
        }
-       btrfs_drop_extent_cache(inode, 0, (u64)-1);
+       btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
        kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
 }
 
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26)
 static void init_once(void *foo)
-#elif LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
-static void init_once(struct kmem_cache * cachep, void *foo)
-#else
-static void init_once(void * foo, struct kmem_cache * cachep,
-                     unsigned long flags)
-#endif
 {
        struct btrfs_inode *ei = (struct btrfs_inode *) foo;
 
@@ -3380,22 +3309,10 @@ void btrfs_destroy_cachep(void)
 
 struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
                                       unsigned long extra_flags,
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26)
-                                      void (*ctor)(void *)
-#elif LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
-                                      void (*ctor)(struct kmem_cache *, void *)
-#else
-                                      void (*ctor)(void *, struct kmem_cache *,
-                                                   unsigned long)
-#endif
-                                    )
+                                      void (*ctor)(void *))
 {
        return kmem_cache_create(name, size, 0, (SLAB_RECLAIM_ACCOUNT |
-                                SLAB_MEM_SPREAD | extra_flags), ctor
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
-                                ,NULL
-#endif
-                               );
+                                SLAB_MEM_SPREAD | extra_flags), ctor);
 }
 
 int btrfs_init_cachep(void)
@@ -3511,19 +3428,36 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root)
 {
        struct list_head *head = &root->fs_info->delalloc_inodes;
        struct btrfs_inode *binode;
+       struct inode *inode;
        unsigned long flags;
 
        spin_lock_irqsave(&root->fs_info->delalloc_lock, flags);
        while(!list_empty(head)) {
                binode = list_entry(head->next, struct btrfs_inode,
                                    delalloc_inodes);
-               atomic_inc(&binode->vfs_inode.i_count);
+               inode = igrab(&binode->vfs_inode);
+               if (!inode)
+                       list_del_init(&binode->delalloc_inodes);
                spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags);
-               filemap_write_and_wait(binode->vfs_inode.i_mapping);
-               iput(&binode->vfs_inode);
+               if (inode) {
+                       filemap_flush(inode->i_mapping);
+                       iput(inode);
+               }
+               cond_resched();
                spin_lock_irqsave(&root->fs_info->delalloc_lock, flags);
        }
        spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags);
+
+       /* the filemap_flush will queue IO into the worker threads, but
+        * we have to make sure the IO is actually started and that
+        * ordered extents get created before we return
+        */
+       atomic_inc(&root->fs_info->async_submit_draining);
+       while(atomic_read(&root->fs_info->nr_async_submits)) {
+               wait_event(root->fs_info->async_submit_wait,
+                  (atomic_read(&root->fs_info->nr_async_submits) == 0));
+       }
+       atomic_dec(&root->fs_info->async_submit_draining);
        return 0;
 }
 
@@ -3643,12 +3577,7 @@ static int btrfs_set_page_dirty(struct page *page)
        return __set_page_dirty_nobuffers(page);
 }
 
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26)
 static int btrfs_permission(struct inode *inode, int mask)
-#else
-static int btrfs_permission(struct inode *inode, int mask,
-                           struct nameidata *nd)
-#endif
 {
        if (btrfs_test_flag(inode, READONLY) && (mask & MAY_WRITE))
                return -EACCES;
@@ -3679,11 +3608,7 @@ static struct inode_operations btrfs_dir_ro_inode_operations = {
 static struct file_operations btrfs_dir_file_operations = {
        .llseek         = generic_file_llseek,
        .read           = generic_read_dir,
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
-       .readdir        = btrfs_nfshack_readdir,
-#else /* NFSd readdir/lookup deadlock is fixed */
        .readdir        = btrfs_real_readdir,
-#endif
        .unlocked_ioctl = btrfs_ioctl,
 #ifdef CONFIG_COMPAT
        .compat_ioctl   = btrfs_ioctl,