Btrfs: crash recovery fixes
authorChris Mason <chris.mason@oracle.com>
Thu, 28 Jun 2007 19:57:36 +0000 (15:57 -0400)
committerDavid Woodhouse <dwmw2@hera.kernel.org>
Thu, 28 Jun 2007 19:57:36 +0000 (15:57 -0400)
Signed-off-by: Chris Mason <chris.mason@oracle.com>
fs/btrfs/ctree.c
fs/btrfs/ctree.h
fs/btrfs/disk-io.c
fs/btrfs/disk-io.h
fs/btrfs/extent-tree.c
fs/btrfs/file.c
fs/btrfs/inode.c
fs/btrfs/transaction.c

index 606a19b5916d3e2a8e9bfc2455fa84a7b1eb18f6..9eb646529edce20c4987cf17a7c13b0210320617 100644 (file)
@@ -75,6 +75,17 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root
        struct btrfs_node *cow_node;
        int ret;
 
+       WARN_ON(!buffer_uptodate(buf));
+       if (trans->transaction != root->fs_info->running_transaction) {
+               printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
+                      root->fs_info->running_transaction->transid);
+               WARN_ON(1);
+       }
+       if (trans->transid != root->fs_info->generation) {
+               printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
+                      root->fs_info->generation);
+               WARN_ON(1);
+       }
        if (btrfs_header_generation(btrfs_buffer_header(buf)) ==
                                    trans->transid) {
                *cow_ret = buf;
@@ -107,7 +118,7 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root
                btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1);
        }
        btrfs_block_release(root, buf);
-       mark_buffer_dirty(cow);
+       btrfs_mark_buffer_dirty(cow);
        *cow_ret = cow;
        return 0;
 }
index 1998f86df08a71875ff44a650d0cef5c721672dc..0287bd51d87c96b7c4d2e11fb41989be44cb69ff 100644 (file)
@@ -1013,18 +1013,13 @@ static inline void btrfs_memmove(struct btrfs_root *root,
        memmove(dst, src, nr);
 }
 
-static inline void btrfs_mark_buffer_dirty(struct buffer_head *bh)
-{
-       WARN_ON(!atomic_read(&bh->b_count));
-       mark_buffer_dirty(bh);
-}
-
 /* helper function to cast into the data area of the leaf. */
 #define btrfs_item_ptr(leaf, slot, type) \
        ((type *)(btrfs_leaf_data(leaf) + \
        btrfs_item_offset((leaf)->items + (slot))))
 
 /* extent-tree.c */
+int btrfs_copy_pinned(struct btrfs_root *root, struct radix_tree_root *copy);
 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
                                                         btrfs_fs_info *info,
                                                         u64 blocknr);
@@ -1044,8 +1039,9 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                  struct buffer_head *buf);
 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
                      *root, u64 blocknr, u64 num_blocks, int pin);
-int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct
-                              btrfs_root *root);
+int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
+                              struct btrfs_root *root,
+                              struct radix_tree_root *unpin_radix);
 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root,
                                u64 blocknr, u64 num_blocks);
index 7081729d5b161dd8bc0e450589d3ae2286c417c7..d1bf5bc1bc14b29bcfaccd442b5c88a4a4f44c90 100644 (file)
@@ -270,14 +270,6 @@ fail:
        return NULL;
 }
 
-int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
-                    struct buffer_head *buf)
-{
-       WARN_ON(atomic_read(&buf->b_count) == 0);
-       mark_buffer_dirty(buf);
-       return 0;
-}
-
 int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                     struct buffer_head *buf)
 {
@@ -621,6 +613,20 @@ int close_ctree(struct btrfs_root *root)
        return 0;
 }
 
+void btrfs_mark_buffer_dirty(struct buffer_head *bh)
+{
+       struct btrfs_root *root = BTRFS_I(bh->b_page->mapping->host)->root;
+       u64 transid = btrfs_header_generation(btrfs_buffer_header(bh));
+       WARN_ON(!atomic_read(&bh->b_count));
+       if (transid != root->fs_info->generation) {
+               printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n",
+                       (unsigned long long)bh->b_blocknr,
+                       transid, root->fs_info->generation);
+               WARN_ON(1);
+       }
+       mark_buffer_dirty(bh);
+}
+
 void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf)
 {
        brelse(buf);
index c4a695ac44f6c3c8846ed7ac364d3207ef9318a5..9e2c261b41aead53c986533e29e69d3dd1e646ab 100644 (file)
@@ -78,4 +78,5 @@ int btrfs_map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh,
 int btrfs_releasepage(struct page *page, gfp_t flags);
 void btrfs_btree_balance_dirty(struct btrfs_root *root);
 int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root);
+void btrfs_mark_buffer_dirty(struct buffer_head *bh);
 #endif
index 01dc3057928737dd1b8f6c068a8c2b6b9f317ce1..14b93268920e7fa4746880ef375a7ac0719492e9 100644 (file)
@@ -523,6 +523,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
        }
        return 0;
 fail:
+       WARN_ON(1);
        for (i =0; i < faili; i++) {
                if (leaf) {
                        u64 disk_blocknr;
@@ -572,7 +573,7 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
        bi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
                            struct btrfs_block_group_item);
        memcpy(bi, &cache->item, sizeof(*bi));
-       mark_buffer_dirty(path->nodes[0]);
+       btrfs_mark_buffer_dirty(path->nodes[0]);
        btrfs_release_path(extent_root, path);
 fail:
        finish_current_insert(trans, extent_root);
@@ -739,8 +740,30 @@ static int try_remove_page(struct address_space *mapping, unsigned long index)
        return ret;
 }
 
-int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct
-                              btrfs_root *root)
+int btrfs_copy_pinned(struct btrfs_root *root, struct radix_tree_root *copy)
+{
+       unsigned long gang[8];
+       u64 last = 0;
+       struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix;
+       int ret;
+       int i;
+
+       while(1) {
+               ret = find_first_radix_bit(pinned_radix, gang, last,
+                                          ARRAY_SIZE(gang));
+               if (!ret)
+                       break;
+               for (i = 0 ; i < ret; i++) {
+                       set_radix_bit(copy, gang[i]);
+                       last = gang[i] + 1;
+               }
+       }
+       return 0;
+}
+
+int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
+                              struct btrfs_root *root,
+                              struct radix_tree_root *unpin_radix)
 {
        unsigned long gang[8];
        struct inode *btree_inode = root->fs_info->btree_inode;
@@ -752,7 +775,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct
        struct radix_tree_root *extent_radix = &root->fs_info->extent_map_radix;
 
        while(1) {
-               ret = find_first_radix_bit(pinned_radix, gang, 0,
+               ret = find_first_radix_bit(unpin_radix, gang, 0,
                                           ARRAY_SIZE(gang));
                if (!ret)
                        break;
@@ -760,6 +783,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct
                        first = gang[0];
                for (i = 0; i < ret; i++) {
                        clear_radix_bit(pinned_radix, gang[i]);
+                       clear_radix_bit(unpin_radix, gang[i]);
                        block_group = btrfs_lookup_block_group(root->fs_info,
                                                               gang[i]);
                        if (block_group) {
@@ -1309,6 +1333,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
        if (data) {
                ret = find_free_extent(trans, root, 0, 0,
                                       search_end, 0, &prealloc_key, 0, 0, 0);
+               BUG_ON(ret);
                if (ret)
                        return ret;
                exclude_nr = info->extent_tree_prealloc_nr;
@@ -1319,6 +1344,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
        ret = find_free_extent(trans, root, num_blocks, search_start,
                               search_end, hint_block, ins,
                               exclude_start, exclude_nr, data);
+       BUG_ON(ret);
        if (ret)
                return ret;
 
@@ -1334,10 +1360,12 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
        if (!data) {
                exclude_start = ins->objectid;
                exclude_nr = ins->offset;
+               hint_block = exclude_start + exclude_nr;
                ret = find_free_extent(trans, root, 0, search_start,
                                       search_end, hint_block,
                                       &prealloc_key, exclude_start,
                                       exclude_nr, 0);
+               BUG_ON(ret);
                if (ret)
                        return ret;
        }
@@ -1348,6 +1376,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
        ret = btrfs_insert_item(trans, extent_root, ins, &extent_item,
                                sizeof(extent_item));
 
+       BUG_ON(ret);
        finish_current_insert(trans, extent_root);
        pending_ret = del_pending_extents(trans, extent_root);
        if (ret) {
index fef7ba1e707fca9284628718342be9da41e7ddb0..2456cc3e1cfd8bf29dda337baf0f8546c3a7ee70 100644 (file)
@@ -127,7 +127,7 @@ static int insert_inline_extent(struct btrfs_root *root, struct inode *inode,
                     ptr, kaddr + bh_offset(bh),
                     size);
        kunmap_atomic(kaddr, KM_USER0);
-       mark_buffer_dirty(path->nodes[0]);
+       btrfs_mark_buffer_dirty(path->nodes[0]);
 fail:
        btrfs_free_path(path);
        ret = btrfs_end_transaction(trans, root);
@@ -211,11 +211,13 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
        int found_type;
        int found_extent;
        int found_inline;
+       int recow;
 
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
        while(1) {
+               recow = 0;
                btrfs_release_path(root, path);
                ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
                                               search_start, -1);
@@ -244,6 +246,10 @@ next_slot:
                if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY) {
                        goto out;
                }
+               if (recow) {
+                       search_start = key.offset;
+                       continue;
+               }
                if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
                        extent = btrfs_item_ptr(leaf, slot,
                                                struct btrfs_file_extent_item);
@@ -274,6 +280,7 @@ next_slot:
                                nextret = btrfs_next_leaf(root, path);
                                if (nextret)
                                        goto out;
+                               recow = 1;
                        } else {
                                path->slots[0]++;
                        }
@@ -321,7 +328,7 @@ next_slot:
                                }
                                btrfs_set_file_extent_num_blocks(extent,
                                                                 new_num);
-                               mark_buffer_dirty(path->nodes[0]);
+                               btrfs_mark_buffer_dirty(path->nodes[0]);
                        } else {
                                WARN_ON(1);
                        }
@@ -452,6 +459,8 @@ static int prepare_pages(struct btrfs_root *root,
                        err = -ENOMEM;
                        goto failed_release;
                }
+               cancel_dirty_page(pages[i], PAGE_CACHE_SIZE);
+               wait_on_page_writeback(pages[i]);
        }
 
        mutex_lock(&root->fs_info->fs_mutex);
@@ -522,8 +531,6 @@ static int prepare_pages(struct btrfs_root *root,
        mutex_unlock(&root->fs_info->fs_mutex);
 
        for (i = 0; i < num_pages; i++) {
-               cancel_dirty_page(pages[i], PAGE_CACHE_SIZE);
-               wait_on_page_writeback(pages[i]);
                offset = pos & (PAGE_CACHE_SIZE -1);
                this_write = min((size_t)PAGE_CACHE_SIZE - offset, write_bytes);
                if (!page_has_buffers(pages[i])) {
index eba06e7cf4147509079fff44f10fd5d9e0ce0bca..4fc0367d54f2bbf4dc8b9ee2158b8ec182a11828 100644 (file)
@@ -506,7 +506,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
                                                         extent_num_blocks);
                                inode->i_blocks -= (orig_num_blocks -
                                        extent_num_blocks) << 3;
-                               mark_buffer_dirty(path->nodes[0]);
+                               btrfs_mark_buffer_dirty(path->nodes[0]);
                        } else {
                                extent_start =
                                        btrfs_file_extent_disk_blocknr(fi);
@@ -2020,7 +2020,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen)
        btrfs_set_header_owner(&leaf->header, root->root_key.objectid);
        memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid,
               sizeof(leaf->header.fsid));
-       mark_buffer_dirty(subvol);
+       btrfs_mark_buffer_dirty(subvol);
 
        inode_item = &root_item.inode;
        memset(inode_item, 0, sizeof(*inode_item));
@@ -2497,7 +2497,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
        ptr = btrfs_file_extent_inline_start(ei);
        btrfs_memcpy(root, path->nodes[0]->b_data,
                     ptr, symname, name_len);
-       mark_buffer_dirty(path->nodes[0]);
+       btrfs_mark_buffer_dirty(path->nodes[0]);
        btrfs_free_path(path);
        inode->i_op = &btrfs_symlink_inode_operations;
        inode->i_mapping->a_ops = &btrfs_symlink_aops;
index 3b2face593e98cf2bb2bb98fc94ed0cddd982cf3..bec38ae8aa11ba16c438ebb81ddc5789a8378d80 100644 (file)
@@ -85,6 +85,8 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
 
        if (root != root->fs_info->tree_root && root->last_trans <
            running_trans_id) {
+               WARN_ON(root == root->fs_info->extent_root);
+               WARN_ON(root->ref_cows != 1);
                if (root->root_item.refs != 0) {
                        radix_tree_tag_set(&root->fs_info->fs_roots_radix,
                                           (unsigned long)root->root_key.objectid,
@@ -113,10 +115,11 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans,
 
        mutex_lock(&root->fs_info->trans_mutex);
        cur_trans = root->fs_info->running_transaction;
+       WARN_ON(cur_trans != trans->transaction);
        WARN_ON(cur_trans->num_writers < 1);
+       cur_trans->num_writers--;
        if (waitqueue_active(&cur_trans->writer_wait))
                wake_up(&cur_trans->writer_wait);
-       cur_trans->num_writers--;
        put_transaction(cur_trans);
        mutex_unlock(&root->fs_info->trans_mutex);
        memset(trans, 0, sizeof(*trans));
@@ -194,6 +197,7 @@ static int wait_for_commit(struct btrfs_root *root,
                           struct btrfs_transaction *commit)
 {
        DEFINE_WAIT(wait);
+       mutex_lock(&root->fs_info->trans_mutex);
        while(!commit->commit_done) {
                prepare_to_wait(&commit->commit_wait, &wait,
                                TASK_UNINTERRUPTIBLE);
@@ -203,6 +207,7 @@ static int wait_for_commit(struct btrfs_root *root,
                schedule();
                mutex_lock(&root->fs_info->trans_mutex);
        }
+       mutex_unlock(&root->fs_info->trans_mutex);
        finish_wait(&commit->commit_wait, &wait);
        return 0;
 }
@@ -279,7 +284,6 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans,
                                                &root->root_item);
                        if (err)
                                break;
-
                        refs = btrfs_root_refs(&tmp_item);
                        btrfs_set_root_refs(&tmp_item, refs - 1);
                        err = btrfs_update_root(trans, root->fs_info->tree_root,
@@ -333,31 +337,53 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
        struct btrfs_transaction *cur_trans;
        struct btrfs_transaction *prev_trans = NULL;
        struct list_head dirty_fs_roots;
+       struct radix_tree_root pinned_copy;
        DEFINE_WAIT(wait);
 
+       init_bit_radix(&pinned_copy);
        INIT_LIST_HEAD(&dirty_fs_roots);
 
        mutex_lock(&root->fs_info->trans_mutex);
        if (trans->transaction->in_commit) {
                cur_trans = trans->transaction;
                trans->transaction->use_count++;
+               mutex_unlock(&root->fs_info->trans_mutex);
                btrfs_end_transaction(trans, root);
+
+               mutex_unlock(&root->fs_info->fs_mutex);
                ret = wait_for_commit(root, cur_trans);
                BUG_ON(ret);
                put_transaction(cur_trans);
-               mutex_unlock(&root->fs_info->trans_mutex);
+               mutex_lock(&root->fs_info->fs_mutex);
                return 0;
        }
-       cur_trans = trans->transaction;
        trans->transaction->in_commit = 1;
+       cur_trans = trans->transaction;
+       if (cur_trans->list.prev != &root->fs_info->trans_list) {
+               prev_trans = list_entry(cur_trans->list.prev,
+                                       struct btrfs_transaction, list);
+               if (!prev_trans->commit_done) {
+                       prev_trans->use_count++;
+                       mutex_unlock(&root->fs_info->fs_mutex);
+                       mutex_unlock(&root->fs_info->trans_mutex);
+
+                       wait_for_commit(root, prev_trans);
+                       put_transaction(prev_trans);
+
+                       mutex_lock(&root->fs_info->fs_mutex);
+                       mutex_lock(&root->fs_info->trans_mutex);
+               }
+       }
        while (trans->transaction->num_writers > 1) {
                WARN_ON(cur_trans != trans->transaction);
                prepare_to_wait(&trans->transaction->writer_wait, &wait,
                                TASK_UNINTERRUPTIBLE);
                if (trans->transaction->num_writers <= 1)
                        break;
+               mutex_unlock(&root->fs_info->fs_mutex);
                mutex_unlock(&root->fs_info->trans_mutex);
                schedule();
+               mutex_lock(&root->fs_info->fs_mutex);
                mutex_lock(&root->fs_info->trans_mutex);
                finish_wait(&trans->transaction->writer_wait, &wait);
        }
@@ -372,34 +398,22 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 
        cur_trans = root->fs_info->running_transaction;
        root->fs_info->running_transaction = NULL;
-       if (cur_trans->list.prev != &root->fs_info->trans_list) {
-               prev_trans = list_entry(cur_trans->list.prev,
-                                       struct btrfs_transaction, list);
-               if (prev_trans->commit_done)
-                       prev_trans = NULL;
-               else
-                       prev_trans->use_count++;
-       }
        btrfs_set_super_generation(&root->fs_info->super_copy,
                                   cur_trans->transid);
        btrfs_set_super_root(&root->fs_info->super_copy,
                             bh_blocknr(root->fs_info->tree_root->node));
        memcpy(root->fs_info->disk_super, &root->fs_info->super_copy,
               sizeof(root->fs_info->super_copy));
+
+       btrfs_copy_pinned(root, &pinned_copy);
+
        mutex_unlock(&root->fs_info->trans_mutex);
        mutex_unlock(&root->fs_info->fs_mutex);
        ret = btrfs_write_and_wait_transaction(trans, root);
-       if (prev_trans) {
-               mutex_lock(&root->fs_info->trans_mutex);
-               wait_for_commit(root, prev_trans);
-               put_transaction(prev_trans);
-               mutex_unlock(&root->fs_info->trans_mutex);
-       }
        BUG_ON(ret);
        write_ctree_super(trans, root);
-
        mutex_lock(&root->fs_info->fs_mutex);
-       btrfs_finish_extent_commit(trans, root);
+       btrfs_finish_extent_commit(trans, root, &pinned_copy);
        mutex_lock(&root->fs_info->trans_mutex);
        cur_trans->commit_done = 1;
        wake_up(&cur_trans->commit_wait);