Btrfs: use percpu counter for dirty metadata count
[linux-2.6-block.git] / fs / btrfs / disk-io.c
index 04f98e3ffd90e6c6e0bc40f49b7ebdfdeccd0504..34ace168eebcf2a1fa81f78af48865a85a883ada 100644 (file)
@@ -946,18 +946,20 @@ static int btree_writepages(struct address_space *mapping,
                            struct writeback_control *wbc)
 {
        struct extent_io_tree *tree;
+       struct btrfs_fs_info *fs_info;
+       int ret;
+
        tree = &BTRFS_I(mapping->host)->io_tree;
        if (wbc->sync_mode == WB_SYNC_NONE) {
-               struct btrfs_root *root = BTRFS_I(mapping->host)->root;
-               u64 num_dirty;
-               unsigned long thresh = 32 * 1024 * 1024;
 
                if (wbc->for_kupdate)
                        return 0;
 
+               fs_info = BTRFS_I(mapping->host)->root->fs_info;
                /* this is a bit racy, but that's ok */
-               num_dirty = root->fs_info->dirty_metadata_bytes;
-               if (num_dirty < thresh)
+               ret = percpu_counter_compare(&fs_info->dirty_metadata_bytes,
+                                            BTRFS_DIRTY_METADATA_THRESH);
+               if (ret < 0)
                        return 0;
        }
        return btree_write_cache_pages(mapping, wbc);
@@ -1125,24 +1127,16 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
 void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                      struct extent_buffer *buf)
 {
+       struct btrfs_fs_info *fs_info = root->fs_info;
+
        if (btrfs_header_generation(buf) ==
-           root->fs_info->running_transaction->transid) {
+           fs_info->running_transaction->transid) {
                btrfs_assert_tree_locked(buf);
 
                if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) {
-                       spin_lock(&root->fs_info->delalloc_lock);
-                       if (root->fs_info->dirty_metadata_bytes >= buf->len)
-                               root->fs_info->dirty_metadata_bytes -= buf->len;
-                       else {
-                               spin_unlock(&root->fs_info->delalloc_lock);
-                               btrfs_panic(root->fs_info, -EOVERFLOW,
-                                         "Can't clear %lu bytes from "
-                                         " dirty_mdatadata_bytes (%llu)",
-                                         buf->len,
-                                         root->fs_info->dirty_metadata_bytes);
-                       }
-                       spin_unlock(&root->fs_info->delalloc_lock);
-
+                       __percpu_counter_add(&fs_info->dirty_metadata_bytes,
+                                            -buf->len,
+                                            fs_info->dirty_metadata_batch);
                        /* ugh, clear_extent_buffer_dirty needs to lock the page */
                        btrfs_set_lock_blocking(buf);
                        clear_extent_buffer_dirty(buf);
@@ -2008,10 +2002,18 @@ int open_ctree(struct super_block *sb,
                goto fail_srcu;
        }
 
+       ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0);
+       if (ret) {
+               err = ret;
+               goto fail_bdi;
+       }
+       fs_info->dirty_metadata_batch = PAGE_CACHE_SIZE *
+                                       (1 + ilog2(nr_cpu_ids));
+
        fs_info->btree_inode = new_inode(sb);
        if (!fs_info->btree_inode) {
                err = -ENOMEM;
-               goto fail_bdi;
+               goto fail_dirty_metadata_bytes;
        }
 
        mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
@@ -2266,6 +2268,7 @@ int open_ctree(struct super_block *sb,
        leafsize = btrfs_super_leafsize(disk_super);
        sectorsize = btrfs_super_sectorsize(disk_super);
        stripesize = btrfs_super_stripesize(disk_super);
+       fs_info->dirty_metadata_batch = leafsize * (1 + ilog2(nr_cpu_ids));
 
        /*
         * mixed block groups end up with duplicate but slightly offset
@@ -2728,6 +2731,8 @@ fail_iput:
 
        invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
        iput(fs_info->btree_inode);
+fail_dirty_metadata_bytes:
+       percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
 fail_bdi:
        bdi_destroy(&fs_info->bdi);
 fail_srcu:
@@ -3406,6 +3411,7 @@ int close_ctree(struct btrfs_root *root)
        btrfs_close_devices(fs_info->fs_devices);
        btrfs_mapping_tree_free(&fs_info->mapping_tree);
 
+       percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
        bdi_destroy(&fs_info->bdi);
        cleanup_srcu_struct(&fs_info->subvol_srcu);
 
@@ -3448,11 +3454,10 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
                        (unsigned long long)transid,
                        (unsigned long long)root->fs_info->generation);
        was_dirty = set_extent_buffer_dirty(buf);
-       if (!was_dirty) {
-               spin_lock(&root->fs_info->delalloc_lock);
-               root->fs_info->dirty_metadata_bytes += buf->len;
-               spin_unlock(&root->fs_info->delalloc_lock);
-       }
+       if (!was_dirty)
+               __percpu_counter_add(&root->fs_info->dirty_metadata_bytes,
+                                    buf->len,
+                                    root->fs_info->dirty_metadata_batch);
 }
 
 static void __btrfs_btree_balance_dirty(struct btrfs_root *root,
@@ -3462,8 +3467,7 @@ static void __btrfs_btree_balance_dirty(struct btrfs_root *root,
         * looks as though older kernels can get into trouble with
         * this code, they end up stuck in balance_dirty_pages forever
         */
-       u64 num_dirty;
-       unsigned long thresh = 32 * 1024 * 1024;
+       int ret;
 
        if (current->flags & PF_MEMALLOC)
                return;
@@ -3471,9 +3475,9 @@ static void __btrfs_btree_balance_dirty(struct btrfs_root *root,
        if (flush_delayed)
                btrfs_balance_delayed_items(root);
 
-       num_dirty = root->fs_info->dirty_metadata_bytes;
-
-       if (num_dirty > thresh) {
+       ret = percpu_counter_compare(&root->fs_info->dirty_metadata_bytes,
+                                    BTRFS_DIRTY_METADATA_THRESH);
+       if (ret > 0) {
                balance_dirty_pages_ratelimited_nr(
                                   root->fs_info->btree_inode->i_mapping, 1);
        }