Btrfs: implement memory reclaim for leaf reference cache
authorYan <zheng.yan@oracle.com>
Wed, 30 Jul 2008 20:29:20 +0000 (16:29 -0400)
committerChris Mason <chris.mason@oracle.com>
Thu, 25 Sep 2008 15:04:05 +0000 (11:04 -0400)
The memory reclaiming issue happens when snapshot exists. In that
case, some cache entries may not be used during old snapshot dropping,
so they will remain in the cache until umount.

The patch adds a field to struct btrfs_leaf_ref to record create time. Besides,
the patch makes all dead roots of a given snapshot linked together in order of
create time. After a old snapshot was completely dropped, we check the dead
root list and remove all cache entries created before the oldest dead root in
the list.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
15 files changed:
fs/btrfs/ctree.c
fs/btrfs/ctree.h
fs/btrfs/dir-item.c
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/file-item.c
fs/btrfs/file.c
fs/btrfs/inode.c
fs/btrfs/locking.c
fs/btrfs/print-tree.c
fs/btrfs/ref-cache.c
fs/btrfs/ref-cache.h
fs/btrfs/transaction.c
fs/btrfs/volumes.c

index 245eb00435dd9d1dbd63856a91a93b6e1b313633..c4792062dd53cb826d1267f12c6d90274b80a166 100644 (file)
@@ -3275,4 +3275,3 @@ int btrfs_previous_item(struct btrfs_root *root,
        }
        return 1;
 }
-
index 83422088c629e6184f0ff1dcda4da8efe4811ce9..be16cd49ef69df1c797ee88b0465d04144e78be2 100644 (file)
@@ -666,7 +666,8 @@ struct btrfs_root {
        /* the dirty list is only used by non-reference counted roots */
        struct list_head dirty_list;
 
-       spinlock_t orphan_lock;
+       spinlock_t list_lock;
+       struct list_head dead_list;
        struct list_head orphan_list;
 };
 
index eb4dd3d75cf97b137f048c5b88b71291e45008c4..125094617fe8d7d14444263627884ee931d74261 100644 (file)
@@ -340,4 +340,3 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
        }
        return 0;
 }
-
index ec1ba8ddb35f1e91deedb7552cc1318b6d4e6177..e826730d750fe156d64c0ff4ca2efe44e1118e5d 100644 (file)
@@ -735,8 +735,9 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
 
        INIT_LIST_HEAD(&root->dirty_list);
        INIT_LIST_HEAD(&root->orphan_list);
+       INIT_LIST_HEAD(&root->dead_list);
        spin_lock_init(&root->node_lock);
-       spin_lock_init(&root->orphan_lock);
+       spin_lock_init(&root->list_lock);
        mutex_init(&root->objectid_mutex);
 
        btrfs_leaf_ref_tree_init(&root->ref_tree_struct);
@@ -1717,7 +1718,7 @@ int close_ctree(struct btrfs_root *root)
                printk("btrfs: at umount reference cache size %Lu\n",
                        fs_info->total_ref_cache_size);
        }
-       
+
        if (fs_info->extent_root->node)
                free_extent_buffer(fs_info->extent_root->node);
 
index fe1ddbd2bfd68fb439477f4320f82461d4fe7dc0..37ca8df30c30280e3f0f6d619ff555362218c161 100644 (file)
@@ -867,8 +867,8 @@ static int get_reference_status(struct btrfs_root *root, u64 bytenr,
                /*
                 * For (parent_gen > 0 && parent_gen > ref_gen):
                 *
-                * we reach here through the oldest root, therefore 
-                * all other reference from same snapshot should have 
+                * we reach here through the oldest root, therefore
+                * all other reference from same snapshot should have
                 * a larger generation.
                 */
                if ((root_objectid != btrfs_ref_root(leaf, ref_item)) ||
@@ -954,7 +954,7 @@ int btrfs_cross_ref_exists(struct btrfs_root *root,
                        if (!eb)
                                continue;
                        extent_start = eb->start;
-               } else 
+               } else
                        extent_start = bytenr;
 
                ret = get_reference_status(root, extent_start, ref_generation,
@@ -1048,7 +1048,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                struct btrfs_leaf_ref *ref;
                struct btrfs_extent_info *info;
 
-               ref = btrfs_alloc_leaf_ref(nr_file_extents);
+               ref = btrfs_alloc_leaf_ref(root, nr_file_extents);
                if (!ref) {
                        WARN_ON(1);
                        goto out;
@@ -1059,7 +1059,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                ref->generation = btrfs_header_generation(buf);
                ref->nritems = nr_file_extents;
                info = ref->extents;
-               
+
                for (i = 0; nr_file_extents > 0 && i < nritems; i++) {
                        u64 disk_bytenr;
                        btrfs_item_key_to_cpu(buf, &key, i);
@@ -1085,7 +1085,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                BUG_ON(!root->ref_tree);
                ret = btrfs_add_leaf_ref(root, ref);
                WARN_ON(ret);
-               btrfs_free_leaf_ref(ref);
+               btrfs_free_leaf_ref(root, ref);
        }
 out:
        return 0;
@@ -2316,7 +2316,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
 }
 
 static int noinline drop_leaf_ref_no_cache(struct btrfs_trans_handle *trans,
-                                          struct btrfs_root *root,
+                                          struct btrfs_root *root,
                                           struct extent_buffer *leaf)
 {
        u64 leaf_owner;
@@ -2367,7 +2367,7 @@ static int noinline drop_leaf_ref_no_cache(struct btrfs_trans_handle *trans,
 }
 
 static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
-                                        struct btrfs_root *root,
+                                        struct btrfs_root *root,
                                         struct btrfs_leaf_ref *ref)
 {
        int i;
@@ -2521,7 +2521,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
                                ret = drop_leaf_ref(trans, root, ref);
                                BUG_ON(ret);
                                btrfs_remove_leaf_ref(root, ref);
-                               btrfs_free_leaf_ref(ref);
+                               btrfs_free_leaf_ref(root, ref);
                                *level = 0;
                                break;
                        }
index 964ec1622d665ad1fe6b45c339f5edf3c0cdd410..5368e3b6eb962e10e5733cc40283e39d2c135f96 100644 (file)
@@ -3497,4 +3497,3 @@ out:
        return ret;
 }
 EXPORT_SYMBOL(try_release_extent_buffer);
-
index afe42d00b5a61e341915130d25d87fbbf5d6639d..2311061f070e06f87f09b66b3369b0ac0fa0a8d8 100644 (file)
@@ -422,4 +422,3 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans,
        BUG_ON(ret);
        return ret;
 }
-
index ded5281f8463533cd8346b588432ef17ca6e20b0..412ab4a26382c48b1150bf40d4d396dd3acbe813 100644 (file)
@@ -1095,4 +1095,3 @@ struct file_operations btrfs_file_operations = {
        .compat_ioctl   = btrfs_ioctl,
 #endif
 };
-
index 3aa82cec6bf797967149f39c333bd6527523d93d..7af8be076ee53d0632147787b05e2d86e21612e0 100644 (file)
@@ -835,17 +835,17 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
        struct btrfs_root *root = BTRFS_I(inode)->root;
        int ret = 0;
 
-       spin_lock(&root->orphan_lock);
+       spin_lock(&root->list_lock);
 
        /* already on the orphan list, we're good */
        if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
-               spin_unlock(&root->orphan_lock);
+               spin_unlock(&root->list_lock);
                return 0;
        }
 
        list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list);
 
-       spin_unlock(&root->orphan_lock);
+       spin_unlock(&root->list_lock);
 
        /*
         * insert an orphan item to track this unlinked/truncated file
@@ -864,20 +864,20 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode)
        struct btrfs_root *root = BTRFS_I(inode)->root;
        int ret = 0;
 
-       spin_lock(&root->orphan_lock);
+       spin_lock(&root->list_lock);
 
        if (list_empty(&BTRFS_I(inode)->i_orphan)) {
-               spin_unlock(&root->orphan_lock);
+               spin_unlock(&root->list_lock);
                return 0;
        }
 
        list_del_init(&BTRFS_I(inode)->i_orphan);
        if (!trans) {
-               spin_unlock(&root->orphan_lock);
+               spin_unlock(&root->list_lock);
                return 0;
        }
 
-       spin_unlock(&root->orphan_lock);
+       spin_unlock(&root->list_lock);
 
        ret = btrfs_del_orphan_item(trans, root, inode->i_ino);
 
@@ -973,9 +973,9 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
                 * add this inode to the orphan list so btrfs_orphan_del does
                 * the proper thing when we hit it
                 */
-               spin_lock(&root->orphan_lock);
+               spin_lock(&root->list_lock);
                list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list);
-               spin_unlock(&root->orphan_lock);
+               spin_unlock(&root->list_lock);
 
                /*
                 * if this is a bad inode, means we actually succeeded in
@@ -3269,13 +3269,13 @@ void btrfs_destroy_inode(struct inode *inode)
            BTRFS_I(inode)->i_default_acl != BTRFS_ACL_NOT_CACHED)
                posix_acl_release(BTRFS_I(inode)->i_default_acl);
 
-       spin_lock(&BTRFS_I(inode)->root->orphan_lock);
+       spin_lock(&BTRFS_I(inode)->root->list_lock);
        if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
                printk(KERN_ERR "BTRFS: inode %lu: inode still on the orphan"
                       " list\n", inode->i_ino);
                dump_stack();
        }
-       spin_unlock(&BTRFS_I(inode)->root->orphan_lock);
+       spin_unlock(&BTRFS_I(inode)->root->list_lock);
 
        while(1) {
                ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1);
index d617c29787fac22cf5d79dcf7f1aa4eed19085b4..d43e14c7471add6ba29b203972b19cdafd0e24de 100644 (file)
@@ -56,4 +56,3 @@ int btrfs_tree_locked(struct extent_buffer *eb)
 {
        return mutex_is_locked(&eb->mutex);
 }
-
index 14d8637203028d4d08cb78c503bbddba48870d33..f1374d597a1786e67177590610a53c4295dab913 100644 (file)
@@ -198,4 +198,3 @@ void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c)
                free_extent_buffer(next);
        }
 }
-
index ec9587784a3dc2098e2073b3c9faff827a5d9bf1..272b9890c9826d07a0564e7fe0d7c27bb402b81e 100644 (file)
 #include "ref-cache.h"
 #include "transaction.h"
 
-struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(int nr_extents)
+struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(struct btrfs_root *root,
+                                           int nr_extents)
 {
        struct btrfs_leaf_ref *ref;
+       size_t size = btrfs_leaf_ref_size(nr_extents);
 
-       ref = kmalloc(btrfs_leaf_ref_size(nr_extents), GFP_NOFS);
+       ref = kmalloc(size, GFP_NOFS);
        if (ref) {
+               spin_lock(&root->fs_info->ref_cache_lock);
+               root->fs_info->total_ref_cache_size += size;
+               spin_unlock(&root->fs_info->ref_cache_lock);
+
                memset(ref, 0, sizeof(*ref));
                atomic_set(&ref->usage, 1);
                INIT_LIST_HEAD(&ref->list);
@@ -34,14 +40,20 @@ struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(int nr_extents)
        return ref;
 }
 
-void btrfs_free_leaf_ref(struct btrfs_leaf_ref *ref)
+void btrfs_free_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref)
 {
        if (!ref)
                return;
        WARN_ON(atomic_read(&ref->usage) == 0);
        if (atomic_dec_and_test(&ref->usage)) {
+               size_t size = btrfs_leaf_ref_size(ref->nritems);
+
                BUG_ON(ref->in_tree);
                kfree(ref);
+
+               spin_lock(&root->fs_info->ref_cache_lock);
+               root->fs_info->total_ref_cache_size -= size;
+               spin_unlock(&root->fs_info->ref_cache_lock);
        }
 }
 
@@ -64,7 +76,7 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr,
                else
                        return parent;
        }
-       
+
        entry = rb_entry(node, struct btrfs_leaf_ref, rb_node);
        entry->in_tree = 1;
        rb_link_node(node, parent, p);
@@ -91,9 +103,8 @@ static struct rb_node *tree_search(struct rb_root *root, u64 bytenr)
        return NULL;
 }
 
-int btrfs_remove_leaf_refs(struct btrfs_root *root)
+int btrfs_remove_leaf_refs(struct btrfs_root *root, u64 max_root_gen)
 {
-       struct rb_node *rb;
        struct btrfs_leaf_ref *ref = NULL;
        struct btrfs_leaf_ref_tree *tree = root->ref_tree;
 
@@ -101,17 +112,18 @@ int btrfs_remove_leaf_refs(struct btrfs_root *root)
                return 0;
 
        spin_lock(&tree->lock);
-       while(!btrfs_leaf_ref_tree_empty(tree)) {
-               rb = rb_first(&tree->root);
-               ref = rb_entry(rb, struct btrfs_leaf_ref, rb_node);
+       while(!list_empty(&tree->list)) {
+               ref = list_entry(tree->list.next, struct btrfs_leaf_ref, list);
+               BUG_ON(!ref->in_tree);
+               if (ref->root_gen > max_root_gen)
+                       break;
+
                rb_erase(&ref->rb_node, &tree->root);
                ref->in_tree = 0;
                list_del_init(&ref->list);
 
                spin_unlock(&tree->lock);
-
-               btrfs_free_leaf_ref(ref);
-
+               btrfs_free_leaf_ref(root, ref);
                cond_resched();
                spin_lock(&tree->lock);
        }
@@ -143,7 +155,6 @@ int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref)
 {
        int ret = 0;
        struct rb_node *rb;
-       size_t size = btrfs_leaf_ref_size(ref->nritems);
        struct btrfs_leaf_ref_tree *tree = root->ref_tree;
 
        spin_lock(&tree->lock);
@@ -151,9 +162,6 @@ int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref)
        if (rb) {
                ret = -EEXIST;
        } else {
-               spin_lock(&root->fs_info->ref_cache_lock);
-               root->fs_info->total_ref_cache_size += size;
-               spin_unlock(&root->fs_info->ref_cache_lock);
                atomic_inc(&ref->usage);
                list_add_tail(&ref->list, &tree->list);
        }
@@ -163,15 +171,10 @@ int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref)
 
 int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref)
 {
-       size_t size = btrfs_leaf_ref_size(ref->nritems);
        struct btrfs_leaf_ref_tree *tree = root->ref_tree;
 
        BUG_ON(!ref->in_tree);
        spin_lock(&tree->lock);
-       
-       spin_lock(&root->fs_info->ref_cache_lock);
-       root->fs_info->total_ref_cache_size -= size;
-       spin_unlock(&root->fs_info->ref_cache_lock);
 
        rb_erase(&ref->rb_node, &tree->root);
        ref->in_tree = 0;
@@ -179,7 +182,6 @@ int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref)
 
        spin_unlock(&tree->lock);
 
-       btrfs_free_leaf_ref(ref);
+       btrfs_free_leaf_ref(root, ref);
        return 0;
 }
-
index 823c049f72f184ca8d1269c4963f1d2e90467e3f..c361b321c0c32289b2557c395354587c6f7656ef 100644 (file)
@@ -30,6 +30,7 @@ struct btrfs_leaf_ref {
        int in_tree;
        atomic_t usage;
 
+       u64 root_gen;
        u64 bytenr;
        u64 owner;
        u64 generation;
@@ -41,14 +42,13 @@ struct btrfs_leaf_ref {
 
 static inline size_t btrfs_leaf_ref_size(int nr_extents)
 {
-       return sizeof(struct btrfs_leaf_ref) + 
+       return sizeof(struct btrfs_leaf_ref) +
               sizeof(struct btrfs_extent_info) * nr_extents;
 }
 
 static inline void btrfs_leaf_ref_tree_init(struct btrfs_leaf_ref_tree *tree)
 {
        tree->root.rb_node = NULL;
-       tree->last = NULL;
        INIT_LIST_HEAD(&tree->list);
        spin_lock_init(&tree->lock);
 }
@@ -59,12 +59,13 @@ static inline int btrfs_leaf_ref_tree_empty(struct btrfs_leaf_ref_tree *tree)
 }
 
 void btrfs_leaf_ref_tree_init(struct btrfs_leaf_ref_tree *tree);
-struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(int nr_extents);
-void btrfs_free_leaf_ref(struct btrfs_leaf_ref *ref);
+struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(struct btrfs_root *root,
+                                           int nr_extents);
+void btrfs_free_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref);
 struct btrfs_leaf_ref *btrfs_lookup_leaf_ref(struct btrfs_root *root,
                                             u64 bytenr);
 int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref);
-int btrfs_remove_leaf_refs(struct btrfs_root *root);
+int btrfs_remove_leaf_refs(struct btrfs_root *root, u64 max_root_gen);
 int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref);
 
 #endif
index 216f31571620928506b37ddb5d6780e08f24cc06..52c5524896a3feb65efebecd1e49a064a58404bb 100644 (file)
@@ -98,20 +98,24 @@ static noinline int record_root_in_trans(struct btrfs_root *root)
                        BUG_ON(!dirty);
                        dirty->root = kmalloc(sizeof(*dirty->root), GFP_NOFS);
                        BUG_ON(!dirty->root);
-
                        dirty->latest_root = root;
                        INIT_LIST_HEAD(&dirty->list);
 
                        root->commit_root = btrfs_root_node(root);
-                       root->dirty_root = dirty;
 
                        memcpy(dirty->root, root, sizeof(*root));
-                       dirty->root->ref_tree = &root->ref_tree_struct;
-
                        spin_lock_init(&dirty->root->node_lock);
+                       spin_lock_init(&dirty->root->list_lock);
                        mutex_init(&dirty->root->objectid_mutex);
+                       INIT_LIST_HEAD(&dirty->root->dead_list);
                        dirty->root->node = root->commit_root;
                        dirty->root->commit_root = NULL;
+
+                       spin_lock(&root->list_lock);
+                       list_add(&dirty->root->dead_list, &root->dead_list);
+                       spin_unlock(&root->list_lock);
+
+                       root->dirty_root = dirty;
                } else {
                        WARN_ON(1);
                }
@@ -356,8 +360,6 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
                list_del_init(next);
                root = list_entry(next, struct btrfs_root, dirty_list);
                update_cowonly_root(trans, root);
-               if (root->fs_info->closing)
-                       btrfs_remove_leaf_refs(root);
        }
        return 0;
 }
@@ -410,7 +412,11 @@ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans,
 
                                free_extent_buffer(root->commit_root);
                                root->commit_root = NULL;
-                               
+
+                               spin_lock(&root->list_lock);
+                               list_del_init(&dirty->root->dead_list);
+                               spin_unlock(&root->list_lock);
+
                                kfree(dirty->root);
                                kfree(dirty);
 
@@ -497,6 +503,7 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root,
        unsigned long nr;
        u64 num_bytes;
        u64 bytes_used;
+       u64 max_useless;
        int ret = 0;
        int err;
 
@@ -554,10 +561,25 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root,
                }
                mutex_unlock(&root->fs_info->drop_mutex);
 
+               spin_lock(&root->list_lock);
+               list_del_init(&dirty->root->dead_list);
+               if (!list_empty(&root->dead_list)) {
+                       struct btrfs_root *oldest;
+                       oldest = list_entry(root->dead_list.prev,
+                                           struct btrfs_root, dead_list);
+                       max_useless = oldest->root_key.offset - 1;
+               } else {
+                       max_useless = root->root_key.offset - 1;
+               }
+               spin_unlock(&root->list_lock);
+
                nr = trans->blocks_used;
                ret = btrfs_end_transaction(trans, tree_root);
                BUG_ON(ret);
 
+               ret = btrfs_remove_leaf_refs(root, max_useless);
+               BUG_ON(ret);
+
                free_extent_buffer(dirty->root->node);
                kfree(dirty->root);
                kfree(dirty);
@@ -785,10 +807,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
        put_transaction(cur_trans);
        put_transaction(cur_trans);
 
+       list_splice_init(&dirty_fs_roots, &root->fs_info->dead_roots);
        if (root->fs_info->closing)
                list_splice_init(&root->fs_info->dead_roots, &dirty_fs_roots);
-       else
-               list_splice_init(&dirty_fs_roots, &root->fs_info->dead_roots);
 
        mutex_unlock(&root->fs_info->trans_mutex);
        kmem_cache_free(btrfs_trans_handle_cachep, trans);
@@ -814,4 +835,3 @@ again:
        }
        return 0;
 }
-
index 5e6ee7a6f73831d574ade1c09353b3a1b82441e3..18db4cbe27941ac3d7779c2c2a4d2504975c2940 100644 (file)
@@ -2527,4 +2527,3 @@ again:
 error:
        return ret;
 }
-